|
|
@@ -1,42 +1,51 @@
|
|
|
+//! BamReader is a library for parsing reads in bam files.
|
|
|
+
|
|
|
use std::{fs::File, io::{Read, BufRead}};
|
|
|
use bgzip::{BGZFReader, BGZFError};
|
|
|
|
|
|
+
|
|
|
+/// Stores possible values of read's sequence.
|
|
|
#[derive(Debug)]
|
|
|
pub enum BamNucleotid {
|
|
|
Equal, A, C, M, G, R, S, V, T, W, Y, H, K, D, B, N
|
|
|
}
|
|
|
+/// The sequence of the read.
|
|
|
#[derive(Debug)]
|
|
|
pub struct BamSequence(Vec<BamNucleotid>);
|
|
|
|
|
|
impl BamSequence {
|
|
|
+ /// Return a new and empty BamSequence
|
|
|
pub fn new() -> BamSequence {
|
|
|
BamSequence(Vec::new())
|
|
|
}
|
|
|
- pub fn get_nt(e: &u8) -> BamNucleotid {
|
|
|
- match e {
|
|
|
- 0 => BamNucleotid::Equal,
|
|
|
- 1 => BamNucleotid::A,
|
|
|
- 2 => BamNucleotid::C,
|
|
|
- 3 => BamNucleotid::M,
|
|
|
- 4 => BamNucleotid::G,
|
|
|
- 5 => BamNucleotid::R,
|
|
|
- 6 => BamNucleotid::S,
|
|
|
- 7 => BamNucleotid::V,
|
|
|
- 8 => BamNucleotid::T,
|
|
|
- 9 => BamNucleotid::W,
|
|
|
- 10 => BamNucleotid::Y,
|
|
|
- 11 => BamNucleotid::H,
|
|
|
- 12 => BamNucleotid::K,
|
|
|
- 13 => BamNucleotid::D,
|
|
|
- 14 => BamNucleotid::B,
|
|
|
- 15 => BamNucleotid::N,
|
|
|
- _ => panic!("Parsing error")
|
|
|
- }
|
|
|
- }
|
|
|
+ /// Parse the u8 value of bam buffer into a BamNucleotid
|
|
|
+ /// and add it to the BamSequence
|
|
|
pub fn push(&mut self, e: &u8) {
|
|
|
- self.0.push(BamSequence::get_nt(e))
|
|
|
+ self.0.push(
|
|
|
+ match e {
|
|
|
+ 0 => BamNucleotid::Equal,
|
|
|
+ 1 => BamNucleotid::A,
|
|
|
+ 2 => BamNucleotid::C,
|
|
|
+ 3 => BamNucleotid::M,
|
|
|
+ 4 => BamNucleotid::G,
|
|
|
+ 5 => BamNucleotid::R,
|
|
|
+ 6 => BamNucleotid::S,
|
|
|
+ 7 => BamNucleotid::V,
|
|
|
+ 8 => BamNucleotid::T,
|
|
|
+ 9 => BamNucleotid::W,
|
|
|
+ 10 => BamNucleotid::Y,
|
|
|
+ 11 => BamNucleotid::H,
|
|
|
+ 12 => BamNucleotid::K,
|
|
|
+ 13 => BamNucleotid::D,
|
|
|
+ 14 => BamNucleotid::B,
|
|
|
+ 15 => BamNucleotid::N,
|
|
|
+ _ => panic!("Parsing error")
|
|
|
+ }
|
|
|
+ )
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+/// Stores possible types of tags values.
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub enum TagValue {
|
|
|
Int(i8),
|
|
|
@@ -49,21 +58,31 @@ pub enum TagValue {
|
|
|
Str(String)
|
|
|
}
|
|
|
|
|
|
+/// Read values:
|
|
|
+/// ref_id,
|
|
|
+/// pos,
|
|
|
+/// mapq,
|
|
|
+/// flag,
|
|
|
+/// read_name,
|
|
|
+/// cigar,
|
|
|
+/// sequence,
|
|
|
+/// phred,
|
|
|
+/// tags
|
|
|
pub struct BamRead {
|
|
|
- ref_id: i32,
|
|
|
- pos: i32,
|
|
|
- mapq: u8,
|
|
|
- flag: u16,
|
|
|
- read_name: String,
|
|
|
- cigar: Vec<(String, u32)>,
|
|
|
- sequence: BamSequence,
|
|
|
- phred: Vec<u8>,
|
|
|
- tags: Vec<(String, TagValue)>
|
|
|
+ pub ref_id: i32,
|
|
|
+ pub pos: i32,
|
|
|
+ pub mapq: u8,
|
|
|
+ pub flag: u16,
|
|
|
+ pub read_name: String,
|
|
|
+ pub cigar: Vec<(String, u32)>,
|
|
|
+ pub sequence: BamSequence,
|
|
|
+ pub phred: Vec<u8>,
|
|
|
+ pub tags: Vec<(String, TagValue)>
|
|
|
}
|
|
|
|
|
|
pub struct BamReader {
|
|
|
- reader: BGZFReader<File>,
|
|
|
- references: Vec<(String, u32)>
|
|
|
+ pub reader: BGZFReader<File>,
|
|
|
+ pub references: Vec<(String, u32)>
|
|
|
}
|
|
|
|
|
|
impl BamReader {
|
|
|
@@ -304,20 +323,20 @@ mod tests {
|
|
|
use super::*;
|
|
|
|
|
|
#[test]
|
|
|
- fn it_works() {
|
|
|
+ fn it_works() {
|
|
|
let bam_path = "/Users/steimle/Documents/Programmes/sv-finder/betya.bam";
|
|
|
let flags: Vec<u16> = vec![81, 161, 97, 145, 65, 129, 113, 177];
|
|
|
+ let n_reads = 100;
|
|
|
+ let first_read_name = "NB501645:337:HCCMVAFX2:3:11505:17842:2102_GATGGGACGG".to_string();
|
|
|
|
|
|
let bam_reader = BamReader::new(bam_path).unwrap();
|
|
|
|
|
|
let reads = bam_reader
|
|
|
- .filter(|br| flags.contains(&br.flag))
|
|
|
- .take(100)
|
|
|
+ .filter(|bam_read| flags.contains(&bam_read.flag))
|
|
|
+ .take(n_reads)
|
|
|
.collect::<Vec<BamRead>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- reads[0].read_name,
|
|
|
- "NB501645:337:HCCMVAFX2:3:11505:17842:2102_GATGGGACGG".to_string()
|
|
|
- );
|
|
|
+
|
|
|
+ println!("The size of {} reads is {} bytes.", n_reads, std::mem::size_of_val(&*reads));
|
|
|
+ assert_eq!(reads.first().unwrap().read_name, first_read_name);
|
|
|
}
|
|
|
}
|