Thomas 2 年之前
父节点
当前提交
307a093706
共有 2 个文件被更改,包括 60 次插入41 次删除
  1. 二进制
      .DS_Store
  2. 60 41
      src/lib.rs

二进制
.DS_Store


+ 60 - 41
src/lib.rs

@@ -1,42 +1,51 @@
+//! BamReader is a library for parsing reads in bam files.
+
 use std::{fs::File, io::{Read, BufRead}};
 use bgzip::{BGZFReader, BGZFError};
 
+
+/// Stores possible values of read's sequence.
 #[derive(Debug)]
 pub enum BamNucleotid {
     Equal, A, C, M, G, R, S, V, T, W, Y, H, K, D, B, N
 }
+/// The sequence of the read.
 #[derive(Debug)]
 pub struct BamSequence(Vec<BamNucleotid>);
 
 impl BamSequence {
+    /// Return a new and empty BamSequence
     pub fn new() -> BamSequence {
         BamSequence(Vec::new())
     }
-    pub fn get_nt(e: &u8) -> BamNucleotid {
-        match e {
-            0  => BamNucleotid::Equal,
-            1  => BamNucleotid::A,
-            2  => BamNucleotid::C,
-            3  => BamNucleotid::M,
-            4  => BamNucleotid::G,
-            5  => BamNucleotid::R,
-            6  => BamNucleotid::S,
-            7  => BamNucleotid::V,
-            8  => BamNucleotid::T,
-            9  => BamNucleotid::W,
-            10 => BamNucleotid::Y,
-            11 => BamNucleotid::H,
-            12 => BamNucleotid::K,
-            13 => BamNucleotid::D,
-            14 => BamNucleotid::B,
-            15 => BamNucleotid::N,
-            _  => panic!("Parsing error")
-        }
-    }
+    /// Parse the u8 value of bam buffer into a BamNucleotid 
+    /// and add it to the BamSequence
     pub fn push(&mut self, e: &u8) {
-        self.0.push(BamSequence::get_nt(e))
+        self.0.push(
+            match e {
+                0  => BamNucleotid::Equal,
+                1  => BamNucleotid::A,
+                2  => BamNucleotid::C,
+                3  => BamNucleotid::M,
+                4  => BamNucleotid::G,
+                5  => BamNucleotid::R,
+                6  => BamNucleotid::S,
+                7  => BamNucleotid::V,
+                8  => BamNucleotid::T,
+                9  => BamNucleotid::W,
+                10 => BamNucleotid::Y,
+                11 => BamNucleotid::H,
+                12 => BamNucleotid::K,
+                13 => BamNucleotid::D,
+                14 => BamNucleotid::B,
+                15 => BamNucleotid::N,
+                _  => panic!("Parsing error")
+            }
+        )
     }
 }
+
+/// Stores possible types of tags values.
 #[derive(Debug, Clone)]
 pub enum TagValue {
     Int(i8),
@@ -49,21 +58,31 @@ pub enum TagValue {
     Str(String)
 }
 
+/// Read values: 
+/// ref_id,
+/// pos,
+/// mapq,
+/// flag,
+/// read_name,
+/// cigar,
+/// sequence,
+/// phred,
+/// tags
 pub struct BamRead {
-    ref_id: i32,
-    pos: i32,
-    mapq: u8,
-    flag: u16,
-    read_name: String,
-    cigar: Vec<(String, u32)>,
-    sequence: BamSequence,
-    phred: Vec<u8>,
-    tags: Vec<(String, TagValue)>
+    pub ref_id: i32,
+    pub pos: i32,
+    pub mapq: u8,
+    pub flag: u16,
+    pub read_name: String,
+    pub cigar: Vec<(String, u32)>,
+    pub sequence: BamSequence,
+    pub phred: Vec<u8>,
+    pub tags: Vec<(String, TagValue)>
 }
 
 pub struct BamReader {
-    reader: BGZFReader<File>,
-    references: Vec<(String, u32)>
+    pub reader: BGZFReader<File>,
+    pub references: Vec<(String, u32)>
 }
 
 impl BamReader {
@@ -304,20 +323,20 @@ mod tests {
     use super::*;
 
     #[test]
-    fn it_works() {
+    fn it_works() { 
         let bam_path = "/Users/steimle/Documents/Programmes/sv-finder/betya.bam";
         let flags: Vec<u16> = vec![81, 161, 97, 145, 65, 129, 113, 177];
+        let n_reads = 100;
+        let first_read_name = "NB501645:337:HCCMVAFX2:3:11505:17842:2102_GATGGGACGG".to_string();
         
         let bam_reader = BamReader::new(bam_path).unwrap();
         
         let reads = bam_reader
-        .filter(|br| flags.contains(&br.flag))
-        .take(100)
+        .filter(|bam_read| flags.contains(&bam_read.flag))
+        .take(n_reads)
         .collect::<Vec<BamRead>>();
-
-        assert_eq!(
-            reads[0].read_name,
-            "NB501645:337:HCCMVAFX2:3:11505:17842:2102_GATGGGACGG".to_string()
-        );
+        
+        println!("The size of {} reads is {} bytes.", n_reads, std::mem::size_of_val(&*reads));
+        assert_eq!(reads.first().unwrap().read_name, first_read_name);
     }
 }