Thomas 3 年之前
父節點
當前提交
d4bf785797
共有 1 個文件被更改,包括 14 次插入32 次删除
  1. 14 32
      src/lib.rs

+ 14 - 32
src/lib.rs

@@ -1,4 +1,4 @@
-use std::{fs, io::{Read, BufRead}};
+use std::{fs::File, io::{Read, BufRead}};
 use bgzip::{BGZFReader, BGZFError};
 
 #[derive(Debug)]
@@ -50,25 +50,25 @@ pub enum TagValue {
 }
 
 pub struct BamRead {
-    ref_id: i32,                  // ref_id
-    pos: i32,                     // pos
-    mapq: u8,                     // mapq
-    flag: u16,                    // flag
-    read_name: String,            // read_name
-    cigar: Vec<(String, u32)>,    // cigar
-    sequence: BamSequence,        // sequence
-    phred: Vec<u8>,               // phred
-    tags: Vec<(String, TagValue)> // tags
+    ref_id: i32,
+    pos: i32,
+    mapq: u8,
+    flag: u16,
+    read_name: String,
+    cigar: Vec<(String, u32)>,
+    sequence: BamSequence,
+    phred: Vec<u8>,
+    tags: Vec<(String, TagValue)>
 }
 
 pub struct BamReader {
-    reader: BGZFReader<fs::File>,
+    reader: BGZFReader<File>,
     references: Vec<(String, u32)>
 }
 
 impl BamReader {
     pub fn new(bam_path: &str) -> Result<BamReader, BGZFError> {
-        let bam = fs::File::open(bam_path).unwrap();
+        let bam = File::open(bam_path).unwrap();
         let reader = BGZFReader::new(bam);
         let mut bam_reader = BamReader { reader, references: Vec::new() };
         bam_reader.read_header()?;
@@ -128,12 +128,10 @@ impl BamReader {
         let mut buffer_1 = [0u8; 1];
         let mut buffer_2 = [0u8; 2];
         let mut buffer_4 = [0u8; 4];
-        // let mut reader = &self.reader;
+
         // Total length of the alignment record, excluding this field
         self.reader.read_exact(&mut buffer_4)?;
-        // if r.is_err() { break; }; // triggered by eof
         let block_size: u32 = u32::from_ne_bytes(buffer_4.clone().try_into().unwrap());
-        // println!("block size {}", block_size);
 
         let pos_start = self.reader.bgzf_pos();
 
@@ -141,66 +139,52 @@ impl BamReader {
         // without a mapping position
         self.reader.read_exact(&mut buffer_4)?;
         let ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
-        // println!("ref_id {}", ref_id);
-        // println!("ref_name {}", references[ref_id as usize].0);
 
         // 0-based leftmost coordinate (= POS − 1)
         self.reader.read_exact(&mut buffer_4)?;
         let pos: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
-        // println!("pos {}", pos);
 
         // Length of read name below (= length(QNAME) + 1)
         self.reader.read_exact(&mut buffer_1)?;
         let l_read_name: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
-        // println!("l_read_name {}", l_read_name);
 
         // Mapping quality (=MAPQ)
         self.reader.read_exact(&mut buffer_1)?;
         let mapq: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
-        // println!("mapq {}", mapq);
 
         // BAI index bin
         self.reader.read_exact(&mut buffer_2)?;
         // let bin: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
-        // println!("bin {}", bin);
 
         // Number of operations in CIGAR,
         self.reader.read_exact(&mut buffer_2)?;
         let n_cigar_op: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
-        // println!("n_cigar_op {}", n_cigar_op);
 
         // Bitwise flags (= FLAG)^29
         self.reader.read_exact(&mut buffer_2)?;
         let flag: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
-        
-        // println!("flag {}", flag);
 
         // Length of SEQ 
         self.reader.read_exact(&mut buffer_4)?;
         let l_seq: u32 = u32::from_ne_bytes(buffer_4.try_into().unwrap());
-        // println!("l_seq {}", l_seq);
 
         // Ref-ID of the next segment (−1 ≤ next refID < n ref) 
         self.reader.read_exact(&mut buffer_4)?;
         // let next_ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
-        // println!("next_ref_id {}", next_ref_id);
 
         // 0-based leftmost pos of the next segment  (= PNEXT − 1)
         self.reader.read_exact(&mut buffer_4)?;
         // let next_pos: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
-        // println!("next_pos {}", next_pos);
 
         // Template length (= TLEN)
         self.reader.read_exact(&mut buffer_4)?;
         // let tlen: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
-        // println!("tlen {}", tlen);
 
         // Read name, NUL-terminated (QNAME with trailing ‘\0’)^30
         let mut buffer_read_name = vec![0u8; l_read_name as usize];
         self.reader.read_exact(&mut buffer_read_name)?;
         buffer_read_name.pop();
         let read_name = String::from_utf8(buffer_read_name).expect("Can't read the read name");
-        // println!("read_name {}", read_name);
 
         // CIGAR: op len<<4|op. ‘MIDNSHP=X’→‘012345678’ 
         let l_buffer_cigar = (n_cigar_op as usize) * 4;
@@ -225,7 +209,6 @@ impl BamReader {
             }
         })
         .collect();
-        // println!("cigar {:?}", cigar);
 
         let l_buffer_seq = (l_seq as usize + 1) / 2;
         let mut buffer_seq = vec![0u8; l_buffer_seq];
@@ -237,7 +220,6 @@ impl BamReader {
             sequence.push(&left);
             if right != 0 { sequence.push(&right) };
         }
-        // println!("sequence {:?}", sequence);
         
         let mut phred = vec![0; l_seq as usize];
         self.reader.read_exact(&mut phred)?;
@@ -332,7 +314,7 @@ mod tests {
         
         let reads = bam_reader
         .filter(|br| flags.contains(&br.flag))
-        .take(1000)
+        .take(100)
         .collect::<Vec<BamRead>>();
 
         assert_eq!(