|
@@ -1,4 +1,4 @@
|
|
|
-use std::{fs, io::{Read, BufRead}};
|
|
|
|
|
|
|
+use std::{fs::File, io::{Read, BufRead}};
|
|
|
use bgzip::{BGZFReader, BGZFError};
|
|
use bgzip::{BGZFReader, BGZFError};
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
#[derive(Debug)]
|
|
@@ -50,25 +50,25 @@ pub enum TagValue {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub struct BamRead {
|
|
pub struct BamRead {
|
|
|
- ref_id: i32, // ref_id
|
|
|
|
|
- pos: i32, // pos
|
|
|
|
|
- mapq: u8, // mapq
|
|
|
|
|
- flag: u16, // flag
|
|
|
|
|
- read_name: String, // read_name
|
|
|
|
|
- cigar: Vec<(String, u32)>, // cigar
|
|
|
|
|
- sequence: BamSequence, // sequence
|
|
|
|
|
- phred: Vec<u8>, // phred
|
|
|
|
|
- tags: Vec<(String, TagValue)> // tags
|
|
|
|
|
|
|
+ ref_id: i32,
|
|
|
|
|
+ pos: i32,
|
|
|
|
|
+ mapq: u8,
|
|
|
|
|
+ flag: u16,
|
|
|
|
|
+ read_name: String,
|
|
|
|
|
+ cigar: Vec<(String, u32)>,
|
|
|
|
|
+ sequence: BamSequence,
|
|
|
|
|
+ phred: Vec<u8>,
|
|
|
|
|
+ tags: Vec<(String, TagValue)>
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub struct BamReader {
|
|
pub struct BamReader {
|
|
|
- reader: BGZFReader<fs::File>,
|
|
|
|
|
|
|
+ reader: BGZFReader<File>,
|
|
|
references: Vec<(String, u32)>
|
|
references: Vec<(String, u32)>
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl BamReader {
|
|
impl BamReader {
|
|
|
pub fn new(bam_path: &str) -> Result<BamReader, BGZFError> {
|
|
pub fn new(bam_path: &str) -> Result<BamReader, BGZFError> {
|
|
|
- let bam = fs::File::open(bam_path).unwrap();
|
|
|
|
|
|
|
+ let bam = File::open(bam_path).unwrap();
|
|
|
let reader = BGZFReader::new(bam);
|
|
let reader = BGZFReader::new(bam);
|
|
|
let mut bam_reader = BamReader { reader, references: Vec::new() };
|
|
let mut bam_reader = BamReader { reader, references: Vec::new() };
|
|
|
bam_reader.read_header()?;
|
|
bam_reader.read_header()?;
|
|
@@ -128,12 +128,10 @@ impl BamReader {
|
|
|
let mut buffer_1 = [0u8; 1];
|
|
let mut buffer_1 = [0u8; 1];
|
|
|
let mut buffer_2 = [0u8; 2];
|
|
let mut buffer_2 = [0u8; 2];
|
|
|
let mut buffer_4 = [0u8; 4];
|
|
let mut buffer_4 = [0u8; 4];
|
|
|
- // let mut reader = &self.reader;
|
|
|
|
|
|
|
+
|
|
|
// Total length of the alignment record, excluding this field
|
|
// Total length of the alignment record, excluding this field
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
- // if r.is_err() { break; }; // triggered by eof
|
|
|
|
|
let block_size: u32 = u32::from_ne_bytes(buffer_4.clone().try_into().unwrap());
|
|
let block_size: u32 = u32::from_ne_bytes(buffer_4.clone().try_into().unwrap());
|
|
|
- // println!("block size {}", block_size);
|
|
|
|
|
|
|
|
|
|
let pos_start = self.reader.bgzf_pos();
|
|
let pos_start = self.reader.bgzf_pos();
|
|
|
|
|
|
|
@@ -141,66 +139,52 @@ impl BamReader {
|
|
|
// without a mapping position
|
|
// without a mapping position
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
let ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
|
|
let ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
|
|
|
- // println!("ref_id {}", ref_id);
|
|
|
|
|
- // println!("ref_name {}", references[ref_id as usize].0);
|
|
|
|
|
|
|
|
|
|
// 0-based leftmost coordinate (= POS − 1)
|
|
// 0-based leftmost coordinate (= POS − 1)
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
let pos: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
|
|
let pos: i32 = i32::from_ne_bytes(buffer_4.try_into().expect("Parse error"));
|
|
|
- // println!("pos {}", pos);
|
|
|
|
|
|
|
|
|
|
// Length of read name below (= length(QNAME) + 1)
|
|
// Length of read name below (= length(QNAME) + 1)
|
|
|
self.reader.read_exact(&mut buffer_1)?;
|
|
self.reader.read_exact(&mut buffer_1)?;
|
|
|
let l_read_name: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
|
|
let l_read_name: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
|
|
|
- // println!("l_read_name {}", l_read_name);
|
|
|
|
|
|
|
|
|
|
// Mapping quality (=MAPQ)
|
|
// Mapping quality (=MAPQ)
|
|
|
self.reader.read_exact(&mut buffer_1)?;
|
|
self.reader.read_exact(&mut buffer_1)?;
|
|
|
let mapq: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
|
|
let mapq: u8 = u8::from_ne_bytes(buffer_1.try_into().expect("Parse error"));
|
|
|
- // println!("mapq {}", mapq);
|
|
|
|
|
|
|
|
|
|
// BAI index bin
|
|
// BAI index bin
|
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
|
// let bin: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
// let bin: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
|
- // println!("bin {}", bin);
|
|
|
|
|
|
|
|
|
|
// Number of operations in CIGAR,
|
|
// Number of operations in CIGAR,
|
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
|
let n_cigar_op: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
let n_cigar_op: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
|
- // println!("n_cigar_op {}", n_cigar_op);
|
|
|
|
|
|
|
|
|
|
// Bitwise flags (= FLAG)^29
|
|
// Bitwise flags (= FLAG)^29
|
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
self.reader.read_exact(&mut buffer_2)?;
|
|
|
let flag: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
let flag: u16 = u16::from_ne_bytes(buffer_2.try_into().expect("Parse error"));
|
|
|
-
|
|
|
|
|
- // println!("flag {}", flag);
|
|
|
|
|
|
|
|
|
|
// Length of SEQ
|
|
// Length of SEQ
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
let l_seq: u32 = u32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
let l_seq: u32 = u32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
|
- // println!("l_seq {}", l_seq);
|
|
|
|
|
|
|
|
|
|
// Ref-ID of the next segment (−1 ≤ next refID < n ref)
|
|
// Ref-ID of the next segment (−1 ≤ next refID < n ref)
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
// let next_ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
// let next_ref_id: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
|
- // println!("next_ref_id {}", next_ref_id);
|
|
|
|
|
|
|
|
|
|
// 0-based leftmost pos of the next segment (= PNEXT − 1)
|
|
// 0-based leftmost pos of the next segment (= PNEXT − 1)
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
// let next_pos: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
// let next_pos: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
|
- // println!("next_pos {}", next_pos);
|
|
|
|
|
|
|
|
|
|
// Template length (= TLEN)
|
|
// Template length (= TLEN)
|
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
self.reader.read_exact(&mut buffer_4)?;
|
|
|
// let tlen: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
// let tlen: i32 = i32::from_ne_bytes(buffer_4.try_into().unwrap());
|
|
|
- // println!("tlen {}", tlen);
|
|
|
|
|
|
|
|
|
|
// Read name, NUL-terminated (QNAME with trailing ‘\0’)^30
|
|
// Read name, NUL-terminated (QNAME with trailing ‘\0’)^30
|
|
|
let mut buffer_read_name = vec![0u8; l_read_name as usize];
|
|
let mut buffer_read_name = vec![0u8; l_read_name as usize];
|
|
|
self.reader.read_exact(&mut buffer_read_name)?;
|
|
self.reader.read_exact(&mut buffer_read_name)?;
|
|
|
buffer_read_name.pop();
|
|
buffer_read_name.pop();
|
|
|
let read_name = String::from_utf8(buffer_read_name).expect("Can't read the read name");
|
|
let read_name = String::from_utf8(buffer_read_name).expect("Can't read the read name");
|
|
|
- // println!("read_name {}", read_name);
|
|
|
|
|
|
|
|
|
|
// CIGAR: op len<<4|op. ‘MIDNSHP=X’→‘012345678’
|
|
// CIGAR: op len<<4|op. ‘MIDNSHP=X’→‘012345678’
|
|
|
let l_buffer_cigar = (n_cigar_op as usize) * 4;
|
|
let l_buffer_cigar = (n_cigar_op as usize) * 4;
|
|
@@ -225,7 +209,6 @@ impl BamReader {
|
|
|
}
|
|
}
|
|
|
})
|
|
})
|
|
|
.collect();
|
|
.collect();
|
|
|
- // println!("cigar {:?}", cigar);
|
|
|
|
|
|
|
|
|
|
let l_buffer_seq = (l_seq as usize + 1) / 2;
|
|
let l_buffer_seq = (l_seq as usize + 1) / 2;
|
|
|
let mut buffer_seq = vec![0u8; l_buffer_seq];
|
|
let mut buffer_seq = vec![0u8; l_buffer_seq];
|
|
@@ -237,7 +220,6 @@ impl BamReader {
|
|
|
sequence.push(&left);
|
|
sequence.push(&left);
|
|
|
if right != 0 { sequence.push(&right) };
|
|
if right != 0 { sequence.push(&right) };
|
|
|
}
|
|
}
|
|
|
- // println!("sequence {:?}", sequence);
|
|
|
|
|
|
|
|
|
|
let mut phred = vec![0; l_seq as usize];
|
|
let mut phred = vec![0; l_seq as usize];
|
|
|
self.reader.read_exact(&mut phred)?;
|
|
self.reader.read_exact(&mut phred)?;
|
|
@@ -332,7 +314,7 @@ mod tests {
|
|
|
|
|
|
|
|
let reads = bam_reader
|
|
let reads = bam_reader
|
|
|
.filter(|br| flags.contains(&br.flag))
|
|
.filter(|br| flags.contains(&br.flag))
|
|
|
- .take(1000)
|
|
|
|
|
|
|
+ .take(100)
|
|
|
.collect::<Vec<BamRead>>();
|
|
.collect::<Vec<BamRead>>();
|
|
|
|
|
|
|
|
assert_eq!(
|
|
assert_eq!(
|