| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- use anyhow::{Context, Ok, Result};
- use rust_htslib::{
- bam,
- bam::{ext::BamRecordExtensions, record, Read, Record},
- };
- pub fn get_hts_nt_pileup(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- with_next_ins: bool,
- ) -> Result<Vec<u8>> {
- let stop = start + 1;
- let mut bases = Vec::new();
- bam.fetch((chr, start, stop))?;
- let mut bam_pileup = Vec::new();
- for p in bam.pileup() {
- let pileup = p.context(format!(
- "Can't pilup bam at position {}:{}-{}",
- chr, start, stop
- ))?;
- let position = pileup.pos() as i32;
- if position == start {
- for alignment in pileup.alignments() {
- match alignment.indel() {
- bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
- bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
- _ => {
- let record = alignment.record();
- if record.seq_len() > 0 {
- if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
- bases.push(b);
- }
- } else {
- if alignment.is_del() {
- bases.push(b'D');
- }
- }
- }
- }
- }
- }
- }
- Ok(bases)
- }
- pub fn hts_base_at(
- record: &rust_htslib::bam::record::Record,
- at_pos: u32,
- with_next_ins: bool,
- ) -> Result<Option<u8>> {
- use rust_htslib::bam::record::Cigar;
- let cigar = record.cigar();
- let seq = record.seq();
- let pos = cigar.pos() as u32;
- let mut read_i = 0u32;
- let at_pos = at_pos - 1;
- let mut ref_pos = pos;
- if ref_pos > at_pos {
- return Ok(None);
- }
- for (id, op) in cigar.iter().enumerate() {
- let (add_read, add_ref) = match *op {
- Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
- Cigar::Ins(len) => (len, 0),
- Cigar::Del(len) => (0, len),
- Cigar::RefSkip(len) => (0, len),
- Cigar::SoftClip(len) => (len, 0),
- Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
- };
- // If at the end of the op len and next op is Ins return I
- if with_next_ins && ref_pos + add_read == at_pos + 1 {
- if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
- return Ok(Some(b'I'));
- }
- }
- if ref_pos + add_ref > at_pos {
- // Handle deletions directly
- if let Cigar::Del(_) = *op {
- return Ok(Some(b'D'));
- } else if let Cigar::RefSkip(_) = op {
- return Ok(None);
- } else {
- let diff = at_pos - ref_pos;
- let p = read_i + diff;
- return Ok(Some(seq[p as usize]));
- }
- }
- read_i += add_read;
- ref_pos += add_ref;
- }
- Ok(None)
- }
- pub fn get_n_start(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- ) -> Result<usize> {
- bam.fetch((chr, start, stop))?;
- // let mut start_positions = Vec::new();
- let mut n_start = 0;
- for read in bam.records() {
- let record = read.context(format!("eRR"))?;
- let rstart = record.pos() as i32;
- if rstart >= start && rstart < stop {
- if record.mapq() > 40 {
- n_start += 1;
- }
- // start_positions.push(rstart);
- }
- }
- Ok(n_start)
- // Ok(start_positions.len())
- }
- pub fn get_n_start_end_qual(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<usize> {
- bam.fetch((chr, start, stop))?;
- // let mut start_positions = Vec::new();
- let mut n_start = 0;
- for read in bam.records() {
- let record = read.context(format!("eRR"))?;
- let rstart = record.pos() as i32;
- let rend = record.reference_end() as i32;
- if rstart >= start && rstart < stop || rend >= start && rend < stop {
- if record.mapq() >= mapq {
- n_start += 1;
- }
- }
- }
- Ok(n_start)
- }
- pub fn get_start_end_qual(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<Vec<i32>> {
- bam.fetch((chr, start - 1, stop))?;
- let length = stop - start;
- let mut results = Vec::new();
- results.resize(length as usize, 0);
- for read in bam.records() {
- let record = read.context(format!("Error while parsing record"))?;
- let rstart = record.pos() as i32;
- // let rstart = record.reference_start() as i32;
- let rend = record.reference_end() as i32;
- if rstart >= start && rstart < stop {
- // println!("start {rstart}");
- if record.mapq() >= mapq {
- let index = rstart - start;
- *results.get_mut(index as usize).unwrap() += 1;
- }
- }
- if rend >= start && rend < stop {
- // println!("{rend}");
- if record.mapq() >= mapq {
- let index = rend - start;
- *results.get_mut(index as usize).unwrap() += 1;
- }
- }
- }
- Ok(results)
- }
- pub fn get_start_end_qual_rec(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<(Vec<Vec<Record>>, Vec<Vec<Record>>)> {
- bam.fetch((chr, start - 1, stop))?;
- let length = stop - start;
- let mut results_start: Vec<Vec<Record>> = Vec::new();
- results_start.resize(length as usize, vec![]);
- let mut results_end: Vec<Vec<Record>> = Vec::new();
- results_end.resize(length as usize, vec![]);
- for read in bam.records() {
- let record = read.context(format!("Error while parsing record"))?;
- let rstart = record.pos() as i32;
- let rend = record.reference_end() as i32;
- if rstart >= start && rstart < stop {
- if record.mapq() >= mapq {
- let index = rstart - start;
- let u = results_start.get_mut(index as usize).unwrap();
- u.push(record.clone());
- }
- }
- if rend >= start && rend < stop {
- if record.mapq() >= mapq {
- let index = rend - start;
- let u = results_end.get_mut(index as usize).unwrap();
- u.push(record.clone());
- }
- }
- }
- Ok((results_start, results_end))
- }
- pub fn primary_record(bam_path: &str, record: &Record) -> Option<Record> {
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
- if record.is_supplementary() {
- let qname = record.qname();
- let mut positions: Vec<(&str, i32)> = Vec::new();
- match record.aux(b"SA") {
- std::result::Result::Ok(v) => match v {
- record::Aux::String(v) => {
- positions = v
- .split(";")
- .filter(|s| s.len() != 0)
- .map(|s| {
- let s = s.split(",").take(2).collect::<Vec<&str>>();
- let chr = *s.get(0).unwrap();
- let position: i32 = s.get(1).unwrap().parse().unwrap();
- (chr, position)
- })
- .collect();
- }
- _ => (),
- },
- Err(_) => (),
- };
- for (chr, start) in positions {
- bam.fetch((chr, start, start + 1)).unwrap();
- let records = bam.records();
- for record in records {
- let record = record.unwrap();
- // String::from_utf8(record.qname().to_vec()).unwrap().as_str()
- if qname == record.qname() {
- if !record.is_supplementary() {
- return Some(record.clone());
- }
- }
- }
- }
- }
- None
- }
- pub fn range_depths(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- ) -> Result<Vec<u32>> {
- bam.fetch((chr, start, stop))?;
- let mut depths = vec![0];
- depths.resize((stop - start) as usize, 0);
- for p in bam.pileup() {
- let pileup = p.context(format!("eRR"))?;
- let rstart = pileup.pos() as i32;
- if rstart >= start && rstart < stop {
- // depths.push(pileup.depth());
- let v = depths
- .get_mut((rstart - start) as usize)
- .context(format!("Errrr {}", rstart - start))?;
- *v = pileup.depth();
- } else if rstart >= stop {
- break;
- }
- }
- Ok(depths)
- }
- pub fn range_depths_qual(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<Vec<usize>> {
- bam.fetch((chr, start, stop))?;
- let mut depths = vec![0];
- depths.resize((stop - start) as usize, 0);
- for p in bam.pileup() {
- let pileup = p.context(format!("eRR"))?;
- let rstart = pileup.pos() as i32;
- if rstart >= start && rstart < stop {
- let v = depths
- .get_mut((rstart - start) as usize)
- .context(format!("Errrr {}", rstart - start))?;
- *v = pileup
- .alignments()
- .filter(|e| e.record().mapq() >= mapq)
- .count();
- } else if rstart >= stop {
- break;
- }
- }
- Ok(depths)
- }
- pub fn range_len_qual(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<Vec<usize>> {
- bam.fetch((chr, start, stop))?;
- let mut depths = vec![0];
- depths.resize((stop - start) as usize, 0);
- for p in bam.pileup() {
- let pileup = p.context(format!("eRR"))?;
- let rstart = pileup.pos() as i32;
- if rstart >= start && rstart < stop {
- let v = depths
- .get_mut((rstart - start) as usize)
- .context(format!("Errrr {}", rstart - start))?;
- *v = pileup
- .alignments()
- .filter(|e| e.record().mapq() >= mapq)
- .map(|e| e.record().seq_len())
- .sum();
- } else if rstart >= stop {
- break;
- }
- }
- Ok(depths)
- }
- pub fn swap_by_primary(bam_path: &str, records: Vec<Record>) -> Vec<Record> {
- let mut res_records = Vec::new();
- for record in records {
- if let Some(record) = primary_record(bam_path, &record) {
- res_records.push(record);
- } else {
- res_records.push(record);
- }
- }
- res_records
- }
- pub fn scan_sa(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- stop: i32,
- mapq: u8,
- ) -> Result<(Vec<Vec<Record>>, Vec<Vec<Record>>)> {
- bam.fetch((chr, start - 1, stop))?;
- let length = stop - start;
- let mut results_start: Vec<Vec<Record>> = Vec::new();
- results_start.resize(length as usize, vec![]);
- let mut results_end: Vec<Vec<Record>> = Vec::new();
- results_end.resize(length as usize, vec![]);
- for read in bam.records() {
- let record = read.context(format!("Error while parsing record"))?;
- let rstart = record.pos() as i32;
- let rend = record.reference_end() as i32;
- if rstart >= start && rstart < stop {
- if record.mapq() >= mapq && record.aux(b"SA").is_ok() {
-
- let index = rstart - start;
- let u = results_start.get_mut(index as usize).unwrap();
- u.push(record.clone());
- }
- }
- if rend >= start && rend < stop && record.aux(b"SA").is_ok() {
- if record.mapq() >= mapq {
- let index = rend - start;
- let u = results_end.get_mut(index as usize).unwrap();
- u.push(record.clone());
- }
- }
- }
- Ok((results_start, results_end))
- }
- pub fn records_at_base(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- with_next_ins: bool,
- ) -> Result<Vec<(Record, u8)>> {
- let stop = start + 1;
- let mut bases = Vec::new();
- bam.fetch((chr, start, stop))?;
- let mut bam_pileup = Vec::new();
- for p in bam.pileup() {
- let pileup = p.context(format!(
- "Can't pilup bam at position {}:{}-{}",
- chr, start, stop
- ))?;
- let position = pileup.pos() as i32;
- if position == start {
- for alignment in pileup.alignments() {
- match alignment.indel() {
- bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
- bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
- _ => {
- let record = alignment.record();
- if record.seq_len() > 0 {
- if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
- bases.push((record, b));
- }
- } else {
- if alignment.is_del() {
- bases.push((record, b'D'));
- }
- }
- }
- }
- }
- }
- }
- Ok(bases)
- }
- pub fn qnames_at_base(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- with_next_ins: bool,
- ) -> Result<Vec<(String, u8)>> {
- let stop = start + 1;
- let mut bases = Vec::new();
- bam.fetch((chr, start, stop))?;
- let mut bam_pileup = Vec::new();
- for p in bam.pileup() {
- let pileup = p.context(format!(
- "Can't pilup bam at position {}:{}-{}",
- chr, start, stop
- ))?;
- let position = pileup.pos() as i32;
- if position == start {
- for alignment in pileup.alignments() {
- match alignment.indel() {
- bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
- bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
- _ => {
- let record = alignment.record();
- let qname = String::from_utf8(record.qname().to_vec())?;
- if record.seq_len() > 0 {
- if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
- bases.push((qname, b));
- }
- } else {
- if alignment.is_del() {
- bases.push((qname, b'D'));
- }
- }
- }
- }
- }
- }
- }
- Ok(bases)
- }
- #[cfg(test)]
- mod tests {
- // Note this useful idiom: importing names from outer (for mod tests) scope.
- use super::*;
- #[test]
- fn test_se() {
- let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam";
- let chr = "chr9";
- let start = 21839796;
- let mapq = 50;
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
- // let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap();
- // println!("{a:?}");
- let res = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap();
- let (_, e) = res;
- let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone());
- println!("{res_records:?}");
- println!("{}", res_records.len());
- assert!(res_records.len() == 12);
- }
- #[test]
- fn test_sa() {
- let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam";
- let chr = "chr9";
- let start = 21839796;
- let mapq = 50;
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
- let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap().pop().unwrap();
- let (_, e) = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap();
- let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone());
- let (_, e) = scan_sa(&mut bam, chr, start, start + 1, mapq).unwrap();
- let res_records_sa = swap_by_primary(bam_path, e.get(0).unwrap().clone());
- assert_eq!(a as usize, res_records.len());
- assert_eq!(a as usize, res_records_sa.len());
- }
- }
|