use anyhow::{Context, Ok, Result}; use rust_htslib::{ bam, bam::{ext::BamRecordExtensions, record, Read, Record}, }; pub fn get_hts_nt_pileup( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, with_next_ins: bool, ) -> Result> { let stop = start + 1; let mut bases = Vec::new(); bam.fetch((chr, start, stop))?; let mut bam_pileup = Vec::new(); for p in bam.pileup() { let pileup = p.context(format!( "Can't pilup bam at position {}:{}-{}", chr, start, stop ))?; let position = pileup.pos() as i32; if position == start { for alignment in pileup.alignments() { match alignment.indel() { bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'), bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'), _ => { let record = alignment.record(); if record.seq_len() > 0 { if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? { bases.push(b); } } else { if alignment.is_del() { bases.push(b'D'); } } } } } } } Ok(bases) } pub fn hts_base_at( record: &rust_htslib::bam::record::Record, at_pos: u32, with_next_ins: bool, ) -> Result> { use rust_htslib::bam::record::Cigar; let cigar = record.cigar(); let seq = record.seq(); let pos = cigar.pos() as u32; let mut read_i = 0u32; let at_pos = at_pos - 1; let mut ref_pos = pos; if ref_pos > at_pos { return Ok(None); } for (id, op) in cigar.iter().enumerate() { let (add_read, add_ref) = match *op { Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len), Cigar::Ins(len) => (len, 0), Cigar::Del(len) => (0, len), Cigar::RefSkip(len) => (0, len), Cigar::SoftClip(len) => (len, 0), Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0), }; // If at the end of the op len and next op is Ins return I if with_next_ins && ref_pos + add_read == at_pos + 1 { if let Some(Cigar::Ins(_)) = cigar.get(id + 1) { return Ok(Some(b'I')); } } if ref_pos + add_ref > at_pos { // Handle deletions directly if let Cigar::Del(_) = *op { return Ok(Some(b'D')); } else if let Cigar::RefSkip(_) = op { return Ok(None); } else { let diff = at_pos - ref_pos; let p = read_i + diff; return Ok(Some(seq[p as usize])); } } read_i += add_read; ref_pos += add_ref; } Ok(None) } pub fn get_n_start( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, ) -> Result { bam.fetch((chr, start, stop))?; // let mut start_positions = Vec::new(); let mut n_start = 0; for read in bam.records() { let record = read.context(format!("eRR"))?; let rstart = record.pos() as i32; if rstart >= start && rstart < stop { if record.mapq() > 40 { n_start += 1; } // start_positions.push(rstart); } } Ok(n_start) // Ok(start_positions.len()) } pub fn get_n_start_end_qual( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result { bam.fetch((chr, start, stop))?; // let mut start_positions = Vec::new(); let mut n_start = 0; for read in bam.records() { let record = read.context(format!("eRR"))?; let rstart = record.pos() as i32; let rend = record.reference_end() as i32; if rstart >= start && rstart < stop || rend >= start && rend < stop { if record.mapq() >= mapq { n_start += 1; } } } Ok(n_start) } pub fn get_start_end_qual( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result> { bam.fetch((chr, start - 1, stop))?; let length = stop - start; let mut results = Vec::new(); results.resize(length as usize, 0); for read in bam.records() { let record = read.context(format!("Error while parsing record"))?; let rstart = record.pos() as i32; // let rstart = record.reference_start() as i32; let rend = record.reference_end() as i32; if rstart >= start && rstart < stop { // println!("start {rstart}"); if record.mapq() >= mapq { let index = rstart - start; *results.get_mut(index as usize).unwrap() += 1; } } if rend >= start && rend < stop { // println!("{rend}"); if record.mapq() >= mapq { let index = rend - start; *results.get_mut(index as usize).unwrap() += 1; } } } Ok(results) } pub fn get_start_end_qual_rec( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result<(Vec>, Vec>)> { bam.fetch((chr, start - 1, stop))?; let length = stop - start; let mut results_start: Vec> = Vec::new(); results_start.resize(length as usize, vec![]); let mut results_end: Vec> = Vec::new(); results_end.resize(length as usize, vec![]); for read in bam.records() { let record = read.context(format!("Error while parsing record"))?; let rstart = record.pos() as i32; let rend = record.reference_end() as i32; if rstart >= start && rstart < stop { if record.mapq() >= mapq { let index = rstart - start; let u = results_start.get_mut(index as usize).unwrap(); u.push(record.clone()); } } if rend >= start && rend < stop { if record.mapq() >= mapq { let index = rend - start; let u = results_end.get_mut(index as usize).unwrap(); u.push(record.clone()); } } } Ok((results_start, results_end)) } pub fn primary_record(bam_path: &str, record: &Record) -> Option { let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap(); if record.is_supplementary() { let qname = record.qname(); let mut positions: Vec<(&str, i32)> = Vec::new(); match record.aux(b"SA") { std::result::Result::Ok(v) => match v { record::Aux::String(v) => { positions = v .split(";") .filter(|s| s.len() != 0) .map(|s| { let s = s.split(",").take(2).collect::>(); let chr = *s.get(0).unwrap(); let position: i32 = s.get(1).unwrap().parse().unwrap(); (chr, position) }) .collect(); } _ => (), }, Err(_) => (), }; for (chr, start) in positions { bam.fetch((chr, start, start + 1)).unwrap(); let records = bam.records(); for record in records { let record = record.unwrap(); // String::from_utf8(record.qname().to_vec()).unwrap().as_str() if qname == record.qname() { if !record.is_supplementary() { return Some(record.clone()); } } } } } None } pub fn range_depths( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, ) -> Result> { bam.fetch((chr, start, stop))?; let mut depths = vec![0]; depths.resize((stop - start) as usize, 0); for p in bam.pileup() { let pileup = p.context(format!("eRR"))?; let rstart = pileup.pos() as i32; if rstart >= start && rstart < stop { // depths.push(pileup.depth()); let v = depths .get_mut((rstart - start) as usize) .context(format!("Errrr {}", rstart - start))?; *v = pileup.depth(); } else if rstart >= stop { break; } } Ok(depths) } pub fn range_depths_qual( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result> { bam.fetch((chr, start, stop))?; let mut depths = vec![0]; depths.resize((stop - start) as usize, 0); for p in bam.pileup() { let pileup = p.context(format!("eRR"))?; let rstart = pileup.pos() as i32; if rstart >= start && rstart < stop { let v = depths .get_mut((rstart - start) as usize) .context(format!("Errrr {}", rstart - start))?; *v = pileup .alignments() .filter(|e| e.record().mapq() >= mapq) .count(); } else if rstart >= stop { break; } } Ok(depths) } pub fn range_len_qual( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result> { bam.fetch((chr, start, stop))?; let mut depths = vec![0]; depths.resize((stop - start) as usize, 0); for p in bam.pileup() { let pileup = p.context(format!("eRR"))?; let rstart = pileup.pos() as i32; if rstart >= start && rstart < stop { let v = depths .get_mut((rstart - start) as usize) .context(format!("Errrr {}", rstart - start))?; *v = pileup .alignments() .filter(|e| e.record().mapq() >= mapq) .map(|e| e.record().seq_len()) .sum(); } else if rstart >= stop { break; } } Ok(depths) } pub fn swap_by_primary(bam_path: &str, records: Vec) -> Vec { let mut res_records = Vec::new(); for record in records { if let Some(record) = primary_record(bam_path, &record) { res_records.push(record); } else { res_records.push(record); } } res_records } pub fn scan_sa( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, stop: i32, mapq: u8, ) -> Result<(Vec>, Vec>)> { bam.fetch((chr, start - 1, stop))?; let length = stop - start; let mut results_start: Vec> = Vec::new(); results_start.resize(length as usize, vec![]); let mut results_end: Vec> = Vec::new(); results_end.resize(length as usize, vec![]); for read in bam.records() { let record = read.context(format!("Error while parsing record"))?; let rstart = record.pos() as i32; let rend = record.reference_end() as i32; if rstart >= start && rstart < stop { if record.mapq() >= mapq && record.aux(b"SA").is_ok() { let index = rstart - start; let u = results_start.get_mut(index as usize).unwrap(); u.push(record.clone()); } } if rend >= start && rend < stop && record.aux(b"SA").is_ok() { if record.mapq() >= mapq { let index = rend - start; let u = results_end.get_mut(index as usize).unwrap(); u.push(record.clone()); } } } Ok((results_start, results_end)) } pub fn records_at_base( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, with_next_ins: bool, ) -> Result> { let stop = start + 1; let mut bases = Vec::new(); bam.fetch((chr, start, stop))?; let mut bam_pileup = Vec::new(); for p in bam.pileup() { let pileup = p.context(format!( "Can't pilup bam at position {}:{}-{}", chr, start, stop ))?; let position = pileup.pos() as i32; if position == start { for alignment in pileup.alignments() { match alignment.indel() { bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'), bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'), _ => { let record = alignment.record(); if record.seq_len() > 0 { if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? { bases.push((record, b)); } } else { if alignment.is_del() { bases.push((record, b'D')); } } } } } } } Ok(bases) } pub fn qnames_at_base( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, with_next_ins: bool, ) -> Result> { let stop = start + 1; let mut bases = Vec::new(); bam.fetch((chr, start, stop))?; let mut bam_pileup = Vec::new(); for p in bam.pileup() { let pileup = p.context(format!( "Can't pilup bam at position {}:{}-{}", chr, start, stop ))?; let position = pileup.pos() as i32; if position == start { for alignment in pileup.alignments() { match alignment.indel() { bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'), bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'), _ => { let record = alignment.record(); let qname = String::from_utf8(record.qname().to_vec())?; if record.seq_len() > 0 { if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? { bases.push((qname, b)); } } else { if alignment.is_del() { bases.push((qname, b'D')); } } } } } } } Ok(bases) } #[cfg(test)] mod tests { // Note this useful idiom: importing names from outer (for mod tests) scope. use super::*; #[test] fn test_se() { let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam"; let chr = "chr9"; let start = 21839796; let mapq = 50; let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap(); // let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap(); // println!("{a:?}"); let res = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap(); let (_, e) = res; let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone()); println!("{res_records:?}"); println!("{}", res_records.len()); assert!(res_records.len() == 12); } #[test] fn test_sa() { let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam"; let chr = "chr9"; let start = 21839796; let mapq = 50; let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap(); let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap().pop().unwrap(); let (_, e) = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap(); let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone()); let (_, e) = scan_sa(&mut bam, chr, start, start + 1, mapq).unwrap(); let res_records_sa = swap_by_primary(bam_path, e.get(0).unwrap().clone()); assert_eq!(a as usize, res_records.len()); assert_eq!(a as usize, res_records_sa.len()); } }