|
|
@@ -229,19 +229,32 @@ pub fn load_bam_collection(result_dir: &str) -> BamCollection {
|
|
|
BamCollection { bams }
|
|
|
}
|
|
|
|
|
|
-pub fn bam_compo(file_path: &str, sample_size: usize) -> anyhow::Result<Vec<(String, String, f64)>> {
|
|
|
+pub fn bam_compo(
|
|
|
+ file_path: &str,
|
|
|
+ sample_size: usize,
|
|
|
+) -> anyhow::Result<Vec<(String, String, f64)>> {
|
|
|
let mut bam = rust_htslib::bam::Reader::from_path(file_path)?;
|
|
|
|
|
|
- let mut rgs: HashMap<( String, String ), u64> = HashMap::new();
|
|
|
+ let mut rgs: HashMap<(String, String), u64> = HashMap::new();
|
|
|
for result in bam.records().filter_map(Result::ok).take(sample_size) {
|
|
|
- if let ( rust_htslib::bam::record::Aux::String(s), rust_htslib::bam::record::Aux::String(b)) = ( result.aux(b"RG")?, result.aux(b"fn")? ) {
|
|
|
- *rgs.entry(( s.to_string(), b.to_string())).or_default() += 1;
|
|
|
+ if let (
|
|
|
+ rust_htslib::bam::record::Aux::String(s),
|
|
|
+ rust_htslib::bam::record::Aux::String(b),
|
|
|
+ ) = (result.aux(b"RG")?, result.aux(b"fn")?)
|
|
|
+ {
|
|
|
+ *rgs.entry((s.to_string(), b.to_string())).or_default() += 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
Ok(rgs
|
|
|
.into_iter()
|
|
|
- .map(|(k, n)| (k.0.to_string(), k.1.to_string(), n as f64 * 100.0 / sample_size as f64))
|
|
|
+ .map(|(k, n)| {
|
|
|
+ (
|
|
|
+ k.0.to_string(),
|
|
|
+ k.1.to_string(),
|
|
|
+ n as f64 * 100.0 / sample_size as f64,
|
|
|
+ )
|
|
|
+ })
|
|
|
.map(|(rg, f, p)| {
|
|
|
let (a, _) = rg.split_once('_').unwrap();
|
|
|
let (b, _) = f.split_once('_').unwrap();
|
|
|
@@ -722,9 +735,9 @@ pub fn base_at_new(
|
|
|
record: &rust_htslib::bam::Record,
|
|
|
at_pos: i64,
|
|
|
with_next_ins: bool,
|
|
|
-) -> anyhow::Result<Option<PileBase>> {
|
|
|
+) -> Option<PileBase> {
|
|
|
if record.pos() > at_pos {
|
|
|
- return Ok(None);
|
|
|
+ return None;
|
|
|
}
|
|
|
|
|
|
let mut ref_pos = record.pos();
|
|
|
@@ -733,11 +746,13 @@ pub fn base_at_new(
|
|
|
let seq = record.seq().as_bytes(); // already expanded to ASCII (A,C,G,T,N)
|
|
|
|
|
|
for (i, op) in cigar.iter().enumerate() {
|
|
|
- let (consume_r, consume_ref) = match op {
|
|
|
- Cigar::Match(l) | Cigar::Equal(l) | Cigar::Diff(l) => (*l as i64, *l as i64),
|
|
|
- Cigar::Ins(l) | Cigar::SoftClip(l) => (*l as i64, 0),
|
|
|
- Cigar::Del(l) | Cigar::RefSkip(l) => (0, *l as i64),
|
|
|
- _ => (0, 0),
|
|
|
+ let l = op.len() as i64;
|
|
|
+ let (consume_read, consume_ref) = match op.char() {
|
|
|
+ 'M' | '=' | 'X' => (l, l),
|
|
|
+ 'I' | 'S' => (l, 0),
|
|
|
+ 'D' | 'N' => (0, l),
|
|
|
+ 'H' | 'P' => (0, 0),
|
|
|
+ _ => unreachable!(),
|
|
|
};
|
|
|
|
|
|
/* look ahead for an insertion immediately AFTER the queried base */
|
|
|
@@ -745,29 +760,37 @@ pub fn base_at_new(
|
|
|
&& ref_pos + consume_ref == at_pos + 1
|
|
|
&& matches!(cigar.get(i + 1), Some(Cigar::Ins(_)))
|
|
|
{
|
|
|
- return Ok(Some(PileBase::Ins));
|
|
|
+ return Some(PileBase::Ins);
|
|
|
}
|
|
|
|
|
|
+ // Instead of PileBase::Ins, consider
|
|
|
+ // extracting the inserted sequence and returning its length or actual bases:
|
|
|
+ // if let Some(Cigar::Ins(len)) = cigar.get(i + 1) {
|
|
|
+ // let ins_seq = &seq[read_pos as usize + (consume_ref as usize)
|
|
|
+ // ..read_pos as usize + (consume_ref as usize) + (*len as usize)];
|
|
|
+ // return Some(PileBase::Ins(ins_seq.to_vec()));
|
|
|
+ // }
|
|
|
+
|
|
|
/* Does the queried reference coordinate fall inside this CIGAR op? */
|
|
|
if ref_pos + consume_ref > at_pos {
|
|
|
- return Ok(Some(match op {
|
|
|
+ return Some(match op {
|
|
|
Cigar::Del(e) => PileBase::Del((record.qname().to_vec(), *e)),
|
|
|
Cigar::RefSkip(_) => PileBase::Skip,
|
|
|
_ => {
|
|
|
let offset = (at_pos - ref_pos) as usize;
|
|
|
if read_pos as usize + offset >= seq.len() {
|
|
|
- return Ok(None); // out‑of‑range, malformed BAM
|
|
|
+ return None; // out of range, malformed BAM
|
|
|
}
|
|
|
decode(seq[read_pos as usize + offset])
|
|
|
}
|
|
|
- }));
|
|
|
+ });
|
|
|
}
|
|
|
|
|
|
- read_pos += consume_r;
|
|
|
+ read_pos += consume_read;
|
|
|
ref_pos += consume_ref;
|
|
|
}
|
|
|
|
|
|
- Ok(None) // beyond alignment
|
|
|
+ None // beyond alignment
|
|
|
}
|
|
|
|
|
|
/// Return the pile‑up of **all reads** at `chr:pos` (0‑based, inclusive).
|
|
|
@@ -821,7 +844,7 @@ pub fn nt_pileup_new(
|
|
|
}
|
|
|
rust_htslib::bam::pileup::Indel::None => {
|
|
|
// Regular match/mismatch: delegate to `base_at`.
|
|
|
- if let Some(base) = base_at_new(&aln.record(), pos as i64, with_next_ins)? {
|
|
|
+ if let Some(base) = base_at_new(&aln.record(), pos as i64, with_next_ins) {
|
|
|
pile.push(base);
|
|
|
}
|
|
|
}
|