|
|
@@ -2,7 +2,9 @@ use rayon::prelude::*;
|
|
|
use std::{collections::HashMap, usize};
|
|
|
|
|
|
use anyhow::Context;
|
|
|
-use rust_htslib::bam::{ext::BamRecordExtensions, record::Aux, Read, Record};
|
|
|
+use rust_htslib::bam::{ext::BamRecordExtensions, record::Aux, IndexedReader, Read, Record};
|
|
|
+
|
|
|
+use crate::bam::primary_record;
|
|
|
|
|
|
/// Enforce that reads should have unique qnames
|
|
|
#[derive(Debug)]
|
|
|
@@ -11,6 +13,7 @@ pub struct Bin {
|
|
|
pub start: u32, // 0-based inclusif
|
|
|
pub end: u32,
|
|
|
pub reads_store: HashMap<Vec<u8>, Record>,
|
|
|
+ pub bam_reader: IndexedReader,
|
|
|
}
|
|
|
|
|
|
impl Bin {
|
|
|
@@ -39,6 +42,7 @@ impl Bin {
|
|
|
start,
|
|
|
end,
|
|
|
reads_store,
|
|
|
+ bam_reader,
|
|
|
})
|
|
|
}
|
|
|
|
|
|
@@ -53,91 +57,41 @@ impl Bin {
|
|
|
.count()
|
|
|
}
|
|
|
|
|
|
+ pub fn sa_primary(&mut self) -> Vec<Record> {
|
|
|
+ self.reads_store
|
|
|
+ .values()
|
|
|
+ .filter(|record| matches!(record.aux(b"SA"), Ok(Aux::String(_))))
|
|
|
+ .map(|record| primary_record(&mut self.bam_reader, record.clone()))
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+
|
|
|
pub fn max_start_or_end(&self) -> (u32, usize) {
|
|
|
- let mut starts: HashMap<u32, usize> = HashMap::new();
|
|
|
- let mut ends: HashMap<u32, usize> = HashMap::new();
|
|
|
- self.reads_store.values().for_each(|record| {
|
|
|
+ let mut se: HashMap<u32, usize> = HashMap::new();
|
|
|
+ for record in self.reads_store.values() {
|
|
|
let reference_start = record.reference_start() as u32;
|
|
|
let reference_end = record.reference_end() as u32;
|
|
|
|
|
|
if reference_start >= self.start && reference_start <= self.end {
|
|
|
- *starts.entry(reference_start).or_default() += 1;
|
|
|
+ *se.entry(reference_start).or_default() += 1;
|
|
|
}
|
|
|
if reference_end >= self.start && reference_end <= self.end {
|
|
|
- *ends.entry(reference_end).or_default() += 1;
|
|
|
+ *se.entry(reference_end).or_default() += 1;
|
|
|
}
|
|
|
- });
|
|
|
-
|
|
|
- let max_pos_start = starts.into_iter().max_by_key(|(_, v)| *v);
|
|
|
- let max_pos_end = ends.into_iter().max_by_key(|(_, v)| *v);
|
|
|
-
|
|
|
- if let (Some(s), Some(e)) = (max_pos_start, max_pos_end) {
|
|
|
- if s > e {
|
|
|
- s
|
|
|
- } else {
|
|
|
- e
|
|
|
- }
|
|
|
- } else {
|
|
|
- (0, 0)
|
|
|
}
|
|
|
+
|
|
|
+ let max_pos = se.into_iter().max_by_key(|(_, v)| *v);
|
|
|
+ max_pos.unwrap_or((0, 0))
|
|
|
}
|
|
|
|
|
|
- // Initiate
|
|
|
- // let mut reads_starts: Vec<Vec<Record>> = Vec::new();
|
|
|
- // reads_starts.resize(length as usize, vec![]);
|
|
|
- // let mut reads_ends: Vec<Vec<Record>> = Vec::new();
|
|
|
- // reads_ends.resize(length as usize, vec![]);
|
|
|
- //
|
|
|
- // for read in bam_reader.records() {
|
|
|
- // let record = read.context(format!("Error while parsing record"))?;
|
|
|
- // // Skip reads with low mapping quality
|
|
|
- // if record.mapq() < mapq {
|
|
|
- // continue;
|
|
|
- // }
|
|
|
- //
|
|
|
- // let read_start = record.reference_start() as u32;
|
|
|
- // let read_end = record.reference_end() as u32;
|
|
|
- //
|
|
|
- // if read_start >= start && read_start < end {
|
|
|
- // let index = read_start - start;
|
|
|
- // let at_pos = reads_starts.get_mut(index as usize).unwrap();
|
|
|
- // at_pos.push(record.clone());
|
|
|
- // }
|
|
|
- //
|
|
|
- // if read_end >= start && read_end < end {
|
|
|
- // let index = read_end - start;
|
|
|
- // let at_pos = reads_ends.get_mut(index as usize).unwrap();
|
|
|
- // at_pos.push(record.clone());
|
|
|
- // }
|
|
|
- // }
|
|
|
-
|
|
|
- // let mut bam_pileup = Vec::new();
|
|
|
- // for p in bam.pileup() {
|
|
|
- // let pileup = p.context(format!(
|
|
|
- // "Can't pilup bam at position {}:{}-{}",
|
|
|
- // chr, start, stop
|
|
|
- // ))?;
|
|
|
- // let position = pileup.pos() as i32;
|
|
|
- // if position == start {
|
|
|
- // for alignment in pileup.alignments() {
|
|
|
- // match alignment.indel() {
|
|
|
- // bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
|
|
|
- // bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
|
|
|
- // _ => {
|
|
|
- // let record = alignment.record();
|
|
|
- // if record.seq_len() > 0 {
|
|
|
- // if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)?
|
|
|
- // {
|
|
|
- // bases.push((record.clone(), b));
|
|
|
- // }
|
|
|
- // } else if alignment.is_del() {
|
|
|
- // bases.push((record.clone(), b'D'));
|
|
|
- // }
|
|
|
- // }
|
|
|
- // }
|
|
|
- // }
|
|
|
- // }
|
|
|
- // }
|
|
|
+ pub fn se_primary(&mut self, pos: u32) -> Vec<Record> {
|
|
|
+ self.reads_store
|
|
|
+ .values()
|
|
|
+ .filter(|record| {
|
|
|
+ record.reference_start() as u32 == pos || record.reference_end() as u32 == pos
|
|
|
+ })
|
|
|
+ .map(|record| primary_record(&mut self.bam_reader, record.clone()))
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
pub fn scan(
|
|
|
@@ -271,7 +225,13 @@ fn compute_mad(data: &[f64], median: f64) -> f64 {
|
|
|
compute_median(&deviations)
|
|
|
}
|
|
|
|
|
|
-pub fn scan_outliers(bam_path: &str, contig: &str, start: u32, end: u32, length: u32) -> Vec<(u32, usize, f64, bool, f64, bool)> {
|
|
|
+pub fn scan_outliers(
|
|
|
+ bam_path: &str,
|
|
|
+ contig: &str,
|
|
|
+ start: u32,
|
|
|
+ end: u32,
|
|
|
+ length: u32,
|
|
|
+) -> Vec<(u32, usize, f64, bool, f64, bool)> {
|
|
|
let mut starts = Vec::new();
|
|
|
let mut current = start;
|
|
|
while current <= end {
|
|
|
@@ -317,10 +277,12 @@ pub fn scan_outliers(bam_path: &str, contig: &str, start: u32, end: u32, length:
|
|
|
filter_outliers_modified_z_score_with_indices(sa_ratios, indices.clone());
|
|
|
let filtered_se_indices = filter_outliers_modified_z_score_with_indices(se_ratios, indices);
|
|
|
|
|
|
- ratios.iter().map(|(p, n, sa, se)| {
|
|
|
- let sa_outlier = filtered_sa_indices.contains(p);
|
|
|
- let se_outlier = filtered_se_indices.contains(p);
|
|
|
- (*p, *n, *sa, sa_outlier, *se, se_outlier)
|
|
|
-
|
|
|
- }).collect()
|
|
|
+ ratios
|
|
|
+ .iter()
|
|
|
+ .map(|(p, n, sa, se)| {
|
|
|
+ let sa_outlier = filtered_sa_indices.contains(p);
|
|
|
+ let se_outlier = filtered_se_indices.contains(p);
|
|
|
+ (*p, *n, *sa, sa_outlier, *se, se_outlier)
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
}
|