|
|
@@ -14,6 +14,7 @@ pub struct Bin {
|
|
|
pub end: u32,
|
|
|
pub reads_store: HashMap<Vec<u8>, Record>,
|
|
|
pub bam_reader: IndexedReader,
|
|
|
+ pub reads_mean_len: u32,
|
|
|
pub n_low_mapq: u32,
|
|
|
}
|
|
|
|
|
|
@@ -32,6 +33,7 @@ impl Bin {
|
|
|
|
|
|
let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
|
|
|
let mut n_low_mapq = 0;
|
|
|
+ let mut lengths = Vec::new();
|
|
|
for read in bam_reader.records() {
|
|
|
let record = read.context("Error while parsing record")?;
|
|
|
// Skip reads with low mapping quality
|
|
|
@@ -39,6 +41,11 @@ impl Bin {
|
|
|
n_low_mapq += 1;
|
|
|
continue;
|
|
|
}
|
|
|
+ lengths.push(
|
|
|
+ record
|
|
|
+ .reference_end()
|
|
|
+ .saturating_sub(record.reference_start()),
|
|
|
+ );
|
|
|
reads_store.insert(record.qname().to_vec(), record);
|
|
|
}
|
|
|
Ok(Bin {
|
|
|
@@ -48,6 +55,7 @@ impl Bin {
|
|
|
reads_store,
|
|
|
bam_reader,
|
|
|
n_low_mapq,
|
|
|
+ reads_mean_len: (lengths.iter().sum::<i64>() as f64 / lengths.len() as f64) as u32,
|
|
|
})
|
|
|
}
|
|
|
|
|
|
@@ -289,19 +297,14 @@ pub fn scan_outliers(
|
|
|
contig: &str,
|
|
|
start: u32,
|
|
|
end: u32,
|
|
|
- length: u32,
|
|
|
+ bin_length: u32,
|
|
|
) -> Vec<(u32, usize, u32, f64, bool, f64, bool)> {
|
|
|
- let mut starts = Vec::new();
|
|
|
- let mut current = start;
|
|
|
- while current <= end {
|
|
|
- starts.push(current);
|
|
|
- current += length;
|
|
|
- }
|
|
|
+ let starts: Vec<_> = (start..=end).step_by(bin_length as usize).collect();
|
|
|
|
|
|
let ratios: Vec<(u32, usize, u32, f64, f64)> = starts
|
|
|
.into_par_iter()
|
|
|
.filter_map(|start| {
|
|
|
- match Bin::new(bam_path, contig, start, length) {
|
|
|
+ match Bin::new(bam_path, contig, start, bin_length) {
|
|
|
Ok(bin) => {
|
|
|
let n = bin.n_reads();
|
|
|
let (_, se) = bin.max_start_or_end();
|