Thomas 1 month ago
parent
commit
46fd4b31e5
2 changed files with 12 additions and 9 deletions
  1. 11 8
      src/bin.rs
  2. 1 1
      src/lib.rs

+ 11 - 8
src/bin.rs

@@ -14,6 +14,7 @@ pub struct Bin {
     pub end: u32,
     pub reads_store: HashMap<Vec<u8>, Record>,
     pub bam_reader: IndexedReader,
+    pub reads_mean_len: u32,
     pub n_low_mapq: u32,
 }
 
@@ -32,6 +33,7 @@ impl Bin {
 
         let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
         let mut n_low_mapq = 0;
+        let mut lengths = Vec::new();
         for read in bam_reader.records() {
             let record = read.context("Error while parsing record")?;
             // Skip reads with low mapping quality
@@ -39,6 +41,11 @@ impl Bin {
                 n_low_mapq += 1;
                 continue;
             }
+            lengths.push(
+                record
+                    .reference_end()
+                    .saturating_sub(record.reference_start()),
+            );
             reads_store.insert(record.qname().to_vec(), record);
         }
         Ok(Bin {
@@ -48,6 +55,7 @@ impl Bin {
             reads_store,
             bam_reader,
             n_low_mapq,
+            reads_mean_len: (lengths.iter().sum::<i64>() as f64 / lengths.len() as f64) as u32,
         })
     }
 
@@ -289,19 +297,14 @@ pub fn scan_outliers(
     contig: &str,
     start: u32,
     end: u32,
-    length: u32,
+    bin_length: u32,
 ) -> Vec<(u32, usize, u32, f64, bool, f64, bool)> {
-    let mut starts = Vec::new();
-    let mut current = start;
-    while current <= end {
-        starts.push(current);
-        current += length;
-    }
+    let starts: Vec<_> = (start..=end).step_by(bin_length as usize).collect();
 
     let ratios: Vec<(u32, usize, u32, f64, f64)> = starts
         .into_par_iter()
         .filter_map(|start| {
-            match Bin::new(bam_path, contig, start, length) {
+            match Bin::new(bam_path, contig, start, bin_length) {
                 Ok(bin) => {
                     let n = bin.n_reads();
                     let (_, se) = bin.max_start_or_end();

+ 1 - 1
src/lib.rs

@@ -337,7 +337,7 @@ mod tests {
     #[test]
     fn whole() {
         init();
-        let id = "HENAUX";
+        let id = "COIFFET";
         let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
         let out_dir = format!("/data/longreads_basic_pipe/{id}/diag/scan");
         par_whole_scan("/data/ref/hs1/chm13v2.0.dict", &bam_path, &out_dir).unwrap();