Преглед на файлове

somatic_depth_quality_ranges bug fix

Thomas преди 2 седмици
родител
ревизия
f6064be4ee
променени са 3 файла, в които са добавени 56 реда и са изтрити 35 реда
  1. 0 19
      src/annotation/mod.rs
  2. 21 16
      src/scan/bin.rs
  3. 35 0
      src/variant/variants_stats.rs

+ 0 - 19
src/annotation/mod.rs

@@ -618,25 +618,6 @@ impl Annotations {
             )
     }
 
-    // pub fn low_shannon_entropy(&mut self, min_shannon_entropy: f64) {
-    //     self.store.iter_mut().for_each(|mut e| {
-    //         let anns = e.value_mut();
-    //         let mut is_low = false;
-    //         anns.iter().for_each(|ann| {
-    //             if let Annotation::ShannonEntropy(ent) = ann {
-    //                 if *ent < min_shannon_entropy
-    //                     && !anns.contains(&Annotation::Callers(_, Sample::Somatic))
-    //                 {
-    //                     is_low = true
-    //                 }
-    //             }
-    //         });
-    //         if is_low {
-    //             anns.push(Annotation::LowEntropy);
-    //         }
-    //     });
-    // }
-    //
     /// Annotate low shannon ent for solo callers (not Somatic)
     pub fn low_shannon_entropy(&mut self, min_shannon_entropy: f64) {
         self.store.iter_mut().for_each(|mut e| {

+ 21 - 16
src/scan/bin.rs

@@ -386,28 +386,33 @@ pub fn parse_bin_record_into<'a>(
     buf: &'a mut BinRowBuf,
     contig_expected: &str,
 ) -> anyhow::Result<(u32, &'a [u32], &'a [u32])> {
-    // Adjust these indices to your actual TSV schema:
     let i_contig = 0usize;
-    let i_start = 1usize;
-    let i_depths = 2usize;
-    let i_lowq = 3usize;
+    let i_start  = 1usize;
+    let i_end    = 2usize;
 
-    let contig = rec.get(i_contig).context("missing contig")?;
-    let contig = std::str::from_utf8(contig).context("non-utf8 contig")?;
-    anyhow::ensure!(
-        contig == contig_expected,
-        "unexpected contig {} (expected {})",
-        contig,
-        contig_expected
-    );
+    // adjust if your file has more/less scalar columns, but for your pasted line:
+    let i_depths = 9usize;   // big CSV list
+    let i_lowq   = 10usize;  // big CSV list
+
+    let contig = std::str::from_utf8(rec.get(i_contig).context("missing contig")?)
+        .context("non-utf8 contig")?;
+    anyhow::ensure!(contig == contig_expected, "unexpected contig");
 
     let start = parse_u32(rec.get(i_start).context("missing start")?)?;
+    let end   = parse_u32(rec.get(i_end).context("missing end")?)?;
 
-    let depths_field = rec.get(i_depths).context("missing depths")?;
-    parse_csv_u32_into(&mut buf.depths, depths_field).context("parse depths")?;
+    parse_csv_u32_into(&mut buf.depths, rec.get(i_depths).context("missing depths")?)
+        .context("parse depths")?;
+    parse_csv_u32_into(&mut buf.lowq, rec.get(i_lowq).context("missing lowq")?)
+        .context("parse lowq")?;
 
-    let lowq_field = rec.get(i_lowq).context("missing lowq")?;
-    parse_csv_u32_into(&mut buf.lowq, lowq_field).context("parse lowq")?;
+    // critical sanity check: end-start+1 should match vector length for per-base bins
+    anyhow::ensure!(
+        (end - start + 1) as usize == buf.depths.len(),
+        "bin width mismatch: {}..{} has width {}, depths has len {}",
+        start, end, end - start + 1, buf.depths.len()
+    );
+    anyhow::ensure!(buf.depths.len() == buf.lowq.len(), "depth/lowq len mismatch");
 
     Ok((start, &buf.depths, &buf.lowq))
 }

+ 35 - 0
src/variant/variants_stats.rs

@@ -978,3 +978,38 @@ pub fn write_glm_rows(all_rows: &[GlmRow], csv_path: &str) -> anyhow::Result<()>
     writer.flush()?;
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use log::info;
+
+    use super::*;
+    use crate::{helpers::test_init, positions::GenomeRangeExt};
+
+    #[test]
+    fn high_depths() -> anyhow::Result<()> {
+        test_init();
+        let config = Config::default();
+
+        let id = "DUMCO";
+
+        let (mut high_depth_ranges, mut low_quality_ranges) =
+            somatic_depth_quality_ranges(id, &config)?;
+        high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+        low_quality_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+
+        let high_depth_ranges = merge_adjacent_ranges(high_depth_ranges);
+
+
+        info!(
+            "High-depth ranges: {} {} bp\nLowQ ranges: {} {} bp",
+            high_depth_ranges.len(),
+            high_depth_ranges.total_len(),
+            low_quality_ranges.len(),
+            low_quality_ranges.total_len(),
+        );
+
+        println!("{:?}", high_depth_ranges.first());
+        Ok(())
+    }
+}