4 дней назад · 37eef933cf
--- a/src/scan/bin.rs
+++ b/src/scan/bin.rs
@@ -61,6 +61,7 @@ impl Bin {
 
															         length: u32,
														
 
															         min_mapq: u8,
														
 
															     ) -> anyhow::Result<Self> {
														
 
															+        anyhow::ensure!(length > 0, "bin length must be > 0");
														
 
															         let end = start + length - 1;
														
 
															         bam_reader
														
@@ -92,6 +93,9 @@ impl Bin {
 
															                     continue; // Not overlapping this bin
														
 
															                 }
														
 
															+                let has_fbinv =
														
 
															+                    fb_inv_from_record(record, &header, min_mapq, Some(150), Some(200)).is_some();
														
 
															+
														
 
															                 // Clone the underlying Record
														
 
															                 reads_store.insert(record.qname().to_vec(), record.clone());
														
@@ -105,7 +109,7 @@ impl Bin {
 
															                         if record.mapq() < min_mapq {
														
 
															                             low_qualities[i] += 1;
														
 
															                         }
														
 
															-                        if let Some(_fbinv) = fb_inv_from_record(record, &header, min_mapq, Some(150), Some(200)) {
														
 
															+                        if has_fbinv {
														
 
															                             fb_invs[i] += 1;
														
 
															                         }
														
 
															                     }
														
@@ -357,6 +361,9 @@ impl Bin {
 
															     }
														
 
															     pub fn mean_coverage_from_depths(&self) -> f64 {
														
 
															+        if self.depths.is_empty() {
														
 
															+            return 0.0;
														
 
															+        }
														
 
															         self.depths.iter().sum::<u32>() as f64 / self.depths.len() as f64
														
 
															     }
														
 
															 }
														
@@ -399,7 +406,8 @@ pub fn parse_bin_record_into<'a>(
 
															     anyhow::ensure!(contig == contig_expected, "unexpected contig");
														
 
															     let start = parse_u32(rec.get(i_start).context("missing start")?)?;
														
 
															-    let end   = parse_u32(rec.get(i_end).context("missing end")?)?;
														
 
															+    let end = parse_u32(rec.get(i_end).context("missing end")?)?;
														
 
															+    anyhow::ensure!(end >= start, "invalid bin coordinates: end < start ({start} > {end})");
														
 
															     parse_csv_u32_into(&mut buf.depths, rec.get(i_depths).context("missing depths")?)
														
 
															         .context("parse depths")?;
														
@@ -548,4 +556,3 @@ impl BinStats {
 
															         })
														
 
															     }
														
 
															 }
														
 
															-
														
--- a/src/scan/scan.rs
+++ b/src/scan/scan.rs
@@ -106,8 +106,17 @@ impl From<&Bin> for BinCount {
 
															     /// A new `BinCount` instance populated with data from the `Bin`.
														
 
															     fn from(bin: &Bin) -> Self {
														
 
															         let n_reads = bin.n_reads();
														
 
															-        let n_reads_float = n_reads as f64;
														
 
															         let (n_sa, n_start, n_end) = bin.count_reads_sa_start_end();
														
 
															+        let (ratio_sa, ratio_start, ratio_end) = if n_reads == 0 {
														
 
															+            (f64::NAN, f64::NAN, f64::NAN)
														
 
															+        } else {
														
 
															+            let n_reads_float = n_reads as f64;
														
 
															+            (
														
 
															+                n_sa as f64 / n_reads_float,
														
 
															+                n_start as f64 / n_reads_float,
														
 
															+                n_end as f64 / n_reads_float,
														
 
															+            )
														
 
															+        };
														
 
															         Self {
														
 
															             contig: bin.contig.clone(),
														
@@ -115,9 +124,9 @@ impl From<&Bin> for BinCount {
 
															             end: bin.end,
														
 
															             n_reads: n_reads as u32,
														
 
															             coverage: bin.mean_coverage_from_depths(),
														
 
															-            ratio_sa: n_sa as f64 / n_reads_float,
														
 
															-            ratio_start: n_start as f64 / n_reads_float,
														
 
															-            ratio_end: n_end as f64 / n_reads_float,
														
 
															+            ratio_sa,
														
 
															+            ratio_start,
														
 
															+            ratio_end,
														
 
															             outlier: None,
														
 
															             depths: bin.depths.clone(),
														
 
															             low_qualities: bin.low_qualities.clone(),
														
@@ -345,6 +354,10 @@ fn build_bin_defs_grid(
 
															     bin_size: u32,
														
 
															     chunk_n_bin: u32,
														
 
															 ) -> (Vec<BinDef>, Vec<Option<usize>>) {
														
 
															+    if bin_size == 0 || chunk_n_bin == 0 {
														
 
															+        return (Vec::new(), Vec::new());
														
 
															+    }
														
 
															+
														
 
															     let n_bin = contig_len / bin_size; // keep your bitwise logic
														
 
															     let n_chunks = n_bin.div_ceil(chunk_n_bin); // keep your bitwise logic
														
@@ -477,6 +490,9 @@ pub fn scan_contig(
 
															     chunk_n_bin: u32,
														
 
															     min_mapq: u8,
														
 
															 ) -> anyhow::Result<Vec<BinCount>> {
														
 
															+    anyhow::ensure!(bin_size > 0, "bin_size must be > 0");
														
 
															+    anyhow::ensure!(chunk_n_bin > 0, "chunk_n_bin must be > 0");
														
 
															+
														
 
															     // Build bin definitions EXACTLY like the old code (bitwise-identical ranges),
														
 
															     // plus a safe mapping from grid-bin index -> defs index.
														
 
															     let (defs, grid_to_def) = build_bin_defs_grid(contig_len, bin_size, chunk_n_bin);
														
@@ -742,7 +758,7 @@ pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Re
 
															                 .with_context(|| format!("failed to create output dir: {out_parent:?}"))?;
														
 
															             let tmp_path = out_parent.join(format!(
														
 
															-                ".{}.{}.{}.tmp",
														
 
															+                ".{}.{}.{}.tmp.gz",
														
 
															                 contig,
														
 
															                 std::process::id(),
														
 
															                 std::thread::current().name().unwrap_or("rayon")
														
@@ -792,20 +808,13 @@ pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Re
 
															 /// method to identify outliers and marks them in the `outlier` field of each `BinCount`.
														
 
															 ///
														
 
															 /// ## Workflow:
														
 
															-/// 1. Wraps the input slice in a `Mutex` for thread-safe access.
														
 
															-/// 2. Defines helper functions to extract specific ratios from `BinCount` objects.
														
 
															-/// 3. Iterates over three outlier types: SA, Start, and End.
														
 
															-/// 4. For each type:
														
 
															-///    a. Filters and collects non-NaN ratios along with their indices.
														
 
															-///    b. Identifies outliers using the `filter_outliers_modified_z_score_with_indices` function.
														
 
															-///    c. Marks identified outliers in the original `BinCount` objects.
														
 
															+/// 1. Materializes `(index, [ratio_sa, ratio_start, ratio_end])` for each bin.
														
 
															+/// 2. For each outlier type, filters non-NaN ratios and runs modified Z-score detection.
														
 
															+/// 3. Marks outlier labels back into `bin_counts` in place.
														
 
															 ///
														
 
															 /// ## Parallelization:
														
 
															-/// - Uses `par_iter()` for parallel processing of `BinCount` objects.
														
 
															-/// - Applies outlier marking in parallel using `par_iter().for_each()`.
														
 
															-///
														
 
															-/// ## Thread Safety:
														
 
															-/// - Uses `Mutex` to ensure thread-safe access to the shared `bin_counts` slice.
														
 
															+/// - Uses Rayon `par_iter()` for ratio extraction/filtering.
														
 
															+/// - Final outlier labels are applied sequentially to the mutable slice.
														
 
															 ///
														
 
															 /// ## Outlier Types:
														
 
															 /// - `BinOutlier::SA`: Outliers in supplementary alignment ratio.
														
@@ -902,7 +911,7 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
 
															 /// This struct encapsulates:
														
 
															 /// - Initialization and conditional cleanup of prior outputs
														
 
															 /// - Logic for checking whether a re-run is necessary (based on BAM and output timestamps)
														
 
															-/// - Coordinated parallel scanning of both normal and tumoral inputs
														
 
															+/// - Coordinated scanning of both normal and tumoral inputs
														
 
															 #[derive(Debug)]
														
 
															 pub struct SomaticScan {
														
 
															     id: String,
														
@@ -934,8 +943,14 @@ impl Initialize for SomaticScan {
 
															         // Force re-run: clean up previous output directories for both normal and tumoral scans
														
 
															         if somatic_scan.config.somatic_scan_force {
														
 
															-            fs::remove_dir_all(somatic_scan.config.somatic_scan_normal_output_dir(id))?;
														
 
															-            fs::remove_dir_all(somatic_scan.config.somatic_scan_tumoral_output_dir(id))?;
														
 
															+            let normal_out = somatic_scan.config.somatic_scan_normal_output_dir(id);
														
 
															+            let tumoral_out = somatic_scan.config.somatic_scan_tumoral_output_dir(id);
														
 
															+            if Path::new(&normal_out).exists() {
														
 
															+                fs::remove_dir_all(&normal_out)?;
														
 
															+            }
														
 
															+            if Path::new(&tumoral_out).exists() {
														
 
															+                fs::remove_dir_all(&tumoral_out)?;
														
 
															+            }
														
 
															         }
														
 
															         Ok(somatic_scan)
														
@@ -978,7 +993,7 @@ impl ShouldRun for SomaticScan {
 
															 }
														
 
															 impl Run for SomaticScan {
														
 
															-    /// Executes the full scan pipeline in parallel for first normal then tumoral samples.
														
 
															+    /// Executes the full scan pipeline sequentially: normal then tumoral.
														
 
															     ///
														
 
															     /// # Returns
														
 
															     /// An error if the underlying scan function (`par_whole_scan`) fails for either sample.