Kaynağa Gözat

better scan

Thomas 4 gün önce
ebeveyn
işleme
70dc1304d2
3 değiştirilmiş dosya ile 43 ekleme ve 13 silme
  1. 6 3
      src/callers/gatk.rs
  2. 21 4
      src/scan/scan.rs
  3. 16 6
      src/variant/variants_stats.rs

+ 6 - 3
src/callers/gatk.rs

@@ -158,10 +158,10 @@ use crate::{
 ///
 /// The remaining ~15% covers JVM off-heap, native allocations (htslib, PairHMM),
 /// and OS overhead. This is the standard GATK recommendation.
-const JVM_HEAP_FRACTION: f64 = 0.85;
+const JVM_HEAP_FRACTION: f64 = 0.92;
 
 /// Number of parallel BED chunks for Mutect2 (default when called from `Run`).
-const DEFAULT_N_PARTS: usize = 30;
+const DEFAULT_N_PARTS: usize = 40;
 
 // ---------------------------------------------------------------------------
 // Struct
@@ -856,6 +856,7 @@ pub fn run_mutect2_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow:
 mod tests {
     use super::*;
     use crate::helpers::test_init;
+    use crate::runners::Run;
 
     #[test]
     fn gatk_version() -> anyhow::Result<()> {
@@ -872,6 +873,8 @@ mod tests {
     fn mutect2_run() -> anyhow::Result<()> {
         test_init();
         let config = Config::default();
-        run_mutect2_chunked("CHALO", &config, 50)
+        Run::run(&mut Mutect2::initialize("CHALO", &config)?)?;
+        // Mutect2::initialize("CHALO", &config)?.run()?;
+        Ok(())
     }
 }

+ 21 - 4
src/scan/scan.rs

@@ -731,12 +731,29 @@ pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Re
             fill_outliers(&mut bins);
 
             debug!("Scan {contig}, writing file");
-            let mut file = get_gz_writer(&out_file, true)
-                .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
-            for bin in bins {
-                writeln!(file, "{}", bin.to_tsv_row())?;
+
+            let tmp_file = format!("{out_file}.tmp.{}", std::process::id());
+
+            {
+                let mut file = get_gz_writer(&tmp_file, true)
+                    .with_context(|| format!("failed to open temp BGZF file: {tmp_file}"))?;
+
+                for bin in bins {
+                    writeln!(file, "{}", bin.to_tsv_row())
+                        .with_context(|| format!("failed writing {contig} row to {tmp_file}"))?;
+                }
+
+                file.flush()
+                    .with_context(|| format!("failed flushing BGZF writer: {tmp_file}"))?;
+
+                file.close().with_context(|| {
+                    format!("failed closing/finalizing BGZF writer: {tmp_file}")
+                })?;
             }
 
+            std::fs::rename(&tmp_file, &out_file)
+                .with_context(|| format!("failed atomic rename {tmp_file} -> {out_file}"))?;
+
             Ok(())
         })?;
 

+ 16 - 6
src/variant/variants_stats.rs

@@ -605,8 +605,13 @@ pub fn somatic_depth_quality_ranges(
             let mut line_no = 0usize;
 
             loop {
-                let n_ok = n_tsv.read_byte_record(&mut n_rec)?;
-                let t_ok = t_tsv.read_byte_record(&mut t_rec)?;
+                let n_ok = n_tsv.read_byte_record(&mut n_rec).with_context(|| {
+                    format!("reading normal TSV: {} line {}", normal_path, line_no + 1)
+                })?;
+
+                let t_ok = t_tsv.read_byte_record(&mut t_rec).with_context(|| {
+                    format!("reading tumor TSV: {} line {}", tumor_path, line_no + 1)
+                })?;
 
                 if n_ok || t_ok {
                     line_no += 1;
@@ -992,7 +997,7 @@ mod tests {
         test_init();
         let config = Config::default();
 
-        let id = "DUMCO";
+        let id = "CML2518";
 
         let (mut high_depth_ranges, mut low_quality_ranges) =
             somatic_depth_quality_ranges(id, &config)?;
@@ -1002,7 +1007,6 @@ mod tests {
         // let high_depth_ranges = merge_adjacent_ranges(high_depth_ranges);
         // let low_quality_ranges = merge_adjacent_ranges(low_quality_ranges);
 
-
         info!(
             "High-depth ranges: n={} bp={}\nLowQ ranges: n={} bp={}",
             high_depth_ranges.len(),
@@ -1010,8 +1014,14 @@ mod tests {
             low_quality_ranges.len(),
             low_quality_ranges.total_len(),
         );
-        high_depth_ranges.iter().take(10).for_each(|e| println!("{e:?}"));
-        low_quality_ranges.iter().take(10).for_each(|e| println!("{e:?}"));
+        high_depth_ranges
+            .iter()
+            .take(10)
+            .for_each(|e| println!("{e:?}"));
+        low_quality_ranges
+            .iter()
+            .take(10)
+            .for_each(|e| println!("{e:?}"));
         Ok(())
     }
 }