Jelajahi Sumber

somatic pipeline should run

Thomas 5 bulan lalu
induk
melakukan
1698753921
2 mengubah file dengan 89 tambahan dan 13 penghapusan
  1. 3 2
      src/collection/bam.rs
  2. 86 11
      src/pipes/somatic.rs

+ 3 - 2
src/collection/bam.rs

@@ -212,7 +212,7 @@ impl BamCollection {
 pub fn load_bam_collection(result_dir: &str) -> BamCollection {
     let pattern = format!("{}/*/*/*.bam", result_dir);
     let bams = glob(&pattern)
-        .expect("Failed to read glob pattern")
+        .expect("Failed to read glob pattern.")
         // .par_bridge()
         .filter_map(|entry| {
             match entry {
@@ -233,7 +233,8 @@ pub fn bam_compo(
     file_path: &str,
     sample_size: usize,
 ) -> anyhow::Result<Vec<(String, String, f64)>> {
-    let mut bam = rust_htslib::bam::Reader::from_path(file_path)?;
+    let mut bam = rust_htslib::bam::Reader::from_path(file_path)
+        .map_err(|e| anyhow::anyhow!("Failed to open bam from path: {file_path}\n\t{e}"))?;
 
     let mut rgs: HashMap<(String, String), u64> = HashMap::new();
     for result in bam.records().filter_map(Result::ok).take(sample_size) {

+ 86 - 11
src/pipes/somatic.rs

@@ -1,8 +1,14 @@
 use crate::{
-    annotation::is_gnomad_and_constit_alt, collection::ShouldRun, create_should_run_normal_tumoral, init_solo_callers_normal_tumoral, io::bed::read_bed, positions::GenomeRange, scan::scan::SomaticScan, variant::{
+    annotation::is_gnomad_and_constit_alt,
+    collection::ShouldRun,
+    create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
+    io::bed::read_bed,
+    positions::GenomeRange,
+    scan::scan::SomaticScan,
+    variant::{
         variant::{run_if_required, ShouldRunBox},
         variants_stats::somatic_depth_quality_ranges,
-    }
+    },
 };
 use itertools::Itertools;
 use log::info;
@@ -165,13 +171,70 @@ impl Initialize for SomaticPipe {
 }
 
 impl ShouldRun for SomaticPipe {
-    fn should_run(&self) -> bool {
-        let tumoral_dir = self.config.tumoral_dir(&self.id);
-        let path_str = format!("{}/{}_somatic_variants.bit", tumoral_dir, self.id);
-        let path = Path::new(&path_str);
 
-        !path.exists()
+    fn should_run(&self) -> bool {
+        // path to the existing “.bit” file
+        let bit_path = self
+            .config
+            .tumoral_dir(&self.id)
+            .join(format!("{}_somatic_variants.bit", self.id));
+
+        // if we can’t read its modification time, re-run
+        let res_meta = match fs::metadata(&bit_path).and_then(|m| m.modified()) {
+            Ok(ts) => ts,
+            Err(_) => return true,
+        };
+
+        // if *either* BAM is newer than the .bit, or we can’t stat it, re-run
+        [ self.config.normal_bam(&self.id),
+          self.config.tumoral_bam(&self.id) ]
+        .iter()
+        .any(|bam| {
+            fs::metadata(bam)
+                .and_then(|m| m.modified())
+                .map_or(true, |ts| ts > res_meta)
+        })
     }
+
+    // fn should_run(&self) -> bool {
+    //     let tumoral_dir = self.config.tumoral_dir(&self.id);
+    //     let path_str = format!("{}/{}_somatic_variants.bit", tumoral_dir, self.id);
+    //     let path = Path::new(&path_str);
+    //
+    //     let normal_bam = self.config.normal_bam(&self.id);
+    //
+    //     if path.exists() {
+    //         if let Ok(Ok(res_metadata)) = path.metadata().map(|r| r.modified()) {
+    //             if let Ok(Ok(normal_bam_m)) =
+    //                 Path::new(&normal_bam).metadata().map(|r| r.modified())
+    //             {
+    //                 if normal_bam_m > res_metadata {
+    //                     return true;
+    //                 }
+    //
+    //                 let tumoral_bam = self.config.tumoral_bam(&self.id);
+    //
+    //                 if let Ok(Ok(tumoral_bam_m)) =
+    //                     Path::new(&tumoral_bam).metadata().map(|r| r.modified())
+    //                 {
+    //                     if tumoral_bam_m > res_metadata {
+    //                         return true;
+    //                     } else {
+    //                         return false;
+    //                     }
+    //                 } else {
+    //                     return true;
+    //                 }
+    //             } else {
+    //                 return true;
+    //             }
+    //         } else {
+    //             return true;
+    //         }
+    //     } else {
+    //         return true;
+    //     }
+    // }
 }
 
 impl Run for SomaticPipe {
@@ -300,7 +363,7 @@ impl Run for SomaticPipe {
 
         info!("ranges: {}", low_quality_ranges.len());
 
-        variants_collections.iter_mut().for_each(|e | {
+        variants_collections.iter_mut().for_each(|e| {
             let _ = e.annotate_with_ranges(&low_quality_ranges, &annotations, Annotation::LowMAPQ);
         });
 
@@ -485,11 +548,23 @@ impl Run for SomaticPipe {
         // Ensure sorted (should already be sorted)
         variants.sort();
 
-        let vntrs: Vec<GenomeRange> = read_bed("/data/ref/hs1/vntrs_chm13.bed")?.iter().map(|r| r.range.clone()).collect();
+        let vntrs: Vec<GenomeRange> = read_bed("/data/ref/hs1/vntrs_chm13.bed")?
+            .iter()
+            .map(|r| r.range.clone())
+            .collect();
         variants.annotate_with_ranges(&vntrs, Some(Annotation::VNTR), 0, Vec::new());
 
-        let repeat_masker: Vec<GenomeRange> = read_bed("/data/ref/hs1/chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed")?.iter().map(|r| r.range.clone()).collect();
-        variants.annotate_with_ranges(&repeat_masker, Some(Annotation::RepeatMasker), 0, Vec::new());
+        let repeat_masker: Vec<GenomeRange> =
+            read_bed("/data/ref/hs1/chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed")?
+                .iter()
+                .map(|r| r.range.clone())
+                .collect();
+        variants.annotate_with_ranges(
+            &repeat_masker,
+            Some(Annotation::RepeatMasker),
+            0,
+            Vec::new(),
+        );
 
         variants.save_to_json(&result_json)?;
         variants.save_to_file(&result_bit)?;