8 months ago · 03ec589273
--- a/src/callers/deep_somatic.rs
+++ b/src/callers/deep_somatic.rs
@@ -201,5 +201,7 @@ impl Variants for DeepSomatic {
 
				     }
			
 
				 }
			
 
				 
			
 
				+// impl  for DeepSomatic {}
			
 
				+
			
 
				 /// Marker trait implementation to signal DeepSomatic supports variant export.
			
 
				 impl RunnerVariants for DeepSomatic {}
			
--- a/src/callers/savana.rs
+++ b/src/callers/savana.rs
@@ -87,8 +87,9 @@ impl Run for Savana {
 
				         if !Path::new(&output_vcf).exists() {
			
 
				             info!("Running Savana v{}", Savana::version(&self.config)?);
			
 
				             let output_dir = self.config.savana_output_dir(id);
			
 
				-            fs::create_dir_all(&output_dir)
			
 
				-                .with_context(|| format!("Failed to create output dir for Savana run: {output_dir}"))?;
			
 
				+            fs::create_dir_all(&output_dir).with_context(|| {
			
 
				+                format!("Failed to create output dir for Savana run: {output_dir}")
			
 
				+            })?;
			
 
				 
			
 
				             // Check for phased germline vcf
			
 
				             // no required anymore since >= 1.3.0
			
@@ -157,7 +158,10 @@ impl Run for Savana {
 
				                 .save_to_file(&log_file)
			
 
				                 .context(format!("Error while writing logs into {log_file}"))?;
			
 
				         } else {
			
 
				-            debug!("Savana output already exists for {}, skipping execution.", self.id);
			
 
				+            debug!(
			
 
				+                "Savana output already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            );
			
 
				         }
			
 
				 
			
 
				         // Keep PASS
			
--- a/src/callers/severus.rs
+++ b/src/callers/severus.rs
@@ -128,7 +128,10 @@ impl Run for Severus {
 
				                 .save_to_file(&log_file)
			
 
				                 .context(format!("Error while writing Severus logs into {log_file}"))?;
			
 
				         } else {
			
 
				-            debug!("Severus output VCF already exists for {}, skipping execution.", self.id);
			
 
				+            debug!(
			
 
				+                "Severus output VCF already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            );
			
 
				         }
			
 
				 
			
 
				         // Filter PASS variants
			
@@ -239,7 +242,6 @@ impl Variants for Severus {
 
				 
			
 
				 impl RunnerVariants for Severus {}
			
 
				 
			
 
				-
			
 
				 /// ========================================================================
			
 
				 
			
 
				 #[derive(Debug)]
			
--- a/src/collection/mod.rs
+++ b/src/collection/mod.rs
@@ -27,7 +27,8 @@ use crate::{
 
				         assembler::{Assembler, AssemblerConfig},
			
 
				         variants::{RunVariantsAgg, VariantsConfig},
			
 
				     },
			
 
				-    runners::Run, scan::scan::{par_whole_scan, par_whole_scan_local},
			
 
				+    runners::Run,
			
 
				+    scan::scan::par_whole_scan,
			
 
				 };
			
 
				 
			
 
				 pub mod bam;
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -69,6 +69,7 @@ pub struct Config {
 
				     pub nanomonsv_solo_passed_vcf: String,
			
 
				     pub somatic_pipe_force: bool,
			
 
				     pub min_high_quality_depth: u32,
			
 
				+    pub somatic_scan_force: bool,
			
 
				 }
			
 
				 
			
 
				 // Here comes names that can't be changed from output of tools
			
@@ -185,6 +186,9 @@ impl Default for Config {
 
				             nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
			
 
				                 .to_string(),
			
 
				 
			
 
				+            // Scan
			
 
				+            somatic_scan_force: false,
			
 
				+
			
 
				             // Pipe
			
 
				             somatic_pipe_force: true,
			
 
				             somatic_min_constit_depth: 5,
			
@@ -542,6 +546,36 @@ impl Config {
 
				         )
			
 
				     }
			
 
				 
			
 
				+    // SomaticScan
			
 
				+    pub fn somatic_scan_solo_output_dir(&self, id: &str, time: &str) -> String {
			
 
				+        format!("{}/counts", self.solo_dir(id, time))
			
 
				+    }
			
 
				+
			
 
				+    pub fn somatic_scan_normal_output_dir(&self, id: &str) -> String {
			
 
				+        self.somatic_scan_solo_output_dir(id, &self.normal_name)
			
 
				+    }
			
 
				+
			
 
				+    pub fn somatic_scan_tumoral_output_dir(&self, id: &str) -> String {
			
 
				+        self.somatic_scan_solo_output_dir(id, &self.tumoral_name)
			
 
				+    }
			
 
				+
			
 
				+    pub fn somatic_scan_solo_count_file(&self, id: &str, time: &str, contig: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_count.tsv.gz",
			
 
				+            self.somatic_scan_solo_output_dir(id, time),
			
 
				+            contig
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    pub fn somatic_scan_normal_count_file(&self, id: &str, contig: &str) -> String {
			
 
				+        self.somatic_scan_solo_count_file(id, &self.normal_name, contig)
			
 
				+    }
			
 
				+
			
 
				+    pub fn somatic_scan_tumoral_count_file(&self, id: &str, contig: &str) -> String {
			
 
				+        self.somatic_scan_solo_count_file(id, &self.tumoral_name, contig)
			
 
				+    }
			
 
				+
			
 
				+    // Modkit
			
 
				     pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
			
 
				         self.modkit_summary_file
			
 
				             .replace("{result_dir}", &self.result_dir)
			
--- a/src/pipes/somatic.rs
+++ b/src/pipes/somatic.rs
@@ -1,3 +1,9 @@
 
				+use crate::{
			
 
				+    create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
			
 
				+    scan::scan::SomaticScan,
			
 
				+    variant::variant::{run_if_required, ShouldRunBox},
			
 
				+};
			
 
				+use anyhow::Context;
			
 
				 use itertools::Itertools;
			
 
				 use log::info;
			
 
				 use std::{
			
@@ -16,10 +22,10 @@ use crate::{
 
				     },
			
 
				     collection::{Initialize, InitializeSolo},
			
 
				     config::Config,
			
 
				-    init_solo_callers, init_somatic_callers,
			
 
				+    create_should_run, init_somatic_callers,
			
 
				     runners::Run,
			
 
				     variant::{
			
 
				-        variant::{load_variants, run_variants, CallerBox},
			
 
				+        variant::{load_variants, CallerBox},
			
 
				         variant_collection::{ExternalAnnotation, VariantCollection, Variants},
			
 
				         variants_stats::VariantsStats,
			
 
				     },
			
@@ -43,7 +49,7 @@ impl Initialize for Somatic {
 
				 }
			
 
				 
			
 
				 #[derive(Debug, Default, Clone)]
			
 
				-pub struct SomaticStats {
			
 
				+pub struct SomaticPipeStats {
			
 
				     pub input: InputStats,
			
 
				     pub n_constit_germline: usize,
			
 
				     pub n_low_constit: usize,
			
@@ -84,7 +90,7 @@ impl InputStats {
 
				     }
			
 
				 }
			
 
				 
			
 
				-impl SomaticStats {
			
 
				+impl SomaticPipeStats {
			
 
				     pub fn init(collections: &[VariantCollection]) -> Self {
			
 
				         Self {
			
 
				             input: InputStats::from_collections(collections),
			
@@ -258,30 +264,36 @@ impl Run for Somatic {
 
				         // LongphasePhase::initialize(&id, self.config.clone())?.run()?;
			
 
				 
			
 
				         // Initalize variants collections
			
 
				-        info!("Initialization of callers...");
			
 
				+        info!("Initialization prerequired pipe components...");
			
 
				 
			
 
				-        let mut callers = init_somatic_callers!(
			
 
				+        let mut to_run_if_req = create_should_run!(
			
 
				             &id,
			
 
				             &config,
			
 
				+            SomaticScan,
			
 
				             ClairS,
			
 
				             NanomonSV,
			
 
				             Severus,
			
 
				             Savana,
			
 
				             DeepSomatic
			
 
				         );
			
 
				+        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant,));
			
 
				 
			
 
				-        callers.extend(init_solo_callers!(
			
 
				+        info!("Running prerequired pipe components.");
			
 
				+
			
 
				+        run_if_required(&mut to_run_if_req)
			
 
				+            .context("Failed to run a prerequired component of somatic pipe.")?;
			
 
				+
			
 
				+        let mut callers = init_somatic_callers!(
			
 
				             &id,
			
 
				             &config,
			
 
				-            DeepVariant,
			
 
				-            "diag",
			
 
				-            DeepVariant,
			
 
				-            "mrd"
			
 
				-        ));
			
 
				+            ClairS,
			
 
				+            NanomonSV,
			
 
				+            Severus,
			
 
				+            Savana,
			
 
				+            DeepSomatic
			
 
				+        );
			
 
				 
			
 
				-        // Loading
			
 
				-        info!("Running variants callers.");
			
 
				-        run_variants(&mut callers)?;
			
 
				+        callers.extend(init_solo_callers_normal_tumoral!(&id, &config, DeepVariant,));
			
 
				 
			
 
				         info!("Loading variants.");
			
 
				         let mut variants_collections = load_variants(&mut callers, &annotations)
			
@@ -292,7 +304,7 @@ impl Run for Somatic {
 
				             ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
			
 
				         variants_collections.push(clairs_germline);
			
 
				 
			
 
				-        let mut somatic_stats = SomaticStats::init(&variants_collections);
			
 
				+        let mut somatic_stats = SomaticPipeStats::init(&variants_collections);
			
 
				         info!(
			
 
				             "Variants collections from {} vcf ({} variants)",
			
 
				             variants_collections.len(),
			
--- a/src/scan/scan.rs
+++ b/src/scan/scan.rs
@@ -1,4 +1,3 @@
 
				-use std::cell::RefCell;
			
 
				 use std::{fmt, fs, io::Write};
			
 
				 
			
 
				 use anyhow::Context;
			
@@ -10,9 +9,12 @@ use rayon::{
 
				 };
			
 
				 use rust_htslib::bam::IndexedReader;
			
 
				 
			
 
				+use crate::collection::{Initialize, ShouldRun};
			
 
				+use crate::helpers::is_file_older;
			
 
				 use crate::io::writers::get_gz_writer;
			
 
				 use crate::math::filter_outliers_modified_z_score_with_indices;
			
 
				 
			
 
				+use crate::runners::Run;
			
 
				 use crate::{config::Config, io::dict::read_dict, scan::bin::Bin};
			
 
				 
			
 
				 /// Represents a count of reads in a genomic bin, including various metrics and outlier information.
			
@@ -261,13 +263,25 @@ impl fmt::Display for BinOutlier {
 
				 /// - The dictionary file cannot be read.
			
 
				 /// - A `Bin` object cannot be created for a specific region.
			
 
				 /// - Any I/O operation (e.g., writing results) fails.
			
 
				-pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
			
 
				+pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Result<()> {
			
 
				     let bin_size = config.count_bin_size;
			
 
				     let chunk_n_bin = config.count_n_chunks;
			
 
				+    let bam_path = &config.solo_bam(id, time_point);
			
 
				+    let out_dir = config.somatic_scan_solo_output_dir(id, time_point);
			
 
				+
			
 
				     info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
			
 
				-    fs::create_dir_all(out_dir)?;
			
 
				+    fs::create_dir_all(&out_dir)?;
			
 
				 
			
 
				     for (contig, length) in read_dict(&config.dict_file)? {
			
 
				+        let out_file = config.somatic_scan_solo_count_file(id, time_point, &contig);
			
 
				+        // let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
			
 
				+
			
 
				+        // Skip this file if it already exists and is up-to-date compared to the input BAM,
			
 
				+        // unless forced by the `somatic_scan_force` flag.
			
 
				+        if !is_file_older(&out_file, bam_path).unwrap_or(true) && !config.somatic_scan_force {
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				         let n_bin = length / bin_size;
			
 
				         // Calculate number of chunks using ceiling division
			
 
				         let n_chunks = n_bin.div_ceil(chunk_n_bin);
			
@@ -321,7 +335,7 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				                         .collect::<Vec<BinCount>>()
			
 
				                 },
			
 
				             )
			
 
				-                .flatten()
			
 
				+            .flatten()
			
 
				             .collect();
			
 
				 
			
 
				         debug!("Scan {contig}, sorting bins");
			
@@ -330,7 +344,6 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				         debug!("Scan {contig}, computing outliers");
			
 
				         fill_outliers(&mut bins);
			
 
				 
			
 
				-        let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
			
 
				         debug!("Scan {contig}, writing file");
			
 
				 
			
 
				         let mut file = get_gz_writer(&out_file, true)
			
@@ -342,85 +355,92 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				     Ok(())
			
 
				 }
			
 
				 
			
 
				-thread_local! {
			
 
				-    static BAM_READER: RefCell<Option<IndexedReader>> = const { RefCell::new(None) };
			
 
				-}
			
 
				-
			
 
				-pub fn par_whole_scan_local(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
			
 
				-    let bin_size = config.count_bin_size;
			
 
				-    let chunk_n_bin = config.count_n_chunks;
			
 
				-    info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
			
 
				-    fs::create_dir_all(out_dir)?;
			
 
				-
			
 
				-    for (contig, length) in read_dict(&config.dict_file)? {
			
 
				-        let n_bin = length / bin_size;
			
 
				-        let n_chunks = n_bin.div_ceil(chunk_n_bin);
			
 
				-        info!("Scan of contig: {contig}");
			
 
				-
			
 
				-        let bins: Vec<BinCount> = (0..n_chunks)
			
 
				-            .into_par_iter()
			
 
				-            .flat_map(|i| {
			
 
				-                let chunk_start = i * chunk_n_bin * bin_size;
			
 
				-                let chunk_length = if i == n_chunks - 1 {
			
 
				-                    length - chunk_start
			
 
				-                } else {
			
 
				-                    chunk_n_bin * bin_size
			
 
				-                };
			
 
				-                let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
			
 
				-
			
 
				-                // Use thread-local BAM reader
			
 
				-                let result = BAM_READER.with(|reader_cell| {
			
 
				-                    let mut reader_ref = reader_cell.borrow_mut();
			
 
				-
			
 
				-                    // Initialize if not already set
			
 
				-                    if reader_ref.is_none() {
			
 
				-                        let reader = IndexedReader::from_path(bam_path)
			
 
				-                            .with_context(|| format!("Failed to open BAM file: {}", bam_path))
			
 
				-                            .ok()?; // handle error as Option
			
 
				-                        *reader_ref = Some(reader);
			
 
				-                    }
			
 
				-
			
 
				-                    let reader = reader_ref.as_mut().unwrap();
			
 
				-
			
 
				-                    // Seek to contig start for this chunk
			
 
				-                    let mut bins_in_chunk = Vec::new();
			
 
				-                    for j in 0..n_bins_in_chunk {
			
 
				-                        let bin_start = chunk_start + j * bin_size;
			
 
				-                        let bin_length = std::cmp::min(bin_size, chunk_length - j * bin_size);
			
 
				-                        match Bin::new(reader, &contig, bin_start, bin_length, config.bam_min_mapq)
			
 
				-                        {
			
 
				-                            Ok(bin) => bins_in_chunk.push(BinCount::from(&bin)),
			
 
				-                            Err(e) => {
			
 
				-                                error!("Failed to get Bin at chunk {i} bin {j}: {e}");
			
 
				-                            }
			
 
				-                        }
			
 
				-                    }
			
 
				-                    Some(bins_in_chunk)
			
 
				-                });
			
 
				-
			
 
				-                result.into_iter().flatten().collect::<Vec<_>>()
			
 
				-            })
			
 
				-            .collect();
			
 
				-
			
 
				-        debug!("Scan {contig}, sorting bins");
			
 
				-        let mut bins = bins;
			
 
				-        bins.par_sort_unstable_by(|a, b| a.start.cmp(&b.start));
			
 
				-
			
 
				-        debug!("Scan {contig}, computing outliers");
			
 
				-        fill_outliers(&mut bins);
			
 
				-
			
 
				-        let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
			
 
				-        debug!("Scan {contig}, writing file");
			
 
				-
			
 
				-        let mut file = get_gz_writer(&out_file, true)
			
 
				-            .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
			
 
				-        for bin in bins {
			
 
				-            writeln!(file, "{}", bin.to_tsv_row())?;
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    Ok(())
			
 
				-}
			
 
				+// thread_local! {
			
 
				+//     static BAM_READER: RefCell<Option<IndexedReader>> = const { RefCell::new(None) };
			
 
				+// }
			
 
				+//
			
 
				+// pub fn par_whole_scan_local(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
			
 
				+//     let bin_size = config.count_bin_size;
			
 
				+//     let chunk_n_bin = config.count_n_chunks;
			
 
				+//     info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
			
 
				+//     fs::create_dir_all(out_dir)?;
			
 
				+//
			
 
				+//     for (contig, length) in read_dict(&config.dict_file)? {
			
 
				+//         let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
			
 
				+//
			
 
				+//         // Skip this file if it already exists and is up-to-date compared to the input BAM,
			
 
				+//         // unless forced by the `somatic_scan_force` flag.
			
 
				+//         if !is_file_older(&out_file, bam_path).unwrap_or(true) && !config.somatic_scan_force {
			
 
				+//             continue;
			
 
				+//         }
			
 
				+//
			
 
				+//         let n_bin = length / bin_size;
			
 
				+//         let n_chunks = n_bin.div_ceil(chunk_n_bin);
			
 
				+//         info!("Scan of contig: {contig}");
			
 
				+//
			
 
				+//         let bins: Vec<BinCount> = (0..n_chunks)
			
 
				+//             .into_par_iter()
			
 
				+//             .flat_map(|i| {
			
 
				+//                 let chunk_start = i * chunk_n_bin * bin_size;
			
 
				+//                 let chunk_length = if i == n_chunks - 1 {
			
 
				+//                     length - chunk_start
			
 
				+//                 } else {
			
 
				+//                     chunk_n_bin * bin_size
			
 
				+//                 };
			
 
				+//                 let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
			
 
				+//
			
 
				+//                 // Use thread-local BAM reader
			
 
				+//                 let result = BAM_READER.with(|reader_cell| {
			
 
				+//                     let mut reader_ref = reader_cell.borrow_mut();
			
 
				+//
			
 
				+//                     // Initialize if not already set
			
 
				+//                     if reader_ref.is_none() {
			
 
				+//                         let reader = IndexedReader::from_path(bam_path)
			
 
				+//                             .with_context(|| format!("Failed to open BAM file: {}", bam_path))
			
 
				+//                             .ok()?; // handle error as Option
			
 
				+//                         *reader_ref = Some(reader);
			
 
				+//                     }
			
 
				+//
			
 
				+//                     let reader = reader_ref.as_mut().unwrap();
			
 
				+//
			
 
				+//                     // Seek to contig start for this chunk
			
 
				+//                     let mut bins_in_chunk = Vec::new();
			
 
				+//                     for j in 0..n_bins_in_chunk {
			
 
				+//                         let bin_start = chunk_start + j * bin_size;
			
 
				+//                         let bin_length = std::cmp::min(bin_size, chunk_length - j * bin_size);
			
 
				+//                         match Bin::new(reader, &contig, bin_start, bin_length, config.bam_min_mapq)
			
 
				+//                         {
			
 
				+//                             Ok(bin) => bins_in_chunk.push(BinCount::from(&bin)),
			
 
				+//                             Err(e) => {
			
 
				+//                                 error!("Failed to get Bin at chunk {i} bin {j}: {e}");
			
 
				+//                             }
			
 
				+//                         }
			
 
				+//                     }
			
 
				+//                     Some(bins_in_chunk)
			
 
				+//                 });
			
 
				+//
			
 
				+//                 result.into_iter().flatten().collect::<Vec<_>>()
			
 
				+//             })
			
 
				+//             .collect();
			
 
				+//
			
 
				+//         debug!("Scan {contig}, sorting bins");
			
 
				+//         let mut bins = bins;
			
 
				+//         bins.par_sort_unstable_by(|a, b| a.start.cmp(&b.start));
			
 
				+//
			
 
				+//         debug!("Scan {contig}, computing outliers");
			
 
				+//         fill_outliers(&mut bins);
			
 
				+//
			
 
				+//         debug!("Scan {contig}, writing file");
			
 
				+//
			
 
				+//         let mut file = get_gz_writer(&out_file, true)
			
 
				+//             .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
			
 
				+//         for bin in bins {
			
 
				+//             writeln!(file, "{}", bin.to_tsv_row())?;
			
 
				+//         }
			
 
				+//     }
			
 
				+//
			
 
				+//     Ok(())
			
 
				+// }
			
 
				 
			
 
				 /// Identifies and marks outliers in a slice of `BinCount` objects based on various ratio metrics.
			
 
				 ///
			
@@ -541,3 +561,96 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
 
				         config,
			
 
				     )
			
 
				 }
			
 
				+
			
 
				+
			
 
				+/// A pipeline runner for executing SomaticScan on matched tumor and normal samples.
			
 
				+///
			
 
				+/// This struct encapsulates:
			
 
				+/// - Initialization and conditional cleanup of prior outputs
			
 
				+/// - Logic for checking whether a re-run is necessary (based on BAM and output timestamps)
			
 
				+/// - Coordinated parallel scanning of both normal and tumoral inputs
			
 
				+#[derive(Debug)]
			
 
				+pub struct SomaticScan {
			
 
				+    id: String,
			
 
				+    config: Config,
			
 
				+}
			
 
				+
			
 
				+impl Initialize for SomaticScan {
			
 
				+    /// Initializes a SomaticScan runner.
			
 
				+    ///
			
 
				+    /// If force is enabled in the config, both the normal and tumoral output directories
			
 
				+    /// are deleted to ensure a clean re-run.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    /// * `id` - The sample ID for the scan
			
 
				+    /// * `config` - Configuration for input/output paths and behavior
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// A fully initialized `SomaticScan` instance ready for execution
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if directory deletion fails during force cleanup.
			
 
				+    fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
			
 
				+        info!("Initialize SomaticScan for {id}.");
			
 
				+
			
 
				+        let somatic_scan = Self {
			
 
				+            id: id.to_string(),
			
 
				+            config,
			
 
				+        };
			
 
				+
			
 
				+        // Force re-run: clean up previous output directories for both normal and tumoral scans
			
 
				+        if somatic_scan.config.somatic_scan_force {
			
 
				+            fs::remove_dir_all(somatic_scan.config.somatic_scan_normal_output_dir(id))?;
			
 
				+            fs::remove_dir_all(somatic_scan.config.somatic_scan_tumoral_output_dir(id))?;
			
 
				+        }
			
 
				+
			
 
				+        Ok(somatic_scan)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for SomaticScan {
			
 
				+    /// Determines whether SomaticScan should re-run by checking whether
			
 
				+    /// any of the count output files are outdated or missing relative to the BAMs.
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let mrd_bam_path = &self.config.normal_bam(&self.id);
			
 
				+        let diag_bam_path = &self.config.tumoral_bam(&self.id);
			
 
				+
			
 
				+        match read_dict(&self.config.dict_file) {
			
 
				+            Ok(dict) => {
			
 
				+                for (contig, _) in dict {
			
 
				+                    let diag_count_file = self
			
 
				+                        .config
			
 
				+                        .somatic_scan_tumoral_count_file(&self.id, &contig);
			
 
				+                    if is_file_older(&diag_count_file, diag_bam_path).unwrap_or(true) {
			
 
				+                        return true;
			
 
				+                    }
			
 
				+                    let mrd_count_file = self
			
 
				+                        .config
			
 
				+                        .somatic_scan_normal_count_file(&self.id, &contig);
			
 
				+                    if is_file_older(&mrd_count_file, mrd_bam_path).unwrap_or(true) {
			
 
				+                        return true;
			
 
				+                    }
			
 
				+                }
			
 
				+                false
			
 
				+            }
			
 
				+            Err(e) => {
			
 
				+                error!("Failed to read dict file: {}\n{e}", self.config.dict_file);
			
 
				+                // Don't run if dict is unreadable
			
 
				+                false
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for SomaticScan {
			
 
				+    /// Executes the full scan pipeline in parallel for first normal then tumoral samples.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// An error if the underlying scan function (`par_whole_scan`) fails for either sample.
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        info!("Starting scan for {} normal.", self.id);
			
 
				+        par_whole_scan(&self.id, &self.config.normal_name, &self.config)?;
			
 
				+        info!("Starting scan for {} tumoral.", self.id);
			
 
				+        par_whole_scan(&self.id, &self.config.tumoral_name, &self.config)
			
 
				+    }
			
 
				+}
			
--- a/src/variant/variant.rs
+++ b/src/variant/variant.rs
@@ -1,5 +1,6 @@
 
				 use crate::{
			
 
				     annotation::Annotations,
			
 
				+    collection::{Initialize, ShouldRun},
			
 
				     helpers::Hash128,
			
 
				     positions::{GenomePosition, GetGenomePosition, VcfPosition},
			
 
				     runners::Run,
			
@@ -7,7 +8,7 @@ use crate::{
 
				 };
			
 
				 use anyhow::{anyhow, Context};
			
 
				 use bitcode::{Decode, Encode};
			
 
				-use log::warn;
			
 
				+use log::{info, warn};
			
 
				 use rayon::prelude::*;
			
 
				 use serde::{Deserialize, Serialize};
			
 
				 use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
			
@@ -1041,6 +1042,184 @@ pub trait VariantId {
 
				     fn variant_id(&self) -> String;
			
 
				 }
			
 
				 
			
 
				+// pub trait AsAny {
			
 
				+//     fn as_any(&self) -> &dyn std::any::Any;
			
 
				+// }
			
 
				+//
			
 
				+// impl<T: 'static> AsAny for T {
			
 
				+//     fn as_any(&self) -> &dyn std::any::Any {
			
 
				+//         self
			
 
				+//     }
			
 
				+// }
			
 
				+
			
 
				+/// A trait alias for all dynamically executable pipeline runners.
			
 
				+///
			
 
				+/// This trait represents any component that:
			
 
				+/// - Can decide whether it needs to run (`ShouldRun`)
			
 
				+/// - Implements the actual execution logic (`Run`)
			
 
				+/// - Is thread-safe (`Send + Sync`) to be boxed and dispatched concurrently
			
 
				+///
			
 
				+/// Components implementing this trait can be boxed as `ShouldRunBox`.
			
 
				+pub trait ShouldRunTrait: ShouldRun + Run + Send + Sync {}
			
 
				+
			
 
				+/// Blanket implementation for all compatible types.
			
 
				+impl<T> ShouldRunTrait for T where T: ShouldRun + Run + Send + Sync {}
			
 
				+
			
 
				+/// A boxed trait object to hold any runner implementing `ShouldRunTrait`.
			
 
				+pub type ShouldRunBox = Box<dyn ShouldRunTrait>;
			
 
				+
			
 
				+/// Macro to initialize and box multiple `ShouldRunTrait` components.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `$id` - Sample ID (typically a string slice)
			
 
				+/// * `$config` - Shared configuration object
			
 
				+/// * `$($runner:ty),+` - One or more runner types implementing `Initialize + ShouldRunTrait`
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// A vector of boxed runner components (`Vec<ShouldRunBox>`)
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```rust
			
 
				+/// let modules: Vec<ShouldRunBox> = create_should_run!(
			
 
				+///     "sample_42",
			
 
				+///     config,
			
 
				+///     ClairS,
			
 
				+///     Savana,
			
 
				+///     DeepSomatic,
			
 
				+/// )?;
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// This macro uses `?`, so it must be called inside a function that returns `anyhow::Result`.
			
 
				+#[macro_export]
			
 
				+macro_rules! create_should_run {
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				+        vec![
			
 
				+            $(
			
 
				+                Box::new(<$runner>::initialize($id, $config.clone())?) as ShouldRunBox
			
 
				+            ),+
			
 
				+        ]
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+/// Macro to initialize and box a list of solo-mode pipeline components that implement `ShouldRunTrait`.
			
 
				+///
			
 
				+/// This is typically used for per-timepoint variant callers (e.g., `DeepVariant`),
			
 
				+/// where each runner is instantiated for a specific sample timepoint (e.g., "tumoral", "normal").
			
 
				+///
			
 
				+/// Each entry must be provided as a pair: the type of the runner and the timepoint string expression.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// - `$id`: The sample ID (`&str`) passed to each initializer
			
 
				+/// - `$config`: A `Config` object (must be cloneable)
			
 
				+/// - `$($runner:ty, $arg:expr),+`: One or more runner types with timepoint arguments (e.g., `config.tumoral_name`)
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// A `Vec<ShouldRunBox>` with boxed runners initialized per timepoint.
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```rust
			
 
				+/// let solo_callers = create_should_run_solo!(
			
 
				+///     "sample42",
			
 
				+///     config,
			
 
				+///     DeepVariant, config.tumoral_name,
			
 
				+///     DeepVariant, config.normal_name,
			
 
				+/// )?;
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Notes
			
 
				+/// Using `config.tumoral_name` and `config.normal_name` is preferred over hardcoded "diag"/"mrd".
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// This macro uses `?` and must be called inside a `Result`-returning context.
			
 
				+#[macro_export]
			
 
				+macro_rules! create_should_run_solo {
			
 
				+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
			
 
				+        vec![
			
 
				+            $(
			
 
				+                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as ShouldRunBox
			
 
				+            ),+
			
 
				+        ]
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+/// Macro to initialize and box a list of pipeline components that must run once per timepoint
			
 
				+/// (i.e., both "tumoral" and "normal") and implement `ShouldRunTrait`.
			
 
				+///
			
 
				+/// This is typically used for variant callers like `DeepVariant` that operate in **solo mode**
			
 
				+/// but must be run twice — once for the tumoral sample and once for the normal sample.
			
 
				+///
			
 
				+/// The macro:
			
 
				+/// - Calls `.initialize(id, timepoint, config.clone())` twice per type
			
 
				+/// - Uses `config.tumoral_name` and `config.normal_name` as timepoints
			
 
				+/// - Returns a flat `Vec<ShouldRunBox>` containing both instances for each type
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// - `$id`: The sample ID (`&str`)
			
 
				+/// - `$config`: The configuration object (must expose `tumoral_name` and `normal_name`)
			
 
				+/// - `$($runner:ty),+`: One or more runner types that implement `Initialize` with `(id, timepoint, config)`
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```rust
			
 
				+/// let runners = create_should_run_normal_tumoral!(
			
 
				+///     "sample_42",
			
 
				+///     config,
			
 
				+///     DeepVariant,
			
 
				+///     AnotherCaller,
			
 
				+/// )?;
			
 
				+/// ```
			
 
				+///
			
 
				+/// This will expand to:
			
 
				+/// ```rust
			
 
				+/// vec![
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as ShouldRunBox,
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as ShouldRunBox,
			
 
				+///     Box::new(AnotherCaller::initialize("sample_42", config.tumoral_name, config.clone())?) as ShouldRunBox,
			
 
				+///     Box::new(AnotherCaller::initialize("sample_42", config.normal_name, config.clone())?) as ShouldRunBox,
			
 
				+/// ]
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// This macro uses `?`, so it must be called inside a function that returns `Result`.
			
 
				+#[macro_export]
			
 
				+macro_rules! create_should_run_normal_tumoral {
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				+        vec![
			
 
				+            $(
			
 
				+                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as ShouldRunBox,
			
 
				+                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as ShouldRunBox
			
 
				+            ),+
			
 
				+        ]
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+/// Executes each runner in the slice only if `should_run()` returns true.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `iterable` - A mutable slice of boxed `InitRun` components
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// * `Ok(())` if all required runners execute successfully
			
 
				+/// * An error if any runner's `run()` method fails
			
 
				+///
			
 
				+/// # Notes
			
 
				+/// - This function will skip runners that return `false` from `should_run()`
			
 
				+pub fn run_if_required(iterable: &mut [ShouldRunBox]) -> anyhow::Result<()> {
			
 
				+    iterable.iter_mut().try_for_each(|e| {
			
 
				+        if e.should_run() {
			
 
				+            e.run()
			
 
				+        } else {
			
 
				+            // info!("Skipping runner: {}", std::any::type_name::<_>()); // or add name field
			
 
				+
			
 
				+            Ok(())
			
 
				+        }
			
 
				+    })
			
 
				+}
			
 
				+
			
 
				+/// A trait alias for all variant callers that support initialization, execution,
			
 
				+/// conditional re-running, and variant extraction (VCF + annotations).
			
 
				+///
			
 
				+/// Used to enable polymorphic handling of both solo and somatic callers in the pipeline.
			
 
				 pub trait RunnerVariants: Run + Variants + Send + Sync {}
			
 
				 
			
 
				 pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
			
@@ -1055,6 +1234,43 @@ macro_rules! init_somatic_callers {
 
				         ]
			
 
				     };
			
 
				 }
			
 
				+
			
 
				+/// Macro to initialize and box a list of **solo-mode variant callers** for specific timepoints,
			
 
				+/// where each runner implements `RunnerVariants`.
			
 
				+///
			
 
				+/// This is useful for callers like `DeepVariant` that need to be instantiated with a specific
			
 
				+/// sample timepoint (e.g., `config.tumoral_name` or `config.normal_name`).
			
 
				+///
			
 
				+/// Each entry must be a pair: a runner type and a timepoint expression (usually from config).
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// - `$id`: The sample ID (`&str`)
			
 
				+/// - `$config`: The configuration object (must be cloneable)
			
 
				+/// - `$($runner:ty, $arg:expr),+`: One or more `(RunnerType, Timepoint)` pairs
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// A `Vec<CallerBox>` containing initialized, boxed solo-mode variant callers.
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```rust
			
 
				+/// let solo_callers = init_solo_callers!(
			
 
				+///     "sample_42",
			
 
				+///     config,
			
 
				+///     DeepVariant, config.tumoral_name,
			
 
				+///     DeepVariant, config.normal_name,
			
 
				+/// )?;
			
 
				+/// ```
			
 
				+///
			
 
				+/// This will expand to:
			
 
				+/// ```rust
			
 
				+/// vec![
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
			
 
				+/// ]
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// This macro uses `?` internally, so it must be used inside a `Result`-returning context.
			
 
				 #[macro_export]
			
 
				 macro_rules! init_solo_callers {
			
 
				     ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
			
@@ -1066,13 +1282,62 @@ macro_rules! init_solo_callers {
 
				     };
			
 
				 }
			
 
				 
			
 
				-pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
			
 
				-    iterable
			
 
				-        .iter_mut()
			
 
				-        .try_for_each(|runner| runner.run())
			
 
				-        .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
			
 
				+/// Macro to initialize and box a list of solo-mode **variant callers** for both `normal` and `tumoral` timepoints.
			
 
				+///
			
 
				+/// This is designed for types like `DeepVariant` that implement `RunnerVariants` and require
			
 
				+/// separate execution for each timepoint. It will:
			
 
				+/// - Call `.initialize(id, timepoint, config)` for both `config.tumoral_name` and `config.normal_name`
			
 
				+/// - Box the result as `CallerBox`
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// - `$id`: Sample ID (usually a `&str`)
			
 
				+/// - `$config`: Cloneable configuration object
			
 
				+/// - `$($runner:ty),+`: One or more runner types that implement `RunnerVariants`
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// A `Vec<CallerBox>` containing two boxed instances per runner (one for each timepoint).
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```rust
			
 
				+/// let solo_callers = init_solo_callers_normal_tumoral!(
			
 
				+///     "sample_42",
			
 
				+///     config,
			
 
				+///     DeepVariant,
			
 
				+///     OtherSoloCaller,
			
 
				+/// )?;
			
 
				+/// ```
			
 
				+///
			
 
				+/// This expands to:
			
 
				+/// ```rust
			
 
				+/// vec![
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
			
 
				+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
			
 
				+///     Box::new(OtherSoloCaller::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
			
 
				+///     Box::new(OtherSoloCaller::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
			
 
				+/// ]
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// This macro uses `?`, so it must be called inside a `Result`-returning context.
			
 
				+#[macro_export]
			
 
				+macro_rules! init_solo_callers_normal_tumoral {
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				+        vec![
			
 
				+            $(
			
 
				+                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as CallerBox,
			
 
				+                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as CallerBox
			
 
				+            ),+
			
 
				+        ]
			
 
				+    };
			
 
				 }
			
 
				 
			
 
				+// pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
			
 
				+//     iterable
			
 
				+//         .iter_mut()
			
 
				+//         .try_for_each(|runner| runner.run())
			
 
				+//         .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
			
 
				+// }
			
 
				+
			
 
				 pub fn load_variants(
			
 
				     iterable: &mut [CallerBox],
			
 
				     annotations: &Annotations,
			
@@ -1091,6 +1356,24 @@ pub fn load_variants(
 
				         .map_err(|e| anyhow::anyhow!("Failed to load variants.\n{e}"))
			
 
				 }
			
 
				 
			
 
				+pub fn par_load_variants(
			
 
				+    iterable: &mut [Box<dyn Variants + Send + Sync>],
			
 
				+    annotations: &Annotations,
			
 
				+) -> anyhow::Result<Vec<VariantCollection>> {
			
 
				+    iterable
			
 
				+        .par_iter()
			
 
				+        .map(|runner| {
			
 
				+            let r = runner.variants(annotations);
			
 
				+            if let Err(ref e) = r {
			
 
				+                warn!("{e}");
			
 
				+            };
			
 
				+            r
			
 
				+        })
			
 
				+        .filter(|r| r.is_ok())
			
 
				+        .collect::<anyhow::Result<Vec<_>>>()
			
 
				+        .map_err(|e| anyhow::anyhow!("Failed to load variants.\n{e}"))
			
 
				+}
			
 
				+
			
 
				 pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
			
 
				     vec1: &[T],
			
 
				     vec2: &[T],