Browse Source

macro and function for trait Should Run

Thomas 8 months ago
parent
commit
03ec589273
8 changed files with 563 additions and 112 deletions
  1. 2 0
      src/callers/deep_somatic.rs
  2. 7 3
      src/callers/savana.rs
  3. 4 2
      src/callers/severus.rs
  4. 2 1
      src/collection/mod.rs
  5. 34 0
      src/config.rs
  6. 28 16
      src/pipes/somatic.rs
  7. 197 84
      src/scan/scan.rs
  8. 289 6
      src/variant/variant.rs

+ 2 - 0
src/callers/deep_somatic.rs

@@ -201,5 +201,7 @@ impl Variants for DeepSomatic {
     }
 }
 
+// impl  for DeepSomatic {}
+
 /// Marker trait implementation to signal DeepSomatic supports variant export.
 impl RunnerVariants for DeepSomatic {}

+ 7 - 3
src/callers/savana.rs

@@ -87,8 +87,9 @@ impl Run for Savana {
         if !Path::new(&output_vcf).exists() {
             info!("Running Savana v{}", Savana::version(&self.config)?);
             let output_dir = self.config.savana_output_dir(id);
-            fs::create_dir_all(&output_dir)
-                .with_context(|| format!("Failed to create output dir for Savana run: {output_dir}"))?;
+            fs::create_dir_all(&output_dir).with_context(|| {
+                format!("Failed to create output dir for Savana run: {output_dir}")
+            })?;
 
             // Check for phased germline vcf
             // no required anymore since >= 1.3.0
@@ -157,7 +158,10 @@ impl Run for Savana {
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
         } else {
-            debug!("Savana output already exists for {}, skipping execution.", self.id);
+            debug!(
+                "Savana output already exists for {}, skipping execution.",
+                self.id
+            );
         }
 
         // Keep PASS

+ 4 - 2
src/callers/severus.rs

@@ -128,7 +128,10 @@ impl Run for Severus {
                 .save_to_file(&log_file)
                 .context(format!("Error while writing Severus logs into {log_file}"))?;
         } else {
-            debug!("Severus output VCF already exists for {}, skipping execution.", self.id);
+            debug!(
+                "Severus output VCF already exists for {}, skipping execution.",
+                self.id
+            );
         }
 
         // Filter PASS variants
@@ -239,7 +242,6 @@ impl Variants for Severus {
 
 impl RunnerVariants for Severus {}
 
-
 /// ========================================================================
 
 #[derive(Debug)]

+ 2 - 1
src/collection/mod.rs

@@ -27,7 +27,8 @@ use crate::{
         assembler::{Assembler, AssemblerConfig},
         variants::{RunVariantsAgg, VariantsConfig},
     },
-    runners::Run, scan::scan::{par_whole_scan, par_whole_scan_local},
+    runners::Run,
+    scan::scan::par_whole_scan,
 };
 
 pub mod bam;

+ 34 - 0
src/config.rs

@@ -69,6 +69,7 @@ pub struct Config {
     pub nanomonsv_solo_passed_vcf: String,
     pub somatic_pipe_force: bool,
     pub min_high_quality_depth: u32,
+    pub somatic_scan_force: bool,
 }
 
 // Here comes names that can't be changed from output of tools
@@ -185,6 +186,9 @@ impl Default for Config {
             nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
                 .to_string(),
 
+            // Scan
+            somatic_scan_force: false,
+
             // Pipe
             somatic_pipe_force: true,
             somatic_min_constit_depth: 5,
@@ -542,6 +546,36 @@ impl Config {
         )
     }
 
+    // SomaticScan
+    pub fn somatic_scan_solo_output_dir(&self, id: &str, time: &str) -> String {
+        format!("{}/counts", self.solo_dir(id, time))
+    }
+
+    pub fn somatic_scan_normal_output_dir(&self, id: &str) -> String {
+        self.somatic_scan_solo_output_dir(id, &self.normal_name)
+    }
+
+    pub fn somatic_scan_tumoral_output_dir(&self, id: &str) -> String {
+        self.somatic_scan_solo_output_dir(id, &self.tumoral_name)
+    }
+
+    pub fn somatic_scan_solo_count_file(&self, id: &str, time: &str, contig: &str) -> String {
+        format!(
+            "{}/{}_count.tsv.gz",
+            self.somatic_scan_solo_output_dir(id, time),
+            contig
+        )
+    }
+
+    pub fn somatic_scan_normal_count_file(&self, id: &str, contig: &str) -> String {
+        self.somatic_scan_solo_count_file(id, &self.normal_name, contig)
+    }
+
+    pub fn somatic_scan_tumoral_count_file(&self, id: &str, contig: &str) -> String {
+        self.somatic_scan_solo_count_file(id, &self.tumoral_name, contig)
+    }
+
+    // Modkit
     pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
         self.modkit_summary_file
             .replace("{result_dir}", &self.result_dir)

+ 28 - 16
src/pipes/somatic.rs

@@ -1,3 +1,9 @@
+use crate::{
+    create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
+    scan::scan::SomaticScan,
+    variant::variant::{run_if_required, ShouldRunBox},
+};
+use anyhow::Context;
 use itertools::Itertools;
 use log::info;
 use std::{
@@ -16,10 +22,10 @@ use crate::{
     },
     collection::{Initialize, InitializeSolo},
     config::Config,
-    init_solo_callers, init_somatic_callers,
+    create_should_run, init_somatic_callers,
     runners::Run,
     variant::{
-        variant::{load_variants, run_variants, CallerBox},
+        variant::{load_variants, CallerBox},
         variant_collection::{ExternalAnnotation, VariantCollection, Variants},
         variants_stats::VariantsStats,
     },
@@ -43,7 +49,7 @@ impl Initialize for Somatic {
 }
 
 #[derive(Debug, Default, Clone)]
-pub struct SomaticStats {
+pub struct SomaticPipeStats {
     pub input: InputStats,
     pub n_constit_germline: usize,
     pub n_low_constit: usize,
@@ -84,7 +90,7 @@ impl InputStats {
     }
 }
 
-impl SomaticStats {
+impl SomaticPipeStats {
     pub fn init(collections: &[VariantCollection]) -> Self {
         Self {
             input: InputStats::from_collections(collections),
@@ -258,30 +264,36 @@ impl Run for Somatic {
         // LongphasePhase::initialize(&id, self.config.clone())?.run()?;
 
         // Initalize variants collections
-        info!("Initialization of callers...");
+        info!("Initialization prerequired pipe components...");
 
-        let mut callers = init_somatic_callers!(
+        let mut to_run_if_req = create_should_run!(
             &id,
             &config,
+            SomaticScan,
             ClairS,
             NanomonSV,
             Severus,
             Savana,
             DeepSomatic
         );
+        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant,));
 
-        callers.extend(init_solo_callers!(
+        info!("Running prerequired pipe components.");
+
+        run_if_required(&mut to_run_if_req)
+            .context("Failed to run a prerequired component of somatic pipe.")?;
+
+        let mut callers = init_somatic_callers!(
             &id,
             &config,
-            DeepVariant,
-            "diag",
-            DeepVariant,
-            "mrd"
-        ));
+            ClairS,
+            NanomonSV,
+            Severus,
+            Savana,
+            DeepSomatic
+        );
 
-        // Loading
-        info!("Running variants callers.");
-        run_variants(&mut callers)?;
+        callers.extend(init_solo_callers_normal_tumoral!(&id, &config, DeepVariant,));
 
         info!("Loading variants.");
         let mut variants_collections = load_variants(&mut callers, &annotations)
@@ -292,7 +304,7 @@ impl Run for Somatic {
             ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
         variants_collections.push(clairs_germline);
 
-        let mut somatic_stats = SomaticStats::init(&variants_collections);
+        let mut somatic_stats = SomaticPipeStats::init(&variants_collections);
         info!(
             "Variants collections from {} vcf ({} variants)",
             variants_collections.len(),

+ 197 - 84
src/scan/scan.rs

@@ -1,4 +1,3 @@
-use std::cell::RefCell;
 use std::{fmt, fs, io::Write};
 
 use anyhow::Context;
@@ -10,9 +9,12 @@ use rayon::{
 };
 use rust_htslib::bam::IndexedReader;
 
+use crate::collection::{Initialize, ShouldRun};
+use crate::helpers::is_file_older;
 use crate::io::writers::get_gz_writer;
 use crate::math::filter_outliers_modified_z_score_with_indices;
 
+use crate::runners::Run;
 use crate::{config::Config, io::dict::read_dict, scan::bin::Bin};
 
 /// Represents a count of reads in a genomic bin, including various metrics and outlier information.
@@ -261,13 +263,25 @@ impl fmt::Display for BinOutlier {
 /// - The dictionary file cannot be read.
 /// - A `Bin` object cannot be created for a specific region.
 /// - Any I/O operation (e.g., writing results) fails.
-pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
+pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Result<()> {
     let bin_size = config.count_bin_size;
     let chunk_n_bin = config.count_n_chunks;
+    let bam_path = &config.solo_bam(id, time_point);
+    let out_dir = config.somatic_scan_solo_output_dir(id, time_point);
+
     info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
-    fs::create_dir_all(out_dir)?;
+    fs::create_dir_all(&out_dir)?;
 
     for (contig, length) in read_dict(&config.dict_file)? {
+        let out_file = config.somatic_scan_solo_count_file(id, time_point, &contig);
+        // let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
+
+        // Skip this file if it already exists and is up-to-date compared to the input BAM,
+        // unless forced by the `somatic_scan_force` flag.
+        if !is_file_older(&out_file, bam_path).unwrap_or(true) && !config.somatic_scan_force {
+            continue;
+        }
+
         let n_bin = length / bin_size;
         // Calculate number of chunks using ceiling division
         let n_chunks = n_bin.div_ceil(chunk_n_bin);
@@ -321,7 +335,7 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
                         .collect::<Vec<BinCount>>()
                 },
             )
-                .flatten()
+            .flatten()
             .collect();
 
         debug!("Scan {contig}, sorting bins");
@@ -330,7 +344,6 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
         debug!("Scan {contig}, computing outliers");
         fill_outliers(&mut bins);
 
-        let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
         debug!("Scan {contig}, writing file");
 
         let mut file = get_gz_writer(&out_file, true)
@@ -342,85 +355,92 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
     Ok(())
 }
 
-thread_local! {
-    static BAM_READER: RefCell<Option<IndexedReader>> = const { RefCell::new(None) };
-}
-
-pub fn par_whole_scan_local(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
-    let bin_size = config.count_bin_size;
-    let chunk_n_bin = config.count_n_chunks;
-    info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
-    fs::create_dir_all(out_dir)?;
-
-    for (contig, length) in read_dict(&config.dict_file)? {
-        let n_bin = length / bin_size;
-        let n_chunks = n_bin.div_ceil(chunk_n_bin);
-        info!("Scan of contig: {contig}");
-
-        let bins: Vec<BinCount> = (0..n_chunks)
-            .into_par_iter()
-            .flat_map(|i| {
-                let chunk_start = i * chunk_n_bin * bin_size;
-                let chunk_length = if i == n_chunks - 1 {
-                    length - chunk_start
-                } else {
-                    chunk_n_bin * bin_size
-                };
-                let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
-
-                // Use thread-local BAM reader
-                let result = BAM_READER.with(|reader_cell| {
-                    let mut reader_ref = reader_cell.borrow_mut();
-
-                    // Initialize if not already set
-                    if reader_ref.is_none() {
-                        let reader = IndexedReader::from_path(bam_path)
-                            .with_context(|| format!("Failed to open BAM file: {}", bam_path))
-                            .ok()?; // handle error as Option
-                        *reader_ref = Some(reader);
-                    }
-
-                    let reader = reader_ref.as_mut().unwrap();
-
-                    // Seek to contig start for this chunk
-                    let mut bins_in_chunk = Vec::new();
-                    for j in 0..n_bins_in_chunk {
-                        let bin_start = chunk_start + j * bin_size;
-                        let bin_length = std::cmp::min(bin_size, chunk_length - j * bin_size);
-                        match Bin::new(reader, &contig, bin_start, bin_length, config.bam_min_mapq)
-                        {
-                            Ok(bin) => bins_in_chunk.push(BinCount::from(&bin)),
-                            Err(e) => {
-                                error!("Failed to get Bin at chunk {i} bin {j}: {e}");
-                            }
-                        }
-                    }
-                    Some(bins_in_chunk)
-                });
-
-                result.into_iter().flatten().collect::<Vec<_>>()
-            })
-            .collect();
-
-        debug!("Scan {contig}, sorting bins");
-        let mut bins = bins;
-        bins.par_sort_unstable_by(|a, b| a.start.cmp(&b.start));
-
-        debug!("Scan {contig}, computing outliers");
-        fill_outliers(&mut bins);
-
-        let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
-        debug!("Scan {contig}, writing file");
-
-        let mut file = get_gz_writer(&out_file, true)
-            .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
-        for bin in bins {
-            writeln!(file, "{}", bin.to_tsv_row())?;
-        }
-    }
-
-    Ok(())
-}
+// thread_local! {
+//     static BAM_READER: RefCell<Option<IndexedReader>> = const { RefCell::new(None) };
+// }
+//
+// pub fn par_whole_scan_local(out_dir: &str, bam_path: &str, config: &Config) -> anyhow::Result<()> {
+//     let bin_size = config.count_bin_size;
+//     let chunk_n_bin = config.count_n_chunks;
+//     info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
+//     fs::create_dir_all(out_dir)?;
+//
+//     for (contig, length) in read_dict(&config.dict_file)? {
+//         let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
+//
+//         // Skip this file if it already exists and is up-to-date compared to the input BAM,
+//         // unless forced by the `somatic_scan_force` flag.
+//         if !is_file_older(&out_file, bam_path).unwrap_or(true) && !config.somatic_scan_force {
+//             continue;
+//         }
+//
+//         let n_bin = length / bin_size;
+//         let n_chunks = n_bin.div_ceil(chunk_n_bin);
+//         info!("Scan of contig: {contig}");
+//
+//         let bins: Vec<BinCount> = (0..n_chunks)
+//             .into_par_iter()
+//             .flat_map(|i| {
+//                 let chunk_start = i * chunk_n_bin * bin_size;
+//                 let chunk_length = if i == n_chunks - 1 {
+//                     length - chunk_start
+//                 } else {
+//                     chunk_n_bin * bin_size
+//                 };
+//                 let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
+//
+//                 // Use thread-local BAM reader
+//                 let result = BAM_READER.with(|reader_cell| {
+//                     let mut reader_ref = reader_cell.borrow_mut();
+//
+//                     // Initialize if not already set
+//                     if reader_ref.is_none() {
+//                         let reader = IndexedReader::from_path(bam_path)
+//                             .with_context(|| format!("Failed to open BAM file: {}", bam_path))
+//                             .ok()?; // handle error as Option
+//                         *reader_ref = Some(reader);
+//                     }
+//
+//                     let reader = reader_ref.as_mut().unwrap();
+//
+//                     // Seek to contig start for this chunk
+//                     let mut bins_in_chunk = Vec::new();
+//                     for j in 0..n_bins_in_chunk {
+//                         let bin_start = chunk_start + j * bin_size;
+//                         let bin_length = std::cmp::min(bin_size, chunk_length - j * bin_size);
+//                         match Bin::new(reader, &contig, bin_start, bin_length, config.bam_min_mapq)
+//                         {
+//                             Ok(bin) => bins_in_chunk.push(BinCount::from(&bin)),
+//                             Err(e) => {
+//                                 error!("Failed to get Bin at chunk {i} bin {j}: {e}");
+//                             }
+//                         }
+//                     }
+//                     Some(bins_in_chunk)
+//                 });
+//
+//                 result.into_iter().flatten().collect::<Vec<_>>()
+//             })
+//             .collect();
+//
+//         debug!("Scan {contig}, sorting bins");
+//         let mut bins = bins;
+//         bins.par_sort_unstable_by(|a, b| a.start.cmp(&b.start));
+//
+//         debug!("Scan {contig}, computing outliers");
+//         fill_outliers(&mut bins);
+//
+//         debug!("Scan {contig}, writing file");
+//
+//         let mut file = get_gz_writer(&out_file, true)
+//             .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
+//         for bin in bins {
+//             writeln!(file, "{}", bin.to_tsv_row())?;
+//         }
+//     }
+//
+//     Ok(())
+// }
 
 /// Identifies and marks outliers in a slice of `BinCount` objects based on various ratio metrics.
 ///
@@ -541,3 +561,96 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
         config,
     )
 }
+
+
+/// A pipeline runner for executing SomaticScan on matched tumor and normal samples.
+///
+/// This struct encapsulates:
+/// - Initialization and conditional cleanup of prior outputs
+/// - Logic for checking whether a re-run is necessary (based on BAM and output timestamps)
+/// - Coordinated parallel scanning of both normal and tumoral inputs
+#[derive(Debug)]
+pub struct SomaticScan {
+    id: String,
+    config: Config,
+}
+
+impl Initialize for SomaticScan {
+    /// Initializes a SomaticScan runner.
+    ///
+    /// If force is enabled in the config, both the normal and tumoral output directories
+    /// are deleted to ensure a clean re-run.
+    ///
+    /// # Arguments
+    /// * `id` - The sample ID for the scan
+    /// * `config` - Configuration for input/output paths and behavior
+    ///
+    /// # Returns
+    /// A fully initialized `SomaticScan` instance ready for execution
+    ///
+    /// # Errors
+    /// Returns an error if directory deletion fails during force cleanup.
+    fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
+        info!("Initialize SomaticScan for {id}.");
+
+        let somatic_scan = Self {
+            id: id.to_string(),
+            config,
+        };
+
+        // Force re-run: clean up previous output directories for both normal and tumoral scans
+        if somatic_scan.config.somatic_scan_force {
+            fs::remove_dir_all(somatic_scan.config.somatic_scan_normal_output_dir(id))?;
+            fs::remove_dir_all(somatic_scan.config.somatic_scan_tumoral_output_dir(id))?;
+        }
+
+        Ok(somatic_scan)
+    }
+}
+
+impl ShouldRun for SomaticScan {
+    /// Determines whether SomaticScan should re-run by checking whether
+    /// any of the count output files are outdated or missing relative to the BAMs.
+    fn should_run(&self) -> bool {
+        let mrd_bam_path = &self.config.normal_bam(&self.id);
+        let diag_bam_path = &self.config.tumoral_bam(&self.id);
+
+        match read_dict(&self.config.dict_file) {
+            Ok(dict) => {
+                for (contig, _) in dict {
+                    let diag_count_file = self
+                        .config
+                        .somatic_scan_tumoral_count_file(&self.id, &contig);
+                    if is_file_older(&diag_count_file, diag_bam_path).unwrap_or(true) {
+                        return true;
+                    }
+                    let mrd_count_file = self
+                        .config
+                        .somatic_scan_normal_count_file(&self.id, &contig);
+                    if is_file_older(&mrd_count_file, mrd_bam_path).unwrap_or(true) {
+                        return true;
+                    }
+                }
+                false
+            }
+            Err(e) => {
+                error!("Failed to read dict file: {}\n{e}", self.config.dict_file);
+                // Don't run if dict is unreadable
+                false
+            }
+        }
+    }
+}
+
+impl Run for SomaticScan {
+    /// Executes the full scan pipeline in parallel for first normal then tumoral samples.
+    ///
+    /// # Returns
+    /// An error if the underlying scan function (`par_whole_scan`) fails for either sample.
+    fn run(&mut self) -> anyhow::Result<()> {
+        info!("Starting scan for {} normal.", self.id);
+        par_whole_scan(&self.id, &self.config.normal_name, &self.config)?;
+        info!("Starting scan for {} tumoral.", self.id);
+        par_whole_scan(&self.id, &self.config.tumoral_name, &self.config)
+    }
+}

+ 289 - 6
src/variant/variant.rs

@@ -1,5 +1,6 @@
 use crate::{
     annotation::Annotations,
+    collection::{Initialize, ShouldRun},
     helpers::Hash128,
     positions::{GenomePosition, GetGenomePosition, VcfPosition},
     runners::Run,
@@ -7,7 +8,7 @@ use crate::{
 };
 use anyhow::{anyhow, Context};
 use bitcode::{Decode, Encode};
-use log::warn;
+use log::{info, warn};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
@@ -1041,6 +1042,184 @@ pub trait VariantId {
     fn variant_id(&self) -> String;
 }
 
+// pub trait AsAny {
+//     fn as_any(&self) -> &dyn std::any::Any;
+// }
+//
+// impl<T: 'static> AsAny for T {
+//     fn as_any(&self) -> &dyn std::any::Any {
+//         self
+//     }
+// }
+
+/// A trait alias for all dynamically executable pipeline runners.
+///
+/// This trait represents any component that:
+/// - Can decide whether it needs to run (`ShouldRun`)
+/// - Implements the actual execution logic (`Run`)
+/// - Is thread-safe (`Send + Sync`) to be boxed and dispatched concurrently
+///
+/// Components implementing this trait can be boxed as `ShouldRunBox`.
+pub trait ShouldRunTrait: ShouldRun + Run + Send + Sync {}
+
+/// Blanket implementation for all compatible types.
+impl<T> ShouldRunTrait for T where T: ShouldRun + Run + Send + Sync {}
+
+/// A boxed trait object to hold any runner implementing `ShouldRunTrait`.
+pub type ShouldRunBox = Box<dyn ShouldRunTrait>;
+
+/// Macro to initialize and box multiple `ShouldRunTrait` components.
+///
+/// # Arguments
+/// * `$id` - Sample ID (typically a string slice)
+/// * `$config` - Shared configuration object
+/// * `$($runner:ty),+` - One or more runner types implementing `Initialize + ShouldRunTrait`
+///
+/// # Returns
+/// A vector of boxed runner components (`Vec<ShouldRunBox>`)
+///
+/// # Example
+/// ```rust
+/// let modules: Vec<ShouldRunBox> = create_should_run!(
+///     "sample_42",
+///     config,
+///     ClairS,
+///     Savana,
+///     DeepSomatic,
+/// )?;
+/// ```
+///
+/// # Errors
+/// This macro uses `?`, so it must be called inside a function that returns `anyhow::Result`.
+#[macro_export]
+macro_rules! create_should_run {
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
+        vec![
+            $(
+                Box::new(<$runner>::initialize($id, $config.clone())?) as ShouldRunBox
+            ),+
+        ]
+    };
+}
+
+/// Macro to initialize and box a list of solo-mode pipeline components that implement `ShouldRunTrait`.
+///
+/// This is typically used for per-timepoint variant callers (e.g., `DeepVariant`),
+/// where each runner is instantiated for a specific sample timepoint (e.g., "tumoral", "normal").
+///
+/// Each entry must be provided as a pair: the type of the runner and the timepoint string expression.
+///
+/// # Arguments
+/// - `$id`: The sample ID (`&str`) passed to each initializer
+/// - `$config`: A `Config` object (must be cloneable)
+/// - `$($runner:ty, $arg:expr),+`: One or more runner types with timepoint arguments (e.g., `config.tumoral_name`)
+///
+/// # Returns
+/// A `Vec<ShouldRunBox>` with boxed runners initialized per timepoint.
+///
+/// # Example
+/// ```rust
+/// let solo_callers = create_should_run_solo!(
+///     "sample42",
+///     config,
+///     DeepVariant, config.tumoral_name,
+///     DeepVariant, config.normal_name,
+/// )?;
+/// ```
+///
+/// # Notes
+/// Using `config.tumoral_name` and `config.normal_name` is preferred over hardcoded "diag"/"mrd".
+///
+/// # Errors
+/// This macro uses `?` and must be called inside a `Result`-returning context.
+#[macro_export]
+macro_rules! create_should_run_solo {
+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
+        vec![
+            $(
+                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as ShouldRunBox
+            ),+
+        ]
+    };
+}
+
+/// Macro to initialize and box a list of pipeline components that must run once per timepoint
+/// (i.e., both "tumoral" and "normal") and implement `ShouldRunTrait`.
+///
+/// This is typically used for variant callers like `DeepVariant` that operate in **solo mode**
+/// but must be run twice — once for the tumoral sample and once for the normal sample.
+///
+/// The macro:
+/// - Calls `.initialize(id, timepoint, config.clone())` twice per type
+/// - Uses `config.tumoral_name` and `config.normal_name` as timepoints
+/// - Returns a flat `Vec<ShouldRunBox>` containing both instances for each type
+///
+/// # Arguments
+/// - `$id`: The sample ID (`&str`)
+/// - `$config`: The configuration object (must expose `tumoral_name` and `normal_name`)
+/// - `$($runner:ty),+`: One or more runner types that implement `Initialize` with `(id, timepoint, config)`
+///
+/// # Example
+/// ```rust
+/// let runners = create_should_run_normal_tumoral!(
+///     "sample_42",
+///     config,
+///     DeepVariant,
+///     AnotherCaller,
+/// )?;
+/// ```
+///
+/// This will expand to:
+/// ```rust
+/// vec![
+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as ShouldRunBox,
+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as ShouldRunBox,
+///     Box::new(AnotherCaller::initialize("sample_42", config.tumoral_name, config.clone())?) as ShouldRunBox,
+///     Box::new(AnotherCaller::initialize("sample_42", config.normal_name, config.clone())?) as ShouldRunBox,
+/// ]
+/// ```
+///
+/// # Errors
+/// This macro uses `?`, so it must be called inside a function that returns `Result`.
+#[macro_export]
+macro_rules! create_should_run_normal_tumoral {
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
+        vec![
+            $(
+                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as ShouldRunBox,
+                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as ShouldRunBox
+            ),+
+        ]
+    };
+}
+
+/// Executes each runner in the slice only if `should_run()` returns true.
+///
+/// # Arguments
+/// * `iterable` - A mutable slice of boxed `InitRun` components
+///
+/// # Returns
+/// * `Ok(())` if all required runners execute successfully
+/// * An error if any runner's `run()` method fails
+///
+/// # Notes
+/// - This function will skip runners that return `false` from `should_run()`
+pub fn run_if_required(iterable: &mut [ShouldRunBox]) -> anyhow::Result<()> {
+    iterable.iter_mut().try_for_each(|e| {
+        if e.should_run() {
+            e.run()
+        } else {
+            // info!("Skipping runner: {}", std::any::type_name::<_>()); // or add name field
+
+            Ok(())
+        }
+    })
+}
+
+/// A trait alias for all variant callers that support initialization, execution,
+/// conditional re-running, and variant extraction (VCF + annotations).
+///
+/// Used to enable polymorphic handling of both solo and somatic callers in the pipeline.
 pub trait RunnerVariants: Run + Variants + Send + Sync {}
 
 pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
@@ -1055,6 +1234,43 @@ macro_rules! init_somatic_callers {
         ]
     };
 }
+
+/// Macro to initialize and box a list of **solo-mode variant callers** for specific timepoints,
+/// where each runner implements `RunnerVariants`.
+///
+/// This is useful for callers like `DeepVariant` that need to be instantiated with a specific
+/// sample timepoint (e.g., `config.tumoral_name` or `config.normal_name`).
+///
+/// Each entry must be a pair: a runner type and a timepoint expression (usually from config).
+///
+/// # Arguments
+/// - `$id`: The sample ID (`&str`)
+/// - `$config`: The configuration object (must be cloneable)
+/// - `$($runner:ty, $arg:expr),+`: One or more `(RunnerType, Timepoint)` pairs
+///
+/// # Returns
+/// A `Vec<CallerBox>` containing initialized, boxed solo-mode variant callers.
+///
+/// # Example
+/// ```rust
+/// let solo_callers = init_solo_callers!(
+///     "sample_42",
+///     config,
+///     DeepVariant, config.tumoral_name,
+///     DeepVariant, config.normal_name,
+/// )?;
+/// ```
+///
+/// This will expand to:
+/// ```rust
+/// vec![
+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
+/// ]
+/// ```
+///
+/// # Errors
+/// This macro uses `?` internally, so it must be used inside a `Result`-returning context.
 #[macro_export]
 macro_rules! init_solo_callers {
     ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
@@ -1066,13 +1282,62 @@ macro_rules! init_solo_callers {
     };
 }
 
-pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
-    iterable
-        .iter_mut()
-        .try_for_each(|runner| runner.run())
-        .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
+/// Macro to initialize and box a list of solo-mode **variant callers** for both `normal` and `tumoral` timepoints.
+///
+/// This is designed for types like `DeepVariant` that implement `RunnerVariants` and require
+/// separate execution for each timepoint. It will:
+/// - Call `.initialize(id, timepoint, config)` for both `config.tumoral_name` and `config.normal_name`
+/// - Box the result as `CallerBox`
+///
+/// # Arguments
+/// - `$id`: Sample ID (usually a `&str`)
+/// - `$config`: Cloneable configuration object
+/// - `$($runner:ty),+`: One or more runner types that implement `RunnerVariants`
+///
+/// # Returns
+/// A `Vec<CallerBox>` containing two boxed instances per runner (one for each timepoint).
+///
+/// # Example
+/// ```rust
+/// let solo_callers = init_solo_callers_normal_tumoral!(
+///     "sample_42",
+///     config,
+///     DeepVariant,
+///     OtherSoloCaller,
+/// )?;
+/// ```
+///
+/// This expands to:
+/// ```rust
+/// vec![
+///     Box::new(DeepVariant::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
+///     Box::new(DeepVariant::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
+///     Box::new(OtherSoloCaller::initialize("sample_42", config.tumoral_name, config.clone())?) as CallerBox,
+///     Box::new(OtherSoloCaller::initialize("sample_42", config.normal_name, config.clone())?) as CallerBox,
+/// ]
+/// ```
+///
+/// # Errors
+/// This macro uses `?`, so it must be called inside a `Result`-returning context.
+#[macro_export]
+macro_rules! init_solo_callers_normal_tumoral {
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
+        vec![
+            $(
+                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as CallerBox,
+                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as CallerBox
+            ),+
+        ]
+    };
 }
 
+// pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
+//     iterable
+//         .iter_mut()
+//         .try_for_each(|runner| runner.run())
+//         .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
+// }
+
 pub fn load_variants(
     iterable: &mut [CallerBox],
     annotations: &Annotations,
@@ -1091,6 +1356,24 @@ pub fn load_variants(
         .map_err(|e| anyhow::anyhow!("Failed to load variants.\n{e}"))
 }
 
+pub fn par_load_variants(
+    iterable: &mut [Box<dyn Variants + Send + Sync>],
+    annotations: &Annotations,
+) -> anyhow::Result<Vec<VariantCollection>> {
+    iterable
+        .par_iter()
+        .map(|runner| {
+            let r = runner.variants(annotations);
+            if let Err(ref e) = r {
+                warn!("{e}");
+            };
+            r
+        })
+        .filter(|r| r.is_ok())
+        .collect::<anyhow::Result<Vec<_>>>()
+        .map_err(|e| anyhow::anyhow!("Failed to load variants.\n{e}"))
+}
+
 pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
     vec1: &[T],
     vec2: &[T],