Browse Source

caller bug // callers shoud run info // remove_dir_if_exists

Thomas 8 months ago
parent
commit
a5880b4b84

+ 54 - 11
src/annotation/cosmic.rs

@@ -3,32 +3,75 @@ use std::str::FromStr;
 use bitcode::{Decode, Encode};
 use serde::{Deserialize, Serialize};
 
+/// Represents parsed COSMIC (Catalogue Of Somatic Mutations In Cancer) data.
+///
+/// This struct currently holds only the count of times a variant was observed in COSMIC.
 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Encode, Decode)]
 pub struct Cosmic {
+    /// The number of times the variant was recorded in the COSMIC database.
     pub cosmic_cnt: u64,
 }
 
 impl FromStr for Cosmic {
     type Err = anyhow::Error;
 
+    /// Parses a `Cosmic` instance from a semicolon-delimited string.
+    ///
+    /// # Expected Input Format
+    /// The input string must follow the format:
+    ///
+    /// ```text
+    /// <field1>;<field2>;CNT=<number>
+    /// ```
+    ///
+    /// - The input must contain exactly three parts, separated by semicolons (`;`).
+    /// - The third part must be of the form `CNT=<number>`, where `<number>` can be parsed as a `u64`.
+    /// - If the first part contains the word `"MISSING"`, parsing will fail.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use your_crate::Cosmic;
+    /// use std::str::FromStr;
+    ///
+    /// let input = "ID1;info;CNT=42";
+    /// let cosmic = Cosmic::from_str(input).unwrap();
+    /// assert_eq!(cosmic.cosmic_cnt, 42);
+    /// ```
+    ///
+    /// # Errors
+    ///
+    /// - Returns an error if the string does not contain exactly three semicolon-separated parts.
+    /// - Returns an error if `"MISSING"` is found in the first part.
+    /// - Returns an error if the third part is not in `key=value` format.
+    /// - Returns an error if the value is not a valid `u64`.
     fn from_str(s: &str) -> anyhow::Result<Self> {
-        let vs: Vec<&str> = s.split(";").collect();
+        let s = s.trim();
+        let vs: Vec<&str> = s.split(";").map(str::trim).collect();
         if vs.len() != 3 {
             return Err(anyhow::anyhow!(
-                "Error while parsing Cosmic results not the right number of parts for {s}"
+                "Expected 3 semicolon-separated parts in Cosmic string, got {}: {s}",
+                vs.len()
             ));
         }
 
         if vs[0].contains("MISSING") {
-            Err(anyhow::anyhow!("MISSING values in Cosmic results: {s}"))
-        } else {
-            let v: Vec<&str> = vs[2].split("=").collect();
-
-            Ok(Cosmic {
-                cosmic_cnt: v[1]
-                    .parse()
-                    .map_err(|e| anyhow::anyhow!("Failed to parse COSMIC CNT.\n{e}"))?,
-            })
+            return Err(anyhow::anyhow!("MISSING values in Cosmic results: {s}"));
         }
+
+        let v: Vec<&str> = vs[2].split("=").map(str::trim).collect();
+
+        if v.len() != 2 {
+            return Err(anyhow::anyhow!(
+                "Expected key=value format in third field: {}",
+                vs[2]
+            ));
+        }
+
+        let count = v[1]
+            .parse::<u64>()
+            .map_err(|e| anyhow::anyhow!("Failed to parse COSMIC CNT from '{}': {}", v[1], e))?;
+
+        Ok(Cosmic { cosmic_cnt: count })
     }
 }

+ 16 - 0
src/annotation/mod.rs

@@ -528,3 +528,19 @@ impl VepStats {
 pub trait CallerCat {
     fn caller_cat(&self) -> Annotation;
 }
+
+/// Returns true if the annotations include both:
+/// - a GnomAD entry with AF > 0
+/// - and a ConstitAlt entry with n_alt > 0
+pub fn is_gnomad_and_constit_alt(anns: &[Annotation]) -> bool {
+    let gnomad = anns.iter().any(|a| {
+        matches!(a, Annotation::GnomAD(g) if g.gnomad_af > 0.0)
+    });
+
+    let constit_alt = anns.iter().any(|a| {
+        matches!(a, Annotation::ConstitAlt(n) if *n > 0)
+    });
+
+    gnomad && constit_alt
+}
+

+ 17 - 12
src/callers/clairs.rs

@@ -3,7 +3,7 @@ use crate::{
     collection::{vcf::Vcf, Initialize, ShouldRun},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::{is_file_older, temp_file_path},
+    helpers::{is_file_older, remove_dir_if_exists, temp_file_path},
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -12,7 +12,7 @@ use crate::{
     },
 };
 use anyhow::{Context, Ok};
-use log::{debug, info};
+use log::{debug, info, warn};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use std::{fs, path::Path};
 
@@ -24,6 +24,9 @@ use std::{fs, path::Path};
 /// - Handling and filtering of output VCFs
 /// - Logging and diagnostic tracking
 /// - Integration with variant annotation workflows
+///
+/// # References
+/// - ClairS: https://github.com/HKU-BAL/ClairS
 #[derive(Debug, Clone)]
 pub struct ClairS {
     pub id: String,
@@ -34,7 +37,7 @@ pub struct ClairS {
 impl Initialize for ClairS {
     /// Initializes the ClairS runner.
     ///
-    /// This method constructs a `ClairS` instance with logging and configuration setup,
+    /// This method constructs a [`ClairS`] instance with logging and configuration setup,
     /// and ensures the output directory is cleaned up if the results are outdated or force execution is enabled.
     ///
     /// # Arguments
@@ -42,7 +45,7 @@ impl Initialize for ClairS {
     /// * `config` - Pipeline-wide configuration object containing paths, resources, and settings.
     ///
     /// # Returns
-    /// A fully initialized `ClairS` instance ready for execution.
+    /// A fully initialized [`ClairS`] instance ready for execution.
     ///
     /// # Errors
     /// Returns an error if the output directory fails to be removed when necessary.
@@ -61,9 +64,8 @@ impl Initialize for ClairS {
             config,
         };
 
-        let passed_vcf = clairs.config.clairs_passed_vcf(&clairs.id);
-        if (clairs.config.clairs_force && Path::new(&passed_vcf).exists()) || clairs.should_run() {
-            fs::remove_dir_all(clairs.config.clairs_output_dir(&clairs.id))?;
+        if clairs.config.clairs_force || clairs.should_run() {
+            remove_dir_if_exists(&clairs.config.clairs_output_dir(&clairs.id))?;
         }
 
         Ok(clairs)
@@ -74,8 +76,12 @@ impl ShouldRun for ClairS {
     /// Determines whether ClairS should be re-run based on BAM modification timestamps.
     fn should_run(&self) -> bool {
         let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
+        if result {
+            warn!("ClairS should run for id: {}.", self.id);
+        }
+        result
     }
 }
 
@@ -202,7 +208,7 @@ impl Run for ClairS {
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
 
-            fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
+            fs::remove_file(&tmp_file).context(format!("Failed to remove temporary file {tmp_file}"))?;
         } else {
             debug!(
                 "ClairS PASSED VCF already exists for {}, skipping execution.",
@@ -231,7 +237,7 @@ impl Variants for ClairS {
     /// * `annotations` - A reference to the global annotations structure used to store variant metadata.
     ///
     /// # Returns
-    /// A `VariantCollection` with the list of variants, the source VCF file, and the associated caller tag.
+    /// A [`VariantCollection`] with the list of variants, the source VCF file, and the associated caller tag.
     ///
     /// # Errors
     /// Will return an error if the VCF file is unreadable, missing, or malformed.
@@ -300,4 +306,3 @@ impl Label for ClairS {
         self.caller_cat().to_string()
     }
 }
-

+ 9 - 8
src/callers/deep_somatic.rs

@@ -9,7 +9,7 @@ use crate::{
     collection::{vcf::Vcf, Initialize, ShouldRun},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::is_file_older,
+    helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -53,11 +53,8 @@ impl Initialize for DeepSomatic {
             log_dir,
         };
 
-        let passed_vcf = deep_somatic.config.deepsomatic_passed_vcf(&deep_somatic.id);
-        if (deep_somatic.config.deepsomatic_force && Path::new(&passed_vcf).exists())
-            || deep_somatic.should_run()
-        {
-            fs::remove_dir_all(deep_somatic.config.deepsomatic_output_dir(&deep_somatic.id))?;
+        if deep_somatic.config.deepsomatic_force || deep_somatic.should_run() {
+            remove_dir_if_exists(&deep_somatic.config.deepsomatic_output_dir(&deep_somatic.id))?;
         }
 
         Ok(deep_somatic)
@@ -75,8 +72,12 @@ impl Initialize for DeepSomatic {
 impl ShouldRun for DeepSomatic {
     fn should_run(&self) -> bool {
         let passed_vcf = &self.config.deepsomatic_passed_vcf(&self.id);
-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
+        if result {
+            info!("DeepSomatic should run for id: {}.", self.id);
+        }
+        result
     }
 }
 

+ 15 - 11
src/callers/deep_variant.rs

@@ -8,7 +8,7 @@ use crate::{
     collection::{vcf::Vcf, InitializeSolo, ShouldRun},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::is_file_older,
+    helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -63,14 +63,12 @@ impl InitializeSolo for DeepVariant {
             config,
         };
 
-        let output_vcf_exists = Path::new(
-            &deepvariant
-                .config
-                .deepvariant_solo_output_vcf(&deepvariant.id, &deepvariant.time_point),
-        )
-        .exists();
-        if (deepvariant.config.deepvariant_force && output_vcf_exists) || deepvariant.should_run() {
-            fs::remove_dir_all(deepvariant.config.savana_output_dir(&deepvariant.id))?;
+        if deepvariant.config.deepvariant_force || deepvariant.should_run() {
+            remove_dir_if_exists(
+                &deepvariant
+                    .config
+                    .deepvariant_output_dir(&deepvariant.id, &deepvariant.time_point),
+            )?;
         }
 
         Ok(deepvariant)
@@ -89,7 +87,14 @@ impl ShouldRun for DeepVariant {
             .config
             .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
         let bam = self.config.solo_bam(&self.id, &self.time_point);
-        is_file_older(&passed_vcf, &bam).unwrap_or(true)
+        let result = is_file_older(&passed_vcf, &bam).unwrap_or(true);
+        if result {
+            info!(
+                "DeepVariant should run for: {} {}.",
+                self.id, self.time_point
+            );
+        }
+        result
     }
 }
 
@@ -245,4 +250,3 @@ impl Label for DeepVariant {
         self.caller_cat().to_string()
     }
 }
-

+ 28 - 13
src/callers/nanomonsv.rs

@@ -13,7 +13,7 @@ use crate::{
     collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::is_file_older,
+    helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     runners::{run_wait, CommandRun, Run, RunReport},
     variant::{
@@ -56,12 +56,9 @@ impl Initialize for NanomonSV {
             config,
         };
 
-        let passed_vcf = nanomonsv.config.nanomonsv_passed_vcf(&nanomonsv.id);
-        if (nanomonsv.config.nanomonsv_force && Path::new(&passed_vcf).exists())
-            || nanomonsv.should_run()
-        {
-            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "diag"))?;
-            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "mrd"))?;
+        if nanomonsv.config.nanomonsv_force || nanomonsv.should_run() {
+            remove_dir_if_exists(&nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "diag"))?;
+            remove_dir_if_exists(&nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "mrd"))?;
         }
 
         Ok(nanomonsv)
@@ -76,8 +73,12 @@ impl ShouldRun for NanomonSV {
     /// `true` if the passed VCF does not exist or is older than any input BAM.
     fn should_run(&self) -> bool {
         let passed_vcf = self.config.nanomonsv_passed_vcf(&self.id);
-        is_file_older(&passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
-            || is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
+        let result = is_file_older(&passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
+        if result {
+            warn!("NanomonSV should run for id: {}.", self.id);
+        }
+        result
     }
 }
 
@@ -128,6 +129,11 @@ impl Run for NanomonSV {
                     "Error while running NanomonSV get for {mrd_result_vcf}"
                 ))?;
             report.save_to_file(&format!("{}/nanomonsv_get_mrd_", self.log_dir))?;
+        } else {
+            debug!(
+                "NanomonSV `get` results already exists for {} normal, skipping execution.",
+                self.id
+            )
         }
 
         if !Path::new(&diag_result_vcf).exists() {
@@ -143,6 +149,11 @@ impl Run for NanomonSV {
                 "Error while running NanomonSV get for {diag_result_vcf}"
             ))?;
             report.save_to_file(&format!("{}/nanomonsv_get_diag_", self.log_dir))?;
+        } else {
+            debug!(
+                "NanomonSV `get` results already exists for {} tumoral, skipping execution.",
+                self.id
+            )
         }
 
         if !Path::new(&vcf_passed).exists() {
@@ -151,7 +162,12 @@ impl Run for NanomonSV {
                     .context(format!("Can't index {}", vcf_passed))?;
             report
                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
-                .context("Can't save report")?;
+                .context("Failed to save report")?;
+        } else {
+            debug!(
+                "NanomonSv PASSED VCF already exists for {}, skipping execution.",
+                self.id
+            )
         }
 
         Ok(())
@@ -179,9 +195,8 @@ impl Variants for NanomonSV {
 
         info!("Loading variants from {}: {}", caller, vcf_passed);
 
-        let variants = read_vcf(&vcf_passed).map_err(|e| {
-            anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed)
-        })?;
+        let variants = read_vcf(&vcf_passed)
+            .map_err(|e| anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed))?;
 
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);

+ 25 - 21
src/callers/savana.rs

@@ -6,7 +6,7 @@ use crate::{
         longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
     },
     config::Config,
-    helpers::is_file_older,
+    helpers::{is_file_older, remove_dir_if_exists},
     io::{readers::get_gz_reader, vcf::read_vcf},
     positions::{num_to_contig, GenomeRange},
     runners::{run_wait, CommandRun, Run},
@@ -17,7 +17,7 @@ use crate::{
 };
 use anyhow::Context;
 use itertools::Itertools;
-use log::{debug, info};
+use log::{debug, info, warn};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -62,15 +62,35 @@ impl Initialize for Savana {
             log_dir,
         };
 
-        let output_vcf_exists = Path::new(&savana.config.savana_output_vcf(id)).exists();
-        if (savana.config.savana_force && output_vcf_exists) || savana.should_run() {
-            fs::remove_dir_all(savana.config.savana_output_dir(id))?;
+        // If forced re-run is enabled or a run is needed, remove old output directory
+        if savana.config.savana_force || savana.should_run() {
+            remove_dir_if_exists(&savana.config.savana_output_dir(id))?;
         }
 
         Ok(savana)
     }
 }
 
+impl ShouldRun for Savana {
+    /// Determines whether Savana should be re-run based on whether
+    /// the filtered PASS VCF is older than the input BAMs.
+    ///
+    /// If either input BAM (normal or tumor) is newer than the PASS VCF,
+    /// Savana is considered out of date and should be re-executed.
+    ///
+    /// # Returns
+    /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.savana_passed_vcf(&self.id);
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
+        if result {
+            warn!("Savana should run for id: {}.", self.id);
+        }
+        result
+    }
+}
+
 impl Run for Savana {
     /// Executes the Savana pipeline, including prerequisite phasing and haplotagging steps.
     ///
@@ -181,22 +201,6 @@ impl Run for Savana {
     }
 }
 
-impl ShouldRun for Savana {
-    /// Determines whether Savana should be re-run based on whether
-    /// the filtered PASS VCF is older than the input BAMs.
-    ///
-    /// If either input BAM (normal or tumor) is newer than the PASS VCF,
-    /// Savana is considered out of date and should be re-executed.
-    ///
-    /// # Returns
-    /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
-    fn should_run(&self) -> bool {
-        let passed_vcf = &self.config.savana_passed_vcf(&self.id);
-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
-    }
-}
-
 impl Version for Savana {
     fn version(config: &Config) -> anyhow::Result<String> {
         let savana_args = ["--version"];

+ 27 - 18
src/callers/severus.rs

@@ -6,7 +6,7 @@ use crate::{
         longphase::LongphasePhase,
     },
     config::Config,
-    helpers::is_file_older,
+    helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     runners::{run_wait, CommandRun, Run},
     variant::{
@@ -53,15 +53,32 @@ impl Initialize for Severus {
             log_dir,
         };
 
-        let output_vcf_exists = Path::new(&severus.config.severus_output_vcf(id)).exists();
-        if (severus.config.severus_force && output_vcf_exists) || severus.should_run() {
-            fs::remove_dir_all(severus.config.severus_output_dir(id))?;
+        if severus.config.severus_force || severus.should_run() {
+            remove_dir_if_exists(&severus.config.severus_output_dir(id))?;
         }
 
         Ok(severus)
     }
 }
 
+impl ShouldRun for Severus {
+    /// Determines whether Severus should re-run based on whether the PASS VCF
+    /// is older than either the tumor or normal BAM file.
+    ///
+    /// # Returns
+    ///
+    /// `true` if Severus needs to be re-run, otherwise `false`
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.severus_passed_vcf(&self.id);
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
+        if result {
+            info!("Severus should run for: {}.", self.id);
+        }
+        result
+    }
+}
+
 impl Run for Severus {
     /// Runs the Severus structural variant caller if its output VCF does not already exist.
     ///
@@ -145,26 +162,18 @@ impl Run for Severus {
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
+        } else {
+            debug!(
+                "Severus PASSED VCF already exists for {}, skipping execution.",
+                self.id
+            );
+
         }
 
         Ok(())
     }
 }
 
-impl ShouldRun for Severus {
-    /// Determines whether Severus should re-run based on whether the PASS VCF
-    /// is older than either the tumor or normal BAM file.
-    ///
-    /// # Returns
-    ///
-    /// `true` if Severus needs to be re-run, otherwise `false`
-    fn should_run(&self) -> bool {
-        let passed_vcf = &self.config.severus_passed_vcf(&self.id);
-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
-    }
-}
-
 impl Version for Severus {
     fn version(config: &Config) -> anyhow::Result<String> {
         let args = [

+ 7 - 3
src/config.rs

@@ -59,6 +59,7 @@ pub struct Config {
     pub mask_bed: String,
     pub somatic_min_constit_depth: u16,
     pub somatic_max_alt_constit: u16,
+    pub entropy_seq_len: usize,
     pub min_shannon_entropy: f64,
     pub nanomonsv_bin: String,
     pub nanomonsv_output_dir: String,
@@ -68,6 +69,7 @@ pub struct Config {
     pub nanomonsv_solo_output_dir: String,
     pub nanomonsv_solo_passed_vcf: String,
     pub somatic_pipe_force: bool,
+    pub somatic_pipe_threads: u8,
     pub min_high_quality_depth: u32,
     pub somatic_scan_force: bool,
 }
@@ -190,9 +192,11 @@ impl Default for Config {
             somatic_scan_force: false,
 
             // Pipe
-            somatic_pipe_force: true,
+            somatic_pipe_force: false,
+            somatic_pipe_threads: 150,
             somatic_min_constit_depth: 5,
             somatic_max_alt_constit: 1,
+            entropy_seq_len: 10,
             min_shannon_entropy: 1.0,
 
             min_high_quality_depth: 14,
@@ -214,7 +218,7 @@ impl Default for AlignConfig {
     fn default() -> Self {
         Self {
             dorado_bin: "/data/tools/dorado-0.9.1-linux-x64/bin/dorado".to_string(),
-            dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
+            dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(),
             // to specify cuda devices (exclude the T1000)
             ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
             ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
@@ -344,7 +348,7 @@ impl Config {
     pub fn deepvariant_solo_passed_vcf(&self, id: &str, time: &str) -> String {
         format!(
             "{}/{}_{}_DeepVariant_PASSED.vcf.gz",
-            self.deepvariant_normal_output_dir(id),
+            self.deepvariant_output_dir(id, time),
             id,
             time
         )

+ 11 - 0
src/helpers.rs

@@ -502,3 +502,14 @@ pub fn is_file_older(file1: &str, file2: &str) -> anyhow::Result<bool> {
 
     Ok(mtime1 < mtime2)
 }
+
+pub fn remove_dir_if_exists(dir: &str) -> anyhow::Result<()> {
+    match fs::remove_dir_all(dir) {
+        Ok(_) => {}
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
+        Err(e) => {
+            anyhow::bail!("Failed to remove directory '{}': {}", dir, e);
+        }
+    };
+    Ok(())
+}

+ 166 - 5
src/lib.rs

@@ -1,3 +1,163 @@
+//! # Long-read Somatic Variant Calling and Analysis Framework
+//!
+//! This Rust library provides a modular, parallelizable framework for somatic variant calling, annotation, and interpretation from long-read sequencing data. It is designed to support full pipelines for research and clinical workflows across multiple variant callers and analysis stages.
+//!
+//! ## Key Features
+//!
+//! - **Pipeline Management**: Full orchestration of Dockerized execution pipelines for tools such as ClairS, Nanomonsv, DeepVariant, Savana, Modkit, and Severus.
+//! - **POD5 Demultiplexing and Alignment**: End-to-end support for processing ONT POD5 files:
+//!     - Demux using barcode metadata and custom CSV input
+//!     - POD5 subsetting and organization by flowcell case
+//!     - Integration with basecallers (e.g., Dorado) for read alignment
+//! - **Flexible Configuration**: Centralized configuration system (`Config`, `CollectionsConfig`) for all modules and pipelines.
+//! - **Input Abstraction**: Unified handling of BAM, POD5, and VCF file collections across cohorts and directories.
+//! - **Variant Processing**: Modular loading, filtering, statistical analysis, and annotation of somatic and germline variants.
+//! - **Haplotype Phasing and Methylation**: Support for LongPhase-based phasing and Modkit methylation pileups with support for multi-threaded pileup and aggregation.
+//! - **Parallel Execution**: Uses `rayon` for efficient multicore parallelization over large cohorts and tasks.
+//!
+//! ## Module Highlights
+//!
+//! - `callers`: Interfaces to variant calling tools (ClairS, DeepVariant, Nanomonsv, Savana, etc...)
+//! - `runners`: Pipeline runners (e.g. `Somatic`, `SeverusSolo`, `LongphasePhase`) that manage end-to-end execution.
+//! - `collection`: Organizes input data across BAMs, VCFs, and POD5 files with auto-detection of completed runs.
+//! - `annotation`: VEP line parsing and high-level annotation aggregation.
+//! - `pipes`: Composition modules for executing pipelines across callers and post-processing steps.
+//! - `functions`: Custom logic for genome assembly, entropy estimation, and internal tooling.
+//! - `positions`, `variant`, `helpers`: Utilities for SV modeling, variant filtering, position overlap logic, and helper methods.
+//!
+//! ---
+//!
+//! ## 🧬 Workflow Overview
+//!
+//! ### 1. 📦 From POD5 to BAM Alignment
+//!
+//! - **Demultiplexing**: POD5 files are subset and demuxed using barcodes (via CSV metadata).
+//! - **Flowcell Case Management**: Each sample is identified by a `FlowCellCase` containing its ID, time point, and POD5 directory.
+//! - **Alignment**: The `Dorado` module handles alignment of POD5 reads to reference genome, producing BAMs.
+//!
+//! ```rust
+//! let case = FlowCellCase { id: "PATIENT1", time_point: "diag", barcode: "01", pod_dir: "...".into() };
+//! Dorado::init(case, Config::default())?.run_pipe()?;
+//! ```
+//!
+//! ---
+//!
+//! ### 2. 🧬 Variant Calling (BAM ➝ VCF)
+//!
+//! Using the aligned BAMs, multiple variant callers can be run in parallel. The `callers` and `runners` modules support:
+//!
+//! - **ClairS** – somatic small variant calling with LongPhase haplotagging  
+//! - **Nanomonsv** – structural variants (SV)  
+//! - **DeepVariant** – germline small variants  
+//! - **Savana** – SVs and copy number variations (CNV)  
+//! - **Modkit** – methylation pileups  
+//! - **LongPhase** – phasing and modcalling
+//!
+//! All workflows can be triggered per-case or per-cohort using `Collections` or `Somatic` runners.
+//!
+//! ```rust
+//! ClairS::initialize("PATIENT1", Config::default())?.run()?;
+//! NanomonSV::initialize("PATIENT1", Config::default())?.run()?;
+//! ```
+//!
+//! ---
+//!
+//! ### 3. 📈 Aggregation & Statistics (VCF ➝ JSON / Stats)
+//!
+//! After variant calling:
+//!
+//! - Annotate with VEP (`annotation` module)
+//! - Load and filter with `variant_collection`
+//! - Compute variant and region-level stats (e.g., mutation rates, alteration categories, coding overlaps)
+//!
+//! ```rust
+//! let variants = Variants::load_from_json("/path/to/somatic_variants.json.gz")?;
+//! let stats = VariantsStats::new(&variants, "PATIENT1", &config)?;
+//! stats.save_to_json("/output/path/stats.json.gz")?;
+//! ```
+//!
+//! ---
+//!
+//! ### 4. 🧠 Intelligent Task Management (`collection` module)
+//!
+//! - Auto-discovers available samples, POD5s, BAMs, and VCFs
+//! - Detects missing outputs and creates task lists
+//! - Tasks are parallelizable using Rayon and can be run on-demand
+//!
+//! ```rust
+//! let mut collections = Collections::new(CollectionsConfig::default())?;
+//! collections.todo()?;      // Identify missing steps
+//! collections.run()?;       // Run them automatically
+//! ```
+//!
+//! ---
+//!
+//! ## 📁 Module Highlights
+//!
+//! - `callers`: Interfaces to ClairS, DeepVariant, Savana, Nanomonsv, etc.
+//! - `runners`: Pipeline runners like `Somatic` and `LongphasePhase`
+//! - `collection`: Auto-discovery of BAM/VCF/POD5s, task orchestration
+//! - `annotation`: VEP line parsing and transcript-level annotations
+//! - `pipes`: High-level pipelines (e.g., `run_somatic`, `todo_deepvariants`)
+//! - `variant`: Variant structs, filtering, alteration categories
+//! - `positions`, `helpers`, `functions`, `math`: Utility layers
+//!
+//! ---
+//!
+//! ## 🔬 Testing
+//!
+//! Integration tests demonstrate the entire pipeline. Run with logging enabled:
+//!
+//! ```bash
+//! export RUST_LOG=debug
+//! cargo test -- --nocapture
+//! ```
+//!
+//! ---
+//! ## Example Use Cases
+//!
+//! - Full somatic variant calling pipeline on matched tumor/normal samples
+//! - POD5-based pipeline from raw signal to variants
+//! - Aggregation and annotation of SVs across a clinical cohort
+//! - Methylation analysis using nanopore-specific tools
+//! - Variant calling and analysis in large-scale longitudinal studies
+//!
+//! ## Getting Started
+//!
+//! All workflows are initialized from `Config` and driven by the `Collections` structure:
+//!
+//! ```rust
+//! let config = Config::default();
+//! let collections = Collections::new(CollectionsConfig::default())?;
+//! collections.todo()?;
+//! collections.run()?;
+//! ```
+//!
+//! ## Running Tests
+//!
+//! Run the full suite with logging enabled:
+//!
+//! ```bash
+//! export RUST_LOG=debug
+//! cargo test -- --nocapture
+//! ```
+//!
+//! ## 🔗 References
+//! ### Basecalling and alignment
+//! - Dorado: <https://github.com/nanoporetech/dorado>
+//! ### Variants Callers
+//! - ClairS: <https://github.com/HKU-BAL/ClairS>
+//! - Nanomonsv: <https://github.com/friend1ws/nanomonsv>
+//! - Savana: <https://github.com/cortes-ciriano-lab/savana>
+//! - DeepVariant: <https://github.com/google/deepvariant>
+//! - DeepSomatic: <https://github.com/google/deepsomatic>
+//! - LongPhase: <https://github.com/PorubskyResearch/LongPhase>
+//! - Modkit: <https://github.com/nanoporetech/modkit>
+//! ### Variants annotation
+//! - VEP: <https://www.ensembl.org/info/docs/tools/vep/index.html>
+//!
+//! ---
+
 use std::sync::{Arc, Mutex};
 
 pub mod commands;
@@ -37,7 +197,7 @@ mod tests {
     use helpers::estimate_shannon_entropy;
     use io::bed::read_bed;
     use log::{error, info, warn};
-    use pipes::somatic::Somatic;
+    use pipes::somatic::SomaticPipe;
     use positions::{overlaps_par, GenomePosition, GenomeRange};
     use rayon::prelude::*;
     use runners::Run;
@@ -533,10 +693,11 @@ mod tests {
     #[test]
     fn pipe_somatic() -> anyhow::Result<()> {   
         init();
-        let id = "ADJAGBA";
-        Somatic::initialize(id, Config::default())?.run()
+        let id = "ACHITE";
+        SomaticPipe::initialize(id, Config::default())?.run()
     }
 
+   
     #[test]
     fn overlaps() {
         init();
@@ -697,7 +858,7 @@ mod tests {
                 continue;
             }
 
-            match Somatic::initialize(id, Config::default())?.run() {
+            match SomaticPipe::initialize(id, Config::default())?.run() {
                 Ok(_) => (),
                 Err(e) => error!("{id} {e}"),
             };
@@ -710,7 +871,7 @@ mod tests {
         init();
         let id = "ADJAGBA";
         let config = Config { somatic_pipe_force: true, ..Default::default() };
-        match Somatic::initialize(id, config)?.run() {
+        match SomaticPipe::initialize(id, config)?.run() {
             Ok(_) => (),
             Err(e) => error!("{id} {e}"),
         };

+ 466 - 254
src/pipes/somatic.rs

@@ -1,7 +1,5 @@
 use crate::{
-    create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
-    scan::scan::SomaticScan,
-    variant::variant::{run_if_required, ShouldRunBox},
+    annotation::is_gnomad_and_constit_alt, collection::ShouldRun, create_should_run_normal_tumoral, init_solo_callers_normal_tumoral, scan::scan::SomaticScan, variant::variant::{run_if_required, ShouldRunBox}
 };
 use anyhow::Context;
 use itertools::Itertools;
@@ -11,7 +9,6 @@ use std::{
     fs::{self, File},
     io::Write,
     path::Path,
-    sync::Arc,
 };
 
 use crate::{
@@ -31,240 +28,172 @@ use crate::{
     },
 };
 
-pub struct Somatic {
+/// Runs the full somatic variant calling pipeline for a single sample (`id`).
+///
+/// This function orchestrates the entire somatic variant discovery process,
+/// starting from raw variant caller outputs (`PASSED VCF`) and applying multiple filtering
+/// and annotation steps to produce high-confidence somatic variants.
+///
+/// This function orchestrates the end-to-end somatic variant discovery process:
+/// - Executes and verifies upstream components if necessary
+/// - Loads variants from multiple callers (tumor and normal samples)
+/// - Applies several annotation and filtering steps (e.g. depth, population frequency, entropy)
+/// - Tracks filtering statistics at each step
+/// - Outputs high-confidence somatic variants in both `.json.gz` and `.bit` formats
+///
+/// ## Output Overview
+/// The final output includes:
+/// - `{tumoral_dir}/{id}_somatic_variants.json.gz`: annotated somatic variants
+/// - `{tumoral_dir}/{id}_somatic_variants.bit`: compact binary variant representation
+/// - `{stats_dir}/`: multiple intermediate JSON files with annotations and statistics
+///
+/// ## Steps
+///
+/// This pipeline performs the following high-level steps:
+///
+/// ### 1. Output Existence Check
+/// If the final JSON result already exists and [`Config::somatic_pipe_force`] is not set,
+/// the pipeline aborts early to avoid overwriting results.
+///
+/// ### 2. Initialization
+/// - Prepares statistics and output directories.
+/// - Initializes variant annotations.
+///
+/// ### 3. Pre-requisite Tool Execution
+/// Runs any required upstream components (e.g., alignment, basecalling, variant callers) if
+/// their outputs are missing, using the [`run_if_required`] logic.
+///
+/// ### 4. Load Variant Collections
+/// - Initializes the configured somatic variant callers and loads their output variants from PASSED VCF.
+/// - Also loads germline variants (from [`ClairS::germline`]) for comparative germline filtering.
+///
+/// ### 5. Statistics Initialization
+/// - Initializes a [`SomaticPipeStats`] object to track the number of variants at each step.
+/// - Captures initial variant counts before filtering.
+/// - Aggregates variant counts from all sources and stores initial annotations for quality control and
+///   comparison before filtering.
+///
+/// ### 6. Filter: Germline & Solo Constitutional Variants
+/// - Removes variants labeled as either Germline or Solo Constitutional, assuming they are unlikely to be somatic.
+/// - Records count of removed variants in [`SomaticPipeStats::n_constit_germline`].
+///
+/// ### 7. Annotation: BAM Read Depth and Alt Allele Counts
+/// - Uses the constitutional BAM file to annotate each variant with read depth and the number of alternate reads observed.
+/// - Flags variants with low depth or excessive alt reads for filtering as specified in [`Config`].
+///
+/// ### 8. Filtering: Low Depth / High Alt Alleles
+/// - Removes variants with low coverage in the constitutional sample, or excessive alt allele support (suggestive of germline origin).
+/// - Updates stats:
+///   - [`SomaticPipeStats::n_low_constit`]
+///   - [`SomaticPipeStats::n_high_alt_constit`]
+///
+/// ### 9. Annotation: Sequence Entropy
+/// Adds Shannon entropy annotation based on the reference sequence context
+/// around each variant (cf. [`Config::entropy_seq_len`]) to flag low-complexity regions (often repetitive).
+///
+/// ### 10. Annotation: External Databases (COSMIC, GnomAD):
+///  - Uses external resources to annotate variants with:
+///    - COSMIC hits (somatic mutation database)
+///    - GnomAD allele frequencies
+///
+/// ### 11. Filtering: GnomAD + Alt Support in Constitutional Sample
+///  - Removes variants that are both present in GnomAD **and** show
+///    alternate allele support in the constitutional BAM.
+///    These are highly likely to be non-somatic germline polymorphisms.
+///  - Updates [`SomaticPipeStats::n_high_alt_constit_gnomad`] stat.
+///
+/// ### 12. Filtering: Low Shannon Entropy:
+///  - Removes variants from low-complexity regions with entropy below the configured threshold
+///    (cf. [`Config::min_shannon_entropy`]).
+///  - Updates [`SomaticPipeStats::n_low_entropies`].
+///
+/// ### 13. Annotation: VEP (Variant Effect Predictor)
+///  Adds transcript-level annotations from Ensembl VEP, providing functional consequences,
+///  impacted genes, and regulatory features.
+///
+/// ### 14. Merging
+///  Merges variant collections into a unified [`Variants`] structure,
+///  preserving annotations and applying deduplication logic.
+///
+/// ### 15. Final Statistics and Saving
+///  - Saves final annotation stats, VEP summaries, and variant-level statistics.
+///  - Exports the final somatic variant set to both compressed JSON and `.bit` formats.
+///
+/// # Returns
+/// - `Ok(())` if all steps completed successfully.
+/// - `Err` if any tool fails, if file I/O fails, or if logical conditions are violated (e.g., pre-existing output).
+///
+/// # Errors
+/// - Returns early if the output file already exists and [`Config::somatic_pipe_force`] is `false`.
+/// - Wraps all component-specific errors using `anyhow::Result` with context.
+///
+/// # Side Effects
+/// - Runs external tools conditionally (e.g., [`ClairS`], [`DeepSomatic`]).
+/// - Creates intermediate directories and annotation JSONs for debugging/QC.
+/// - May consume significant compute time depending on the number of callers, annotations, and variants.
+///
+/// # TODOs
+/// - Support compressed intermediate files (`// TODO: GZ !!!`)
+/// - Improve filtering metrics reporting (currently not all filtered variants are tracked in final stats).
+///
+/// # Example Output Files
+/// - `tumoral_dir/sample123_somatic_variants.json.gz`
+/// - `tumoral_dir/sample123_somatic_variants.bit`
+/// - `stats_dir/sample123_annotations_*.json` (intermediate annotation snapshots)
+///
+/// # See Also
+/// - [`Annotations`] – core structure for managing per-variant metadata
+/// - [`Variants`] – the merged final variant structure
+/// - [`SomaticPipeStats`] – used for tracking variant counts throughout filtering
+///
+pub struct SomaticPipe {
+    /// Unique identifier for the sample.
     pub id: String,
+    /// Configuration parameters for the pipeline.
     pub config: Config,
-    pub annotations: Annotations,
 }
 
-impl Initialize for Somatic {
+impl Initialize for SomaticPipe {
+    /// Initializes a new `Somatic` instance with default annotations.
     fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
         let id = id.to_string();
-        Ok(Self {
-            id,
-            config,
-            annotations: Annotations::default(),
-        })
+        Ok(Self { id, config })
     }
 }
 
-#[derive(Debug, Default, Clone)]
-pub struct SomaticPipeStats {
-    pub input: InputStats,
-    pub n_constit_germline: usize,
-    pub n_low_constit: usize,
-    pub n_high_alt_constit: usize,
-    pub n_high_alt_constit_gnomad: usize,
-    pub n_low_entropies: usize,
-}
-
-#[derive(Debug, Default, Clone)]
-pub struct InputStats {
-    pub solo_tumor: Vec<(Annotation, usize)>,
-    pub solo_constit: Vec<(Annotation, usize)>,
-    pub germline: Vec<(Annotation, usize)>,
-    pub somatic: Vec<(Annotation, usize)>,
-}
-
-impl InputStats {
-    pub fn from_collections(collections: &[VariantCollection]) -> Self {
-        let mut stats = Self::default();
-        for collection in collections.iter() {
-            match collection.caller {
-                Annotation::Callers(_, Sample::SoloTumor) => stats
-                    .solo_tumor
-                    .push((collection.caller.clone(), collection.variants.len())),
-                Annotation::Callers(_, Sample::SoloConstit) => stats
-                    .solo_constit
-                    .push((collection.caller.clone(), collection.variants.len())),
-                Annotation::Callers(_, Sample::Germline) => stats
-                    .germline
-                    .push((collection.caller.clone(), collection.variants.len())),
-                Annotation::Callers(_, Sample::Somatic) => stats
-                    .somatic
-                    .push((collection.caller.clone(), collection.variants.len())),
-                _ => (),
-            };
-        }
-        stats
+impl ShouldRun for SomaticPipe {
+    fn should_run(&self) -> bool {
+        todo!()
     }
 }
 
-impl SomaticPipeStats {
-    pub fn init(collections: &[VariantCollection]) -> Self {
-        Self {
-            input: InputStats::from_collections(collections),
-            ..Default::default()
-        }
-    }
-
-    pub fn annot_init(&self, stats: &AnnotationsStats, json_path: &str) -> anyhow::Result<()> {
-        let stats: Vec<(Vec<Annotation>, u64)> = stats
-            .categorical
-            .iter()
-            .map(|e| {
-                let anns = e
-                    .key()
-                    .split(" + ")
-                    .map(|k| k.parse())
-                    .collect::<anyhow::Result<Vec<Annotation>>>()
-                    .map_err(|err| {
-                        anyhow::anyhow!("Error while splitting key in AnnotationsStats.\n{err}")
-                    })?;
-                Ok((anns, *e.value()))
-            })
-            .collect::<anyhow::Result<Vec<(Vec<Annotation>, u64)>>>()?;
-
-        let callers_somatic_solo_tumor = [
-            self.input
-                .somatic
-                .iter()
-                .map(|(caller, _)| caller.clone())
-                .collect::<Vec<Annotation>>(),
-            self.input
-                .solo_tumor
-                .iter()
-                .map(|(caller, _)| caller.clone())
-                .collect(),
-        ]
-        .concat();
-
-        let callers_germline_solo_constit = [
-            self.input
-                .germline
-                .iter()
-                .map(|(caller, _)| caller.clone())
-                .collect::<Vec<Annotation>>(),
-            self.input
-                .solo_constit
-                .iter()
-                .map(|(caller, _)| caller.clone())
-                .collect(),
-        ]
-        .concat();
-
-        let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
-        stats.iter().for_each(|(anns, v)| {
-            if anns.iter().any(|a| {
-                matches!(
-                    a,
-                    Annotation::Callers(_, Sample::SoloConstit)
-                        | Annotation::Callers(_, Sample::Germline)
-                )
-            }) {
-                let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
-                    .iter()
-                    .flat_map(|tumor| {
-                        if anns.contains(tumor) {
-                            vec![(tumor.to_string(), *v)]
-                        } else {
-                            vec![]
-                        }
-                    })
-                    .collect();
-
-                let mut germline_caller: Vec<String> = callers_germline_solo_constit
-                    .iter()
-                    .flat_map(|germ| {
-                        if anns.contains(germ) {
-                            vec![germ.to_string()]
-                        } else {
-                            vec![]
-                        }
-                    })
-                    .collect();
-                germline_caller.sort();
-                let germline_caller = germline_caller.join(" + ");
-
-                n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
-                    if let Some(row) = with_germline.get_mut(tumoral_caller) {
-                        if let Some(col) = row.get_mut(&germline_caller) {
-                            *col += *n;
-                        } else {
-                            row.insert(germline_caller.to_string(), *n);
-                        }
-                    } else {
-                        let mut row = HashMap::new();
-                        row.insert(germline_caller.to_string(), *n);
-                        with_germline.insert(tumoral_caller.to_string(), row);
-                    }
-                });
-            }
-        });
-
-        let mut germlines_callers: Vec<String> = with_germline
-            .iter()
-            .flat_map(|(_, r)| {
-                r.iter()
-                    .map(|(k, _)| k.to_string())
-                    .collect::<Vec<String>>()
-            })
-            .collect();
-        germlines_callers.sort();
-        germlines_callers.dedup();
-
-        let mut json = Vec::new();
-        let mut lines: Vec<String> = with_germline
-            .iter()
-            .map(|(tumor, row)| {
-                json.push(format!(
-                    "{{\"caller_name\": \"{tumor}\", \"germline\": [{}] }}",
-                    germlines_callers
-                        .iter()
-                        .map(|g| {
-                            let v = row.get(g).unwrap_or(&0);
-                            format!("{{\"{g}\": {v}}}")
-                        })
-                        .join(", ")
-                ));
-                format!(
-                    "{tumor}\t{}",
-                    germlines_callers
-                        .iter()
-                        .map(|g| {
-                            let v = row.get(g).unwrap_or(&0);
-                            format!("{g}: {v}")
-                        })
-                        .join("\t")
-                )
-            })
-            .collect();
-        lines.sort();
-        println!("{}", lines.join("\n"));
-
-        let json = format!("[{}]", json.join(", "));
-        let mut file = File::create(json_path)?;
-        file.write_all(json.as_bytes())?;
-
-        Ok(())
-    }
-}
-
-impl Run for Somatic {
+impl Run for SomaticPipe {
+    /// Executes the full somatic variant analysis pipeline.
     fn run(&mut self) -> anyhow::Result<()> {
         let config = self.config.clone();
         let id = self.id.clone();
+        info!("Running somatic pipe for {id}.");
 
+        // Define output paths for the final somatic variants
         let result_json = format!("{}/{id}_somatic_variants.json.gz", config.tumoral_dir(&id));
         let result_bit = format!("{}/{id}_somatic_variants.bit", config.tumoral_dir(&id));
 
         if Path::new(&result_json).exists() && !config.somatic_pipe_force {
-            return Err(anyhow::anyhow!("already exists"));
+            return Err(anyhow::anyhow!(
+                "Somatic Pipe output already exists for {id}."
+            ));
         }
 
-        info!("Running somatic pipe for {id}.");
-        let annotations = Arc::new(self.annotations.clone());
+        let mut annotations = Annotations::default();
 
-        // Stats dir
+        // Create stats directory if it doesn't exist
         let stats_dir = config.somatic_pipe_stats(&id);
         if !Path::new(&stats_dir).exists() {
             fs::create_dir(&stats_dir)?;
         }
-        // TODO: GZ !!!
-        // LongphasePhase::initialize(&id, self.config.clone())?.run()?;
 
-        // Initalize variants collections
-        info!("Initialization prerequired pipe components...");
+        // Initialize and run any pre-required input if necessary
+        info!("Initialization prerequired pipe inputs...");
 
         let mut to_run_if_req = create_should_run!(
             &id,
@@ -276,13 +205,13 @@ impl Run for Somatic {
             Savana,
             DeepSomatic
         );
-        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant,));
+        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant));
 
         info!("Running prerequired pipe components.");
-
         run_if_required(&mut to_run_if_req)
             .context("Failed to run a prerequired component of somatic pipe.")?;
 
+        // Initialize variant callers
         let mut callers = init_somatic_callers!(
             &id,
             &config,
@@ -295,18 +224,21 @@ impl Run for Somatic {
 
         callers.extend(init_solo_callers_normal_tumoral!(&id, &config, DeepVariant,));
 
+        // Load variants from each caller
         info!("Loading variants.");
         let mut variants_collections = load_variants(&mut callers, &annotations)
             .map_err(|e| anyhow::anyhow!("Error while loading variants\n{e}"))?;
 
-        info!("Loading Germline");
+        // Load germline variants using ClairS
+        info!("Loading germline variants.");
         let clairs_germline =
             ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
         variants_collections.push(clairs_germline);
 
+        // Initialize statistics
         let mut somatic_stats = SomaticPipeStats::init(&variants_collections);
         info!(
-            "Variants collections from {} vcf ({} variants)",
+            "Variants collections loaded from {} vcf (total of {} variants loaded)",
             variants_collections.len(),
             variants_collections
                 .iter()
@@ -314,8 +246,7 @@ impl Run for Somatic {
                 .sum::<usize>()
         );
 
-        let mut annotations = Arc::try_unwrap(annotations)
-            .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
+        // Initial annotation stats (caller annotations only)
         let caller_cat_anns = |v: &Annotation| matches!(v, Annotation::Callers(_, _));
         let annot_init = annotations.callers_stat(Some(Box::new(caller_cat_anns)));
         somatic_stats.annot_init(
@@ -324,8 +255,11 @@ impl Run for Somatic {
         )?;
         annot_init.save_to_json(&format!("{stats_dir}/{id}_annotations_01.json"))?;
 
-        // Filter: Variants neither Germline nor SoloConstit
-        info!("Keeping somatic variants (variants neither in solo nor in germline).");
+        // Filter out germline and solo constitutional variants
+        info!(
+            "Keeping somatic variants (variants neither in solo {} nor in germline).",
+            config.normal_name
+        );
         somatic_stats.n_constit_germline =
             annotations.retain_variants(&mut variants_collections, |anns| {
                 !anns.iter().any(|ann| {
@@ -342,7 +276,7 @@ impl Run for Somatic {
                 "{stats_dir}/{id}_annotations_02_post_germline.json"
             ))?;
 
-        // Annotation: BAM depth, n_alt
+        // Annotate with depth and number of alternate reads from constitutional BAM
         info!("Reading Constit BAM file for depth and pileup annotation.");
         variants_collections.iter().try_for_each(|c| {
             c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150)
@@ -364,7 +298,7 @@ impl Run for Somatic {
             })))
             .save_to_json(&format!("{stats_dir}/{id}_annotations_03_bam.json"))?;
 
-        // Filter: Remove LowConstitDepth from annotations and variants collections
+        // Filter based on low constitutional depth
         info!(
             "Removing variants when depth in constit bam < {}.",
             self.config.somatic_min_constit_depth
@@ -403,17 +337,22 @@ impl Run for Somatic {
             })))
             .save_to_json(&format!("{stats_dir}/{id}_annotations_04_bam_filter.json"))?;
 
-        // Annotation: Entropy
+        // Annotate variants with sequence entropy
         info!(
             "Entropy annotation from {} sequences.",
             self.config.reference
         );
         variants_collections.iter().for_each(|c| {
-            c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
+            c.annotate_with_sequence_entropy(
+                &annotations,
+                &self.config.reference,
+                self.config.entropy_seq_len,
+                self.config.somatic_pipe_threads,
+            );
         });
 
-        // Annotation: Cosmic and GnomAD
-        info!("Annotation with Cosmic and GnomAD.");
+        // Annotate with external databases like COSMIC and GnomAD
+        info!("Annotation with external databases like COSMIC and GnomAD.");
         variants_collections
             .iter()
             .try_for_each(|c| -> anyhow::Result<()> {
@@ -430,31 +369,11 @@ impl Run for Somatic {
             })))
             .save_to_json(&format!("{stats_dir}/{id}_annotations_05_gnomad.json"))?;
 
-        // Filter: Remove variants in Gnomad and in constit bam
+        // Filter: Remove variants present in GnomAD and have alt reads in constitutional sample
         info!("Filtering out variants in GnomAD and in constit bam at low AF.");
-        somatic_stats.n_high_alt_constit_gnomad =
-            annotations.retain_variants(&mut variants_collections, |anns| {
-                !anns
-                    .iter()
-                    .find_map(|a| {
-                        if let Annotation::GnomAD(gnomad) = a {
-                            Some(gnomad.gnomad_af > 0.0)
-                        } else {
-                            None
-                        }
-                    })
-                    .and_then(|gnomad_condition| {
-                        anns.iter()
-                            .find_map(|a| {
-                                if let Annotation::ConstitAlt(n_alt) = a {
-                                    Some(*n_alt > 0)
-                                } else {
-                                    None
-                                }
-                            })
-                            .map(|constit_alt_condition| gnomad_condition && constit_alt_condition)
-                    })
-                    .unwrap_or(false)
+        somatic_stats.n_high_alt_constit_gnomad = annotations
+            .retain_variants(&mut variants_collections, |anns| {
+                !is_gnomad_and_constit_alt(anns)
             });
 
         info!(
@@ -473,12 +392,13 @@ impl Run for Somatic {
                 "{stats_dir}/{id}_annotations_06_gnomad_filter.json"
             ))?;
 
-        // Annotation low entropy
+        // Filter low entropy variants
         annotations.low_shannon_entropy(self.config.min_shannon_entropy);
-        // annotations.callers_stat();
 
-        // Filtering low entropy for solo variants.
-        info!("Filtering low entropies");
+        info!(
+            "Filtering out variants with low entropies ({})",
+            config.min_shannon_entropy
+        );
         annotations
             .callers_stat(Some(Box::new(|v| {
                 matches!(v, Annotation::Callers(_, _) | Annotation::LowEntropy)
@@ -489,14 +409,15 @@ impl Run for Somatic {
             .retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::LowEntropy)
             });
+
         annotations
             .callers_stat(Some(Box::new(|v| matches!(v, Annotation::Callers(_, _)))))
             .save_to_json(&format!(
                 "{stats_dir}/{id}_annotations_08_entropy_filter.json"
             ))?;
 
-        // VEP
-        info!("VEP annotation.");
+        // Final VEP annotation
+        info!("Annotation with VEP.");
         variants_collections
             .iter()
             .try_for_each(|c| -> anyhow::Result<()> {
@@ -504,12 +425,14 @@ impl Run for Somatic {
                 ext_annot.annotate_vep(&c.variants, &annotations)?;
                 Ok(())
             })?;
+
         annotations
             .callers_stat(Some(Box::new(caller_cat_anns)))
             .save_to_json(&format!("{stats_dir}/{id}_annotations_09_vep.json"))?;
 
         annotations.vep_stats()?;
 
+        // Merge all variants into a final collection
         let variants = variants_collections.into_iter().fold(
             Variants::default(),
             |mut acc, variants_collection| {
@@ -550,3 +473,292 @@ pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
 
     Ok(())
 }
+
+/// Holds statistical data for somatic variant pipeline processing,
+/// including summary counts and input categorization.
+#[derive(Debug, Default, Clone)]
+pub struct SomaticPipeStats {
+    /// Summary of input variant collections grouped by sample type.
+    pub input: InputStats,
+
+    /// Number of variants labeled as both constitutional and germline.
+    pub n_constit_germline: usize,
+
+    /// Number of variants in constitutional samples with low allele frequency.
+    pub n_low_constit: usize,
+
+    /// Number of variants in constitutional samples with high alternative allele count.
+    pub n_high_alt_constit: usize,
+
+    /// Number of high-alt constitutional variants that are also found in gnomAD.
+    pub n_high_alt_constit_gnomad: usize,
+
+    /// Number of variants filtered due to low entropy (indicative of low complexity regions).
+    pub n_low_entropies: usize,
+}
+
+impl SomaticPipeStats {
+    /// Initializes a `SomaticPipeStats` object with populated `InputStats` based on the
+    /// provided `VariantCollection`s.
+    pub fn init(collections: &[VariantCollection]) -> Self {
+        Self {
+            input: InputStats::from_collections(collections),
+            ..Default::default()
+        }
+    }
+
+    /// Generates a tumor-vs-germline annotation matrix and writes it to a JSON file.
+    ///
+    /// This method analyzes co-occurrence patterns between tumor and germline/constit
+    /// variant callers by iterating through annotation statistics from `AnnotationsStats`.
+    /// It builds a matrix where each row corresponds to a **tumor caller** and each column
+    /// corresponds to a **germline or constitutional caller**. Each cell contains the number
+    /// of variant calls that were annotated by both the tumor and the germline/constit caller.
+    ///
+    /// In addition to writing a structured JSON file to the specified path, the function also
+    /// prints a tab-separated human-readable matrix to stdout for convenience.
+    ///
+    /// # Parameters
+    ///
+    /// - `stats`: A reference to [`AnnotationsStats`] containing categorical annotations.
+    ///   This object holds a frequency map where keys are combinations of `Annotation`
+    ///   values (serialized as strings) and values are occurrence counts.
+    /// - `json_path`: The path where the resulting JSON output file should be written.
+    ///
+    /// # Output Formats
+    ///
+    /// ## JSON Output (`json_path`)
+    ///
+    /// The output JSON is an **array of tumor caller records**, each containing:
+    /// - `caller_name`: The name of the tumor caller (as a string).
+    /// - `germline`: An array of JSON objects, each with one key-value pair,
+    ///   where the key is a germline/constit caller (or combination) and the value is the count.
+    ///
+    /// ### Example
+    /// ```json
+    /// [
+    ///   {
+    ///     "caller_name": "DeepVariant SoloTumor",
+    ///     "germline": [
+    ///       {"ClairS Germline": 99978},
+    ///       {"ClairS Germline + DeepVariant SoloConstit": 4710570}
+    ///     ]
+    ///   },
+    ///   {
+    ///     "caller_name": "ClairS Somatic",
+    ///     "germline": [
+    ///       {"ClairS Germline": 944},
+    ///       {"ClairS Germline + DeepVariant SoloConstit": 362}
+    ///     ]
+    ///   }
+    /// ]
+    /// ```
+    ///
+    /// ## Console Output (TSV)
+    ///
+    /// A tab-separated matrix is printed to stdout. Each row begins with a tumor caller,
+    /// followed by columns showing germline/constit caller combinations and their counts.
+    ///
+    /// # Notes
+    /// - Tumor callers are collected from `self.input.somatic` and `self.input.solo_tumor`.
+    /// - Germline/constit callers are collected from `self.input.germline` and `self.input.solo_constit`.
+    /// - Keys in the original `AnnotationsStats` map are split using `" + "` and parsed into `Annotation` enums.
+    /// - Germline keys are sorted and joined to form canonical column labels.
+    /// - Tumor and germline annotations are matched by exact `Annotation` equality.
+    ///
+    /// # Errors
+    /// Returns an error if:
+    /// - Any annotation key in `AnnotationsStats` fails to parse into a valid `Annotation`.
+    /// - The JSON output file cannot be created or written to.
+    ///
+    /// # Dependencies
+    /// Requires that `Annotation` implements `FromStr`, `ToString`, `PartialEq`, and `Clone`.
+    ///
+    /// # Example Usage
+    /// ```rust
+    /// let mut stats = SomaticPipeStats::init(&collections);
+    /// stats.annot_init(&annotation_stats, "output/matrix.json")?;
+    /// ```
+    pub fn annot_init(&self, stats: &AnnotationsStats, json_path: &str) -> anyhow::Result<()> {
+        // Parse annotations from stats
+        let stats: Vec<(Vec<Annotation>, u64)> = stats
+            .categorical
+            .iter()
+            .map(|e| {
+                let anns = e
+                    .key()
+                    .split(" + ")
+                    .map(|k| k.parse())
+                    .collect::<anyhow::Result<Vec<Annotation>>>()
+                    .map_err(|err| {
+                        anyhow::anyhow!("Error while splitting key in AnnotationsStats.\n{err}")
+                    })?;
+                Ok((anns, *e.value()))
+            })
+            .collect::<anyhow::Result<Vec<(Vec<Annotation>, u64)>>>()?;
+
+        // Collect tumor and germline callers from input stats
+        let callers_somatic_solo_tumor = [
+            self.input
+                .somatic
+                .iter()
+                .map(|(caller, _)| caller.clone())
+                .collect::<Vec<Annotation>>(),
+            self.input
+                .solo_tumor
+                .iter()
+                .map(|(caller, _)| caller.clone())
+                .collect(),
+        ]
+        .concat();
+
+        let callers_germline_solo_constit = [
+            self.input
+                .germline
+                .iter()
+                .map(|(caller, _)| caller.clone())
+                .collect::<Vec<Annotation>>(),
+            self.input
+                .solo_constit
+                .iter()
+                .map(|(caller, _)| caller.clone())
+                .collect(),
+        ]
+        .concat();
+
+        // Build a matrix of tumor vs germline hits
+        let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
+        stats.iter().for_each(|(anns, v)| {
+            // Only proceed if this annotation includes a germline/constit sample
+            if anns.iter().any(|a| {
+                matches!(
+                    a,
+                    Annotation::Callers(_, Sample::SoloConstit)
+                        | Annotation::Callers(_, Sample::Germline)
+                )
+            }) {
+                // Find all tumor callers present in this annotation set
+                let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
+                    .iter()
+                    .flat_map(|tumor| {
+                        if anns.contains(tumor) {
+                            vec![(tumor.to_string(), *v)]
+                        } else {
+                            vec![]
+                        }
+                    })
+                    .collect();
+
+                // Build a normalized germline key
+                let mut germline_caller: Vec<String> = callers_germline_solo_constit
+                    .iter()
+                    .flat_map(|germ| {
+                        if anns.contains(germ) {
+                            vec![germ.to_string()]
+                        } else {
+                            vec![]
+                        }
+                    })
+                    .collect();
+                germline_caller.sort();
+                let germline_caller = germline_caller.join(" + ");
+
+                // Update matrix: tumor -> germline -> count
+                n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
+                    if let Some(row) = with_germline.get_mut(tumoral_caller) {
+                        if let Some(col) = row.get_mut(&germline_caller) {
+                            *col += *n;
+                        } else {
+                            row.insert(germline_caller.to_string(), *n);
+                        }
+                    } else {
+                        let mut row = HashMap::new();
+                        row.insert(germline_caller.to_string(), *n);
+                        with_germline.insert(tumoral_caller.to_string(), row);
+                    }
+                });
+            }
+        });
+
+        // Extract all unique germline caller labels
+        let mut germlines_callers: Vec<String> = with_germline
+            .iter()
+            .flat_map(|(_, r)| {
+                r.iter()
+                    .map(|(k, _)| k.to_string())
+                    .collect::<Vec<String>>()
+            })
+            .collect();
+        germlines_callers.sort();
+        germlines_callers.dedup();
+
+        // Print a readable tab-separated matrix
+        let mut json = Vec::new();
+        let mut lines: Vec<String> = with_germline
+            .iter()
+            .map(|(tumor, row)| {
+                json.push(format!(
+                    "{{\"caller_name\": \"{tumor}\", \"germline\": [{}] }}",
+                    germlines_callers
+                        .iter()
+                        .map(|g| {
+                            let v = row.get(g).unwrap_or(&0);
+                            format!("{{\"{g}\": {v}}}")
+                        })
+                        .join(", ")
+                ));
+                format!(
+                    "{tumor}\t{}",
+                    germlines_callers
+                        .iter()
+                        .map(|g| {
+                            let v = row.get(g).unwrap_or(&0);
+                            format!("{g}: {v}")
+                        })
+                        .join("\t")
+                )
+            })
+            .collect();
+        lines.sort();
+        println!("{}", lines.join("\n"));
+
+        // Write JSON to file
+        let json = format!("[{}]", json.join(", "));
+        let mut file = File::create(json_path)?;
+        file.write_all(json.as_bytes())?;
+
+        Ok(())
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct InputStats {
+    pub solo_tumor: Vec<(Annotation, usize)>,
+    pub solo_constit: Vec<(Annotation, usize)>,
+    pub germline: Vec<(Annotation, usize)>,
+    pub somatic: Vec<(Annotation, usize)>,
+}
+
+impl InputStats {
+    pub fn from_collections(collections: &[VariantCollection]) -> Self {
+        let mut stats = Self::default();
+        for collection in collections.iter() {
+            match collection.caller {
+                Annotation::Callers(_, Sample::SoloTumor) => stats
+                    .solo_tumor
+                    .push((collection.caller.clone(), collection.variants.len())),
+                Annotation::Callers(_, Sample::SoloConstit) => stats
+                    .solo_constit
+                    .push((collection.caller.clone(), collection.variants.len())),
+                Annotation::Callers(_, Sample::Germline) => stats
+                    .germline
+                    .push((collection.caller.clone(), collection.variants.len())),
+                Annotation::Callers(_, Sample::Somatic) => stats
+                    .somatic
+                    .push((collection.caller.clone(), collection.variants.len())),
+                _ => (),
+            };
+        }
+        stats
+    }
+}

+ 94 - 55
src/variant/variant.rs

@@ -1046,16 +1046,6 @@ pub trait Label {
     fn label(&self) -> String;
 }
 
-// pub trait AsAny {
-//     fn as_any(&self) -> &dyn std::any::Any;
-// }
-//
-// impl<T: 'static> AsAny for T {
-//     fn as_any(&self) -> &dyn std::any::Any {
-//         self
-//     }
-// }
-
 /// A trait alias for all dynamically executable pipeline runners.
 ///
 /// This trait represents any component that:
@@ -1097,15 +1087,22 @@ pub type ShouldRunBox = Box<dyn ShouldRunTrait>;
 /// This macro uses `?`, so it must be called inside a function that returns `anyhow::Result`.
 #[macro_export]
 macro_rules! create_should_run {
-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, $config.clone())?) as ShouldRunBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
+        use anyhow::Context;
+        let mut runners: Vec<ShouldRunBox> = Vec::new();
+        $(
+            let runner = <$runner>::initialize($id, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize should-run checker {} for {}",
+                    stringify!($runner), $id
+                ))?;
+            runners.push(Box::new(runner) as ShouldRunBox);
+        )+
+        runners
+    }};
 }
 
+
 /// Macro to initialize and box a list of solo-mode pipeline components that implement `ShouldRunTrait`.
 ///
 /// This is typically used for per-timepoint variant callers (e.g., `DeepVariant`),
@@ -1138,13 +1135,19 @@ macro_rules! create_should_run {
 /// This macro uses `?` and must be called inside a `Result`-returning context.
 #[macro_export]
 macro_rules! create_should_run_solo {
-    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as ShouldRunBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {{
+        use anyhow::Context;
+        let mut runners: Vec<ShouldRunBox> = Vec::new();
+        $(
+            let runner = <$runner>::initialize($id, $arg, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize solo should-run checker {} for '{}' and arg '{}'",
+                    stringify!($runner), $id, $arg
+                ))?;
+            runners.push(Box::new(runner) as ShouldRunBox);
+        )+
+        runners
+    }};
 }
 
 /// Macro to initialize and box a list of pipeline components that must run once per timepoint
@@ -1187,16 +1190,29 @@ macro_rules! create_should_run_solo {
 /// This macro uses `?`, so it must be called inside a function that returns `Result`.
 #[macro_export]
 macro_rules! create_should_run_normal_tumoral {
-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as ShouldRunBox,
-                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as ShouldRunBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
+        use anyhow::Context;
+        let mut runners: Vec<ShouldRunBox> = Vec::new();
+        $(
+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize tumoral should-run checker {} for {}",
+                    stringify!($runner), $id
+                ))?;
+            runners.push(Box::new(tumoral) as ShouldRunBox);
+
+            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize normal should-run checker {} for {}",
+                    stringify!($runner), $id
+                ))?;
+            runners.push(Box::new(normal) as ShouldRunBox);
+        )+
+        runners
+    }};
 }
 
+
 /// Executes each runner in the slice only if `should_run()` returns true.
 ///
 /// # Arguments
@@ -1233,15 +1249,22 @@ pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
 
 #[macro_export]
 macro_rules! init_somatic_callers {
-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, $config.clone())?) as CallerBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
+        use anyhow::Context;
+        let mut callers: Vec<CallerBox> = Vec::new();
+        $(
+            let caller = <$runner>::initialize($id, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize somatic caller {} for {}",
+                    stringify!($runner), $id
+                ))?;
+            callers.push(Box::new(caller) as CallerBox);
+        )+
+        callers
+    }};
 }
 
+
 /// Macro to initialize and box a list of **solo-mode variant callers** for specific timepoints,
 /// where each runner implements `RunnerVariants`.
 ///
@@ -1280,15 +1303,19 @@ macro_rules! init_somatic_callers {
 /// This macro uses `?` internally, so it must be used inside a `Result`-returning context.
 #[macro_export]
 macro_rules! init_solo_callers {
-    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as CallerBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {{
+        let mut callers: Vec<CallerBox> = Vec::new();
+        $(
+            let caller = <$runner>::initialize($id, $arg, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize caller {} for {}",
+                    stringify!($runner), $id
+                ))?;
+            callers.push(Box::new(caller) as CallerBox);
+        )+
+        callers
+    }};
 }
-
 /// Macro to initialize and box a list of solo-mode **variant callers** for both `normal` and `tumoral` timepoints.
 ///
 /// This is designed for types like `DeepVariant` that implement `RunnerVariants` and require
@@ -1328,14 +1355,26 @@ macro_rules! init_solo_callers {
 /// This macro uses `?`, so it must be called inside a `Result`-returning context.
 #[macro_export]
 macro_rules! init_solo_callers_normal_tumoral {
-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
-        vec![
-            $(
-                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as CallerBox,
-                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as CallerBox
-            ),+
-        ]
-    };
+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
+        use anyhow::Context;
+        let mut callers: Vec<CallerBox> = Vec::new();
+        $(
+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize tumoral caller {} for {} '{}'",
+                    stringify!($runner), $id, $config.tumoral_name
+                ))?;
+            callers.push(Box::new(tumoral) as CallerBox);
+
+            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
+                .with_context(|| format!(
+                    "Failed to initialize normal caller {} for {} '{}'",
+                    stringify!($runner), $id, $config.normal_name
+                ))?;
+            callers.push(Box::new(normal) as CallerBox);
+        )+
+        callers
+    }};
 }
 
 // pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {