8 months ago · a5880b4b84
--- a/src/annotation/cosmic.rs
+++ b/src/annotation/cosmic.rs
@@ -3,32 +3,75 @@ use std::str::FromStr;
 
				 use bitcode::{Decode, Encode};
			
 
				 use serde::{Deserialize, Serialize};
			
 
				 
			
 
				+/// Represents parsed COSMIC (Catalogue Of Somatic Mutations In Cancer) data.
			
 
				+///
			
 
				+/// This struct currently holds only the count of times a variant was observed in COSMIC.
			
 
				 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Encode, Decode)]
			
 
				 pub struct Cosmic {
			
 
				+    /// The number of times the variant was recorded in the COSMIC database.
			
 
				     pub cosmic_cnt: u64,
			
 
				 }
			
 
				 
			
 
				 impl FromStr for Cosmic {
			
 
				     type Err = anyhow::Error;
			
 
				 
			
 
				+    /// Parses a `Cosmic` instance from a semicolon-delimited string.
			
 
				+    ///
			
 
				+    /// # Expected Input Format
			
 
				+    /// The input string must follow the format:
			
 
				+    ///
			
 
				+    /// ```text
			
 
				+    /// <field1>;<field2>;CNT=<number>
			
 
				+    /// ```
			
 
				+    ///
			
 
				+    /// - The input must contain exactly three parts, separated by semicolons (`;`).
			
 
				+    /// - The third part must be of the form `CNT=<number>`, where `<number>` can be parsed as a `u64`.
			
 
				+    /// - If the first part contains the word `"MISSING"`, parsing will fail.
			
 
				+    ///
			
 
				+    /// # Examples
			
 
				+    ///
			
 
				+    /// ```
			
 
				+    /// use your_crate::Cosmic;
			
 
				+    /// use std::str::FromStr;
			
 
				+    ///
			
 
				+    /// let input = "ID1;info;CNT=42";
			
 
				+    /// let cosmic = Cosmic::from_str(input).unwrap();
			
 
				+    /// assert_eq!(cosmic.cosmic_cnt, 42);
			
 
				+    /// ```
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// - Returns an error if the string does not contain exactly three semicolon-separated parts.
			
 
				+    /// - Returns an error if `"MISSING"` is found in the first part.
			
 
				+    /// - Returns an error if the third part is not in `key=value` format.
			
 
				+    /// - Returns an error if the value is not a valid `u64`.
			
 
				     fn from_str(s: &str) -> anyhow::Result<Self> {
			
 
				-        let vs: Vec<&str> = s.split(";").collect();
			
 
				+        let s = s.trim();
			
 
				+        let vs: Vec<&str> = s.split(";").map(str::trim).collect();
			
 
				         if vs.len() != 3 {
			
 
				             return Err(anyhow::anyhow!(
			
 
				-                "Error while parsing Cosmic results not the right number of parts for {s}"
			
 
				+                "Expected 3 semicolon-separated parts in Cosmic string, got {}: {s}",
			
 
				+                vs.len()
			
 
				             ));
			
 
				         }
			
 
				 
			
 
				         if vs[0].contains("MISSING") {
			
 
				-            Err(anyhow::anyhow!("MISSING values in Cosmic results: {s}"))
			
 
				-        } else {
			
 
				-            let v: Vec<&str> = vs[2].split("=").collect();
			
 
				-
			
 
				-            Ok(Cosmic {
			
 
				-                cosmic_cnt: v[1]
			
 
				-                    .parse()
			
 
				-                    .map_err(|e| anyhow::anyhow!("Failed to parse COSMIC CNT.\n{e}"))?,
			
 
				-            })
			
 
				+            return Err(anyhow::anyhow!("MISSING values in Cosmic results: {s}"));
			
 
				         }
			
 
				+
			
 
				+        let v: Vec<&str> = vs[2].split("=").map(str::trim).collect();
			
 
				+
			
 
				+        if v.len() != 2 {
			
 
				+            return Err(anyhow::anyhow!(
			
 
				+                "Expected key=value format in third field: {}",
			
 
				+                vs[2]
			
 
				+            ));
			
 
				+        }
			
 
				+
			
 
				+        let count = v[1]
			
 
				+            .parse::<u64>()
			
 
				+            .map_err(|e| anyhow::anyhow!("Failed to parse COSMIC CNT from '{}': {}", v[1], e))?;
			
 
				+
			
 
				+        Ok(Cosmic { cosmic_cnt: count })
			
 
				     }
			
 
				 }
			
--- a/src/annotation/mod.rs
+++ b/src/annotation/mod.rs
@@ -528,3 +528,19 @@ impl VepStats {
 
				 pub trait CallerCat {
			
 
				     fn caller_cat(&self) -> Annotation;
			
 
				 }
			
 
				+
			
 
				+/// Returns true if the annotations include both:
			
 
				+/// - a GnomAD entry with AF > 0
			
 
				+/// - and a ConstitAlt entry with n_alt > 0
			
 
				+pub fn is_gnomad_and_constit_alt(anns: &[Annotation]) -> bool {
			
 
				+    let gnomad = anns.iter().any(|a| {
			
 
				+        matches!(a, Annotation::GnomAD(g) if g.gnomad_af > 0.0)
			
 
				+    });
			
 
				+
			
 
				+    let constit_alt = anns.iter().any(|a| {
			
 
				+        matches!(a, Annotation::ConstitAlt(n) if *n > 0)
			
 
				+    });
			
 
				+
			
 
				+    gnomad && constit_alt
			
 
				+}
			
 
				+
			
--- a/src/callers/clairs.rs
+++ b/src/callers/clairs.rs
@@ -3,7 +3,7 @@ use crate::{
 
				     collection::{vcf::Vcf, Initialize, ShouldRun},
			
 
				     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
			
 
				     config::Config,
			
 
				-    helpers::{is_file_older, temp_file_path},
			
 
				+    helpers::{is_file_older, remove_dir_if_exists, temp_file_path},
			
 
				     io::vcf::read_vcf,
			
 
				     runners::{run_wait, DockerRun, Run},
			
 
				     variant::{
			
@@ -12,7 +12,7 @@ use crate::{
 
				     },
			
 
				 };
			
 
				 use anyhow::{Context, Ok};
			
 
				-use log::{debug, info};
			
 
				+use log::{debug, info, warn};
			
 
				 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
			
 
				 use std::{fs, path::Path};
			
 
				 
			
@@ -24,6 +24,9 @@ use std::{fs, path::Path};
 
				 /// - Handling and filtering of output VCFs
			
 
				 /// - Logging and diagnostic tracking
			
 
				 /// - Integration with variant annotation workflows
			
 
				+///
			
 
				+/// # References
			
 
				+/// - ClairS: https://github.com/HKU-BAL/ClairS
			
 
				 #[derive(Debug, Clone)]
			
 
				 pub struct ClairS {
			
 
				     pub id: String,
			
@@ -34,7 +37,7 @@ pub struct ClairS {
 
				 impl Initialize for ClairS {
			
 
				     /// Initializes the ClairS runner.
			
 
				     ///
			
 
				-    /// This method constructs a `ClairS` instance with logging and configuration setup,
			
 
				+    /// This method constructs a [`ClairS`] instance with logging and configuration setup,
			
 
				     /// and ensures the output directory is cleaned up if the results are outdated or force execution is enabled.
			
 
				     ///
			
 
				     /// # Arguments
			
@@ -42,7 +45,7 @@ impl Initialize for ClairS {
 
				     /// * `config` - Pipeline-wide configuration object containing paths, resources, and settings.
			
 
				     ///
			
 
				     /// # Returns
			
 
				-    /// A fully initialized `ClairS` instance ready for execution.
			
 
				+    /// A fully initialized [`ClairS`] instance ready for execution.
			
 
				     ///
			
 
				     /// # Errors
			
 
				     /// Returns an error if the output directory fails to be removed when necessary.
			
@@ -61,9 +64,8 @@ impl Initialize for ClairS {
 
				             config,
			
 
				         };
			
 
				 
			
 
				-        let passed_vcf = clairs.config.clairs_passed_vcf(&clairs.id);
			
 
				-        if (clairs.config.clairs_force && Path::new(&passed_vcf).exists()) || clairs.should_run() {
			
 
				-            fs::remove_dir_all(clairs.config.clairs_output_dir(&clairs.id))?;
			
 
				+        if clairs.config.clairs_force || clairs.should_run() {
			
 
				+            remove_dir_if_exists(&clairs.config.clairs_output_dir(&clairs.id))?;
			
 
				         }
			
 
				 
			
 
				         Ok(clairs)
			
@@ -74,8 +76,12 @@ impl ShouldRun for ClairS {
 
				     /// Determines whether ClairS should be re-run based on BAM modification timestamps.
			
 
				     fn should_run(&self) -> bool {
			
 
				         let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
			
 
				-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
			
 
				+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
			
 
				+        if result {
			
 
				+            warn!("ClairS should run for id: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -202,7 +208,7 @@ impl Run for ClairS {
 
				                 .save_to_file(&log_file)
			
 
				                 .context(format!("Error while writing logs into {log_file}"))?;
			
 
				 
			
 
				-            fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
			
 
				+            fs::remove_file(&tmp_file).context(format!("Failed to remove temporary file {tmp_file}"))?;
			
 
				         } else {
			
 
				             debug!(
			
 
				                 "ClairS PASSED VCF already exists for {}, skipping execution.",
			
@@ -231,7 +237,7 @@ impl Variants for ClairS {
 
				     /// * `annotations` - A reference to the global annotations structure used to store variant metadata.
			
 
				     ///
			
 
				     /// # Returns
			
 
				-    /// A `VariantCollection` with the list of variants, the source VCF file, and the associated caller tag.
			
 
				+    /// A [`VariantCollection`] with the list of variants, the source VCF file, and the associated caller tag.
			
 
				     ///
			
 
				     /// # Errors
			
 
				     /// Will return an error if the VCF file is unreadable, missing, or malformed.
			
@@ -300,4 +306,3 @@ impl Label for ClairS {
 
				         self.caller_cat().to_string()
			
 
				     }
			
 
				 }
			
 
				-
			
--- a/src/callers/deep_somatic.rs
+++ b/src/callers/deep_somatic.rs
@@ -9,7 +9,7 @@ use crate::{
 
				     collection::{vcf::Vcf, Initialize, ShouldRun},
			
 
				     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
			
 
				     config::Config,
			
 
				-    helpers::is_file_older,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				     io::vcf::read_vcf,
			
 
				     runners::{run_wait, DockerRun, Run},
			
 
				     variant::{
			
@@ -53,11 +53,8 @@ impl Initialize for DeepSomatic {
 
				             log_dir,
			
 
				         };
			
 
				 
			
 
				-        let passed_vcf = deep_somatic.config.deepsomatic_passed_vcf(&deep_somatic.id);
			
 
				-        if (deep_somatic.config.deepsomatic_force && Path::new(&passed_vcf).exists())
			
 
				-            || deep_somatic.should_run()
			
 
				-        {
			
 
				-            fs::remove_dir_all(deep_somatic.config.deepsomatic_output_dir(&deep_somatic.id))?;
			
 
				+        if deep_somatic.config.deepsomatic_force || deep_somatic.should_run() {
			
 
				+            remove_dir_if_exists(&deep_somatic.config.deepsomatic_output_dir(&deep_somatic.id))?;
			
 
				         }
			
 
				 
			
 
				         Ok(deep_somatic)
			
@@ -75,8 +72,12 @@ impl Initialize for DeepSomatic {
 
				 impl ShouldRun for DeepSomatic {
			
 
				     fn should_run(&self) -> bool {
			
 
				         let passed_vcf = &self.config.deepsomatic_passed_vcf(&self.id);
			
 
				-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
			
 
				+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
			
 
				+        if result {
			
 
				+            info!("DeepSomatic should run for id: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				     }
			
 
				 }
			
 
				 
			
--- a/src/callers/deep_variant.rs
+++ b/src/callers/deep_variant.rs
@@ -8,7 +8,7 @@ use crate::{
 
				     collection::{vcf::Vcf, InitializeSolo, ShouldRun},
			
 
				     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
			
 
				     config::Config,
			
 
				-    helpers::is_file_older,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				     io::vcf::read_vcf,
			
 
				     runners::{run_wait, DockerRun, Run},
			
 
				     variant::{
			
@@ -63,14 +63,12 @@ impl InitializeSolo for DeepVariant {
 
				             config,
			
 
				         };
			
 
				 
			
 
				-        let output_vcf_exists = Path::new(
			
 
				-            &deepvariant
			
 
				-                .config
			
 
				-                .deepvariant_solo_output_vcf(&deepvariant.id, &deepvariant.time_point),
			
 
				-        )
			
 
				-        .exists();
			
 
				-        if (deepvariant.config.deepvariant_force && output_vcf_exists) || deepvariant.should_run() {
			
 
				-            fs::remove_dir_all(deepvariant.config.savana_output_dir(&deepvariant.id))?;
			
 
				+        if deepvariant.config.deepvariant_force || deepvariant.should_run() {
			
 
				+            remove_dir_if_exists(
			
 
				+                &deepvariant
			
 
				+                    .config
			
 
				+                    .deepvariant_output_dir(&deepvariant.id, &deepvariant.time_point),
			
 
				+            )?;
			
 
				         }
			
 
				 
			
 
				         Ok(deepvariant)
			
@@ -89,7 +87,14 @@ impl ShouldRun for DeepVariant {
 
				             .config
			
 
				             .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
			
 
				         let bam = self.config.solo_bam(&self.id, &self.time_point);
			
 
				-        is_file_older(&passed_vcf, &bam).unwrap_or(true)
			
 
				+        let result = is_file_older(&passed_vcf, &bam).unwrap_or(true);
			
 
				+        if result {
			
 
				+            info!(
			
 
				+                "DeepVariant should run for: {} {}.",
			
 
				+                self.id, self.time_point
			
 
				+            );
			
 
				+        }
			
 
				+        result
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -245,4 +250,3 @@ impl Label for DeepVariant {
 
				         self.caller_cat().to_string()
			
 
				     }
			
 
				 }
			
 
				-
			
--- a/src/callers/nanomonsv.rs
+++ b/src/callers/nanomonsv.rs
@@ -13,7 +13,7 @@ use crate::{
 
				     collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun},
			
 
				     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
			
 
				     config::Config,
			
 
				-    helpers::is_file_older,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				     io::vcf::read_vcf,
			
 
				     runners::{run_wait, CommandRun, Run, RunReport},
			
 
				     variant::{
			
@@ -56,12 +56,9 @@ impl Initialize for NanomonSV {
 
				             config,
			
 
				         };
			
 
				 
			
 
				-        let passed_vcf = nanomonsv.config.nanomonsv_passed_vcf(&nanomonsv.id);
			
 
				-        if (nanomonsv.config.nanomonsv_force && Path::new(&passed_vcf).exists())
			
 
				-            || nanomonsv.should_run()
			
 
				-        {
			
 
				-            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "diag"))?;
			
 
				-            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "mrd"))?;
			
 
				+        if nanomonsv.config.nanomonsv_force || nanomonsv.should_run() {
			
 
				+            remove_dir_if_exists(&nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "diag"))?;
			
 
				+            remove_dir_if_exists(&nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "mrd"))?;
			
 
				         }
			
 
				 
			
 
				         Ok(nanomonsv)
			
@@ -76,8 +73,12 @@ impl ShouldRun for NanomonSV {
 
				     /// `true` if the passed VCF does not exist or is older than any input BAM.
			
 
				     fn should_run(&self) -> bool {
			
 
				         let passed_vcf = self.config.nanomonsv_passed_vcf(&self.id);
			
 
				-        is_file_older(&passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				-            || is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
			
 
				+        let result = is_file_older(&passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				+            || is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
			
 
				+        if result {
			
 
				+            warn!("NanomonSV should run for id: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -128,6 +129,11 @@ impl Run for NanomonSV {
 
				                     "Error while running NanomonSV get for {mrd_result_vcf}"
			
 
				                 ))?;
			
 
				             report.save_to_file(&format!("{}/nanomonsv_get_mrd_", self.log_dir))?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "NanomonSV `get` results already exists for {} normal, skipping execution.",
			
 
				+                self.id
			
 
				+            )
			
 
				         }
			
 
				 
			
 
				         if !Path::new(&diag_result_vcf).exists() {
			
@@ -143,6 +149,11 @@ impl Run for NanomonSV {
 
				                 "Error while running NanomonSV get for {diag_result_vcf}"
			
 
				             ))?;
			
 
				             report.save_to_file(&format!("{}/nanomonsv_get_diag_", self.log_dir))?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "NanomonSV `get` results already exists for {} tumoral, skipping execution.",
			
 
				+                self.id
			
 
				+            )
			
 
				         }
			
 
				 
			
 
				         if !Path::new(&vcf_passed).exists() {
			
@@ -151,7 +162,12 @@ impl Run for NanomonSV {
 
				                     .context(format!("Can't index {}", vcf_passed))?;
			
 
				             report
			
 
				                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
			
 
				-                .context("Can't save report")?;
			
 
				+                .context("Failed to save report")?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "NanomonSv PASSED VCF already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            )
			
 
				         }
			
 
				 
			
 
				         Ok(())
			
@@ -179,9 +195,8 @@ impl Variants for NanomonSV {
 
				 
			
 
				         info!("Loading variants from {}: {}", caller, vcf_passed);
			
 
				 
			
 
				-        let variants = read_vcf(&vcf_passed).map_err(|e| {
			
 
				-            anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed)
			
 
				-        })?;
			
 
				+        let variants = read_vcf(&vcf_passed)
			
 
				+            .map_err(|e| anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed))?;
			
 
				 
			
 
				         variants.par_iter().for_each(|v| {
			
 
				             annotations.insert_update(v.hash(), &add);
			
--- a/src/callers/savana.rs
+++ b/src/callers/savana.rs
@@ -6,7 +6,7 @@ use crate::{
 
				         longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
			
 
				     },
			
 
				     config::Config,
			
 
				-    helpers::is_file_older,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				     io::{readers::get_gz_reader, vcf::read_vcf},
			
 
				     positions::{num_to_contig, GenomeRange},
			
 
				     runners::{run_wait, CommandRun, Run},
			
@@ -17,7 +17,7 @@ use crate::{
 
				 };
			
 
				 use anyhow::Context;
			
 
				 use itertools::Itertools;
			
 
				-use log::{debug, info};
			
 
				+use log::{debug, info, warn};
			
 
				 use rayon::prelude::*;
			
 
				 use serde::{Deserialize, Serialize};
			
 
				 use std::{
			
@@ -62,15 +62,35 @@ impl Initialize for Savana {
 
				             log_dir,
			
 
				         };
			
 
				 
			
 
				-        let output_vcf_exists = Path::new(&savana.config.savana_output_vcf(id)).exists();
			
 
				-        if (savana.config.savana_force && output_vcf_exists) || savana.should_run() {
			
 
				-            fs::remove_dir_all(savana.config.savana_output_dir(id))?;
			
 
				+        // If forced re-run is enabled or a run is needed, remove old output directory
			
 
				+        if savana.config.savana_force || savana.should_run() {
			
 
				+            remove_dir_if_exists(&savana.config.savana_output_dir(id))?;
			
 
				         }
			
 
				 
			
 
				         Ok(savana)
			
 
				     }
			
 
				 }
			
 
				 
			
 
				+impl ShouldRun for Savana {
			
 
				+    /// Determines whether Savana should be re-run based on whether
			
 
				+    /// the filtered PASS VCF is older than the input BAMs.
			
 
				+    ///
			
 
				+    /// If either input BAM (normal or tumor) is newer than the PASS VCF,
			
 
				+    /// Savana is considered out of date and should be re-executed.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let passed_vcf = &self.config.savana_passed_vcf(&self.id);
			
 
				+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
			
 
				+        if result {
			
 
				+            warn!("Savana should run for id: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 impl Run for Savana {
			
 
				     /// Executes the Savana pipeline, including prerequisite phasing and haplotagging steps.
			
 
				     ///
			
@@ -181,22 +201,6 @@ impl Run for Savana {
 
				     }
			
 
				 }
			
 
				 
			
 
				-impl ShouldRun for Savana {
			
 
				-    /// Determines whether Savana should be re-run based on whether
			
 
				-    /// the filtered PASS VCF is older than the input BAMs.
			
 
				-    ///
			
 
				-    /// If either input BAM (normal or tumor) is newer than the PASS VCF,
			
 
				-    /// Savana is considered out of date and should be re-executed.
			
 
				-    ///
			
 
				-    /// # Returns
			
 
				-    /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
			
 
				-    fn should_run(&self) -> bool {
			
 
				-        let passed_vcf = &self.config.savana_passed_vcf(&self.id);
			
 
				-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				 impl Version for Savana {
			
 
				     fn version(config: &Config) -> anyhow::Result<String> {
			
 
				         let savana_args = ["--version"];
			
--- a/src/callers/severus.rs
+++ b/src/callers/severus.rs
@@ -6,7 +6,7 @@ use crate::{
 
				         longphase::LongphasePhase,
			
 
				     },
			
 
				     config::Config,
			
 
				-    helpers::is_file_older,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				     io::vcf::read_vcf,
			
 
				     runners::{run_wait, CommandRun, Run},
			
 
				     variant::{
			
@@ -53,15 +53,32 @@ impl Initialize for Severus {
 
				             log_dir,
			
 
				         };
			
 
				 
			
 
				-        let output_vcf_exists = Path::new(&severus.config.severus_output_vcf(id)).exists();
			
 
				-        if (severus.config.severus_force && output_vcf_exists) || severus.should_run() {
			
 
				-            fs::remove_dir_all(severus.config.severus_output_dir(id))?;
			
 
				+        if severus.config.severus_force || severus.should_run() {
			
 
				+            remove_dir_if_exists(&severus.config.severus_output_dir(id))?;
			
 
				         }
			
 
				 
			
 
				         Ok(severus)
			
 
				     }
			
 
				 }
			
 
				 
			
 
				+impl ShouldRun for Severus {
			
 
				+    /// Determines whether Severus should re-run based on whether the PASS VCF
			
 
				+    /// is older than either the tumor or normal BAM file.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// `true` if Severus needs to be re-run, otherwise `false`
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let passed_vcf = &self.config.severus_passed_vcf(&self.id);
			
 
				+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true);
			
 
				+        if result {
			
 
				+            info!("Severus should run for: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 impl Run for Severus {
			
 
				     /// Runs the Severus structural variant caller if its output VCF does not already exist.
			
 
				     ///
			
@@ -145,26 +162,18 @@ impl Run for Severus {
 
				             report
			
 
				                 .save_to_file(&log_file)
			
 
				                 .context(format!("Error while writing logs into {log_file}"))?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "Severus PASSED VCF already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            );
			
 
				+
			
 
				         }
			
 
				 
			
 
				         Ok(())
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-impl ShouldRun for Severus {
			
 
				-    /// Determines whether Severus should re-run based on whether the PASS VCF
			
 
				-    /// is older than either the tumor or normal BAM file.
			
 
				-    ///
			
 
				-    /// # Returns
			
 
				-    ///
			
 
				-    /// `true` if Severus needs to be re-run, otherwise `false`
			
 
				-    fn should_run(&self) -> bool {
			
 
				-        let passed_vcf = &self.config.severus_passed_vcf(&self.id);
			
 
				-        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
			
 
				-            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				 impl Version for Severus {
			
 
				     fn version(config: &Config) -> anyhow::Result<String> {
			
 
				         let args = [
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -59,6 +59,7 @@ pub struct Config {
 
				     pub mask_bed: String,
			
 
				     pub somatic_min_constit_depth: u16,
			
 
				     pub somatic_max_alt_constit: u16,
			
 
				+    pub entropy_seq_len: usize,
			
 
				     pub min_shannon_entropy: f64,
			
 
				     pub nanomonsv_bin: String,
			
 
				     pub nanomonsv_output_dir: String,
			
@@ -68,6 +69,7 @@ pub struct Config {
 
				     pub nanomonsv_solo_output_dir: String,
			
 
				     pub nanomonsv_solo_passed_vcf: String,
			
 
				     pub somatic_pipe_force: bool,
			
 
				+    pub somatic_pipe_threads: u8,
			
 
				     pub min_high_quality_depth: u32,
			
 
				     pub somatic_scan_force: bool,
			
 
				 }
			
@@ -190,9 +192,11 @@ impl Default for Config {
 
				             somatic_scan_force: false,
			
 
				 
			
 
				             // Pipe
			
 
				-            somatic_pipe_force: true,
			
 
				+            somatic_pipe_force: false,
			
 
				+            somatic_pipe_threads: 150,
			
 
				             somatic_min_constit_depth: 5,
			
 
				             somatic_max_alt_constit: 1,
			
 
				+            entropy_seq_len: 10,
			
 
				             min_shannon_entropy: 1.0,
			
 
				 
			
 
				             min_high_quality_depth: 14,
			
@@ -214,7 +218,7 @@ impl Default for AlignConfig {
 
				     fn default() -> Self {
			
 
				         Self {
			
 
				             dorado_bin: "/data/tools/dorado-0.9.1-linux-x64/bin/dorado".to_string(),
			
 
				-            dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
			
 
				+            dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(),
			
 
				             // to specify cuda devices (exclude the T1000)
			
 
				             ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
			
 
				             ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
			
@@ -344,7 +348,7 @@ impl Config {
 
				     pub fn deepvariant_solo_passed_vcf(&self, id: &str, time: &str) -> String {
			
 
				         format!(
			
 
				             "{}/{}_{}_DeepVariant_PASSED.vcf.gz",
			
 
				-            self.deepvariant_normal_output_dir(id),
			
 
				+            self.deepvariant_output_dir(id, time),
			
 
				             id,
			
 
				             time
			
 
				         )
			
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -502,3 +502,14 @@ pub fn is_file_older(file1: &str, file2: &str) -> anyhow::Result<bool> {
 
				 
			
 
				     Ok(mtime1 < mtime2)
			
 
				 }
			
 
				+
			
 
				+pub fn remove_dir_if_exists(dir: &str) -> anyhow::Result<()> {
			
 
				+    match fs::remove_dir_all(dir) {
			
 
				+        Ok(_) => {}
			
 
				+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
			
 
				+        Err(e) => {
			
 
				+            anyhow::bail!("Failed to remove directory '{}': {}", dir, e);
			
 
				+        }
			
 
				+    };
			
 
				+    Ok(())
			
 
				+}
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,163 @@
 
				+//! # Long-read Somatic Variant Calling and Analysis Framework
			
 
				+//!
			
 
				+//! This Rust library provides a modular, parallelizable framework for somatic variant calling, annotation, and interpretation from long-read sequencing data. It is designed to support full pipelines for research and clinical workflows across multiple variant callers and analysis stages.
			
 
				+//!
			
 
				+//! ## Key Features
			
 
				+//!
			
 
				+//! - **Pipeline Management**: Full orchestration of Dockerized execution pipelines for tools such as ClairS, Nanomonsv, DeepVariant, Savana, Modkit, and Severus.
			
 
				+//! - **POD5 Demultiplexing and Alignment**: End-to-end support for processing ONT POD5 files:
			
 
				+//!     - Demux using barcode metadata and custom CSV input
			
 
				+//!     - POD5 subsetting and organization by flowcell case
			
 
				+//!     - Integration with basecallers (e.g., Dorado) for read alignment
			
 
				+//! - **Flexible Configuration**: Centralized configuration system (`Config`, `CollectionsConfig`) for all modules and pipelines.
			
 
				+//! - **Input Abstraction**: Unified handling of BAM, POD5, and VCF file collections across cohorts and directories.
			
 
				+//! - **Variant Processing**: Modular loading, filtering, statistical analysis, and annotation of somatic and germline variants.
			
 
				+//! - **Haplotype Phasing and Methylation**: Support for LongPhase-based phasing and Modkit methylation pileups with support for multi-threaded pileup and aggregation.
			
 
				+//! - **Parallel Execution**: Uses `rayon` for efficient multicore parallelization over large cohorts and tasks.
			
 
				+//!
			
 
				+//! ## Module Highlights
			
 
				+//!
			
 
				+//! - `callers`: Interfaces to variant calling tools (ClairS, DeepVariant, Nanomonsv, Savana, etc...)
			
 
				+//! - `runners`: Pipeline runners (e.g. `Somatic`, `SeverusSolo`, `LongphasePhase`) that manage end-to-end execution.
			
 
				+//! - `collection`: Organizes input data across BAMs, VCFs, and POD5 files with auto-detection of completed runs.
			
 
				+//! - `annotation`: VEP line parsing and high-level annotation aggregation.
			
 
				+//! - `pipes`: Composition modules for executing pipelines across callers and post-processing steps.
			
 
				+//! - `functions`: Custom logic for genome assembly, entropy estimation, and internal tooling.
			
 
				+//! - `positions`, `variant`, `helpers`: Utilities for SV modeling, variant filtering, position overlap logic, and helper methods.
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ## 🧬 Workflow Overview
			
 
				+//!
			
 
				+//! ### 1. 📦 From POD5 to BAM Alignment
			
 
				+//!
			
 
				+//! - **Demultiplexing**: POD5 files are subset and demuxed using barcodes (via CSV metadata).
			
 
				+//! - **Flowcell Case Management**: Each sample is identified by a `FlowCellCase` containing its ID, time point, and POD5 directory.
			
 
				+//! - **Alignment**: The `Dorado` module handles alignment of POD5 reads to reference genome, producing BAMs.
			
 
				+//!
			
 
				+//! ```rust
			
 
				+//! let case = FlowCellCase { id: "PATIENT1", time_point: "diag", barcode: "01", pod_dir: "...".into() };
			
 
				+//! Dorado::init(case, Config::default())?.run_pipe()?;
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ### 2. 🧬 Variant Calling (BAM ➝ VCF)
			
 
				+//!
			
 
				+//! Using the aligned BAMs, multiple variant callers can be run in parallel. The `callers` and `runners` modules support:
			
 
				+//!
			
 
				+//! - **ClairS** – somatic small variant calling with LongPhase haplotagging  
			
 
				+//! - **Nanomonsv** – structural variants (SV)  
			
 
				+//! - **DeepVariant** – germline small variants  
			
 
				+//! - **Savana** – SVs and copy number variations (CNV)  
			
 
				+//! - **Modkit** – methylation pileups  
			
 
				+//! - **LongPhase** – phasing and modcalling
			
 
				+//!
			
 
				+//! All workflows can be triggered per-case or per-cohort using `Collections` or `Somatic` runners.
			
 
				+//!
			
 
				+//! ```rust
			
 
				+//! ClairS::initialize("PATIENT1", Config::default())?.run()?;
			
 
				+//! NanomonSV::initialize("PATIENT1", Config::default())?.run()?;
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ### 3. 📈 Aggregation & Statistics (VCF ➝ JSON / Stats)
			
 
				+//!
			
 
				+//! After variant calling:
			
 
				+//!
			
 
				+//! - Annotate with VEP (`annotation` module)
			
 
				+//! - Load and filter with `variant_collection`
			
 
				+//! - Compute variant and region-level stats (e.g., mutation rates, alteration categories, coding overlaps)
			
 
				+//!
			
 
				+//! ```rust
			
 
				+//! let variants = Variants::load_from_json("/path/to/somatic_variants.json.gz")?;
			
 
				+//! let stats = VariantsStats::new(&variants, "PATIENT1", &config)?;
			
 
				+//! stats.save_to_json("/output/path/stats.json.gz")?;
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ### 4. 🧠 Intelligent Task Management (`collection` module)
			
 
				+//!
			
 
				+//! - Auto-discovers available samples, POD5s, BAMs, and VCFs
			
 
				+//! - Detects missing outputs and creates task lists
			
 
				+//! - Tasks are parallelizable using Rayon and can be run on-demand
			
 
				+//!
			
 
				+//! ```rust
			
 
				+//! let mut collections = Collections::new(CollectionsConfig::default())?;
			
 
				+//! collections.todo()?;      // Identify missing steps
			
 
				+//! collections.run()?;       // Run them automatically
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ## 📁 Module Highlights
			
 
				+//!
			
 
				+//! - `callers`: Interfaces to ClairS, DeepVariant, Savana, Nanomonsv, etc.
			
 
				+//! - `runners`: Pipeline runners like `Somatic` and `LongphasePhase`
			
 
				+//! - `collection`: Auto-discovery of BAM/VCF/POD5s, task orchestration
			
 
				+//! - `annotation`: VEP line parsing and transcript-level annotations
			
 
				+//! - `pipes`: High-level pipelines (e.g., `run_somatic`, `todo_deepvariants`)
			
 
				+//! - `variant`: Variant structs, filtering, alteration categories
			
 
				+//! - `positions`, `helpers`, `functions`, `math`: Utility layers
			
 
				+//!
			
 
				+//! ---
			
 
				+//!
			
 
				+//! ## 🔬 Testing
			
 
				+//!
			
 
				+//! Integration tests demonstrate the entire pipeline. Run with logging enabled:
			
 
				+//!
			
 
				+//! ```bash
			
 
				+//! export RUST_LOG=debug
			
 
				+//! cargo test -- --nocapture
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ---
			
 
				+//! ## Example Use Cases
			
 
				+//!
			
 
				+//! - Full somatic variant calling pipeline on matched tumor/normal samples
			
 
				+//! - POD5-based pipeline from raw signal to variants
			
 
				+//! - Aggregation and annotation of SVs across a clinical cohort
			
 
				+//! - Methylation analysis using nanopore-specific tools
			
 
				+//! - Variant calling and analysis in large-scale longitudinal studies
			
 
				+//!
			
 
				+//! ## Getting Started
			
 
				+//!
			
 
				+//! All workflows are initialized from `Config` and driven by the `Collections` structure:
			
 
				+//!
			
 
				+//! ```rust
			
 
				+//! let config = Config::default();
			
 
				+//! let collections = Collections::new(CollectionsConfig::default())?;
			
 
				+//! collections.todo()?;
			
 
				+//! collections.run()?;
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## Running Tests
			
 
				+//!
			
 
				+//! Run the full suite with logging enabled:
			
 
				+//!
			
 
				+//! ```bash
			
 
				+//! export RUST_LOG=debug
			
 
				+//! cargo test -- --nocapture
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## 🔗 References
			
 
				+//! ### Basecalling and alignment
			
 
				+//! - Dorado: <https://github.com/nanoporetech/dorado>
			
 
				+//! ### Variants Callers
			
 
				+//! - ClairS: <https://github.com/HKU-BAL/ClairS>
			
 
				+//! - Nanomonsv: <https://github.com/friend1ws/nanomonsv>
			
 
				+//! - Savana: <https://github.com/cortes-ciriano-lab/savana>
			
 
				+//! - DeepVariant: <https://github.com/google/deepvariant>
			
 
				+//! - DeepSomatic: <https://github.com/google/deepsomatic>
			
 
				+//! - LongPhase: <https://github.com/PorubskyResearch/LongPhase>
			
 
				+//! - Modkit: <https://github.com/nanoporetech/modkit>
			
 
				+//! ### Variants annotation
			
 
				+//! - VEP: <https://www.ensembl.org/info/docs/tools/vep/index.html>
			
 
				+//!
			
 
				+//! ---
			
 
				+
			
 
				 use std::sync::{Arc, Mutex};
			
 
				 
			
 
				 pub mod commands;
			
@@ -37,7 +197,7 @@ mod tests {
 
				     use helpers::estimate_shannon_entropy;
			
 
				     use io::bed::read_bed;
			
 
				     use log::{error, info, warn};
			
 
				-    use pipes::somatic::Somatic;
			
 
				+    use pipes::somatic::SomaticPipe;
			
 
				     use positions::{overlaps_par, GenomePosition, GenomeRange};
			
 
				     use rayon::prelude::*;
			
 
				     use runners::Run;
			
@@ -533,10 +693,11 @@ mod tests {
 
				     #[test]
			
 
				     fn pipe_somatic() -> anyhow::Result<()> {   
			
 
				         init();
			
 
				-        let id = "ADJAGBA";
			
 
				-        Somatic::initialize(id, Config::default())?.run()
			
 
				+        let id = "ACHITE";
			
 
				+        SomaticPipe::initialize(id, Config::default())?.run()
			
 
				     }
			
 
				 
			
 
				+   
			
 
				     #[test]
			
 
				     fn overlaps() {
			
 
				         init();
			
@@ -697,7 +858,7 @@ mod tests {
 
				                 continue;
			
 
				             }
			
 
				 
			
 
				-            match Somatic::initialize(id, Config::default())?.run() {
			
 
				+            match SomaticPipe::initialize(id, Config::default())?.run() {
			
 
				                 Ok(_) => (),
			
 
				                 Err(e) => error!("{id} {e}"),
			
 
				             };
			
@@ -710,7 +871,7 @@ mod tests {
 
				         init();
			
 
				         let id = "ADJAGBA";
			
 
				         let config = Config { somatic_pipe_force: true, ..Default::default() };
			
 
				-        match Somatic::initialize(id, config)?.run() {
			
 
				+        match SomaticPipe::initialize(id, config)?.run() {
			
 
				             Ok(_) => (),
			
 
				             Err(e) => error!("{id} {e}"),
			
 
				         };
			
--- a/src/pipes/somatic.rs
+++ b/src/pipes/somatic.rs
@@ -1,7 +1,5 @@
 
				 use crate::{
			
 
				-    create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
			
 
				-    scan::scan::SomaticScan,
			
 
				-    variant::variant::{run_if_required, ShouldRunBox},
			
 
				+    annotation::is_gnomad_and_constit_alt, collection::ShouldRun, create_should_run_normal_tumoral, init_solo_callers_normal_tumoral, scan::scan::SomaticScan, variant::variant::{run_if_required, ShouldRunBox}
			
 
				 };
			
 
				 use anyhow::Context;
			
 
				 use itertools::Itertools;
			
@@ -11,7 +9,6 @@ use std::{
 
				     fs::{self, File},
			
 
				     io::Write,
			
 
				     path::Path,
			
 
				-    sync::Arc,
			
 
				 };
			
 
				 
			
 
				 use crate::{
			
@@ -31,240 +28,172 @@ use crate::{
 
				     },
			
 
				 };
			
 
				 
			
 
				-pub struct Somatic {
			
 
				+/// Runs the full somatic variant calling pipeline for a single sample (`id`).
			
 
				+///
			
 
				+/// This function orchestrates the entire somatic variant discovery process,
			
 
				+/// starting from raw variant caller outputs (`PASSED VCF`) and applying multiple filtering
			
 
				+/// and annotation steps to produce high-confidence somatic variants.
			
 
				+///
			
 
				+/// This function orchestrates the end-to-end somatic variant discovery process:
			
 
				+/// - Executes and verifies upstream components if necessary
			
 
				+/// - Loads variants from multiple callers (tumor and normal samples)
			
 
				+/// - Applies several annotation and filtering steps (e.g. depth, population frequency, entropy)
			
 
				+/// - Tracks filtering statistics at each step
			
 
				+/// - Outputs high-confidence somatic variants in both `.json.gz` and `.bit` formats
			
 
				+///
			
 
				+/// ## Output Overview
			
 
				+/// The final output includes:
			
 
				+/// - `{tumoral_dir}/{id}_somatic_variants.json.gz`: annotated somatic variants
			
 
				+/// - `{tumoral_dir}/{id}_somatic_variants.bit`: compact binary variant representation
			
 
				+/// - `{stats_dir}/`: multiple intermediate JSON files with annotations and statistics
			
 
				+///
			
 
				+/// ## Steps
			
 
				+///
			
 
				+/// This pipeline performs the following high-level steps:
			
 
				+///
			
 
				+/// ### 1. Output Existence Check
			
 
				+/// If the final JSON result already exists and [`Config::somatic_pipe_force`] is not set,
			
 
				+/// the pipeline aborts early to avoid overwriting results.
			
 
				+///
			
 
				+/// ### 2. Initialization
			
 
				+/// - Prepares statistics and output directories.
			
 
				+/// - Initializes variant annotations.
			
 
				+///
			
 
				+/// ### 3. Pre-requisite Tool Execution
			
 
				+/// Runs any required upstream components (e.g., alignment, basecalling, variant callers) if
			
 
				+/// their outputs are missing, using the [`run_if_required`] logic.
			
 
				+///
			
 
				+/// ### 4. Load Variant Collections
			
 
				+/// - Initializes the configured somatic variant callers and loads their output variants from PASSED VCF.
			
 
				+/// - Also loads germline variants (from [`ClairS::germline`]) for comparative germline filtering.
			
 
				+///
			
 
				+/// ### 5. Statistics Initialization
			
 
				+/// - Initializes a [`SomaticPipeStats`] object to track the number of variants at each step.
			
 
				+/// - Captures initial variant counts before filtering.
			
 
				+/// - Aggregates variant counts from all sources and stores initial annotations for quality control and
			
 
				+///   comparison before filtering.
			
 
				+///
			
 
				+/// ### 6. Filter: Germline & Solo Constitutional Variants
			
 
				+/// - Removes variants labeled as either Germline or Solo Constitutional, assuming they are unlikely to be somatic.
			
 
				+/// - Records count of removed variants in [`SomaticPipeStats::n_constit_germline`].
			
 
				+///
			
 
				+/// ### 7. Annotation: BAM Read Depth and Alt Allele Counts
			
 
				+/// - Uses the constitutional BAM file to annotate each variant with read depth and the number of alternate reads observed.
			
 
				+/// - Flags variants with low depth or excessive alt reads for filtering as specified in [`Config`].
			
 
				+///
			
 
				+/// ### 8. Filtering: Low Depth / High Alt Alleles
			
 
				+/// - Removes variants with low coverage in the constitutional sample, or excessive alt allele support (suggestive of germline origin).
			
 
				+/// - Updates stats:
			
 
				+///   - [`SomaticPipeStats::n_low_constit`]
			
 
				+///   - [`SomaticPipeStats::n_high_alt_constit`]
			
 
				+///
			
 
				+/// ### 9. Annotation: Sequence Entropy
			
 
				+/// Adds Shannon entropy annotation based on the reference sequence context
			
 
				+/// around each variant (cf. [`Config::entropy_seq_len`]) to flag low-complexity regions (often repetitive).
			
 
				+///
			
 
				+/// ### 10. Annotation: External Databases (COSMIC, GnomAD):
			
 
				+///  - Uses external resources to annotate variants with:
			
 
				+///    - COSMIC hits (somatic mutation database)
			
 
				+///    - GnomAD allele frequencies
			
 
				+///
			
 
				+/// ### 11. Filtering: GnomAD + Alt Support in Constitutional Sample
			
 
				+///  - Removes variants that are both present in GnomAD **and** show
			
 
				+///    alternate allele support in the constitutional BAM.
			
 
				+///    These are highly likely to be non-somatic germline polymorphisms.
			
 
				+///  - Updates [`SomaticPipeStats::n_high_alt_constit_gnomad`] stat.
			
 
				+///
			
 
				+/// ### 12. Filtering: Low Shannon Entropy:
			
 
				+///  - Removes variants from low-complexity regions with entropy below the configured threshold
			
 
				+///    (cf. [`Config::min_shannon_entropy`]).
			
 
				+///  - Updates [`SomaticPipeStats::n_low_entropies`].
			
 
				+///
			
 
				+/// ### 13. Annotation: VEP (Variant Effect Predictor)
			
 
				+///  Adds transcript-level annotations from Ensembl VEP, providing functional consequences,
			
 
				+///  impacted genes, and regulatory features.
			
 
				+///
			
 
				+/// ### 14. Merging
			
 
				+///  Merges variant collections into a unified [`Variants`] structure,
			
 
				+///  preserving annotations and applying deduplication logic.
			
 
				+///
			
 
				+/// ### 15. Final Statistics and Saving
			
 
				+///  - Saves final annotation stats, VEP summaries, and variant-level statistics.
			
 
				+///  - Exports the final somatic variant set to both compressed JSON and `.bit` formats.
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// - `Ok(())` if all steps completed successfully.
			
 
				+/// - `Err` if any tool fails, if file I/O fails, or if logical conditions are violated (e.g., pre-existing output).
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// - Returns early if the output file already exists and [`Config::somatic_pipe_force`] is `false`.
			
 
				+/// - Wraps all component-specific errors using `anyhow::Result` with context.
			
 
				+///
			
 
				+/// # Side Effects
			
 
				+/// - Runs external tools conditionally (e.g., [`ClairS`], [`DeepSomatic`]).
			
 
				+/// - Creates intermediate directories and annotation JSONs for debugging/QC.
			
 
				+/// - May consume significant compute time depending on the number of callers, annotations, and variants.
			
 
				+///
			
 
				+/// # TODOs
			
 
				+/// - Support compressed intermediate files (`// TODO: GZ !!!`)
			
 
				+/// - Improve filtering metrics reporting (currently not all filtered variants are tracked in final stats).
			
 
				+///
			
 
				+/// # Example Output Files
			
 
				+/// - `tumoral_dir/sample123_somatic_variants.json.gz`
			
 
				+/// - `tumoral_dir/sample123_somatic_variants.bit`
			
 
				+/// - `stats_dir/sample123_annotations_*.json` (intermediate annotation snapshots)
			
 
				+///
			
 
				+/// # See Also
			
 
				+/// - [`Annotations`] – core structure for managing per-variant metadata
			
 
				+/// - [`Variants`] – the merged final variant structure
			
 
				+/// - [`SomaticPipeStats`] – used for tracking variant counts throughout filtering
			
 
				+///
			
 
				+pub struct SomaticPipe {
			
 
				+    /// Unique identifier for the sample.
			
 
				     pub id: String,
			
 
				+    /// Configuration parameters for the pipeline.
			
 
				     pub config: Config,
			
 
				-    pub annotations: Annotations,
			
 
				 }
			
 
				 
			
 
				-impl Initialize for Somatic {
			
 
				+impl Initialize for SomaticPipe {
			
 
				+    /// Initializes a new `Somatic` instance with default annotations.
			
 
				     fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
			
 
				         let id = id.to_string();
			
 
				-        Ok(Self {
			
 
				-            id,
			
 
				-            config,
			
 
				-            annotations: Annotations::default(),
			
 
				-        })
			
 
				+        Ok(Self { id, config })
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-#[derive(Debug, Default, Clone)]
			
 
				-pub struct SomaticPipeStats {
			
 
				-    pub input: InputStats,
			
 
				-    pub n_constit_germline: usize,
			
 
				-    pub n_low_constit: usize,
			
 
				-    pub n_high_alt_constit: usize,
			
 
				-    pub n_high_alt_constit_gnomad: usize,
			
 
				-    pub n_low_entropies: usize,
			
 
				-}
			
 
				-
			
 
				-#[derive(Debug, Default, Clone)]
			
 
				-pub struct InputStats {
			
 
				-    pub solo_tumor: Vec<(Annotation, usize)>,
			
 
				-    pub solo_constit: Vec<(Annotation, usize)>,
			
 
				-    pub germline: Vec<(Annotation, usize)>,
			
 
				-    pub somatic: Vec<(Annotation, usize)>,
			
 
				-}
			
 
				-
			
 
				-impl InputStats {
			
 
				-    pub fn from_collections(collections: &[VariantCollection]) -> Self {
			
 
				-        let mut stats = Self::default();
			
 
				-        for collection in collections.iter() {
			
 
				-            match collection.caller {
			
 
				-                Annotation::Callers(_, Sample::SoloTumor) => stats
			
 
				-                    .solo_tumor
			
 
				-                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				-                Annotation::Callers(_, Sample::SoloConstit) => stats
			
 
				-                    .solo_constit
			
 
				-                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				-                Annotation::Callers(_, Sample::Germline) => stats
			
 
				-                    .germline
			
 
				-                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				-                Annotation::Callers(_, Sample::Somatic) => stats
			
 
				-                    .somatic
			
 
				-                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				-                _ => (),
			
 
				-            };
			
 
				-        }
			
 
				-        stats
			
 
				+impl ShouldRun for SomaticPipe {
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        todo!()
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-impl SomaticPipeStats {
			
 
				-    pub fn init(collections: &[VariantCollection]) -> Self {
			
 
				-        Self {
			
 
				-            input: InputStats::from_collections(collections),
			
 
				-            ..Default::default()
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    pub fn annot_init(&self, stats: &AnnotationsStats, json_path: &str) -> anyhow::Result<()> {
			
 
				-        let stats: Vec<(Vec<Annotation>, u64)> = stats
			
 
				-            .categorical
			
 
				-            .iter()
			
 
				-            .map(|e| {
			
 
				-                let anns = e
			
 
				-                    .key()
			
 
				-                    .split(" + ")
			
 
				-                    .map(|k| k.parse())
			
 
				-                    .collect::<anyhow::Result<Vec<Annotation>>>()
			
 
				-                    .map_err(|err| {
			
 
				-                        anyhow::anyhow!("Error while splitting key in AnnotationsStats.\n{err}")
			
 
				-                    })?;
			
 
				-                Ok((anns, *e.value()))
			
 
				-            })
			
 
				-            .collect::<anyhow::Result<Vec<(Vec<Annotation>, u64)>>>()?;
			
 
				-
			
 
				-        let callers_somatic_solo_tumor = [
			
 
				-            self.input
			
 
				-                .somatic
			
 
				-                .iter()
			
 
				-                .map(|(caller, _)| caller.clone())
			
 
				-                .collect::<Vec<Annotation>>(),
			
 
				-            self.input
			
 
				-                .solo_tumor
			
 
				-                .iter()
			
 
				-                .map(|(caller, _)| caller.clone())
			
 
				-                .collect(),
			
 
				-        ]
			
 
				-        .concat();
			
 
				-
			
 
				-        let callers_germline_solo_constit = [
			
 
				-            self.input
			
 
				-                .germline
			
 
				-                .iter()
			
 
				-                .map(|(caller, _)| caller.clone())
			
 
				-                .collect::<Vec<Annotation>>(),
			
 
				-            self.input
			
 
				-                .solo_constit
			
 
				-                .iter()
			
 
				-                .map(|(caller, _)| caller.clone())
			
 
				-                .collect(),
			
 
				-        ]
			
 
				-        .concat();
			
 
				-
			
 
				-        let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
			
 
				-        stats.iter().for_each(|(anns, v)| {
			
 
				-            if anns.iter().any(|a| {
			
 
				-                matches!(
			
 
				-                    a,
			
 
				-                    Annotation::Callers(_, Sample::SoloConstit)
			
 
				-                        | Annotation::Callers(_, Sample::Germline)
			
 
				-                )
			
 
				-            }) {
			
 
				-                let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
			
 
				-                    .iter()
			
 
				-                    .flat_map(|tumor| {
			
 
				-                        if anns.contains(tumor) {
			
 
				-                            vec![(tumor.to_string(), *v)]
			
 
				-                        } else {
			
 
				-                            vec![]
			
 
				-                        }
			
 
				-                    })
			
 
				-                    .collect();
			
 
				-
			
 
				-                let mut germline_caller: Vec<String> = callers_germline_solo_constit
			
 
				-                    .iter()
			
 
				-                    .flat_map(|germ| {
			
 
				-                        if anns.contains(germ) {
			
 
				-                            vec![germ.to_string()]
			
 
				-                        } else {
			
 
				-                            vec![]
			
 
				-                        }
			
 
				-                    })
			
 
				-                    .collect();
			
 
				-                germline_caller.sort();
			
 
				-                let germline_caller = germline_caller.join(" + ");
			
 
				-
			
 
				-                n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
			
 
				-                    if let Some(row) = with_germline.get_mut(tumoral_caller) {
			
 
				-                        if let Some(col) = row.get_mut(&germline_caller) {
			
 
				-                            *col += *n;
			
 
				-                        } else {
			
 
				-                            row.insert(germline_caller.to_string(), *n);
			
 
				-                        }
			
 
				-                    } else {
			
 
				-                        let mut row = HashMap::new();
			
 
				-                        row.insert(germline_caller.to_string(), *n);
			
 
				-                        with_germline.insert(tumoral_caller.to_string(), row);
			
 
				-                    }
			
 
				-                });
			
 
				-            }
			
 
				-        });
			
 
				-
			
 
				-        let mut germlines_callers: Vec<String> = with_germline
			
 
				-            .iter()
			
 
				-            .flat_map(|(_, r)| {
			
 
				-                r.iter()
			
 
				-                    .map(|(k, _)| k.to_string())
			
 
				-                    .collect::<Vec<String>>()
			
 
				-            })
			
 
				-            .collect();
			
 
				-        germlines_callers.sort();
			
 
				-        germlines_callers.dedup();
			
 
				-
			
 
				-        let mut json = Vec::new();
			
 
				-        let mut lines: Vec<String> = with_germline
			
 
				-            .iter()
			
 
				-            .map(|(tumor, row)| {
			
 
				-                json.push(format!(
			
 
				-                    "{{\"caller_name\": \"{tumor}\", \"germline\": [{}] }}",
			
 
				-                    germlines_callers
			
 
				-                        .iter()
			
 
				-                        .map(|g| {
			
 
				-                            let v = row.get(g).unwrap_or(&0);
			
 
				-                            format!("{{\"{g}\": {v}}}")
			
 
				-                        })
			
 
				-                        .join(", ")
			
 
				-                ));
			
 
				-                format!(
			
 
				-                    "{tumor}\t{}",
			
 
				-                    germlines_callers
			
 
				-                        .iter()
			
 
				-                        .map(|g| {
			
 
				-                            let v = row.get(g).unwrap_or(&0);
			
 
				-                            format!("{g}: {v}")
			
 
				-                        })
			
 
				-                        .join("\t")
			
 
				-                )
			
 
				-            })
			
 
				-            .collect();
			
 
				-        lines.sort();
			
 
				-        println!("{}", lines.join("\n"));
			
 
				-
			
 
				-        let json = format!("[{}]", json.join(", "));
			
 
				-        let mut file = File::create(json_path)?;
			
 
				-        file.write_all(json.as_bytes())?;
			
 
				-
			
 
				-        Ok(())
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-impl Run for Somatic {
			
 
				+impl Run for SomaticPipe {
			
 
				+    /// Executes the full somatic variant analysis pipeline.
			
 
				     fn run(&mut self) -> anyhow::Result<()> {
			
 
				         let config = self.config.clone();
			
 
				         let id = self.id.clone();
			
 
				+        info!("Running somatic pipe for {id}.");
			
 
				 
			
 
				+        // Define output paths for the final somatic variants
			
 
				         let result_json = format!("{}/{id}_somatic_variants.json.gz", config.tumoral_dir(&id));
			
 
				         let result_bit = format!("{}/{id}_somatic_variants.bit", config.tumoral_dir(&id));
			
 
				 
			
 
				         if Path::new(&result_json).exists() && !config.somatic_pipe_force {
			
 
				-            return Err(anyhow::anyhow!("already exists"));
			
 
				+            return Err(anyhow::anyhow!(
			
 
				+                "Somatic Pipe output already exists for {id}."
			
 
				+            ));
			
 
				         }
			
 
				 
			
 
				-        info!("Running somatic pipe for {id}.");
			
 
				-        let annotations = Arc::new(self.annotations.clone());
			
 
				+        let mut annotations = Annotations::default();
			
 
				 
			
 
				-        // Stats dir
			
 
				+        // Create stats directory if it doesn't exist
			
 
				         let stats_dir = config.somatic_pipe_stats(&id);
			
 
				         if !Path::new(&stats_dir).exists() {
			
 
				             fs::create_dir(&stats_dir)?;
			
 
				         }
			
 
				-        // TODO: GZ !!!
			
 
				-        // LongphasePhase::initialize(&id, self.config.clone())?.run()?;
			
 
				 
			
 
				-        // Initalize variants collections
			
 
				-        info!("Initialization prerequired pipe components...");
			
 
				+        // Initialize and run any pre-required input if necessary
			
 
				+        info!("Initialization prerequired pipe inputs...");
			
 
				 
			
 
				         let mut to_run_if_req = create_should_run!(
			
 
				             &id,
			
@@ -276,13 +205,13 @@ impl Run for Somatic {
 
				             Savana,
			
 
				             DeepSomatic
			
 
				         );
			
 
				-        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant,));
			
 
				+        to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant));
			
 
				 
			
 
				         info!("Running prerequired pipe components.");
			
 
				-
			
 
				         run_if_required(&mut to_run_if_req)
			
 
				             .context("Failed to run a prerequired component of somatic pipe.")?;
			
 
				 
			
 
				+        // Initialize variant callers
			
 
				         let mut callers = init_somatic_callers!(
			
 
				             &id,
			
 
				             &config,
			
@@ -295,18 +224,21 @@ impl Run for Somatic {
 
				 
			
 
				         callers.extend(init_solo_callers_normal_tumoral!(&id, &config, DeepVariant,));
			
 
				 
			
 
				+        // Load variants from each caller
			
 
				         info!("Loading variants.");
			
 
				         let mut variants_collections = load_variants(&mut callers, &annotations)
			
 
				             .map_err(|e| anyhow::anyhow!("Error while loading variants\n{e}"))?;
			
 
				 
			
 
				-        info!("Loading Germline");
			
 
				+        // Load germline variants using ClairS
			
 
				+        info!("Loading germline variants.");
			
 
				         let clairs_germline =
			
 
				             ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
			
 
				         variants_collections.push(clairs_germline);
			
 
				 
			
 
				+        // Initialize statistics
			
 
				         let mut somatic_stats = SomaticPipeStats::init(&variants_collections);
			
 
				         info!(
			
 
				-            "Variants collections from {} vcf ({} variants)",
			
 
				+            "Variants collections loaded from {} vcf (total of {} variants loaded)",
			
 
				             variants_collections.len(),
			
 
				             variants_collections
			
 
				                 .iter()
			
@@ -314,8 +246,7 @@ impl Run for Somatic {
 
				                 .sum::<usize>()
			
 
				         );
			
 
				 
			
 
				-        let mut annotations = Arc::try_unwrap(annotations)
			
 
				-            .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
			
 
				+        // Initial annotation stats (caller annotations only)
			
 
				         let caller_cat_anns = |v: &Annotation| matches!(v, Annotation::Callers(_, _));
			
 
				         let annot_init = annotations.callers_stat(Some(Box::new(caller_cat_anns)));
			
 
				         somatic_stats.annot_init(
			
@@ -324,8 +255,11 @@ impl Run for Somatic {
 
				         )?;
			
 
				         annot_init.save_to_json(&format!("{stats_dir}/{id}_annotations_01.json"))?;
			
 
				 
			
 
				-        // Filter: Variants neither Germline nor SoloConstit
			
 
				-        info!("Keeping somatic variants (variants neither in solo nor in germline).");
			
 
				+        // Filter out germline and solo constitutional variants
			
 
				+        info!(
			
 
				+            "Keeping somatic variants (variants neither in solo {} nor in germline).",
			
 
				+            config.normal_name
			
 
				+        );
			
 
				         somatic_stats.n_constit_germline =
			
 
				             annotations.retain_variants(&mut variants_collections, |anns| {
			
 
				                 !anns.iter().any(|ann| {
			
@@ -342,7 +276,7 @@ impl Run for Somatic {
 
				                 "{stats_dir}/{id}_annotations_02_post_germline.json"
			
 
				             ))?;
			
 
				 
			
 
				-        // Annotation: BAM depth, n_alt
			
 
				+        // Annotate with depth and number of alternate reads from constitutional BAM
			
 
				         info!("Reading Constit BAM file for depth and pileup annotation.");
			
 
				         variants_collections.iter().try_for_each(|c| {
			
 
				             c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150)
			
@@ -364,7 +298,7 @@ impl Run for Somatic {
 
				             })))
			
 
				             .save_to_json(&format!("{stats_dir}/{id}_annotations_03_bam.json"))?;
			
 
				 
			
 
				-        // Filter: Remove LowConstitDepth from annotations and variants collections
			
 
				+        // Filter based on low constitutional depth
			
 
				         info!(
			
 
				             "Removing variants when depth in constit bam < {}.",
			
 
				             self.config.somatic_min_constit_depth
			
@@ -403,17 +337,22 @@ impl Run for Somatic {
 
				             })))
			
 
				             .save_to_json(&format!("{stats_dir}/{id}_annotations_04_bam_filter.json"))?;
			
 
				 
			
 
				-        // Annotation: Entropy
			
 
				+        // Annotate variants with sequence entropy
			
 
				         info!(
			
 
				             "Entropy annotation from {} sequences.",
			
 
				             self.config.reference
			
 
				         );
			
 
				         variants_collections.iter().for_each(|c| {
			
 
				-            c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
			
 
				+            c.annotate_with_sequence_entropy(
			
 
				+                &annotations,
			
 
				+                &self.config.reference,
			
 
				+                self.config.entropy_seq_len,
			
 
				+                self.config.somatic_pipe_threads,
			
 
				+            );
			
 
				         });
			
 
				 
			
 
				-        // Annotation: Cosmic and GnomAD
			
 
				-        info!("Annotation with Cosmic and GnomAD.");
			
 
				+        // Annotate with external databases like COSMIC and GnomAD
			
 
				+        info!("Annotation with external databases like COSMIC and GnomAD.");
			
 
				         variants_collections
			
 
				             .iter()
			
 
				             .try_for_each(|c| -> anyhow::Result<()> {
			
@@ -430,31 +369,11 @@ impl Run for Somatic {
 
				             })))
			
 
				             .save_to_json(&format!("{stats_dir}/{id}_annotations_05_gnomad.json"))?;
			
 
				 
			
 
				-        // Filter: Remove variants in Gnomad and in constit bam
			
 
				+        // Filter: Remove variants present in GnomAD and have alt reads in constitutional sample
			
 
				         info!("Filtering out variants in GnomAD and in constit bam at low AF.");
			
 
				-        somatic_stats.n_high_alt_constit_gnomad =
			
 
				-            annotations.retain_variants(&mut variants_collections, |anns| {
			
 
				-                !anns
			
 
				-                    .iter()
			
 
				-                    .find_map(|a| {
			
 
				-                        if let Annotation::GnomAD(gnomad) = a {
			
 
				-                            Some(gnomad.gnomad_af > 0.0)
			
 
				-                        } else {
			
 
				-                            None
			
 
				-                        }
			
 
				-                    })
			
 
				-                    .and_then(|gnomad_condition| {
			
 
				-                        anns.iter()
			
 
				-                            .find_map(|a| {
			
 
				-                                if let Annotation::ConstitAlt(n_alt) = a {
			
 
				-                                    Some(*n_alt > 0)
			
 
				-                                } else {
			
 
				-                                    None
			
 
				-                                }
			
 
				-                            })
			
 
				-                            .map(|constit_alt_condition| gnomad_condition && constit_alt_condition)
			
 
				-                    })
			
 
				-                    .unwrap_or(false)
			
 
				+        somatic_stats.n_high_alt_constit_gnomad = annotations
			
 
				+            .retain_variants(&mut variants_collections, |anns| {
			
 
				+                !is_gnomad_and_constit_alt(anns)
			
 
				             });
			
 
				 
			
 
				         info!(
			
@@ -473,12 +392,13 @@ impl Run for Somatic {
 
				                 "{stats_dir}/{id}_annotations_06_gnomad_filter.json"
			
 
				             ))?;
			
 
				 
			
 
				-        // Annotation low entropy
			
 
				+        // Filter low entropy variants
			
 
				         annotations.low_shannon_entropy(self.config.min_shannon_entropy);
			
 
				-        // annotations.callers_stat();
			
 
				 
			
 
				-        // Filtering low entropy for solo variants.
			
 
				-        info!("Filtering low entropies");
			
 
				+        info!(
			
 
				+            "Filtering out variants with low entropies ({})",
			
 
				+            config.min_shannon_entropy
			
 
				+        );
			
 
				         annotations
			
 
				             .callers_stat(Some(Box::new(|v| {
			
 
				                 matches!(v, Annotation::Callers(_, _) | Annotation::LowEntropy)
			
@@ -489,14 +409,15 @@ impl Run for Somatic {
 
				             .retain_variants(&mut variants_collections, |anns| {
			
 
				                 !anns.contains(&Annotation::LowEntropy)
			
 
				             });
			
 
				+
			
 
				         annotations
			
 
				             .callers_stat(Some(Box::new(|v| matches!(v, Annotation::Callers(_, _)))))
			
 
				             .save_to_json(&format!(
			
 
				                 "{stats_dir}/{id}_annotations_08_entropy_filter.json"
			
 
				             ))?;
			
 
				 
			
 
				-        // VEP
			
 
				-        info!("VEP annotation.");
			
 
				+        // Final VEP annotation
			
 
				+        info!("Annotation with VEP.");
			
 
				         variants_collections
			
 
				             .iter()
			
 
				             .try_for_each(|c| -> anyhow::Result<()> {
			
@@ -504,12 +425,14 @@ impl Run for Somatic {
 
				                 ext_annot.annotate_vep(&c.variants, &annotations)?;
			
 
				                 Ok(())
			
 
				             })?;
			
 
				+
			
 
				         annotations
			
 
				             .callers_stat(Some(Box::new(caller_cat_anns)))
			
 
				             .save_to_json(&format!("{stats_dir}/{id}_annotations_09_vep.json"))?;
			
 
				 
			
 
				         annotations.vep_stats()?;
			
 
				 
			
 
				+        // Merge all variants into a final collection
			
 
				         let variants = variants_collections.into_iter().fold(
			
 
				             Variants::default(),
			
 
				             |mut acc, variants_collection| {
			
@@ -550,3 +473,292 @@ pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
 
				 
			
 
				     Ok(())
			
 
				 }
			
 
				+
			
 
				+/// Holds statistical data for somatic variant pipeline processing,
			
 
				+/// including summary counts and input categorization.
			
 
				+#[derive(Debug, Default, Clone)]
			
 
				+pub struct SomaticPipeStats {
			
 
				+    /// Summary of input variant collections grouped by sample type.
			
 
				+    pub input: InputStats,
			
 
				+
			
 
				+    /// Number of variants labeled as both constitutional and germline.
			
 
				+    pub n_constit_germline: usize,
			
 
				+
			
 
				+    /// Number of variants in constitutional samples with low allele frequency.
			
 
				+    pub n_low_constit: usize,
			
 
				+
			
 
				+    /// Number of variants in constitutional samples with high alternative allele count.
			
 
				+    pub n_high_alt_constit: usize,
			
 
				+
			
 
				+    /// Number of high-alt constitutional variants that are also found in gnomAD.
			
 
				+    pub n_high_alt_constit_gnomad: usize,
			
 
				+
			
 
				+    /// Number of variants filtered due to low entropy (indicative of low complexity regions).
			
 
				+    pub n_low_entropies: usize,
			
 
				+}
			
 
				+
			
 
				+impl SomaticPipeStats {
			
 
				+    /// Initializes a `SomaticPipeStats` object with populated `InputStats` based on the
			
 
				+    /// provided `VariantCollection`s.
			
 
				+    pub fn init(collections: &[VariantCollection]) -> Self {
			
 
				+        Self {
			
 
				+            input: InputStats::from_collections(collections),
			
 
				+            ..Default::default()
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Generates a tumor-vs-germline annotation matrix and writes it to a JSON file.
			
 
				+    ///
			
 
				+    /// This method analyzes co-occurrence patterns between tumor and germline/constit
			
 
				+    /// variant callers by iterating through annotation statistics from `AnnotationsStats`.
			
 
				+    /// It builds a matrix where each row corresponds to a **tumor caller** and each column
			
 
				+    /// corresponds to a **germline or constitutional caller**. Each cell contains the number
			
 
				+    /// of variant calls that were annotated by both the tumor and the germline/constit caller.
			
 
				+    ///
			
 
				+    /// In addition to writing a structured JSON file to the specified path, the function also
			
 
				+    /// prints a tab-separated human-readable matrix to stdout for convenience.
			
 
				+    ///
			
 
				+    /// # Parameters
			
 
				+    ///
			
 
				+    /// - `stats`: A reference to [`AnnotationsStats`] containing categorical annotations.
			
 
				+    ///   This object holds a frequency map where keys are combinations of `Annotation`
			
 
				+    ///   values (serialized as strings) and values are occurrence counts.
			
 
				+    /// - `json_path`: The path where the resulting JSON output file should be written.
			
 
				+    ///
			
 
				+    /// # Output Formats
			
 
				+    ///
			
 
				+    /// ## JSON Output (`json_path`)
			
 
				+    ///
			
 
				+    /// The output JSON is an **array of tumor caller records**, each containing:
			
 
				+    /// - `caller_name`: The name of the tumor caller (as a string).
			
 
				+    /// - `germline`: An array of JSON objects, each with one key-value pair,
			
 
				+    ///   where the key is a germline/constit caller (or combination) and the value is the count.
			
 
				+    ///
			
 
				+    /// ### Example
			
 
				+    /// ```json
			
 
				+    /// [
			
 
				+    ///   {
			
 
				+    ///     "caller_name": "DeepVariant SoloTumor",
			
 
				+    ///     "germline": [
			
 
				+    ///       {"ClairS Germline": 99978},
			
 
				+    ///       {"ClairS Germline + DeepVariant SoloConstit": 4710570}
			
 
				+    ///     ]
			
 
				+    ///   },
			
 
				+    ///   {
			
 
				+    ///     "caller_name": "ClairS Somatic",
			
 
				+    ///     "germline": [
			
 
				+    ///       {"ClairS Germline": 944},
			
 
				+    ///       {"ClairS Germline + DeepVariant SoloConstit": 362}
			
 
				+    ///     ]
			
 
				+    ///   }
			
 
				+    /// ]
			
 
				+    /// ```
			
 
				+    ///
			
 
				+    /// ## Console Output (TSV)
			
 
				+    ///
			
 
				+    /// A tab-separated matrix is printed to stdout. Each row begins with a tumor caller,
			
 
				+    /// followed by columns showing germline/constit caller combinations and their counts.
			
 
				+    ///
			
 
				+    /// # Notes
			
 
				+    /// - Tumor callers are collected from `self.input.somatic` and `self.input.solo_tumor`.
			
 
				+    /// - Germline/constit callers are collected from `self.input.germline` and `self.input.solo_constit`.
			
 
				+    /// - Keys in the original `AnnotationsStats` map are split using `" + "` and parsed into `Annotation` enums.
			
 
				+    /// - Germline keys are sorted and joined to form canonical column labels.
			
 
				+    /// - Tumor and germline annotations are matched by exact `Annotation` equality.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if:
			
 
				+    /// - Any annotation key in `AnnotationsStats` fails to parse into a valid `Annotation`.
			
 
				+    /// - The JSON output file cannot be created or written to.
			
 
				+    ///
			
 
				+    /// # Dependencies
			
 
				+    /// Requires that `Annotation` implements `FromStr`, `ToString`, `PartialEq`, and `Clone`.
			
 
				+    ///
			
 
				+    /// # Example Usage
			
 
				+    /// ```rust
			
 
				+    /// let mut stats = SomaticPipeStats::init(&collections);
			
 
				+    /// stats.annot_init(&annotation_stats, "output/matrix.json")?;
			
 
				+    /// ```
			
 
				+    pub fn annot_init(&self, stats: &AnnotationsStats, json_path: &str) -> anyhow::Result<()> {
			
 
				+        // Parse annotations from stats
			
 
				+        let stats: Vec<(Vec<Annotation>, u64)> = stats
			
 
				+            .categorical
			
 
				+            .iter()
			
 
				+            .map(|e| {
			
 
				+                let anns = e
			
 
				+                    .key()
			
 
				+                    .split(" + ")
			
 
				+                    .map(|k| k.parse())
			
 
				+                    .collect::<anyhow::Result<Vec<Annotation>>>()
			
 
				+                    .map_err(|err| {
			
 
				+                        anyhow::anyhow!("Error while splitting key in AnnotationsStats.\n{err}")
			
 
				+                    })?;
			
 
				+                Ok((anns, *e.value()))
			
 
				+            })
			
 
				+            .collect::<anyhow::Result<Vec<(Vec<Annotation>, u64)>>>()?;
			
 
				+
			
 
				+        // Collect tumor and germline callers from input stats
			
 
				+        let callers_somatic_solo_tumor = [
			
 
				+            self.input
			
 
				+                .somatic
			
 
				+                .iter()
			
 
				+                .map(|(caller, _)| caller.clone())
			
 
				+                .collect::<Vec<Annotation>>(),
			
 
				+            self.input
			
 
				+                .solo_tumor
			
 
				+                .iter()
			
 
				+                .map(|(caller, _)| caller.clone())
			
 
				+                .collect(),
			
 
				+        ]
			
 
				+        .concat();
			
 
				+
			
 
				+        let callers_germline_solo_constit = [
			
 
				+            self.input
			
 
				+                .germline
			
 
				+                .iter()
			
 
				+                .map(|(caller, _)| caller.clone())
			
 
				+                .collect::<Vec<Annotation>>(),
			
 
				+            self.input
			
 
				+                .solo_constit
			
 
				+                .iter()
			
 
				+                .map(|(caller, _)| caller.clone())
			
 
				+                .collect(),
			
 
				+        ]
			
 
				+        .concat();
			
 
				+
			
 
				+        // Build a matrix of tumor vs germline hits
			
 
				+        let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
			
 
				+        stats.iter().for_each(|(anns, v)| {
			
 
				+            // Only proceed if this annotation includes a germline/constit sample
			
 
				+            if anns.iter().any(|a| {
			
 
				+                matches!(
			
 
				+                    a,
			
 
				+                    Annotation::Callers(_, Sample::SoloConstit)
			
 
				+                        | Annotation::Callers(_, Sample::Germline)
			
 
				+                )
			
 
				+            }) {
			
 
				+                // Find all tumor callers present in this annotation set
			
 
				+                let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
			
 
				+                    .iter()
			
 
				+                    .flat_map(|tumor| {
			
 
				+                        if anns.contains(tumor) {
			
 
				+                            vec![(tumor.to_string(), *v)]
			
 
				+                        } else {
			
 
				+                            vec![]
			
 
				+                        }
			
 
				+                    })
			
 
				+                    .collect();
			
 
				+
			
 
				+                // Build a normalized germline key
			
 
				+                let mut germline_caller: Vec<String> = callers_germline_solo_constit
			
 
				+                    .iter()
			
 
				+                    .flat_map(|germ| {
			
 
				+                        if anns.contains(germ) {
			
 
				+                            vec![germ.to_string()]
			
 
				+                        } else {
			
 
				+                            vec![]
			
 
				+                        }
			
 
				+                    })
			
 
				+                    .collect();
			
 
				+                germline_caller.sort();
			
 
				+                let germline_caller = germline_caller.join(" + ");
			
 
				+
			
 
				+                // Update matrix: tumor -> germline -> count
			
 
				+                n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
			
 
				+                    if let Some(row) = with_germline.get_mut(tumoral_caller) {
			
 
				+                        if let Some(col) = row.get_mut(&germline_caller) {
			
 
				+                            *col += *n;
			
 
				+                        } else {
			
 
				+                            row.insert(germline_caller.to_string(), *n);
			
 
				+                        }
			
 
				+                    } else {
			
 
				+                        let mut row = HashMap::new();
			
 
				+                        row.insert(germline_caller.to_string(), *n);
			
 
				+                        with_germline.insert(tumoral_caller.to_string(), row);
			
 
				+                    }
			
 
				+                });
			
 
				+            }
			
 
				+        });
			
 
				+
			
 
				+        // Extract all unique germline caller labels
			
 
				+        let mut germlines_callers: Vec<String> = with_germline
			
 
				+            .iter()
			
 
				+            .flat_map(|(_, r)| {
			
 
				+                r.iter()
			
 
				+                    .map(|(k, _)| k.to_string())
			
 
				+                    .collect::<Vec<String>>()
			
 
				+            })
			
 
				+            .collect();
			
 
				+        germlines_callers.sort();
			
 
				+        germlines_callers.dedup();
			
 
				+
			
 
				+        // Print a readable tab-separated matrix
			
 
				+        let mut json = Vec::new();
			
 
				+        let mut lines: Vec<String> = with_germline
			
 
				+            .iter()
			
 
				+            .map(|(tumor, row)| {
			
 
				+                json.push(format!(
			
 
				+                    "{{\"caller_name\": \"{tumor}\", \"germline\": [{}] }}",
			
 
				+                    germlines_callers
			
 
				+                        .iter()
			
 
				+                        .map(|g| {
			
 
				+                            let v = row.get(g).unwrap_or(&0);
			
 
				+                            format!("{{\"{g}\": {v}}}")
			
 
				+                        })
			
 
				+                        .join(", ")
			
 
				+                ));
			
 
				+                format!(
			
 
				+                    "{tumor}\t{}",
			
 
				+                    germlines_callers
			
 
				+                        .iter()
			
 
				+                        .map(|g| {
			
 
				+                            let v = row.get(g).unwrap_or(&0);
			
 
				+                            format!("{g}: {v}")
			
 
				+                        })
			
 
				+                        .join("\t")
			
 
				+                )
			
 
				+            })
			
 
				+            .collect();
			
 
				+        lines.sort();
			
 
				+        println!("{}", lines.join("\n"));
			
 
				+
			
 
				+        // Write JSON to file
			
 
				+        let json = format!("[{}]", json.join(", "));
			
 
				+        let mut file = File::create(json_path)?;
			
 
				+        file.write_all(json.as_bytes())?;
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Default, Clone)]
			
 
				+pub struct InputStats {
			
 
				+    pub solo_tumor: Vec<(Annotation, usize)>,
			
 
				+    pub solo_constit: Vec<(Annotation, usize)>,
			
 
				+    pub germline: Vec<(Annotation, usize)>,
			
 
				+    pub somatic: Vec<(Annotation, usize)>,
			
 
				+}
			
 
				+
			
 
				+impl InputStats {
			
 
				+    pub fn from_collections(collections: &[VariantCollection]) -> Self {
			
 
				+        let mut stats = Self::default();
			
 
				+        for collection in collections.iter() {
			
 
				+            match collection.caller {
			
 
				+                Annotation::Callers(_, Sample::SoloTumor) => stats
			
 
				+                    .solo_tumor
			
 
				+                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				+                Annotation::Callers(_, Sample::SoloConstit) => stats
			
 
				+                    .solo_constit
			
 
				+                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				+                Annotation::Callers(_, Sample::Germline) => stats
			
 
				+                    .germline
			
 
				+                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				+                Annotation::Callers(_, Sample::Somatic) => stats
			
 
				+                    .somatic
			
 
				+                    .push((collection.caller.clone(), collection.variants.len())),
			
 
				+                _ => (),
			
 
				+            };
			
 
				+        }
			
 
				+        stats
			
 
				+    }
			
 
				+}
			
--- a/src/variant/variant.rs
+++ b/src/variant/variant.rs
@@ -1046,16 +1046,6 @@ pub trait Label {
 
				     fn label(&self) -> String;
			
 
				 }
			
 
				 
			
 
				-// pub trait AsAny {
			
 
				-//     fn as_any(&self) -> &dyn std::any::Any;
			
 
				-// }
			
 
				-//
			
 
				-// impl<T: 'static> AsAny for T {
			
 
				-//     fn as_any(&self) -> &dyn std::any::Any {
			
 
				-//         self
			
 
				-//     }
			
 
				-// }
			
 
				-
			
 
				 /// A trait alias for all dynamically executable pipeline runners.
			
 
				 ///
			
 
				 /// This trait represents any component that:
			
@@ -1097,15 +1087,22 @@ pub type ShouldRunBox = Box<dyn ShouldRunTrait>;
 
				 /// This macro uses `?`, so it must be called inside a function that returns `anyhow::Result`.
			
 
				 #[macro_export]
			
 
				 macro_rules! create_should_run {
			
 
				-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, $config.clone())?) as ShouldRunBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
			
 
				+        use anyhow::Context;
			
 
				+        let mut runners: Vec<ShouldRunBox> = Vec::new();
			
 
				+        $(
			
 
				+            let runner = <$runner>::initialize($id, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize should-run checker {} for {}",
			
 
				+                    stringify!($runner), $id
			
 
				+                ))?;
			
 
				+            runners.push(Box::new(runner) as ShouldRunBox);
			
 
				+        )+
			
 
				+        runners
			
 
				+    }};
			
 
				 }
			
 
				 
			
 
				+
			
 
				 /// Macro to initialize and box a list of solo-mode pipeline components that implement `ShouldRunTrait`.
			
 
				 ///
			
 
				 /// This is typically used for per-timepoint variant callers (e.g., `DeepVariant`),
			
@@ -1138,13 +1135,19 @@ macro_rules! create_should_run {
 
				 /// This macro uses `?` and must be called inside a `Result`-returning context.
			
 
				 #[macro_export]
			
 
				 macro_rules! create_should_run_solo {
			
 
				-    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as ShouldRunBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {{
			
 
				+        use anyhow::Context;
			
 
				+        let mut runners: Vec<ShouldRunBox> = Vec::new();
			
 
				+        $(
			
 
				+            let runner = <$runner>::initialize($id, $arg, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize solo should-run checker {} for '{}' and arg '{}'",
			
 
				+                    stringify!($runner), $id, $arg
			
 
				+                ))?;
			
 
				+            runners.push(Box::new(runner) as ShouldRunBox);
			
 
				+        )+
			
 
				+        runners
			
 
				+    }};
			
 
				 }
			
 
				 
			
 
				 /// Macro to initialize and box a list of pipeline components that must run once per timepoint
			
@@ -1187,16 +1190,29 @@ macro_rules! create_should_run_solo {
 
				 /// This macro uses `?`, so it must be called inside a function that returns `Result`.
			
 
				 #[macro_export]
			
 
				 macro_rules! create_should_run_normal_tumoral {
			
 
				-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as ShouldRunBox,
			
 
				-                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as ShouldRunBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
			
 
				+        use anyhow::Context;
			
 
				+        let mut runners: Vec<ShouldRunBox> = Vec::new();
			
 
				+        $(
			
 
				+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize tumoral should-run checker {} for {}",
			
 
				+                    stringify!($runner), $id
			
 
				+                ))?;
			
 
				+            runners.push(Box::new(tumoral) as ShouldRunBox);
			
 
				+
			
 
				+            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize normal should-run checker {} for {}",
			
 
				+                    stringify!($runner), $id
			
 
				+                ))?;
			
 
				+            runners.push(Box::new(normal) as ShouldRunBox);
			
 
				+        )+
			
 
				+        runners
			
 
				+    }};
			
 
				 }
			
 
				 
			
 
				+
			
 
				 /// Executes each runner in the slice only if `should_run()` returns true.
			
 
				 ///
			
 
				 /// # Arguments
			
@@ -1233,15 +1249,22 @@ pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
 
				 
			
 
				 #[macro_export]
			
 
				 macro_rules! init_somatic_callers {
			
 
				-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, $config.clone())?) as CallerBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
			
 
				+        use anyhow::Context;
			
 
				+        let mut callers: Vec<CallerBox> = Vec::new();
			
 
				+        $(
			
 
				+            let caller = <$runner>::initialize($id, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize somatic caller {} for {}",
			
 
				+                    stringify!($runner), $id
			
 
				+                ))?;
			
 
				+            callers.push(Box::new(caller) as CallerBox);
			
 
				+        )+
			
 
				+        callers
			
 
				+    }};
			
 
				 }
			
 
				 
			
 
				+
			
 
				 /// Macro to initialize and box a list of **solo-mode variant callers** for specific timepoints,
			
 
				 /// where each runner implements `RunnerVariants`.
			
 
				 ///
			
@@ -1280,15 +1303,19 @@ macro_rules! init_somatic_callers {
 
				 /// This macro uses `?` internally, so it must be used inside a `Result`-returning context.
			
 
				 #[macro_export]
			
 
				 macro_rules! init_solo_callers {
			
 
				-    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as CallerBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {{
			
 
				+        let mut callers: Vec<CallerBox> = Vec::new();
			
 
				+        $(
			
 
				+            let caller = <$runner>::initialize($id, $arg, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize caller {} for {}",
			
 
				+                    stringify!($runner), $id
			
 
				+                ))?;
			
 
				+            callers.push(Box::new(caller) as CallerBox);
			
 
				+        )+
			
 
				+        callers
			
 
				+    }};
			
 
				 }
			
 
				-
			
 
				 /// Macro to initialize and box a list of solo-mode **variant callers** for both `normal` and `tumoral` timepoints.
			
 
				 ///
			
 
				 /// This is designed for types like `DeepVariant` that implement `RunnerVariants` and require
			
@@ -1328,14 +1355,26 @@ macro_rules! init_solo_callers {
 
				 /// This macro uses `?`, so it must be called inside a `Result`-returning context.
			
 
				 #[macro_export]
			
 
				 macro_rules! init_solo_callers_normal_tumoral {
			
 
				-    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
			
 
				-        vec![
			
 
				-            $(
			
 
				-                Box::new(<$runner>::initialize($id, &$config.tumoral_name, $config.clone())?) as CallerBox,
			
 
				-                Box::new(<$runner>::initialize($id, &$config.normal_name, $config.clone())?) as CallerBox
			
 
				-            ),+
			
 
				-        ]
			
 
				-    };
			
 
				+    ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {{
			
 
				+        use anyhow::Context;
			
 
				+        let mut callers: Vec<CallerBox> = Vec::new();
			
 
				+        $(
			
 
				+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize tumoral caller {} for {} '{}'",
			
 
				+                    stringify!($runner), $id, $config.tumoral_name
			
 
				+                ))?;
			
 
				+            callers.push(Box::new(tumoral) as CallerBox);
			
 
				+
			
 
				+            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
			
 
				+                .with_context(|| format!(
			
 
				+                    "Failed to initialize normal caller {} for {} '{}'",
			
 
				+                    stringify!($runner), $id, $config.normal_name
			
 
				+                ))?;
			
 
				+            callers.push(Box::new(normal) as CallerBox);
			
 
				+        )+
			
 
				+        callers
			
 
				+    }};
			
 
				 }
			
 
				 
			
 
				 // pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {