Browse Source

rationalization adn documentation of callers functions

Thomas 8 months ago
parent
commit
91cde944f4
11 changed files with 920 additions and 442 deletions
  1. 145 79
      src/callers/clairs.rs
  2. 85 45
      src/callers/deep_somatic.rs
  3. 136 59
      src/callers/deep_variant.rs
  4. 129 67
      src/callers/nanomonsv.rs
  5. 99 56
      src/callers/savana.rs
  6. 106 49
      src/callers/severus.rs
  7. 2 2
      src/collection/mod.rs
  8. 55 1
      src/config.rs
  9. 41 0
      src/helpers.rs
  10. 122 23
      src/runners.rs
  11. 0 61
      src/scan/bin.rs

+ 145 - 79
src/callers/clairs.rs

@@ -1,9 +1,9 @@
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize},
+    collection::{vcf::Vcf, Initialize, ShouldRun},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::{force_or_not, temp_file_path},
+    helpers::{is_file_older, temp_file_path},
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -16,82 +16,103 @@ use log::{debug, info};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use std::{fs, path::Path};
 
+/// A pipeline runner for executing ClairS on paired tumor and normal samples.
+///
+/// ClairS is a somatic variant caller that uses haplotype tagging from LongPhase.
+/// This struct manages:
+/// - Dockerized execution of the ClairS pipeline
+/// - Handling and filtering of output VCFs
+/// - Logging and diagnostic tracking
+/// - Integration with variant annotation workflows
 #[derive(Debug, Clone)]
 pub struct ClairS {
     pub id: String,
-    pub output_dir: String,
-    pub output_vcf: String,
-    pub output_indels_vcf: String,
-    pub vcf_passed: String,
-    pub diag_bam: String,
-    pub mrd_bam: String,
-    pub log_dir: String,
     pub config: Config,
-    pub clair3_germline_normal: String,
-    pub clair3_germline_tumor: String,
-    pub clair3_germline_passed: String,
+    pub log_dir: String,
 }
 
 impl Initialize for ClairS {
+    /// Initializes the ClairS runner.
+    ///
+    /// This method constructs a `ClairS` instance with logging and configuration setup,
+    /// and ensures the output directory is cleaned up if the results are outdated or force execution is enabled.
+    ///
+    /// # Arguments
+    /// * `id` - The identifier for the sample being analyzed.
+    /// * `config` - Pipeline-wide configuration object containing paths, resources, and settings.
+    ///
+    /// # Returns
+    /// A fully initialized `ClairS` instance ready for execution.
+    ///
+    /// # Errors
+    /// Returns an error if the output directory fails to be removed when necessary.
+    ///
+    /// If the output VCF already exists and is not outdated, initialization skips deletion.
+    /// Otherwise, the output directory is cleared for a fresh run.
     fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize ClairS for {id}.");
         let log_dir = format!("{}/{}/log/clairs", config.result_dir, &id);
 
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        let output_dir = config.clairs_output_dir(&id);
-        fs::create_dir_all(&output_dir).context(format!("Can't create dir: {output_dir}"))?;
-
-        let (output_vcf, output_indels_vcf) = config.clairs_output_vcfs(&id);
-        let vcf_passed = format!("{output_dir}/{id}_diag_clairs_PASSED.vcf.gz");
-
-        let diag_bam = config.tumoral_bam(&id);
-        let mrd_bam = config.normal_bam(&id);
-
-        let clair3_germline_normal = config.clairs_germline_normal_vcf(&id);
-        let clair3_germline_tumor = config.clairs_germline_tumor_vcf(&id);
-        let clair3_germline_passed = config.clairs_germline_passed_vcf(&id);
-
-        Ok(Self {
+        let clairs = Self {
             id,
-            output_dir,
-            output_vcf,
-            output_indels_vcf,
-            vcf_passed,
-            diag_bam,
-            mrd_bam,
             log_dir,
             config,
-            clair3_germline_normal,
-            clair3_germline_tumor,
-            clair3_germline_passed,
-        })
+        };
+
+        let passed_vcf = clairs.config.clairs_passed_vcf(&clairs.id);
+        if (clairs.config.clairs_force && Path::new(&passed_vcf).exists()) || clairs.should_run() {
+            fs::remove_dir_all(clairs.config.clairs_output_dir(&clairs.id))?;
+        }
+
+        Ok(clairs)
+    }
+}
+
+impl ShouldRun for ClairS {
+    /// Determines whether ClairS should be re-run based on BAM modification timestamps.
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
+        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
     }
 }
 
 impl Run for ClairS {
+    /// Executes the ClairS variant calling pipeline and post-processes its output.
+    ///
+    /// # Pipeline Overview
+    /// - Runs ClairS in a Docker container using paired tumor and normal BAMs
+    /// - Generates both somatic and germline variant VCFs
+    /// - Applies bcftools filtering to keep only PASS variants
+    /// - Concatenates separate VCFs (e.g., SNPs and INDELs) into a single somatic file
+    /// - Tracks all operations via logs saved to disk
+    ///
+    /// # Errors
+    /// Returns an error if:
+    /// - Docker execution fails
+    /// - bcftools fails to process or filter VCFs
+    /// - Temporary files can't be removed or written
     fn run(&mut self) -> anyhow::Result<()> {
-        force_or_not(&self.vcf_passed, self.config.clairs_force)?;
-
         // Run Docker command if output VCF doesn't exist
-        if !Path::new(&self.output_vcf).exists() || !Path::new(&self.output_indels_vcf).exists() {
+        let (output_vcf, output_indels_vcf) = self.config.clairs_output_vcfs(&self.id);
+        if !Path::new(&output_vcf).exists() || !Path::new(&output_indels_vcf).exists() {
+            let output_dir = self.config.clairs_output_dir(&self.id);
+            fs::create_dir_all(&output_dir).context(format!("Failed create dir: {output_dir}"))?;
+
             let mut docker_run = DockerRun::new(&[
                 "run",
                 "-d",
                 "-v",
                 "/data:/data",
                 "-v",
-                &format!("{}:{}", self.output_dir, self.output_dir),
+                &format!("{}:{}", output_dir, output_dir),
                 "hkubal/clairs:latest",
                 "/opt/bin/run_clairs",
                 "-T",
-                &self.diag_bam,
+                &self.config.tumoral_bam(&self.id),
                 "-N",
-                &self.mrd_bam,
+                &self.config.normal_bam(&self.id),
                 "-R",
                 &self.config.reference,
                 "-t",
@@ -105,34 +126,40 @@ impl Run for ClairS {
                 "--use_longphase_for_intermediate_haplotagging",
                 "true",
                 "--output_dir",
-                &self.output_dir,
+                &output_dir,
                 "-s",
                 &format!("{}_diag", self.id),
             ]);
-            let report = run_wait(&mut docker_run).context(format!(
-                "Error while running ClairS in docker for  {} and {}",
-                &self.diag_bam, &self.mrd_bam
-            ))?;
+            let report = run_wait(&mut docker_run)
+                .context(format!("Failed to run ClairS for {}", &self.id))?;
 
             let log_file = format!("{}/clairs_", self.log_dir);
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
         } else {
-            debug!("ClairS VCFs exist.");
+            debug!(
+                "ClairS output VCF already exists for {}, skipping execution.",
+                self.id
+            );
         }
 
-        // Germline
-        if !Path::new(&self.clair3_germline_passed).exists() {
+        // Germline PASS
+        let clair3_germline_passed = self.config.clairs_germline_passed_vcf(&self.id);
+        if !Path::new(&clair3_germline_passed).exists() {
+            let clair3_germline_normal = self.config.clairs_germline_normal_vcf(&self.id);
+            // let clair3_germline_tumor = self.config.clairs_germline_tumor_vcf(&self.id);
+
             let report = bcftools_keep_pass(
-                &self.clair3_germline_normal,
-                &self.clair3_germline_passed,
+                &clair3_germline_normal,
+                &clair3_germline_passed,
                 BcftoolsConfig::default(),
             )
             .context(format!(
                 "Error while running bcftools keep PASS for {}",
-                &self.clair3_germline_passed
+                &clair3_germline_passed
             ))?;
+
             let log_file = format!("{}/bcftools_pass_", self.log_dir);
             report
                 .save_to_file(&log_file)
@@ -140,34 +167,35 @@ impl Run for ClairS {
 
             // fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
         } else {
-            debug!("ClairS germline VCF exists.");
+            debug!(
+                "ClairS Germline PASSED VCF already exists for {}, skipping execution.",
+                self.id
+            );
         }
 
-        if !Path::new(&self.vcf_passed).exists() {
+        let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
+        if !Path::new(&passed_vcf).exists() {
             // Concat output and indels
             let tmp_file = temp_file_path(".vcf.gz")?.to_str().unwrap().to_string();
             let report = bcftools_concat(
-                vec![
-                    self.output_vcf.to_string(),
-                    self.output_indels_vcf.to_string(),
-                ],
+                vec![output_vcf.to_string(), output_indels_vcf.to_string()],
                 &tmp_file,
                 BcftoolsConfig::default(),
             )
             .context(format!(
-                "Error while running bcftools concat for {} and {}",
-                &self.output_vcf, &self.output_indels_vcf
+                "Failed to run bcftools concat for {} and {}",
+                &output_vcf, &output_indels_vcf
             ))?;
             let log_file = format!("{}/bcftools_concat_", self.log_dir);
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
 
-            let report = bcftools_keep_pass(&tmp_file, &self.vcf_passed, BcftoolsConfig::default())
-                .context(format!(
-                    "Error while running bcftools keep PASS for {}",
-                    &self.output_vcf
-                ))?;
+            let report =
+                bcftools_keep_pass(&tmp_file, passed_vcf, BcftoolsConfig::default()).context(
+                    format!("Error while running bcftools keep PASS for {}", &output_vcf),
+                )?;
+
             let log_file = format!("{}/bcftools_pass_", self.log_dir);
             report
                 .save_to_file(&log_file)
@@ -175,7 +203,10 @@ impl Run for ClairS {
 
             fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
         } else {
-            debug!("ClairS PASSED vcf exists.");
+            debug!(
+                "ClairS PASSED VCF already exists for {}, skipping execution.",
+                self.id
+            );
         }
 
         Ok(())
@@ -183,18 +214,34 @@ impl Run for ClairS {
 }
 
 impl CallerCat for ClairS {
+    /// Tags this runner as somatic, used for annotation classification.
     fn caller_cat(&self) -> Annotation {
         Annotation::Callers(Caller::ClairS, Sample::Somatic)
     }
 }
 
 impl Variants for ClairS {
+    /// Loads and annotates somatic variants from the ClairS filtered VCF.
+    ///
+    /// This method reads the filtered PASS VCF file generated by ClairS for somatic variants.
+    /// It tags each variant with the ClairS somatic annotation and adds it to the shared `annotations` map.
+    ///
+    /// # Arguments
+    /// * `annotations` - A reference to the global annotations structure used to store variant metadata.
+    ///
+    /// # Returns
+    /// A `VariantCollection` with the list of variants, the source VCF file, and the associated caller tag.
+    ///
+    /// # Errors
+    /// Will return an error if the VCF file is unreadable, missing, or malformed.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, self.vcf_passed);
-        let variants = read_vcf(&self.vcf_passed)
-            .map_err(|e| anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", self.vcf_passed))?;
+        let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
+
+        info!("Loading variants from {}: {}", caller, passed_vcf);
+        let variants = read_vcf(passed_vcf)
+            .map_err(|e| anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", passed_vcf))?;
 
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
@@ -203,19 +250,37 @@ impl Variants for ClairS {
 
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.vcf_passed.clone().into())?,
+            vcf: Vcf::new(passed_vcf.into())?,
             caller,
         })
     }
 }
 
 impl ClairS {
+    /// Loads and annotates germline variants from the ClairS germline output.
+    ///
+    /// This function loads a pre-filtered VCF file containing germline variants called by ClairS.
+    /// It updates the provided `annotations` structure with a tag indicating these are germline variants.
+    ///
+    /// # Arguments
+    /// * `annotations` - A shared annotation structure to update with variant hashes and tags.
+    ///
+    /// # Returns
+    /// A [`VariantCollection`] object containing the loaded variants, associated VCF metadata, and caller category.
+    ///
+    /// # Errors
+    /// Will return an error if the VCF file cannot be read or parsed.
     pub fn germline(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = Annotation::Callers(Caller::ClairS, Sample::Germline);
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, self.vcf_passed);
+        let clair3_germline_passed = &self.config.clairs_germline_passed_vcf(&self.id);
+
+        info!(
+            "Loading variants from {}: {}",
+            caller, clair3_germline_passed
+        );
 
-        let variants = read_vcf(&self.clair3_germline_passed)?;
+        let variants = read_vcf(clair3_germline_passed)?;
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
@@ -223,10 +288,11 @@ impl ClairS {
 
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.clair3_germline_passed.clone().into())?,
+            vcf: Vcf::new(clair3_germline_passed.into())?,
             caller,
         })
     }
 }
 
+/// Marker trait implementation to signal ClairS supports variant export.
 impl RunnerVariants for ClairS {}

+ 85 - 45
src/callers/deep_somatic.rs

@@ -6,10 +6,10 @@ use rayon::prelude::*;
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize},
+    collection::{vcf::Vcf, Initialize, ShouldRun},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::path_prefix,
+    helpers::is_file_older,
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -18,57 +18,93 @@ use crate::{
     },
 };
 
+/// A pipeline runner for executing DeepSomatic on paired tumor and normal BAM files.
+///
+/// This struct encapsulates the configuration and metadata required to run the DeepSomatic
+/// variant caller inside a Docker container. It integrates into the pipeline ecosystem with:
+/// - Docker-based execution logic
+/// - Output filtering for PASS variants
+/// - Logging and diagnostics tracking
+/// - Annotation tagging for somatic variants
 #[derive(Debug)]
 pub struct DeepSomatic {
     pub id: String,
-    pub output_dir: String,
-    pub output_vcf: String,
-    pub vcf_passed: String,
     pub log_dir: String,
     pub config: Config,
 }
 
 impl Initialize for DeepSomatic {
+    /// Initializes the DeepSomatic runner by setting paths and logging.
+    ///
+    /// # Arguments
+    /// * `id` - Sample ID used for output directory and log tagging.
+    /// * `config` - Shared pipeline configuration.
+    ///
+    /// # Returns
+    /// A ready-to-use `DeepSomatic` runner.
     fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initializing DeepSomatic for {id}.");
 
         let log_dir = format!("{}/{}/log/deepsomatic", config.result_dir, &id);
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        let output_dir = config.deepsomatic_output_dir(&id);
-        fs::create_dir_all(&output_dir).context(format!("Can't create dir: {output_dir}"))?;
-
-        let output_vcf = format!(
-            "{}/{}_{}_DeepSomatic.vcf.gz",
-            output_dir, id, config.tumoral_name
-        );
-        let vcf_passed = format!("{}_PASSED.vcf.gz", path_prefix(&output_vcf)?);
-
-        Ok(Self {
+        let deep_somatic = Self {
             id,
             config,
             log_dir,
-            output_dir,
-            output_vcf,
-            vcf_passed,
-        })
+        };
+
+        let passed_vcf = deep_somatic.config.deepsomatic_passed_vcf(&deep_somatic.id);
+        if (deep_somatic.config.deepsomatic_force && Path::new(&passed_vcf).exists())
+            || deep_somatic.should_run()
+        {
+            fs::remove_dir_all(deep_somatic.config.deepsomatic_output_dir(&deep_somatic.id))?;
+        }
+
+        Ok(deep_somatic)
+    }
+}
+
+/// Determines whether DeepSomatic should be re-run based on whether
+/// the filtered PASS VCF is older than the input BAMs.
+///
+/// If either input BAM (normal or tumor) is newer than the PASS VCF,
+/// DeepSomatic is considered out of date and should be re-executed.
+///
+/// # Returns
+/// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
+impl ShouldRun for DeepSomatic {
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.deepsomatic_passed_vcf(&self.id);
+        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
     }
 }
 
 impl Run for DeepSomatic {
+    /// Runs DeepSomatic inside Docker and filters resulting variants with bcftools.
+    ///
+    /// # Workflow
+    /// - Creates output directory
+    /// - Executes Docker container for DeepSomatic
+    /// - Applies PASS filtering using bcftools
+    /// - Stores log outputs in gzipped format
+    ///
+    /// # Errors
+    /// Returns an error if any subprocess or file operation fails.
     fn run(&mut self) -> anyhow::Result<()> {
-        if !Path::new(&self.output_vcf).exists() {
+        let output_vcf = self.config.deepsomatic_output_vcf(&self.id);
+        if !Path::new(&output_vcf).exists() {
+            let output_dir = self.config.deepsomatic_output_dir(&self.id);
+            fs::create_dir_all(&output_dir)
+                .context(format!("Failed to create dir: {output_dir}"))?;
+
             let mut docker_run = DockerRun::new(&[
                 "run",
                 "-d",
                 "-v",
                 "/data:/data",
                 "-v",
-                &format!("{}:/output", self.output_dir),
+                &format!("{}:/output", output_dir),
                 &format!("google/deepsomatic:{}", self.config.deepsomatic_bin_version),
                 "run_deepsomatic",
                 &format!("--model_type={}", self.config.deepsomatic_model_type),
@@ -109,25 +145,19 @@ impl Run for DeepSomatic {
         }
 
         // Keep PASS
-        if !Path::new(&self.vcf_passed).exists() {
-            info!("Filtering PASS variants");
-            let report = bcftools_keep_pass(
-                &self.output_vcf,
-                &self.vcf_passed,
-                BcftoolsConfig::default(),
-            )
-            .map_err(|e| {
-                anyhow::anyhow!(
-                    "Error while running bcftools pass for {}.\n{e}",
-                    self.output_vcf
-                )
-            })?;
+        let vcf_passed = self.config.deepsomatic_passed_vcf(&self.id);
+        if !Path::new(&vcf_passed).exists() {
+            info!("Filtering DeepSomatic PASS variants for {}", self.id);
+            let report = bcftools_keep_pass(&output_vcf, &vcf_passed, BcftoolsConfig::default())
+                .map_err(|e| {
+                    anyhow::anyhow!("Error while running bcftools pass for {}.\n{e}", output_vcf)
+                })?;
             report
                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
                 .map_err(|e| {
                     anyhow::anyhow!(
                         "Error while saving bcftools report for {}.\n{e}",
-                        self.output_vcf
+                        output_vcf
                     )
                 })?;
         }
@@ -137,29 +167,39 @@ impl Run for DeepSomatic {
 }
 
 impl CallerCat for DeepSomatic {
+    /// Returns a classification tag for this caller, identifying it as somatic.
     fn caller_cat(&self) -> Annotation {
         Annotation::Callers(Caller::DeepSomatic, Sample::Somatic)
     }
 }
 
 impl Variants for DeepSomatic {
+    /// Loads and annotates variants from the DeepSomatic VCF (PASS-filtered).
+    ///
+    /// # Arguments
+    /// * `annotations` - The global annotation map to which variants are added.
+    ///
+    /// # Returns
+    /// A `VariantCollection` containing variants, associated VCF path, and caller category.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, self.vcf_passed);
-        let variants = read_vcf(&self.vcf_passed).map_err(|e| {
-            anyhow::anyhow!("Failed to read DeepSomatic VCF {}.\n{e}", self.vcf_passed)
-        })?;
+        let vcf_passed = self.config.deepsomatic_passed_vcf(&self.id);
+
+        info!("Loading variants from {}: {}", caller, vcf_passed);
+        let variants = read_vcf(&vcf_passed)
+            .map_err(|e| anyhow::anyhow!("Failed to read DeepSomatic VCF {}.\n{e}", vcf_passed))?;
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
         info!("{}, {} variants loaded.", caller, variants.len());
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.vcf_passed.clone().into())?,
+            vcf: Vcf::new(vcf_passed.into())?,
             caller,
         })
     }
 }
 
+/// Marker trait implementation to signal DeepSomatic supports variant export.
 impl RunnerVariants for DeepSomatic {}

+ 136 - 59
src/callers/deep_variant.rs

@@ -1,14 +1,14 @@
 use anyhow::Context;
-use log::info;
+use log::{debug, info};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use std::{fs, path::Path};
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, InitializeSolo},
+    collection::{vcf::Vcf, InitializeSolo, ShouldRun},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
-    helpers::{force_or_not, path_prefix},
+    helpers::is_file_older,
     io::vcf::read_vcf,
     runners::{run_wait, DockerRun, Run},
     variant::{
@@ -17,104 +17,164 @@ use crate::{
     },
 };
 
+/// A pipeline runner for executing DeepVariant on a single sample and a specific time point (e.g., normal or tumor).
+///
+/// This struct holds all necessary metadata, configuration, and output paths to perform variant calling
+/// on a given BAM file using DeepVariant in a Dockerized environment. It also supports:
+/// - Conditional execution via `should_run`
+/// - Cleanup and preparation via `initialize`
+/// - Log tracking
+/// - Integration into annotation and variant aggregation workflows
 #[derive(Debug, Clone)]
 pub struct DeepVariant {
     pub id: String,
-    pub time: String,
-    pub bam: String,
-    pub output_dir: String,
-    pub output_vcf: String,
-    pub vcf_passed: String,
+    pub time_point: String,
     pub log_dir: String,
     pub config: Config,
 }
 
 impl InitializeSolo for DeepVariant {
-    fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
+    /// Initializes the DeepVariant runner for a specific sample and time point.
+    /// If `force` is true or the output is outdated, the previous output directory is removed.
+    /// Initializes the DeepVariant runner for a given ID and time point.
+    ///
+    /// # Arguments
+    /// * `id` - Sample ID
+    /// * `time_point` - Either the normal or tumor label for this run
+    /// * `config` - Global configuration object
+    ///
+    /// # Returns
+    /// A ready-to-run `DeepVariant` instance with proper path resolution.
+    ///
+    /// # Errors
+    /// Will return an error if directory cleanup fails when forced or outdated.
+    fn initialize(id: &str, time_point: &str, config: Config) -> anyhow::Result<Self> {
         let id = id.to_string();
-        let time = time.to_string();
-        info!("Initializing DeepVariant for {id} {time}.");
+        let time_point = time_point.to_string();
 
-        let log_dir = format!("{}/{}/log/deepvariant", config.result_dir, &id);
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        let bam = config.solo_bam(&id, &time);
-        if !Path::new(&bam).exists() {
-            anyhow::bail!("Bam files doesn't exists: {bam}")
-        }
-
-        let output_dir = config.deepvariant_output_dir(&id, &time);
-        fs::create_dir_all(&output_dir).context(format!("Can't create dir: {output_dir}"))?;
+        info!("Initializing DeepVariant for {id} {time_point}.");
 
-        let output_vcf = config.deepvariant_output_vcf(&id, &time);
-        let vcf_passed = format!("{}_PASSED.vcf.gz", path_prefix(&output_vcf)?);
+        let log_dir = format!("{}/{}/log/deepvariant", config.result_dir, &id);
 
-        Ok(Self {
+        let deepvariant = Self {
             id,
-            time,
-            bam,
-            output_dir,
-            output_vcf,
-            vcf_passed,
+            time_point,
             log_dir,
             config,
-        })
+        };
+
+        let output_vcf_exists = Path::new(
+            &deepvariant
+                .config
+                .deepvariant_solo_output_vcf(&deepvariant.id, &deepvariant.time_point),
+        )
+        .exists();
+        if (deepvariant.config.deepvariant_force && output_vcf_exists) || deepvariant.should_run() {
+            fs::remove_dir_all(deepvariant.config.savana_output_dir(&deepvariant.id))?;
+        }
+
+        Ok(deepvariant)
+    }
+}
+
+impl ShouldRun for DeepVariant {
+    /// Returns true if the DeepVariant PASS VCF doesn't exist or is outdated vs input BAM.
+    /// Determines whether DeepVariant should be rerun by comparing input BAM
+    /// modification time to the output VCF.
+    ///
+    /// # Returns
+    /// `true` if the output is outdated or missing.
+    fn should_run(&self) -> bool {
+        let passed_vcf = self
+            .config
+            .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
+        let bam = self.config.solo_bam(&self.id, &self.time_point);
+        is_file_older(&passed_vcf, &bam).unwrap_or(true)
     }
 }
 
 impl Run for DeepVariant {
+    /// Executes DeepVariant inside Docker and filters PASS variants via bcftools.
+    /// Runs DeepVariant inside Docker and filters variants using bcftools.
+    ///
+    /// This function:
+    /// - Creates necessary output directories
+    /// - Executes DeepVariant through Docker if needed
+    /// - Filters for PASS variants
+    /// - Saves logs and handles caching logic via file existence
+    ///
+    /// # Errors
+    /// Returns an error if any external command or file operation fails.
     fn run(&mut self) -> anyhow::Result<()> {
-        force_or_not(&self.vcf_passed, self.config.deepvariant_force)?;
+        let bam = self.config.solo_bam(&self.id, &self.time_point);
+        let output_vcf = self
+            .config
+            .deepvariant_solo_output_vcf(&self.id, &self.time_point);
 
         // Run Docker command if output VCF doesn't exist
-        if !Path::new(&self.output_vcf).exists() {
+        if !Path::new(&output_vcf).exists() {
+            let output_dir = self
+                .config
+                .deepvariant_output_dir(&self.id, &self.time_point);
+
+            fs::create_dir_all(&output_dir)
+                .context(format!("Failed to create dir: {output_dir}"))?;
+
             let mut docker_run = DockerRun::new(&[
                 "run",
                 "-d",
                 "-v",
                 "/data:/data",
                 "-v",
-                &format!("{}:/output", self.output_dir),
+                &format!("{}:/output", output_dir),
                 &format!("google/deepvariant:{}", self.config.deepvariant_bin_version),
                 "/opt/deepvariant/bin/run_deepvariant",
                 &format!("--model_type={}", self.config.deepvariant_model_type),
                 "--ref",
                 &self.config.reference,
                 "--reads",
-                &self.bam,
+                &bam,
                 "--output_vcf",
-                &format!("/output/{}_{}_DeepVariant.vcf.gz", self.id, self.time),
+                &format!("/output/{}_{}_DeepVariant.vcf.gz", self.id, self.time_point),
                 "--output_gvcf",
-                &format!("/output/{}_{}_DeepVariant.g.vcf.gz", self.id, self.time),
+                &format!(
+                    "/output/{}_{}_DeepVariant.g.vcf.gz",
+                    self.id, self.time_point
+                ),
                 &format!("--num_shards={}", self.config.deepvariant_threads),
                 "--logging_dir",
                 "--vcf_stats_report=true",
-                &format!("/output/{}_{}_DeepVariant_logs", self.id, self.time),
+                &format!("/output/{}_{}_DeepVariant_logs", self.id, self.time_point),
                 "--dry_run=false",
                 "--sample_name",
-                &format!("{}_{}", self.id, self.time),
+                &format!("{}_{}", self.id, self.time_point),
             ]);
+
             let report = run_wait(&mut docker_run).context(format!(
                 "Erreur while running DeepVariant for {} {}",
-                self.id, self.time
+                self.id, self.time_point
             ))?;
+
             report
                 .save_to_file(&format!("{}/deepvariant_", self.log_dir))
                 .context("Can't save DeepVariant logs")?;
+        } else {
+            debug!(
+                "DeepVariant output already exists for {} {}, skipping execution.",
+                self.id, self.time_point
+            );
         }
 
-        // Keep PASS
-        if !Path::new(&self.vcf_passed).exists() {
-            info!("Filtering PASS variants");
-            let report = bcftools_keep_pass(
-                &self.output_vcf,
-                &self.vcf_passed,
-                BcftoolsConfig::default(),
-            )
-            .unwrap();
+        let vcf_passed = self
+            .config
+            .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
+        if !Path::new(&vcf_passed).exists() {
+            info!(
+                "Filtering PASS variants for {} {}",
+                self.id, self.time_point
+            );
+            let report =
+                bcftools_keep_pass(&output_vcf, &vcf_passed, BcftoolsConfig::default()).unwrap();
             report
                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
                 .unwrap();
@@ -125,38 +185,54 @@ impl Run for DeepVariant {
 }
 
 impl CallerCat for DeepVariant {
+    /// Identifies the caller and whether it's for a normal or tumor sample.
+    /// Determines the category for variant annotation (normal or tumor).
+    ///
+    /// # Returns
+    /// A variant caller classification used for downstream tagging.
     fn caller_cat(&self) -> Annotation {
         let Config {
             normal_name,
             tumoral_name,
             ..
         } = &self.config;
-        if *normal_name == self.time {
+        if *normal_name == self.time_point {
             Annotation::Callers(Caller::DeepVariant, Sample::SoloConstit)
-        } else if *tumoral_name == self.time {
+        } else if *tumoral_name == self.time_point {
             Annotation::Callers(Caller::DeepVariant, Sample::SoloTumor)
         } else {
-            panic!("Error in time_point name: {}", self.time);
+            panic!("Error in time_point name: {}", self.time_point);
         }
     }
 }
 
 impl Variants for DeepVariant {
+    /// Loads variants from DeepVariant VCF and registers annotations.
+    /// Loads and annotates variants from the DeepVariant PASS-filtered VCF.
+    ///
+    /// # Arguments
+    /// * `annotations` - A mutable registry to tag loaded variants
+    ///
+    /// # Returns
+    /// A `VariantCollection` with variant data, source VCF, and caller identity.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
-        info!("Loading variants from {}: {}", caller, self.vcf_passed);
-        let variants = read_vcf(&self.vcf_passed)
-            .map_err(|e| anyhow::anyhow!("Failed to read DeepVariant VCF {}.\n{e}", self.vcf_passed))?;
+        let vcf_passed = self
+            .config
+            .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
+        info!("Loading variants from {}: {}", caller, vcf_passed);
+        let variants = read_vcf(&vcf_passed)
+            .map_err(|e| anyhow::anyhow!("Failed to read DeepVariant VCF {}.\n{e}", vcf_passed))?;
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &[caller.clone()]);
         });
         info!("{}, {} variants loaded.", caller, variants.len());
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.vcf_passed.clone().into()).map_err(|e| {
+            vcf: Vcf::new(vcf_passed.clone().into()).map_err(|e| {
                 anyhow::anyhow!(
                     "Error while creating a VCF representation for {}.\n{e}",
-                    self.vcf_passed
+                    vcf_passed
                 )
             })?,
             caller,
@@ -164,4 +240,5 @@ impl Variants for DeepVariant {
     }
 }
 
+/// Marker trait implementation to signal DeepVariant supports variant export.
 impl RunnerVariants for DeepVariant {}

+ 129 - 67
src/callers/nanomonsv.rs

@@ -10,9 +10,10 @@ use log::{debug, error, info, warn};
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize, InitializeSolo},
+    collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
+    helpers::is_file_older,
     io::vcf::read_vcf,
     runners::{run_wait, CommandRun, Run, RunReport},
     variant::{
@@ -21,63 +22,108 @@ use crate::{
     },
 };
 
+/// Represents the NanomonSV runner, responsible for structural variant calling
+/// from diagnostic and normal BAMs using the NanomonSV tool. This runner initialize,
+/// run, classify, and extract variants from VCF.
 #[derive(Debug)]
 pub struct NanomonSV {
     pub id: String,
-    pub diag_bam: String,
-    pub mrd_bam: String,
-    pub diag_out_dir: String,
-    pub mrd_out_dir: String,
     pub log_dir: String,
-    pub vcf_passed: String,
     pub config: Config,
 }
 
 impl Initialize for NanomonSV {
+    /// Initializes a new NanomonSV pipeline for the given sample ID and config.
+    ///
+    /// This function prepares the runner instance, computes paths, and conditionally clears previous outputs
+    /// if the runner is set to force mode or the inputs are newer than the last result.
+    ///
+    /// # Arguments
+    /// * `id` - Case ID
+    /// * `config` - Global pipeline configuration
+    ///
+    /// # Returns
+    /// A fully prepared `NanomonSV` instance ready to run.
     fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize Nanomonsv for {id}.");
-        let log_dir = format!("{}/{}/log/nanomonsv", config.result_dir, &id);
 
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        let diag_bam = config.tumoral_bam(&id);
-        let mrd_bam = config.normal_bam(&id);
-        let diag_out_dir = config.nanomonsv_output_dir(&id, "diag");
-        fs::create_dir_all(&diag_out_dir)?;
-        let mrd_out_dir = config.nanomonsv_output_dir(&id, "mrd");
-        fs::create_dir_all(&mrd_out_dir)?;
+        let log_dir = format!("{}/{}/log/nanomonsv", config.result_dir, &id);
 
-        let vcf_passed = config.nanomonsv_passed_vcf(&id);
-        Ok(Self {
+        let nanomonsv = Self {
             id,
-            diag_bam,
-            mrd_bam,
-            diag_out_dir,
-            mrd_out_dir,
             log_dir,
-            vcf_passed,
             config,
-        })
+        };
+
+        let passed_vcf = nanomonsv.config.nanomonsv_passed_vcf(&nanomonsv.id);
+        if (nanomonsv.config.nanomonsv_force && Path::new(&passed_vcf).exists())
+            || nanomonsv.should_run()
+        {
+            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "diag"))?;
+            fs::remove_dir_all(nanomonsv.config.nanomonsv_output_dir(&nanomonsv.id, "mrd"))?;
+        }
+
+        Ok(nanomonsv)
+    }
+}
+
+impl ShouldRun for NanomonSV {
+    /// Determines whether NanomonSV should re-run based on modification dates
+    /// of the input BAM files compared to the existing PASS-filtered VCF.
+    ///
+    /// # Returns
+    /// `true` if the passed VCF does not exist or is older than any input BAM.
+    fn should_run(&self) -> bool {
+        let passed_vcf = self.config.nanomonsv_passed_vcf(&self.id);
+        is_file_older(&passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
     }
 }
 
 impl Run for NanomonSV {
+    /// Executes the NanomonSV workflow:
+    ///
+    /// 1. Creates output directories
+    /// 2. Runs `parse` in parallel for diagnostic and MRD BAMs
+    /// 3. Runs `nanomonsv get` on each BAM
+    /// 4. Applies `bcftools` to filter the final result to PASS variants
+    ///
+    /// This function is idempotent and skips steps if the expected output already exists.
+    /// Runs the full NanomonSV pipeline including:
+    /// 1. Parsing diagnostic and MRD BAMs in parallel.
+    /// 2. Running NanomonSV `get` for both samples.
+    /// 3. Filtering final VCF to retain PASS variants only.
     fn run(&mut self) -> anyhow::Result<()> {
-        somatic_parse(self)?;
+        let diag_bam = self.config.tumoral_bam(&self.id);
+        let mrd_bam = self.config.normal_bam(&self.id);
+
+        let diag_out_dir = self.config.nanomonsv_output_dir(&self.id, "diag");
+        let mrd_out_dir = self.config.nanomonsv_output_dir(&self.id, "mrd");
+        let vcf_passed = self.config.nanomonsv_passed_vcf(&self.id);
 
-        let diag_out_prefix = format!("{}/{}_diag", self.diag_out_dir, self.id);
-        let mrd_out_prefix = format!("{}/{}_mrd", self.mrd_out_dir, self.id);
+        fs::create_dir_all(&diag_out_dir)?;
+        fs::create_dir_all(&mrd_out_dir)?;
+
+        somatic_parse(
+            &self.id,
+            &diag_bam,
+            &mrd_bam,
+            &diag_out_dir,
+            &mrd_out_dir,
+            &self.config,
+            &self.log_dir,
+        )?;
+
+        let diag_out_prefix = format!("{}/{}_diag", diag_out_dir, self.id);
+        let mrd_out_prefix = format!("{}/{}_mrd", mrd_out_dir, self.id);
 
         let diag_result_vcf = format!("{diag_out_prefix}.nanomonsv.result.vcf");
         let mrd_result_vcf = format!("{mrd_out_prefix}.nanomonsv.result.vcf");
 
         if !Path::new(&mrd_result_vcf).exists() {
-            info!("Nanomonsv get from normal bam: {}.", self.mrd_bam);
-            let report = nanomonsv_get(&self.mrd_bam, &mrd_out_prefix, None, None, &self.config)
+            info!("Nanomonsv get from normal bam: {}.", mrd_bam);
+            let report = nanomonsv_get(&mrd_bam, &mrd_out_prefix, None, None, &self.config)
                 .context(format!(
                     "Error while running NanomonSV get for {mrd_result_vcf}"
                 ))?;
@@ -85,30 +131,26 @@ impl Run for NanomonSV {
         }
 
         if !Path::new(&diag_result_vcf).exists() {
-            info!("NanomonSV get from tumoral bam: {}.", self.diag_bam);
-
+            info!("NanomonSV get from tumoral bam: {}.", diag_bam);
             let report = nanomonsv_get(
-                &self.diag_bam,
+                &diag_bam,
                 &diag_out_prefix,
-                Some(&self.mrd_bam),
+                Some(&mrd_bam),
                 Some(&mrd_out_prefix),
                 &self.config,
             )
             .context(format!(
                 "Error while running NanomonSV get for {diag_result_vcf}"
             ))?;
-            report.save_to_file(&format!("{}/nanomonsv_get_diag_", self.log_dir,))?;
+            report.save_to_file(&format!("{}/nanomonsv_get_diag_", self.log_dir))?;
         }
 
-        if !Path::new(&self.vcf_passed).exists() {
-            let report = bcftools_keep_pass(
-                &diag_result_vcf,
-                &self.vcf_passed,
-                BcftoolsConfig::default(),
-            )
-            .context(format!("Can't index {}", self.vcf_passed))?;
+        if !Path::new(&vcf_passed).exists() {
+            let report =
+                bcftools_keep_pass(&diag_result_vcf, &vcf_passed, BcftoolsConfig::default())
+                    .context(format!("Can't index {}", vcf_passed))?;
             report
-                .save_to_file(&format!("{}/bcftools_pass_", self.log_dir,))
+                .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
                 .context("Can't save report")?;
         }
 
@@ -117,35 +159,48 @@ impl Run for NanomonSV {
 }
 
 impl CallerCat for NanomonSV {
+    /// Tags the caller identity for downstream variant classification and traceability.
+    /// Identifies this tool as the NanomonSV caller producing somatic variants.
     fn caller_cat(&self) -> Annotation {
         Annotation::Callers(Caller::NanomonSV, Sample::Somatic)
     }
 }
 
 impl Variants for NanomonSV {
+    /// Loads and annotates the variants from the NanomonSV PASS VCF.
+    ///
+    /// This method inserts metadata into the global annotation registry,
+    /// helping associate each variant to its caller and sample category.
+    /// Loads variants from the NanomonSV PASS-filtered VCF and annotates them.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, self.vcf_passed);
+        let vcf_passed = self.config.nanomonsv_passed_vcf(&self.id);
+
+        info!("Loading variants from {}: {}", caller, vcf_passed);
 
-        let variants = read_vcf(&self.vcf_passed).map_err(|e| {
-            anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", self.vcf_passed)
+        let variants = read_vcf(&vcf_passed).map_err(|e| {
+            anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed)
         })?;
 
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
+
         info!("{}, {} variants loaded.", caller, variants.len());
+
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.vcf_passed.clone().into())?,
+            vcf: Vcf::new(vcf_passed.into())?,
             caller,
         })
     }
 }
 
+/// Marker implementation to integrate `NanomonSV` into the unified runner interface.
 impl RunnerVariants for NanomonSV {}
 
+/// SOLO
 #[derive(Debug)]
 pub struct NanomonSVSolo {
     pub id: String,
@@ -291,31 +346,39 @@ pub fn nanomonsv_parse(bam: &str, out_prefix: &str, config: &Config) -> anyhow::
     Ok(res)
 }
 
-pub fn somatic_parse(s: &NanomonSV) -> anyhow::Result<()> {
-    let diag_out_prefix = format!("{}/{}_diag", s.diag_out_dir, s.id);
-    let mrd_out_prefix = format!("{}/{}_mrd", s.mrd_out_dir, s.id);
+/// Executes the NanomonSV `parse` step in parallel for both diagnostic and MRD BAMs.
+/// Internally spawns threads that run the parsing script and waits for their completion.
+/// Executes the NanomonSV `parse` step in parallel for both diagnostic and MRD BAMs.
+/// Internally spawns threads that run the parsing script and waits for their completion.
+///
+/// # Arguments
+/// * `id` - Sample ID
+/// * `diag_bam` - Path to diagnostic (tumoral) BAM file
+/// * `mrd_bam` - Path to matched normal BAM file
+/// * `diag_out_dir` - Output directory for diagnostic BAM
+/// * `mrd_out_dir` - Output directory for MRD BAM
+/// * `config` - Shared pipeline configuration
+/// * `log_dir` - Log output directory
+fn somatic_parse(
+    id: &str,
+    diag_bam: &str,
+    mrd_bam: &str,
+    diag_out_dir: &str,
+    mrd_out_dir: &str,
+    config: &Config,
+    log_dir: &str,
+) -> anyhow::Result<()> {
+    let diag_out_prefix = format!("{}/{}_diag", diag_out_dir, id);
+    let mrd_out_prefix = format!("{}/{}_mrd", mrd_out_dir, id);
 
     let diag_info_vcf = format!("{diag_out_prefix}.bp_info.sorted.bed.gz");
     let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
 
     let threads_handles = vec![
-        spawn_parse_thread(
-            &s.diag_bam,
-            &diag_out_prefix,
-            &s.config,
-            &s.log_dir,
-            &diag_info_vcf,
-        )?,
-        spawn_parse_thread(
-            &s.mrd_bam,
-            &mrd_out_prefix,
-            &s.config,
-            &s.log_dir,
-            &mrd_info_vcf,
-        )?,
+        spawn_parse_thread(diag_bam, &diag_out_prefix, config, log_dir, &diag_info_vcf)?,
+        spawn_parse_thread(mrd_bam, &mrd_out_prefix, config, log_dir, &mrd_info_vcf)?,
     ];
 
-    // Wait for all threads to finish and propagate errors
     for handle in threads_handles {
         handle
             .join()
@@ -323,7 +386,6 @@ pub fn somatic_parse(s: &NanomonSV) -> anyhow::Result<()> {
     }
 
     info!("Nanomonsv parsing completed successfully.");
-
     Ok(())
 }
 

+ 99 - 56
src/callers/savana.rs

@@ -1,11 +1,12 @@
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, HasOutputs, Initialize, Version},
+    collection::{vcf::Vcf, Initialize, ShouldRun, Version},
     commands::{
         bcftools::{bcftools_keep_pass, BcftoolsConfig},
         longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
     },
     config::Config,
+    helpers::is_file_older,
     io::{readers::get_gz_reader, vcf::read_vcf},
     positions::{num_to_contig, GenomeRange},
     runners::{run_wait, CommandRun, Run},
@@ -16,7 +17,7 @@ use crate::{
 };
 use anyhow::Context;
 use itertools::Itertools;
-use log::info;
+use log::{debug, info};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -26,77 +27,95 @@ use std::{
     str::FromStr,
 };
 
+/// The `Savana` struct orchestrates the haplotype-aware somatic variant calling
+/// pipeline using the Savana tool. It manages initialization, conditional execution,
+/// phasing dependencies, haplotagging, and output filtering.
 #[derive(Debug)]
 pub struct Savana {
     pub id: String,
-    pub passed_vcf: String,
-    pub phased_germline_vcf: String,
-    pub normal_hp_bam: String,
-    pub tumoral_hp_bam: String,
     pub config: Config,
     pub log_dir: String,
 }
 
 impl Initialize for Savana {
+    /// Initializes a new `Savana` instance for a given sample ID and configuration.
+    ///
+    /// If `savana_force` is enabled in [`Config`] and the output VCF exists, or if the run
+    /// should be re-triggered (based on file freshness), the output directory
+    /// will be cleaned.
+    ///
+    /// # Arguments
+    ///
+    /// * `id` - Sample identifier
+    /// * `config` - Pipeline execution configuration
+    ///
+    /// # Returns
+    ///
+    /// A new `Savana` instance, or an error if cleanup fails.
     fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
+        info!("Initialize Savana for {id}.");
         let log_dir = format!("{}/{}/log/savana", config.result_dir, id);
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        // Check for phased germline vcf
-        // no required anymore since >= 1.3.0
-        let phased_germline_vcf = config.constit_phased_vcf(id);
-        if !Path::new(&phased_germline_vcf).exists() {
-            LongphasePhase::initialize(id, config.clone())?.run()?;
-        }
-
-        let normal_hp_bam = config.normal_haplotagged_bam(id);
-        let tumoral_hp_bam = config.tumoral_haplotagged_bam(id);
-
-        let passed_vcf = config.savana_passed_vcf(id);
-        fs::create_dir_all(config.savana_output_dir(id))?;
 
-        Ok(Self {
+        let savana = Self {
             id: id.to_string(),
-            passed_vcf,
             config,
             log_dir,
-            phased_germline_vcf,
-            normal_hp_bam,
-            tumoral_hp_bam,
-        })
+        };
+
+        let output_vcf_exists = Path::new(&savana.config.savana_output_vcf(id)).exists();
+        if (savana.config.savana_force && output_vcf_exists) || savana.should_run() {
+            fs::remove_dir_all(savana.config.savana_output_dir(id))?;
+        }
+
+        Ok(savana)
     }
 }
 
 impl Run for Savana {
+    /// Executes the Savana pipeline, including prerequisite phasing and haplotagging steps.
+    ///
+    /// If the output VCF does not already exist, it ensures that the phased VCF and
+    /// haplotagged BAMs exist (or runs the modules to produce them), then runs Savana.
+    /// Finally, it filters the resulting VCF to retain only PASS variants.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` if the run completes successfully, or an error otherwise.
     fn run(&mut self) -> anyhow::Result<()> {
-        // force_or_not(
-        //     &self.config.savana_output_vcf(&self.id),
-        //     self.config.savana_force,
-        // )?;
-
         let id = &self.id;
         let output_vcf = &self.config.savana_output_vcf(id);
         if !Path::new(&output_vcf).exists() {
             info!("Running Savana v{}", Savana::version(&self.config)?);
+            let output_dir = self.config.savana_output_dir(id);
+            fs::create_dir_all(&output_dir)
+                .with_context(|| format!("Failed to create output dir for Savana run: {output_dir}"))?;
+
+            // Check for phased germline vcf
+            // no required anymore since >= 1.3.0
+            let phased_germline_vcf = self.config.constit_phased_vcf(id);
+            if !Path::new(&phased_germline_vcf).exists() {
+                LongphasePhase::initialize(id, self.config.clone())?.run()?;
+            }
+
+            let normal_hp_bam = self.config.normal_haplotagged_bam(id);
+            let tumoral_hp_bam = self.config.tumoral_haplotagged_bam(id);
+
             // Check for haplotagged bam
-            if !Path::new(&self.normal_hp_bam).exists() {
+            if !Path::new(&normal_hp_bam).exists() {
                 LongphaseHap::new(
                     id,
                     &self.config.normal_bam(id),
-                    &self.phased_germline_vcf,
+                    &phased_germline_vcf,
                     LongphaseConfig::default(),
                 )
                 .run()?;
             }
 
-            if !Path::new(&self.tumoral_hp_bam).exists() {
+            if !Path::new(&tumoral_hp_bam).exists() {
                 LongphaseHap::new(
                     id,
                     &self.config.tumoral_bam(id),
-                    &self.phased_germline_vcf,
+                    &phased_germline_vcf,
                     LongphaseConfig::default(),
                 )
                 .run()?;
@@ -105,15 +124,15 @@ impl Run for Savana {
             let savana_args = [
                 // "run",
                 "--tumour",
-                &self.tumoral_hp_bam,
+                &tumoral_hp_bam,
                 "--normal",
-                &self.normal_hp_bam,
+                &normal_hp_bam,
                 "--outdir",
                 &self.config.savana_output_dir(id),
                 "--ref",
                 &self.config.reference,
                 "--snp_vcf",
-                &self.phased_germline_vcf,
+                &phased_germline_vcf,
                 "--no_blacklist",
                 "--threads",
                 &self.config.savana_threads.to_string(),
@@ -137,29 +156,40 @@ impl Run for Savana {
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
+        } else {
+            debug!("Savana output already exists for {}, skipping execution.", self.id);
         }
 
         // Keep PASS
-        if !Path::new(&self.passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report =
-                bcftools_keep_pass(output_vcf, &self.passed_vcf, BcftoolsConfig::default())
-                    .context(format!(
-                        "Error while running bcftools keep PASS for {output_vcf}"
-                    ))?;
+        let passed_vcf = self.config.savana_passed_vcf(id);
+        if !Path::new(&passed_vcf).exists() && Path::new(output_vcf).exists() {
+            let report = bcftools_keep_pass(output_vcf, &passed_vcf, BcftoolsConfig::default())
+                .context(format!(
+                    "Error while running bcftools keep PASS for {output_vcf}"
+                ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
         }
+
         Ok(())
     }
 }
 
-impl HasOutputs for Savana {
-    fn has_output(&self, id: &str) -> (bool, bool) {
-        let output = &self.config.savana_passed_vcf(id);
-        let passed = &self.config.savana_passed_vcf(id);
-        (Path::new(output).exists(), Path::new(passed).exists())
+impl ShouldRun for Savana {
+    /// Determines whether Savana should be re-run based on whether
+    /// the filtered PASS VCF is older than the input BAMs.
+    ///
+    /// If either input BAM (normal or tumor) is newer than the PASS VCF,
+    /// Savana is considered out of date and should be re-executed.
+    ///
+    /// # Returns
+    /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.savana_passed_vcf(&self.id);
+        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
     }
 }
 
@@ -200,19 +230,32 @@ impl CallerCat for Savana {
 }
 
 impl Variants for Savana {
+    /// Loads variants from the PASS-filtered VCF produced by Savana and annotates them.
+    ///
+    /// Reads the VCF file, applies the caller tag via the `Annotations` object, and
+    /// wraps the parsed data into a `VariantCollection`.
+    ///
+    /// # Arguments
+    /// * `annotations` - Global annotation registry shared across callers.
+    ///
+    /// # Returns
+    /// A populated `VariantCollection`, or an error if the VCF fails to parse.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, self.passed_vcf);
-        let variants = read_vcf(&self.passed_vcf)
-            .map_err(|e| anyhow::anyhow!("Failed to read Savana VCF {}.\n{e}", self.passed_vcf))?;
+        let passed_vcf = self.config.savana_passed_vcf(&self.id);
+
+        info!("Loading variants from {}: {}", caller, &passed_vcf);
+        let variants = read_vcf(&passed_vcf)
+            .map_err(|e| anyhow::anyhow!("Failed to read Savana VCF {}.\n{e}", passed_vcf))?;
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
+
         info!("{}, {} variants loaded.", caller, variants.len());
         Ok(VariantCollection {
             variants,
-            vcf: Vcf::new(self.passed_vcf.clone().into())?,
+            vcf: Vcf::new(passed_vcf.clone().into())?,
             caller,
         })
     }

+ 106 - 49
src/callers/severus.rs

@@ -1,11 +1,12 @@
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, HasOutputs, Initialize, InitializeSolo, Version},
+    collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun, Version},
     commands::{
         bcftools::{bcftools_keep_pass_precise, BcftoolsConfig},
         longphase::LongphasePhase,
     },
     config::Config,
+    helpers::is_file_older,
     io::vcf::read_vcf,
     runners::{run_wait, CommandRun, Run},
     variant::{
@@ -18,6 +19,9 @@ use log::{debug, info};
 use rayon::prelude::*;
 use std::{fs, path::Path};
 
+/// Represents a wrapper around the Severus pipeline, responsible for calling structural variants
+/// using phased VCFs and tumor/control BAMs. It handles initialization, conditional execution,
+/// logging, and cleanup.
 #[derive(Debug)]
 pub struct Severus {
     pub id: String,
@@ -26,36 +30,48 @@ pub struct Severus {
 }
 
 impl Initialize for Severus {
+    /// Initializes a new Severus instance for a given sample ID and configuration.
+    ///
+    /// This will create the output log directory path and clean up previous output files
+    /// if the run should be re-executed or `severus_force` is set and an output VCF exists.
+    ///
+    /// # Arguments
+    ///
+    /// * `id` - The sample ID
+    /// * `config` - The execution configuration
+    ///
+    /// # Returns
+    ///
+    /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
     fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
         info!("Initialize Severus for {id}.");
-        let output_vcf_exists = Path::new(&config.severus_output_vcf(id)).exists();
-        if config.severus_force && output_vcf_exists {
-            fs::remove_dir_all(config.severus_output_dir(id))?;
-        }
 
         let log_dir = format!("{}/{}/log/severus", config.result_dir, id);
-        if !Path::new(&log_dir).exists() {
-            fs::create_dir_all(&log_dir)
-                .context(format!("Failed  to create {log_dir} directory"))?;
-        }
-
-        let phased_germline_vcf = &config.constit_phased_vcf(id);
-        if !Path::new(phased_germline_vcf).exists() {
-            LongphasePhase::initialize(id, config.clone())?.run()?;
-        }
-        fs::create_dir_all(config.severus_output_dir(id))?;
-
-        Ok(Self {
+        let severus = Self {
             id: id.to_string(),
             config,
             log_dir,
-        })
+        };
+
+        let output_vcf_exists = Path::new(&severus.config.severus_output_vcf(id)).exists();
+        if (severus.config.severus_force && output_vcf_exists) || severus.should_run() {
+            fs::remove_dir_all(severus.config.severus_output_dir(id))?;
+        }
+
+        Ok(severus)
     }
 }
 
 impl Run for Severus {
+    /// Runs the Severus structural variant caller if its output VCF does not already exist.
+    ///
+    /// It also conditionally runs the upstream phasing module (`LongphasePhase`) if the required
+    /// phased VCF is missing. After running Severus, it filters PASS variants using `bcftools`.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` if everything runs successfully; otherwise, an error with context.
     fn run(&mut self) -> anyhow::Result<()> {
-        // TODO make that version check work
         info!("Running Severus v{}", Severus::version(&self.config)?);
 
         let id = &self.id;
@@ -63,14 +79,23 @@ impl Run for Severus {
         let passed_vcf = &self.config.severus_passed_vcf(id);
 
         if !Path::new(output_vcf).exists() {
-            // Run command if output VCF doesn't exist
+            let constit_phased_vcf = &self.config.constit_phased_vcf(id);
+
+            // Run Longphase if necessary
+            if !Path::new(constit_phased_vcf).exists() {
+                LongphasePhase::initialize(&self.id, self.config.clone())?.run()?;
+            }
+
+            fs::create_dir_all(self.config.severus_output_dir(id))
+                .context("Failed to create Severus output directory")?;
+
             let severus_args = [
                 "--target-bam",
                 &self.config.tumoral_bam(id),
                 "--control-bam",
                 &self.config.normal_bam(id),
                 "--phasing-vcf",
-                &self.config.constit_phased_vcf(id),
+                constit_phased_vcf,
                 "--out-dir",
                 &self.config.severus_output_dir(id),
                 "-t",
@@ -92,6 +117,7 @@ impl Run for Severus {
                 ),
             ];
             let mut cmd_run = CommandRun::new("bash", &args);
+
             let report = run_wait(&mut cmd_run).context(format!(
                 "Error while running `severus.py {}`",
                 args.join(" ")
@@ -100,31 +126,39 @@ impl Run for Severus {
             let log_file = format!("{}/severus_", self.log_dir);
             report
                 .save_to_file(&log_file)
-                .context(format!("Error while writing logs into {log_file}"))?;
+                .context(format!("Error while writing Severus logs into {log_file}"))?;
         } else {
-            debug!("")
+            debug!("Severus output VCF already exists for {}, skipping execution.", self.id);
         }
 
-        // Keep PASS
+        // Filter PASS variants
         if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report = bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
-                .context(format!(
-                    "Error while running bcftools keep PASS for {output_vcf}"
-                ))?;
+            let report =
+                bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
+                    .context(format!(
+                        "Error while running bcftools keep PASS for {output_vcf}"
+                    ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
         }
+
         Ok(())
     }
 }
 
-impl HasOutputs for Severus {
-    fn has_output(&self, id: &str) -> (bool, bool) {
-        let output = &self.config.severus_passed_vcf(id);
-        let passed = &self.config.severus_passed_vcf(id);
-        (Path::new(output).exists(), Path::new(passed).exists())
+impl ShouldRun for Severus {
+    /// Determines whether Severus should re-run based on whether the PASS VCF
+    /// is older than either the tumor or normal BAM file.
+    ///
+    /// # Returns
+    ///
+    /// `true` if Severus needs to be re-run, otherwise `false`
+    fn should_run(&self) -> bool {
+        let passed_vcf = &self.config.severus_passed_vcf(&self.id);
+        is_file_older(passed_vcf, &self.config.normal_bam(&self.id)).unwrap_or(true)
+            || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id)).unwrap_or(true)
     }
 }
 
@@ -149,33 +183,52 @@ impl Version for Severus {
 }
 
 impl CallerCat for Severus {
+    /// Returns the annotation category for Severus calls.
+    ///
+    /// This indicates that the variants come from the `Severus` caller
+    /// and are somatic in nature. It is used downstream for caller-specific
+    /// annotation tracking and filtering.
     fn caller_cat(&self) -> Annotation {
         Annotation::Callers(Caller::Severus, Sample::Somatic)
     }
 }
 
 impl Variants for Severus {
+    /// Loads the variant calls from the Severus filtered PASS VCF.
+    ///
+    /// This method reads the VCF file output by Severus (after PASS filtering),
+    /// assigns the appropriate annotation tag to each variant using `caller_cat()`,
+    /// and updates the provided `Annotations` map in parallel.
+    ///
+    /// # Arguments
+    ///
+    /// * `annotations` - A reference to the global annotations map used
+    ///   for aggregating and tracking variant-level metadata across callers.
+    ///
+    /// # Returns
+    ///
+    /// A `VariantCollection` object containing:
+    /// - the parsed variants,
+    /// - the `Vcf` wrapper of the file path,
+    /// - and the caller identity used for tagging.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the VCF file cannot be read or parsed.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let vcf_passed = self.config.severus_passed_vcf(&self.id);
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!(
-            "Loading variants from {}: {}",
-            caller, vcf_passed
-        );
+        info!("Loading variants from {}: {}", caller, vcf_passed);
 
-        let variants = read_vcf(&vcf_passed).map_err(|e| {
-            anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed)
-        })?;
+        let variants = read_vcf(&vcf_passed)
+            .map_err(|e| anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed))?;
 
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
-        info!(
-            "{}, {} variants loaded.",
-            caller,
-            variants.len()
-        );
+
+        info!("{}, {} variants loaded.", caller, variants.len());
         Ok(VariantCollection {
             variants,
             vcf: Vcf::new(vcf_passed.into())?,
@@ -186,6 +239,9 @@ impl Variants for Severus {
 
 impl RunnerVariants for Severus {}
 
+
+/// ========================================================================
+
 #[derive(Debug)]
 pub struct SeverusSolo {
     pub id: String,
@@ -260,10 +316,11 @@ impl Run for SeverusSolo {
 
         // Keep PASS
         if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report = bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
-                .context(format!(
-                    "Error while running bcftools keep PASS for {output_vcf}"
-                ))?;
+            let report =
+                bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
+                    .context(format!(
+                        "Error while running bcftools keep PASS for {output_vcf}"
+                    ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)

+ 2 - 2
src/collection/mod.rs

@@ -912,8 +912,8 @@ pub trait InitializeSolo: Sized {
     fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self>;
 }
 
-pub trait HasOutputs {
-    fn has_output(&self, id: &str) -> (bool, bool);
+pub trait ShouldRun {
+    fn should_run(&self) -> bool;
 }
 
 pub trait Version {

+ 55 - 1
src/config.rs

@@ -46,6 +46,7 @@ pub struct Config {
     pub deepsomatic_threads: u8,
     pub deepsomatic_bin_version: String,
     pub deepsomatic_model_type: String,
+    pub deepsomatic_force: bool,
     pub bam_min_mapq: u8,
     pub bam_n_threads: u8,
     pub db_cases_path: String,
@@ -132,6 +133,7 @@ impl Default for Config {
             deepsomatic_threads: 155,
             deepsomatic_bin_version: "1.8.0".to_string(),
             deepsomatic_model_type: "ONT".to_string(),
+            deepsomatic_force: false,
 
             // ClairS
             clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
@@ -317,7 +319,7 @@ impl Config {
             .replace("{time}", time)
     }
 
-    pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
+    pub fn deepvariant_solo_output_vcf(&self, id: &str, time: &str) -> String {
         format!(
             "{}/{}",
             self.deepvariant_output_dir(id, time),
@@ -327,6 +329,31 @@ impl Config {
         .replace("{time}", time)
     }
 
+    pub fn deepvariant_normal_output_dir(&self, id: &str) -> String {
+        self.deepvariant_output_dir(id, &self.normal_name)
+    }
+
+    pub fn deepvariant_tumoral_output_dir(&self, id: &str) -> String {
+        self.deepvariant_solo_passed_vcf(id, &self.tumoral_name)
+    }
+
+    pub fn deepvariant_solo_passed_vcf(&self, id: &str, time: &str) -> String {
+        format!(
+            "{}/{}_{}_DeepVariant_PASSED.vcf.gz",
+            self.deepvariant_normal_output_dir(id),
+            id,
+            time
+        )
+    }
+
+    pub fn deepvariant_normal_passed_vcf(&self, id: &str) -> String {
+        self.deepvariant_solo_passed_vcf(id, &self.normal_name)
+    }
+
+    pub fn deepvariant_tumoral_passed_vcf(&self, id: &str) -> String {
+        self.deepvariant_solo_passed_vcf(id, &self.tumoral_name)
+    }
+
     // DeepSomatic
     pub fn deepsomatic_output_dir(&self, id: &str) -> String {
         self.deepsomatic_output_dir
@@ -335,6 +362,24 @@ impl Config {
             .replace("{time}", &self.tumoral_name)
     }
 
+    pub fn deepsomatic_output_vcf(&self, id: &str) -> String {
+        format!(
+            "{}/{}_{}_DeepSomatic.vcf.gz",
+            self.deepsomatic_output_dir(id),
+            id,
+            self.tumoral_name
+        )
+    }
+
+    pub fn deepsomatic_passed_vcf(&self, id: &str) -> String {
+        format!(
+            "{}/{}_{}_DeepSomatic_PASSED.vcf.gz",
+            self.deepvariant_normal_output_dir(id),
+            id,
+            self.tumoral_name
+        )
+    }
+
     // ClairS
     pub fn clairs_output_dir(&self, id: &str) -> String {
         self.clairs_output_dir
@@ -350,6 +395,15 @@ impl Config {
         )
     }
 
+    pub fn clairs_passed_vcf(&self, id: &str) -> String {
+        format!(
+            "{}/{}_{}_clairs_PASSED.vcf.gz",
+            self.clairs_output_dir(id),
+            id,
+            self.tumoral_name
+        )
+    }
+
     pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
         let dir = self.clairs_output_dir(id);
         format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)

+ 41 - 0
src/helpers.rs

@@ -461,3 +461,44 @@ pub fn list_directories(dir_path: &str) -> std::io::Result<Vec<String>> {
 
     Ok(directories)
 }
+
+/// Returns `true` if the first file is older than the second file,
+/// based on their last modification times.
+///
+/// # Arguments
+///
+/// * `file1` - Path to the first file.
+/// * `file2` - Path to the second file.
+///
+/// # Returns
+///
+/// * `Ok(true)` if `file1` is older than `file2`
+/// * `Ok(false)` if `file1` is not older than `file2`
+/// * `Err(e)` if one of the files cannot be read or metadata cannot be retrieved
+///
+/// # Errors
+///
+/// Returns an error if either file does not exist, cannot be accessed,
+/// or the modification time is unavailable.
+///
+/// # Example
+///
+/// ```
+/// let result = is_file_older("a.txt", "b.txt")?;
+/// if result {
+///     println!("a.txt is older than b.txt");
+/// }
+/// ```
+pub fn is_file_older(file1: &str, file2: &str) -> anyhow::Result<bool> {
+    let mtime1 = fs::metadata(file1)
+        .with_context(|| format!("Failed to read metadata for '{}'", file1))?
+        .modified()
+        .with_context(|| format!("Failed to get modified time for '{}'", file1))?;
+
+    let mtime2 = fs::metadata(file2)
+        .with_context(|| format!("Failed to read metadata for '{}'", file2))?
+        .modified()
+        .with_context(|| format!("Failed to get modified time for '{}'", file2))?;
+
+    Ok(mtime1 < mtime2)
+}

+ 122 - 23
src/runners.rs

@@ -1,6 +1,7 @@
 use std::{
-    fs::File,
+    fs::{self},
     io::{BufRead, BufReader, Write},
+    path::Path,
     process::{Child, Command, Stdio},
     sync::{
         mpsc::{self, TryRecvError},
@@ -10,13 +11,12 @@ use std::{
 };
 
 use anyhow::Context;
-use bgzip::{BGZFWriter, Compression};
 use chrono::{DateTime, Utc};
 use log::{info, warn};
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
-use crate::{config::Config, DOCKER_ID};
+use crate::{config::Config, io::writers::get_gz_writer, DOCKER_ID};
 
 /// Trait for running a command.
 pub trait Run {
@@ -41,9 +41,31 @@ pub trait Wait {
     fn wait(&mut self) -> anyhow::Result<()>;
 }
 
+/// A convenience trait alias that bundles `Run`, `Wait`, and `Log` traits
+/// for types that support full command execution lifecycle behavior.
 pub trait RunWait: Run + Wait + Log {}
+
+/// Blanket implementation of `RunWait` for any type that implements `Run + Wait + Log`.
 impl<T: Run + Wait + Log> RunWait for T {}
 
+/// Executes a `RunWait` task by calling `.run()` then `.wait()`, and generates a `RunReport`.
+///
+/// This utility function captures the wall-clock start and end times of the task
+/// and stores its output logs for later inspection or saving.
+///
+/// # Arguments
+///
+/// * `item` - A mutable reference to any struct implementing the `RunWait` trait
+///
+/// # Returns
+///
+/// A `RunReport` struct that includes:
+/// - start and end timestamps (in UTC)
+/// - accumulated stdout/stderr log
+///
+/// # Errors
+///
+/// Returns an error if either the `.run()` or `.wait()` step fails.
 pub fn run_wait<T: RunWait>(item: &mut T) -> anyhow::Result<RunReport> {
     let mut run_report = RunReport {
         start: Utc::now(),
@@ -56,23 +78,51 @@ pub fn run_wait<T: RunWait>(item: &mut T) -> anyhow::Result<RunReport> {
     Ok(run_report)
 }
 
+/// Captures the lifecycle information and log output of a `RunWait` task.
+///
+/// This is useful for tracing, debugging, audit logging, or benchmarking pipeline steps.
 #[derive(Debug, Default, Serialize, Deserialize)]
 pub struct RunReport {
+    /// Timestamp when the task started
     pub start: DateTime<Utc>,
+    /// Timestamp when the task ended
     pub end: DateTime<Utc>,
+    /// Aggregated log output from the task (stdout and stderr)
     pub log: String,
 }
 
+
 impl RunReport {
-    /// Serialize the RunReport to a JSON string and save to file_prefix.log.gz
-    pub fn save_to_file(&self, file_prefix: &str) -> std::io::Result<()> {
-        let json_data = serde_json::to_string_pretty(self).expect("Failed to serialize RunReport");
-        let uuid = Uuid::new_v4().to_string()[..5].to_string();
-        let file_path = format!("{}{}.log.gz", file_prefix, uuid);
-        let file = File::create(&file_path)?;
-        let mut writer = BGZFWriter::new(file, Compression::default());
+    /// Serialize the RunReport to a JSON string and save it to `file_prefix<uuid>.log.gz`.
+    ///
+    /// If the directory part of `file_prefix` does not exist, it will be created automatically.
+    ///
+    /// # Arguments
+    ///
+    /// * `file_prefix` - Path prefix for the output file (can include directory).
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` on success, or a `std::io::Error` if writing fails.
+    pub fn save_to_file(&self, file_prefix: &str) -> anyhow::Result<()> {
+        // Serialize to JSON
+        let json_data = serde_json::to_string_pretty(self)?;
+
+        // Generate short UUID suffix
+        let uuid_suffix = &Uuid::new_v4().to_string()[..5];
+        let file_path = format!("{}{}.log.gz", file_prefix, uuid_suffix);
+
+        // Ensure the parent directory exists
+        if let Some(parent) = Path::new(&file_path).parent() {
+            fs::create_dir_all(parent)?;
+        }
+
+        // Create and write BGZF-compressed file
+        let mut writer = get_gz_writer(&file_path, true)
+            .with_context(|| format!("Failed to get gz writer for: {file_path}"))?;
         writer.write_all(json_data.as_bytes())?;
         writer.close()?;
+
         Ok(())
     }
 }
@@ -110,6 +160,17 @@ impl DockerRun {
 }
 
 impl Run for DockerRun {
+    /// Executes the configured Docker command inside a container.
+    ///
+    /// - Sets a Ctrl-C (SIGINT) handler to stop the container if the process is interrupted.
+    /// - Adds memory limits to the Docker arguments based on configuration.
+    /// - Captures and logs the container ID.
+    /// - Spawns a thread to follow container logs in real-time.
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(())` if the container starts successfully.
+    /// * An `anyhow::Error` if setup or execution fails.
     fn run(&mut self) -> anyhow::Result<()> {
         // Sets up a Ctrl-C handler to stop Docker containers on interrupt.
         ctrlc::try_set_handler(move || {
@@ -120,7 +181,8 @@ impl Run for DockerRun {
                 }
             }
             std::process::exit(1);
-        }).context("Failed to set Ctrl-C handler")?;
+        })
+        .context("Failed to set Ctrl-C handler")?;
 
         // Configures memory limits for the Docker container.
         let c = Config::default();
@@ -189,11 +251,28 @@ impl Run for DockerRun {
 }
 
 impl Wait for DockerRun {
+    /// Waits for the Docker container associated with this `DockerRun` instance to finish.
+    ///
+    /// This method blocks until the container exits, using `docker wait`. It captures the
+    /// exit status and logs a warning if the container did not exit successfully.
+    ///
+    /// After the container exits, its ID is removed from the global `DOCKER_ID` list
+    /// to prevent further cleanup or signal handling on an already-finished container.
+    ///
+    /// # Returns
+    ///
+    /// * `Ok(())` if the wait completes successfully (regardless of container success).
+    /// * An error if the `docker wait` command fails to execute.
+    ///
+    /// # Errors
+    ///
+    /// This function returns an `anyhow::Error` if the underlying `docker wait` command fails to launch.
     fn wait(&mut self) -> anyhow::Result<()> {
         let status = Command::new("docker")
             .args(["wait", &self.container_id])
             .status()
             .expect("Failed to wait on Docker container");
+
         if !status.success() {
             let warn = format!("Docker command failed with status: {status}");
             warn!("{warn}");
@@ -201,6 +280,11 @@ impl Wait for DockerRun {
                 let mut logs_lock = self.logs.lock().unwrap();
                 logs_lock.push_str(&format!("\n{warn}\n"));
             }
+        } else {
+            info!(
+                "Container {} exited successfully with status: {}",
+                self.container_id, status
+            );
         }
 
         let mut container_id_lock = DOCKER_ID.lock().unwrap();
@@ -217,31 +301,34 @@ impl Log for DockerRun {
     }
 }
 
-/// Represents a command to be run, with facilities for execution, monitoring, and logging.
+/// Represents a command to be executed, with streaming log capture,
+/// real-time monitoring, and access to its output.
 pub struct CommandRun {
-    /// The binary or command to be executed
+    /// The binary or command to execute
     pub bin: String,
-    /// The arguments to be passed to the command
+    /// The list of arguments to pass to the command
     pub args: Vec<String>,
-    /// The child process, if the command has been started
+    /// The spawned child process, if started
     pub child: Option<Child>,
-    /// Sender for the command's output channel
+    /// Sender side of a channel used to stream stdout/stderr lines
     pub tx: mpsc::Sender<(String, String)>,
-    /// Receiver for the command's output channel
+    /// Receiver side of the same channel, used during wait to collect output
     pub rx: mpsc::Receiver<(String, String)>,
-    /// Accumulated log of the command's execution
+    /// Accumulated log from stdout and stderr, organized by stream
     pub log: String,
 }
 
 impl CommandRun {
-    /// Creates a new CommandRun instance.
+    /// Creates a new `CommandRun` instance with the given binary and arguments.
     ///
     /// # Arguments
-    /// * `bin` - The binary or command to be executed
-    /// * `args` - The arguments to be passed to the command
+    ///
+    /// * `bin` - The binary or shell command to execute
+    /// * `args` - A slice of arguments to pass to the command
     ///
     /// # Returns
-    /// A new CommandRun instance
+    ///
+    /// A new `CommandRun` instance ready to be started via `.run()`
     pub fn new(bin: &str, args: &[&str]) -> Self {
         let (tx, rx) = mpsc::channel();
 
@@ -257,14 +344,20 @@ impl CommandRun {
 }
 
 impl Run for CommandRun {
+    /// Starts the command and begins capturing stdout and stderr in separate threads.
+    ///
+    /// This method spawns the process and streams its output line-by-line.
+    /// It does not block until the process exits — that’s handled by `.wait()`.
     fn run(&mut self) -> anyhow::Result<()> {
         let info = format!("Running command: {} {}", &self.bin, &self.args.join(" "));
         info!("{info}");
+
         let mut child = Command::new(&self.bin)
             .args(&self.args)
             .stdout(Stdio::piped())
             .stderr(Stdio::piped())
-            .spawn()?;
+            .spawn()
+            .map_err(|e| anyhow::anyhow!("Failed to spawn {}: {e}", self.bin))?;
 
         self.log.push_str(&format!("{info}\n"));
 
@@ -277,6 +370,7 @@ impl Run for CommandRun {
         let tx = self.tx.clone();
         let bin = self.bin.clone();
 
+        // Use scoped threads to avoid leaking threads or missing log lines
         std::thread::scope(|s| {
             s.spawn(|| {
                 for line in stdout_reader.lines().map_while(Result::ok) {
@@ -301,6 +395,10 @@ impl Run for CommandRun {
 }
 
 impl Wait for CommandRun {
+    /// Waits for the command to exit, polling and collecting output in real time.
+    ///
+    /// This method consumes the receiver channel to stream logs and checks the process
+    /// status regularly to know when to finish. Captured logs are accumulated in `self.log`.
     fn wait(&mut self) -> anyhow::Result<()> {
         if let Some(child) = &mut self.child {
             loop {
@@ -331,6 +429,7 @@ impl Wait for CommandRun {
 }
 
 impl Log for CommandRun {
+    /// Returns the accumulated log of the command's stdout and stderr streams.
     fn log(&self) -> String {
         self.log.clone()
     }

+ 0 - 61
src/scan/bin.rs

@@ -117,67 +117,6 @@ impl Bin {
             depths,
         })
     }
-
-    pub fn new_old(
-        bam_reader: &mut IndexedReader,
-        contig: &str,
-        start: u32,
-        length: u32,
-        min_mapq: u8,
-    ) -> anyhow::Result<Self> {
-        // let mut bam_reader = IndexedReader::from_path(bam_path)
-        //     .with_context(|| format!("Can't open BAM file: {}", bam_path))?;
-
-        // Calculate inclusive end position
-        let end = start + length - 1;
-
-        // debug!("Creating bin {}:{}-{}", contig, start, end);
-
-        // Fetch reads in the required region
-        bam_reader
-            .fetch((contig, start as i64, end as i64))
-            .with_context(|| format!("Failed to fetch region {}:{}-{}", contig, start, end))?;
-
-        let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
-        let mut n_low_mapq = 0;
-
-        let mut depths = Vec::new();
-        for pileup in bam_reader.pileup() {
-            let pileup = pileup?;
-            let pos = pileup.pos();
-
-            if pos < start {
-                continue;
-            } else if pos > end {
-                break;
-            }
-
-            let depth = pileup
-                .alignments()
-                .filter(|a| {
-                    let record = a.record();
-                    if record.mapq() < min_mapq {
-                        n_low_mapq += 1;
-                        return false;
-                    }
-                    reads_store.insert(record.qname().to_vec(), record);
-
-                    true
-                })
-                .count();
-            depths.push(depth as u32);
-        }
-
-        Ok(Bin {
-            contig: contig.to_string(),
-            start,
-            end,
-            reads_store,
-            n_low_mapq,
-            depths,
-        })
-    }
-
     /// Returns the total number of reads in this bin.
     pub fn n_reads(&self) -> usize {
         self.reads_store.len()