Thomas 5 days ago
parent
commit
b135d60d4e

+ 28 - 0
AGENTS.md

@@ -0,0 +1,28 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+- Core library lives in `src/`, organized by domain: `collection` (sample discovery), `pipes` and `runners` (pipeline orchestration), `callers` (tool interfaces), `annotation` and `variant` (VEP parsing, variant models), `commands` (external tool wrappers), `io`/`helpers`/`functions` (utilities), `modkit`, and `math`.
+- `jq_filters/` holds jq modules used by the JSON post-processing examples in `README.md`.
+- `pandora-config.example.toml` documents runtime settings; copy and adapt for local runs. Build outputs land in `target/`.
+
+## Build, Test, and Development Commands
+- `cargo build` — compile the library; ensure system deps like minimap2, samtools, dorado, bcftools, modkit, and VEP are available for runtime pipelines.
+- `cargo test -- --nocapture` — run tests with stdout; set `RUST_LOG=debug` when debugging pipelines.
+- `cargo fmt` and `cargo clippy -- -D warnings` — format and lint; run before sending a PR.
+- `cargo doc --open` — generate local docs to understand module APIs.
+
+## Coding Style & Naming Conventions
+- Rust 2021 edition; use 4-space indentation and `snake_case` for functions/vars, `PascalCase` for types, `SCREAMING_SNAKE_CASE` for consts.
+- Prefer `anyhow::Result` with the `?` operator; avoid `unwrap()` in pipeline code—propagate errors with context.
+- Keep modules focused (e.g., orchestration in `runners`, IO logic in `io`, stats in `math`/`variant`). Add small comments when wiring external tools or Docker/Slurm behaviors.
+
+## Testing Guidelines
+- Existing integration tests expect access to shared test data at the path in `TEST_DIR` (see `src/lib.rs`). If unavailable, skip or point to local fixtures when adding new tests.
+- Co-locate unit tests with their modules using Rust’s `#[cfg(test)]` blocks; name tests after the behavior under check (e.g., `loads_bam_metadata`, `filters_low_quality_variants`).
+- Before proposing changes, run `cargo test` and note any environment-specific assumptions (e.g., tool availability, data paths).
+
+## Commit & Pull Request Guidelines
+- Commit messages are short and imperative (e.g., `add somatic stats export`, `fix dorado pod5 paths`).
+- PRs should include: purpose, key changes, commands run (build/test), and any pipeline outputs or log snippets that validate tool integration.
+- Document new external tool requirements or config keys in `README.md` and update `pandora-config.example.toml` when adding settings.
+- Keep changes atomic: separate refactors from functional changes, and prefer small, reviewable diffs.

+ 4 - 2
pandora-config.example.toml

@@ -299,11 +299,14 @@ bcftools_threads = 30
 longphase_bin = "/data/tools/longphase_linux-x64"
 
 # Threads for longphase.
-longphase_threads = 150
+longphase_threads = 40
 
 # Threads for longphase modcall step.
 longphase_modcall_threads = 8
 
+# Force longphase recomputation (haplotagging/phasing).
+longphase_force = false
+
 # Longphase modcall VCF template.
 # {result_dir}, {id}, {time}
 longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
@@ -402,4 +405,3 @@ samtools_merge_threads = 40
 
 # Threads for `samtools split`.
 samtools_split_threads = 20
-

+ 13 - 4
src/callers/clairs.rs

@@ -160,8 +160,8 @@ use crate::{
     },
     config::Config,
     helpers::{
-        get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions_exact,
-        temp_file_path,
+        get_genome_sizes, is_file_older, remove_dir_if_exists, singularity_bind_flags,
+        split_genome_into_n_regions_exact, temp_file_path,
     },
     io::vcf::read_vcf,
     pipes::{Initialize, ShouldRun, Version},
@@ -301,10 +301,18 @@ impl JobCommand for ClairS {
 
         let sample_name = format!("{}_diag", self.id);
 
+        let bind_flags = singularity_bind_flags([
+            &self.config.tumoral_bam(&self.id),
+            &self.config.normal_bam(&self.id),
+            &self.config.reference,
+            &output_dir,
+            &self.log_dir,
+            &self.config.tmp_dir,
+        ]);
+
         format!(
             "{singularity_bin} exec \
-             --bind /mnt:/mnt \
-             --bind /home/t_steimle:/home/t_steimle \
+             {binds} \
              --bind {output_dir}:{output_dir} \
              {image} \
              /opt/bin/run_clairs \
@@ -322,6 +330,7 @@ impl JobCommand for ClairS {
              -s {sample_name} \
              {region_arg}",
             singularity_bin = self.config.singularity_bin,
+            binds = bind_flags,
             image = self.config.clairs_image,
             tumor_bam = self.config.tumoral_bam(&self.id),
             normal_bam = self.config.normal_bam(&self.id),

+ 13 - 3
src/callers/deep_somatic.rs

@@ -62,7 +62,8 @@ use crate::{
     },
     config::Config,
     helpers::{
-        get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions_exact,
+        get_genome_sizes, is_file_older, remove_dir_if_exists, singularity_bind_flags,
+        split_genome_into_n_regions_exact,
     },
     io::vcf::read_vcf,
     pipes::{Initialize, ShouldRun, Version},
@@ -163,6 +164,15 @@ impl JobCommand for DeepSomatic {
             None => format!("{}_DeepSomatic_logs", sample_name_tumor),
         };
 
+        let bind_flags = singularity_bind_flags([
+            &self.config.normal_bam(&self.id),
+            &self.config.tumoral_bam(&self.id),
+            &self.config.reference,
+            &output_dir,
+            &self.log_dir,
+            &self.config.tmp_dir,
+        ]);
+
         let output_vcf = format!(
             "/output/{}",
             Path::new(&output_vcf_path)
@@ -173,8 +183,7 @@ impl JobCommand for DeepSomatic {
 
         format!(
             "{singularity_bin} exec \
-            --bind /mnt:/mnt \
-            --bind /home/t_steimle:/home/t_steimle \
+            {binds} \
             --bind {output_dir}:/output \
             {image} \
             /opt/deepvariant/bin/deepsomtic/run_deepsomatic \
@@ -191,6 +200,7 @@ impl JobCommand for DeepSomatic {
             --sample_name_tumor={sample_name_tumor} \
             --sample_name_normal={sample_name_normal}",
             singularity_bin = self.config.singularity_bin,
+            binds = bind_flags,
             output_dir = output_dir,
             image = self.config.deepsomatic_image,
             model_type = self.config.deepsomatic_model_type,

+ 13 - 3
src/callers/deep_variant.rs

@@ -65,7 +65,8 @@ use crate::{
     },
     config::Config,
     helpers::{
-        get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions_exact,
+        get_genome_sizes, is_file_older, remove_dir_if_exists, singularity_bind_flags,
+        split_genome_into_n_regions_exact,
     },
     io::vcf::read_vcf,
     pipes::{InitializeSolo, ShouldRun, Version},
@@ -279,10 +280,18 @@ impl JobCommand for DeepVariant {
             Karyotype::XX => "", // No haploid contigs for XX
         };
 
+        let bind_flags = singularity_bind_flags([
+            &bam,
+            &self.config.reference,
+            &self.config.pseudoautosomal_regions_bed,
+            &output_dir,
+            &self.log_dir,
+            &self.config.tmp_dir,
+        ]);
+
         format!(
             "{singularity_bin} exec \
-            --bind /mnt:/mnt \
-            --bind /home/t_steimle:/home/t_steimle \
+            {binds} \
             --bind {output_dir}:/output \
             {image} \
             /opt/deepvariant/bin/run_deepvariant \
@@ -300,6 +309,7 @@ impl JobCommand for DeepVariant {
             --dry_run=false \
             --sample_name={sample_name}",
             singularity_bin = self.config.singularity_bin,
+            binds = bind_flags,
             output_dir = output_dir,
             image = self.config.deepvariant_image,
             model_type = self.config.deepvariant_model_type,

+ 11 - 22
src/callers/mod.rs

@@ -1,24 +1,13 @@
+//! Variant caller integrations wired to the shared runner pattern (local/Slurm via `run!`).
+//! - ClairS — <https://github.com/HKU-BAL/ClairS>
+//! - DeepVariant — <https://github.com/google/deepvariant>
+//! - DeepSomatic — <https://github.com/google/deepsomatic>
+//! - Savana — <https://github.com/cortes-ciriano-lab/savana>
+//! - Severus — <https://github.com/genome-nexus/severus> (structural variants)
+//! - NanomonSV — <https://github.com/friend1ws/nanomonsv>
 pub mod clairs;
 pub mod deep_variant;
-mod deep_somatic;
-// pub mod nanomonsv;
-// pub mod savana;
-// pub mod severus;
-// pub mod deep_somatic;
-
-// #[derive(Debug)]
-// pub enum CallersSolo {
-//     DeepVariant(DeepVariant),
-//     NanomonSV(NanomonSV),
-// }
-//
-// #[derive(Debug)]
-// pub enum CallersSomatic {
-//     ClairS(ClairS)
-// }
-//
-// #[derive(Debug)]
-// pub enum Callers {
-//     CallersSomatic,
-//     CallersSolo,
-// }
+pub mod deep_somatic;
+pub mod savana;
+pub mod severus;
+pub mod nanomonsv;

+ 114 - 60
src/callers/nanomonsv.rs

@@ -1,3 +1,6 @@
+//! NanomonSV structural variant caller orchestration (paired and solo).
+//!
+//! Runs parse/get and PASS filtering through the shared runner interfaces (local/Slurm) using the global `Config`.
 use rayon::prelude::*;
 use std::{
     fs::{self},
@@ -11,12 +14,16 @@ use log::{debug, error, info, warn};
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
     collection::vcf::Vcf,
-    commands::bcftools::{BcftoolsConfig, bcftools_concat, bcftools_keep_pass},
+    commands::{
+        bcftools::{BcftoolsConcat, BcftoolsKeepPass},
+        CapturedOutput, Command as JobCommand, LocalRunner, SlurmParams, SlurmRunner,
+    },
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     pipes::{Initialize, InitializeSolo, ShouldRun, Version},
-    runners::{CommandRun, Run, RunReport, run_wait},
+    run,
+    runners::Run,
     variant::{
         variant::{Label, Variants},
         variant_collection::VariantCollection,
@@ -31,6 +38,31 @@ pub struct NanomonSV {
     pub id: String,
     pub log_dir: String,
     pub config: Config,
+
+    // Command args and threads used by the shared runner.
+    job_args: Vec<String>,
+    threads: u8,
+}
+
+impl JobCommand for NanomonSV {
+    fn cmd(&self) -> String {
+        format!("{} {}", self.config.nanomonsv_bin, self.job_args.join(" "))
+    }
+}
+
+impl LocalRunner for NanomonSV {}
+
+impl SlurmRunner for NanomonSV {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some(format!("nanomonsv_{}", self.id)),
+            cpus_per_task: Some(self.threads as u32),
+            mem: Some("60G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
 }
 
 impl Initialize for NanomonSV {
@@ -45,7 +77,7 @@ impl Initialize for NanomonSV {
     ///
     /// # Returns
     /// A fully prepared `NanomonSV` instance ready to run.
-    fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize Nanomonsv for {id}.");
 
@@ -54,7 +86,9 @@ impl Initialize for NanomonSV {
         let nanomonsv = Self {
             id,
             log_dir,
-            config,
+            config: config.clone(),
+            job_args: Vec::new(),
+            threads: config.nanomonsv_threads,
         };
 
         if nanomonsv.config.nanomonsv_force {
@@ -137,7 +171,7 @@ impl Run for NanomonSV {
                 .context(format!(
                     "Error while running NanomonSV get for {mrd_result_vcf}"
                 ))?;
-            report.save_to_file(&format!("{}/nanomonsv_get_mrd_", self.log_dir))?;
+            report.save_to_file(format!("{}/nanomonsv_get_mrd_", self.log_dir))?;
         } else {
             debug!(
                 "NanomonSV `get` results already exists for {} normal, skipping execution.",
@@ -157,7 +191,7 @@ impl Run for NanomonSV {
             .context(format!(
                 "Error while running NanomonSV get for {diag_result_vcf}"
             ))?;
-            report.save_to_file(&format!("{}/nanomonsv_get_diag_", self.log_dir))?;
+            report.save_to_file(format!("{}/nanomonsv_get_diag_", self.log_dir))?;
         } else {
             debug!(
                 "NanomonSV `get` results already exists for {} tumoral, skipping execution.",
@@ -166,11 +200,12 @@ impl Run for NanomonSV {
         }
 
         if !Path::new(&vcf_passed).exists() {
+            let mut keep =
+                BcftoolsKeepPass::from_config(&self.config, &diag_result_vcf, &vcf_passed);
             let report =
-                bcftools_keep_pass(&diag_result_vcf, &vcf_passed, BcftoolsConfig::default())
-                    .context(format!("Can't index {vcf_passed}"))?;
+                run!(&self.config, &mut keep).context(format!("Can't index {vcf_passed}"))?;
             report
-                .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
+                .save_to_file(format!("{}/bcftools_pass_", self.log_dir))
                 .context("Failed to save report")?;
         } else {
             debug!(
@@ -197,14 +232,16 @@ impl Version for NanomonSV {
     /// # Errors
     /// Returns an error if command execution fails or "Version " not found in output.
     fn version(config: &Config) -> anyhow::Result<String> {
-        let args = ["--version"];
-        let mut cmd_run = CommandRun::new(&config.nanomonsv_bin, &args);
-
-        let report = run_wait(&mut cmd_run).context(format!(
-            "Error while running `NanomonSV {}`",
-            args.join(" ")
-        ))?;
-        let log = report.log;
+        let mut runner = NanomonSV {
+            id: "version".to_string(),
+            log_dir: config.tmp_dir.clone(),
+            config: config.clone(),
+            job_args: vec!["--version".into()],
+            threads: 1,
+        };
+        let out = run!(&runner.config, &mut runner)
+            .context("Error while running `NanomonSV --version`")?;
+        let log = format!("{}{}", out.stdout, out.stderr);
         let start = log
             .find("stdout: nanomonsv ")
             .context("Failed to find 'stdout: nanomonsv ' in the log")?;
@@ -284,7 +321,7 @@ impl InitializeSolo for NanomonSVSolo {
     ///
     /// # Errors
     /// Returns an error if directory creation fails.
-    fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize Nanomonsv solo for {id} {time}.");
         let log_dir = format!("{}/{}/log/nanomonsv_solo", config.result_dir, &id);
@@ -308,7 +345,7 @@ impl InitializeSolo for NanomonSVSolo {
             out_dir,
             log_dir,
             vcf_passed,
-            config,
+            config: config.clone(),
         })
     }
 }
@@ -352,11 +389,11 @@ impl Run for NanomonSVSolo {
         }
 
         if !Path::new(&self.vcf_passed).exists() {
-            let report =
-                bcftools_keep_pass(&result_vcf, &self.vcf_passed, BcftoolsConfig::default())
-                    .context(format!(
-                        "Error while running bcftools keep PASS for {result_vcf}"
-                    ))?;
+            let mut keep =
+                BcftoolsKeepPass::from_config(&self.config, &result_vcf, &self.vcf_passed);
+            let report = run!(&self.config, &mut keep).context(format!(
+                "Error while running bcftools keep PASS for {result_vcf}"
+            ))?;
 
             let log_file = format!("{}/bcftools_pass_", self.log_dir);
             report
@@ -423,17 +460,26 @@ impl Variants for NanomonSVSolo {
 ///
 /// # Errors
 /// Returns an error if command execution fails.
-pub fn nanomonsv_parse(bam: &str, out_prefix: &str, config: &Config) -> anyhow::Result<RunReport> {
+pub fn nanomonsv_parse(
+    bam: &str,
+    out_prefix: &str,
+    config: &Config,
+) -> anyhow::Result<CapturedOutput> {
     let args = vec![
-        "parse",
-        "--reference_fasta",
-        &config.reference,
-        bam,
-        out_prefix,
+        "parse".to_string(),
+        "--reference_fasta".to_string(),
+        config.reference.clone(),
+        bam.to_string(),
+        out_prefix.to_string(),
     ];
-    let mut cmd_run = CommandRun::new(&config.nanomonsv_bin, &args);
-    let res = run_wait(&mut cmd_run)?;
-    Ok(res)
+    let mut runner = NanomonSV {
+        id: "nanomonsv".to_string(),
+        log_dir: config.tmp_dir.clone(),
+        config: config.clone(),
+        job_args: args,
+        threads: config.nanomonsv_threads,
+    };
+    run!(config, &mut runner)
 }
 
 /// Executes the NanomonSV `parse` step in parallel for both diagnostic and MRD BAMs.
@@ -504,7 +550,7 @@ fn spawn_parse_thread(
                 .with_context(|| format!("Failed to parse BAM: {bam}"))?;
 
             report
-                .save_to_file(&format!("{log_dir}/nanomonsv_parse_{bam}_"))
+                .save_to_file(format!("{log_dir}/nanomonsv_parse_{bam}_"))
                 .with_context(|| format!("Failed to save report for BAM: {bam}"))?;
 
             Ok(())
@@ -529,29 +575,34 @@ pub fn nanomonsv_get(
     ctrl_bam: Option<&str>,
     ctrl_prefix: Option<&str>,
     config: &Config,
-) -> anyhow::Result<RunReport> {
+) -> anyhow::Result<CapturedOutput> {
     let threads = config.nanomonsv_threads.to_string();
-    let mut args = vec!["get"];
+    let mut args: Vec<String> = vec!["get".into()];
 
     if let (Some(ctrl_bam), Some(ctrl_prefix)) = (ctrl_bam, ctrl_prefix) {
         args.extend(vec![
-            "--control_prefix",
-            ctrl_prefix,
-            "--control_bam",
-            ctrl_bam,
+            "--control_prefix".into(),
+            ctrl_prefix.to_string(),
+            "--control_bam".into(),
+            ctrl_bam.to_string(),
         ])
     }
 
     args.extend(vec![
-        "--process",
-        &threads,
-        out_prefix,
-        bam,
-        &config.reference,
+        "--process".into(),
+        threads,
+        out_prefix.to_string(),
+        bam.to_string(),
+        config.reference.clone(),
     ]);
-    let mut cmd_run = CommandRun::new(&config.nanomonsv_bin, &args);
-    let res = run_wait(&mut cmd_run)?;
-    Ok(res)
+    let mut runner = NanomonSV {
+        id: "nanomonsv".to_string(),
+        log_dir: config.tmp_dir.clone(),
+        config: config.clone(),
+        job_args: args,
+        threads: config.nanomonsv_threads,
+    };
+    run!(config, &mut runner)
 }
 
 /// Creates a panel of normals (PON) from MRD NanomonSV results.
@@ -562,7 +613,12 @@ pub fn nanomonsv_get(
 /// Returns an error if directory traversal, filtering, or concatenation fails.
 pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<()> {
     let mut passed_mrd = Vec::new();
-    for mrd_dir in find_nanomonsv_dirs(&PathBuf::from(&config.result_dir), "mrd", 0, 3) {
+    for mrd_dir in find_nanomonsv_dirs(
+        &PathBuf::from(&config.result_dir),
+        &config.normal_name,
+        0,
+        3,
+    ) {
         let mut passed = None;
         let mut passed_csi = None;
         let mut result = None;
@@ -589,36 +645,34 @@ pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<(
             (Some(p), None, None) => {
                 let output = replace_filename_suffix(
                     &p,
-                    "_mrd.nanomonsv.result.vcf",
-                    "_mrd_nanomonsv_PASSED.vcf.gz",
+                    &format!("_{}.nanomonsv.result.vcf", config.normal_name),
+                    &format!("_{}_nanomonsv_PASSED.vcf.gz", config.normal_name),
                 );
                 info!("Do pass for {} to {}", p.display(), output.display());
 
-                if let Err(r) = bcftools_keep_pass(
-                    p.to_str().unwrap(),
-                    output.to_str().unwrap(),
-                    BcftoolsConfig::default(),
-                ) {
+                let mut keep = BcftoolsKeepPass::from_config(config, p, &output);
+                if let Err(r) = run!(config, &mut keep) {
                     error!("{r}");
                 } else {
                     passed_mrd.push(output);
                 }
             }
-            (Some(_), Some(p), None) => warn!("Do csi for {}", p.display()),
+            (Some(_), Some(p), None) => warn!("Prossing csi for {}", p.display()),
             (Some(_), Some(p), Some(_)) => passed_mrd.push(p),
             _ => {} // All files found
         }
     }
 
     println!("{} vcf to concat", passed_mrd.len());
-    bcftools_concat(
+    let mut concat = BcftoolsConcat::from_config(
+        config,
         passed_mrd
             .iter()
             .map(|p| p.to_string_lossy().to_string())
             .collect(),
         pon_path,
-        BcftoolsConfig::default(),
-    )?;
+    );
+    run!(config, &mut concat)?;
 
     Ok(())
 }

+ 123 - 84
src/callers/savana.rs

@@ -1,16 +1,22 @@
+//! Savana somatic variant caller orchestration.
+//!
+//! This module wires Savana execution (haplotagging prerequisites, run, PASS filtering)
+//! through the shared runner interfaces (local/Slurm) using the global `Config`.
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
     collection::vcf::Vcf,
     commands::{
-        bcftools::{bcftools_keep_pass, BcftoolsConfig},
-        longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
+        bcftools::BcftoolsKeepPass,
+        longphase::{LongphaseHap, LongphasePhase},
+        CapturedOutput, Command as JobCommand, LocalRunner, SlurmParams, SlurmRunner,
     },
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
     io::{readers::get_gz_reader, vcf::read_vcf},
     pipes::{Initialize, ShouldRun, Version},
     positions::{num_to_contig, GenomeRange},
-    runners::{run_wait, CommandRun, Run},
+    run,
+    runners::Run,
     variant::{
         variant::{Label, Variants},
         variant_collection::VariantCollection,
@@ -36,6 +42,9 @@ pub struct Savana {
     pub id: String,
     pub config: Config,
     pub log_dir: String,
+
+    // Arguments for the Savana command (populated before execution).
+    job_args: Vec<String>,
 }
 
 impl Initialize for Savana {
@@ -53,14 +62,15 @@ impl Initialize for Savana {
     /// # Returns
     ///
     /// A new `Savana` instance, or an error if cleanup fails.
-    fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         info!("Initialize Savana for {id}.");
         let log_dir = format!("{}/{}/log/savana", config.result_dir, id);
 
         let savana = Self {
             id: id.to_string(),
-            config,
+            config: config.clone(),
             log_dir,
+            job_args: Vec::new(),
         };
 
         // If forced re-run is enabled or a run is needed, remove old output directory
@@ -93,6 +103,32 @@ impl ShouldRun for Savana {
     }
 }
 
+impl JobCommand for Savana {
+    fn cmd(&self) -> String {
+        format!(
+            "source {conda_sh} && conda activate savana && {bin} {args}",
+            conda_sh = self.config.conda_sh,
+            bin = self.config.savana_bin,
+            args = self.job_args.join(" ")
+        )
+    }
+}
+
+impl LocalRunner for Savana {}
+
+impl SlurmRunner for Savana {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some("savana".into()),
+            cpus_per_task: Some(self.config.savana_threads as u32),
+            mem: Some("120G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
 impl Run for Savana {
     /// Executes the Savana pipeline, including prerequisite phasing and haplotagging steps.
     ///
@@ -104,79 +140,66 @@ impl Run for Savana {
     ///
     /// `Ok(())` if the run completes successfully, or an error otherwise.
     fn run(&mut self) -> anyhow::Result<()> {
-        let id = &self.id;
-        let output_vcf = &self.config.savana_output_vcf(id);
+        let output_vcf = &self.config.savana_output_vcf(&self.id);
         if !Path::new(&output_vcf).exists() {
             info!("Running Savana v{}", Savana::version(&self.config)?);
-            let output_dir = self.config.savana_output_dir(id);
+            let output_dir = self.config.savana_output_dir(&self.id);
             fs::create_dir_all(&output_dir).with_context(|| {
                 format!("Failed to create output dir for Savana run: {output_dir}")
             })?;
 
             // Check for phased germline vcf
             // no required anymore since >= 1.3.0
-            let phased_germline_vcf = self.config.constit_phased_vcf(id);
+            let phased_germline_vcf = self.config.constit_phased_vcf(&self.id);
             if !Path::new(&phased_germline_vcf).exists() {
-                LongphasePhase::initialize(id, self.config.clone())?.run()?;
+                let mut phase = LongphasePhase::initialize(&self.id, &self.config.clone())?;
+                run!(&self.config, &mut phase)?;
             }
 
-            let normal_hp_bam = self.config.normal_haplotagged_bam(id);
-            let tumoral_hp_bam = self.config.tumoral_haplotagged_bam(id);
+            let normal_hp_bam = self.config.normal_haplotagged_bam(&self.id);
+            let tumoral_hp_bam = self.config.tumoral_haplotagged_bam(&self.id);
 
             // Check for haplotagged bam
             if !Path::new(&normal_hp_bam).exists() {
-                LongphaseHap::new(
-                    id,
-                    &self.config.normal_bam(id),
+                let mut normal_hap = LongphaseHap::new(
+                    &self.id,
+                    &self.config.normal_bam(&self.id),
                     &phased_germline_vcf,
-                    LongphaseConfig::default(),
-                )
-                .run()?;
+                    self.config.clone(),
+                );
+                run!(&self.config, &mut normal_hap)?;
             }
 
             if !Path::new(&tumoral_hp_bam).exists() {
-                LongphaseHap::new(
-                    id,
-                    &self.config.tumoral_bam(id),
+                let mut tumoral_hap = LongphaseHap::new(
+                    &self.id,
+                    &self.config.tumoral_bam(&self.id),
                     &phased_germline_vcf,
-                    LongphaseConfig::default(),
-                )
-                .run()?;
+                    self.config.clone(),
+                );
+                run!(&self.config, &mut tumoral_hap)?;
             }
 
-            let savana_args = [
-                // "run",
-                "--tumour",
-                &tumoral_hp_bam,
-                "--normal",
-                &normal_hp_bam,
-                "--outdir",
-                &self.config.savana_output_dir(id),
-                "--ref",
-                &self.config.reference,
-                "--snp_vcf",
-                &phased_germline_vcf,
-                "--no_blacklist",
-                "--threads",
-                &self.config.savana_threads.to_string(),
-            ];
-            let args = [
-                "-c",
-                &format!(
-                    "source {} && conda activate savana && {} {}",
-                    self.config.conda_sh,
-                    self.config.savana_bin,
-                    savana_args.join(" ")
-                ),
+            self.job_args = vec![
+                "--tumour".to_string(),
+                tumoral_hp_bam.clone(),
+                "--normal".to_string(),
+                normal_hp_bam.clone(),
+                "--outdir".to_string(),
+                self.config.savana_output_dir(&self.id),
+                "--ref".to_string(),
+                self.config.reference.clone(),
+                "--snp_vcf".to_string(),
+                phased_germline_vcf.clone(),
+                "--no_blacklist".to_string(),
+                "--threads".to_string(),
+                self.config.savana_threads.to_string(),
             ];
-            let mut cmd_run = CommandRun::new("bash", &args);
-            let report = run_wait(&mut cmd_run).context(format!(
-                "Error while running `severus.py {}`",
-                args.join(" ")
-            ))?;
 
+            let output =
+                run!(&self.config, self).context("Error while running Savana (local or Slurm)")?;
             let log_file = format!("{}/savana_", self.log_dir);
-            report
+            output
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
         } else {
@@ -187,12 +210,13 @@ impl Run for Savana {
         }
 
         // Keep PASS
-        let passed_vcf = self.config.savana_passed_vcf(id);
+        let passed_vcf = self.config.savana_passed_vcf(&self.id);
         if !Path::new(&passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report = bcftools_keep_pass(output_vcf, &passed_vcf, BcftoolsConfig::default())
-                .context(format!(
-                    "Error while running bcftools keep PASS for {output_vcf}"
-                ))?;
+            let mut keep_pass =
+                BcftoolsKeepPass::from_config(&self.config, output_vcf, &passed_vcf);
+            let report = run!(&self.config, &mut keep_pass).context(format!(
+                "Error while running bcftools keep PASS for {output_vcf}"
+            ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)
@@ -209,32 +233,47 @@ impl Version for Savana {
     /// # Errors
     /// Returns an error if command execution fails or "Version " not found in output.
     fn version(config: &Config) -> anyhow::Result<String> {
-        let savana_args = ["--version"];
-        let args = [
-            "-c",
-            &format!(
-                "source {} && conda activate savana && {} {}",
-                config.conda_sh,
-                config.savana_bin,
-                savana_args.join(" ")
-            ),
-        ];
-        let mut cmd_run = CommandRun::new("bash", &args);
-        let report = run_wait(&mut cmd_run)
-            .context(format!("Error while running `savana {}`", args.join(" ")))?;
-        let log = report.log;
-        let start = log
-            .find("Version ")
-            .context("Failed to find 'Version ' in the log")?;
-        let start_index = start + "Version ".len();
-        let end = log[start_index..]
-            .find('\n')
-            .context("Failed to find newline after 'Version '")?;
-        Ok(log[start_index..start_index + end]
-            .to_string()
-            .trim()
-            .to_string())
+        let mut job = Savana {
+            id: "version".into(),
+            config: config.clone(),
+            log_dir: config.tmp_dir.clone(),
+            job_args: vec!["--version".to_string()],
+        };
+        let out =
+            run!(&config, &mut job).context("Error while running `savana --version` (local)")?;
+        parse_savana_version(&out)
+    }
+
+    fn version_slurm(config: &Config) -> anyhow::Result<String> {
+        let mut job = Savana {
+            id: "version".into(),
+            config: config.clone(),
+            log_dir: config.tmp_dir.clone(),
+            job_args: vec!["--version".to_string()],
+        };
+        let out = SlurmRunner::run(&mut job).context("Failed to run Savana --version via Slurm")?;
+        parse_savana_version(&out)
+    }
+}
+
+fn parse_savana_version(out: &CapturedOutput) -> anyhow::Result<String> {
+    let mut log = out.stdout.clone();
+    if !out.stderr.is_empty() {
+        log.push_str(&out.stderr);
+    }
+    if let Some(epilog) = &out.slurm_epilog {
+        log.push_str(&format!("\n{epilog:#?}"));
     }
+
+    let start = log
+        .find("Version ")
+        .context("Failed to find 'Version ' in the log")?;
+    let start_index = start + "Version ".len();
+    let end = log[start_index..]
+        .find(|c: char| c.is_whitespace())
+        .map(|idx| start_index + idx)
+        .unwrap_or_else(|| log.len());
+    Ok(log[start_index..end].to_string())
 }
 
 impl CallerCat for Savana {

+ 112 - 92
src/callers/severus.rs

@@ -1,15 +1,19 @@
+//! Severus structural variant caller orchestration.
+//!
+//! Uses shared runner traits (local/Slurm) with global `Config` and handles PASS filtering via bcftools.
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
     collection::vcf::Vcf,
     commands::{
-        bcftools::{BcftoolsConfig, bcftools_keep_pass_precise},
-        longphase::LongphasePhase,
+        bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase,
+        Command as JobCommand, LocalRunner, SlurmParams, SlurmRunner,
     },
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
     io::vcf::read_vcf,
     pipes::{Initialize, InitializeSolo, ShouldRun, Version},
-    runners::{CommandRun, Run, run_wait},
+    run,
+    runners::Run,
     variant::{
         variant::{Label, Variants},
         variant_collection::VariantCollection,
@@ -44,13 +48,13 @@ impl Initialize for Severus {
     /// # Returns
     ///
     /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
-    fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         info!("Initialize Severus for {id}.");
 
         let log_dir = format!("{}/{}/log/severus", config.result_dir, id);
         let severus = Self {
             id: id.to_string(),
-            config,
+            config: config.clone(),
             log_dir,
         };
 
@@ -102,48 +106,42 @@ impl Run for Severus {
 
             // Run Longphase if necessary
             if !Path::new(constit_phased_vcf).exists() {
-                LongphasePhase::initialize(&self.id, self.config.clone())?.run()?;
+                let mut phase = LongphasePhase::initialize(&self.id, &self.config)?;
+                run!(&self.config, &mut phase)?;
             }
 
             fs::create_dir_all(self.config.severus_output_dir(id))
                 .context("Failed to create Severus output directory")?;
 
-            let severus_args = [
-                "--target-bam",
-                &self.config.tumoral_bam(id),
-                "--control-bam",
-                &self.config.normal_bam(id),
-                "--phasing-vcf",
-                constit_phased_vcf,
-                "--out-dir",
-                &self.config.severus_output_dir(id),
-                "-t",
-                &self.config.severus_threads.to_string(),
-                "--write-alignments",
-                "--use-supplementary-tag",
-                "--resolve-overlaps",
-                "--between-junction-ins",
-                "--vntr-bed",
-                &self.config.vntrs_bed,
+            let severus_args: Vec<String> = vec![
+                "--target-bam".into(),
+                self.config.tumoral_bam(id),
+                "--control-bam".into(),
+                self.config.normal_bam(id),
+                "--phasing-vcf".into(),
+                constit_phased_vcf.clone(),
+                "--out-dir".into(),
+                self.config.severus_output_dir(id),
+                "-t".into(),
+                self.config.severus_threads.to_string(),
+                "--write-alignments".into(),
+                "--use-supplementary-tag".into(),
+                "--resolve-overlaps".into(),
+                "--between-junction-ins".into(),
+                "--vntr-bed".into(),
+                self.config.vntrs_bed.clone(),
             ];
-            let args = [
-                "-c",
-                &format!(
-                    "source {} && conda activate severus_env && {} {}",
-                    self.config.conda_sh,
-                    self.config.severus_bin,
-                    severus_args.join(" ")
-                ),
-            ];
-            let mut cmd_run = CommandRun::new("bash", &args);
+            let mut job = SeverusJob {
+                conda_sh: self.config.conda_sh.clone(),
+                severus_bin: self.config.severus_bin.clone(),
+                args: severus_args,
+            };
 
-            let report = run_wait(&mut cmd_run).context(format!(
-                "Error while running `severus.py {}`",
-                args.join(" ")
-            ))?;
+            let output = run!(&self.config, &mut job)
+                .context("Error while running Severus (local/Slurm)")?;
 
             let log_file = format!("{}/severus_", self.log_dir);
-            report
+            output
                 .save_to_file(&log_file)
                 .context(format!("Error while writing Severus logs into {log_file}"))?;
         } else {
@@ -155,11 +153,11 @@ impl Run for Severus {
 
         // Filter PASS variants
         if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report =
-                bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
-                    .context(format!(
-                        "Error while running bcftools keep PASS for {output_vcf}"
-                    ))?;
+            let mut keep =
+                BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
+            let report = run!(&self.config, &mut keep).context(format!(
+                "Error while running bcftools keep PASS for {output_vcf}"
+            ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)
@@ -175,6 +173,39 @@ impl Run for Severus {
     }
 }
 
+#[derive(Debug, Clone)]
+struct SeverusJob {
+    conda_sh: String,
+    severus_bin: String,
+    args: Vec<String>,
+}
+
+impl JobCommand for SeverusJob {
+    fn cmd(&self) -> String {
+        format!(
+            "source {conda_sh} && conda activate severus_env && {bin} {args}",
+            conda_sh = self.conda_sh,
+            bin = self.severus_bin,
+            args = self.args.join(" ")
+        )
+    }
+}
+
+impl LocalRunner for SeverusJob {}
+
+impl SlurmRunner for SeverusJob {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some("severus".into()),
+            partition: Some("shortq".into()),
+            cpus_per_task: Some(16),
+            mem: Some("120G".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
 impl CallerCat for Severus {
     /// Returns the annotation category for Severus calls.
     ///
@@ -243,21 +274,18 @@ impl Version for Severus {
     /// # Errors
     /// Returns an error if command execution fails or "Version " not found in output.
     fn version(config: &Config) -> anyhow::Result<String> {
-        let args = [
-            "-c",
-            &format!(
-                "source {} && conda activate severus_env && {} {}",
-                config.conda_sh, config.severus_bin, "--version"
-            ),
-        ];
-        let mut cmd_run = CommandRun::new("bash", &args);
-        let report = run_wait(&mut cmd_run).context("Error while running `severus --version`")?;
-        let v = match report.log.split_once(':') {
-            Some((_, value)) => value.trim(),
-            None => anyhow::bail!("Error while parsing `severus --version`"),
+        let mut job = SeverusJob {
+            conda_sh: config.conda_sh.clone(),
+            severus_bin: config.severus_bin.clone(),
+            args: vec!["--version".to_string()],
         };
-
-        Ok(v.to_string())
+        let out = run!(&config, &mut job).context("Error while running `severus --version`")?;
+        let combined = format!("{}{}", out.stdout, out.stderr);
+        let v = combined
+            .split_once(':')
+            .map(|(_, value)| value.trim().to_string())
+            .ok_or_else(|| anyhow::anyhow!("Error while parsing `severus --version`"))?;
+        Ok(v)
     }
 }
 
@@ -282,7 +310,7 @@ impl InitializeSolo for SeverusSolo {
     ///
     /// # Errors
     /// Returns an error if directory creation fails.
-    fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
         let log_dir = format!("{}/{}/log/severus_solo", config.result_dir, id);
         if !Path::new(&log_dir).exists() {
             fs::create_dir_all(&log_dir)
@@ -292,7 +320,7 @@ impl InitializeSolo for SeverusSolo {
         Ok(SeverusSolo {
             id: id.to_string(),
             time: time.to_string(),
-            config,
+            config: config.clone(),
             log_dir,
         })
     }
@@ -314,34 +342,26 @@ impl Run for SeverusSolo {
 
         if !Path::new(output_vcf).exists() {
             // Run command if output VCF doesn't exist
-            let severus_args = [
-                "--target-bam",
-                &self.config.solo_bam(id, time),
-                "--out-dir",
-                &self.config.severus_solo_output_dir(id, time),
-                "-t",
-                &self.config.severus_threads.to_string(),
-                "--write-alignments",
-                "--use-supplementary-tag",
-                "--resolve-overlaps",
-                "--between-junction-ins",
-                "--vntr-bed",
-                &self.config.vntrs_bed,
-            ];
-            let args = [
-                "-c",
-                &format!(
-                    "source {} && conda activate severus_env && {} {}",
-                    self.config.conda_sh,
-                    self.config.severus_bin,
-                    severus_args.join(" ")
-                ),
-            ];
-            let mut cmd_run = CommandRun::new("bash", &args);
-            let report = run_wait(&mut cmd_run).context(format!(
-                "Error while running `severus.py {}`",
-                args.join(" ")
-            ))?;
+            let mut job = SeverusJob {
+                conda_sh: self.config.conda_sh.clone(),
+                severus_bin: self.config.severus_bin.clone(),
+                args: vec![
+                    "--target-bam".into(),
+                    self.config.solo_bam(id, time),
+                    "--out-dir".into(),
+                    self.config.severus_solo_output_dir(id, time),
+                    "-t".into(),
+                    self.config.severus_threads.to_string(),
+                    "--write-alignments".into(),
+                    "--use-supplementary-tag".into(),
+                    "--resolve-overlaps".into(),
+                    "--between-junction-ins".into(),
+                    "--vntr-bed".into(),
+                    self.config.vntrs_bed.clone(),
+                ],
+            };
+            let report =
+                run!(&self.config, &mut job).context("Error while running severus solo")?;
 
             let log_file = format!("{}/severus_", self.log_dir);
             report
@@ -353,11 +373,11 @@ impl Run for SeverusSolo {
 
         // Keep PASS
         if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
-            let report =
-                bcftools_keep_pass_precise(output_vcf, passed_vcf, BcftoolsConfig::default())
-                    .context(format!(
-                        "Error while running bcftools keep PASS for {output_vcf}"
-                    ))?;
+            let mut keep =
+                BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
+            let report = run!(&self.config, &mut keep).context(format!(
+                "Error while running bcftools keep PASS for {output_vcf}"
+            ))?;
             let log_file = format!("{}/bcftools_pass", self.log_dir);
             report
                 .save_to_file(&log_file)

+ 159 - 118
src/commands/longphase.rs

@@ -1,43 +1,23 @@
+//! Longphase haplotagging, phasing, and modcall runners.
+//!
+//! All steps use the shared runner traits (local/Slurm) driven by the global `Config`.
 use crate::{
-    pipes::{Initialize, InitializeSolo},
-    commands::bcftools::{bcftools_compress, bcftools_index},
+    commands::bcftools::{BcftoolsCompress, BcftoolsIndex, BcftoolsKeepPass},
+    commands::samtools::SamtoolsIndex,
     config::Config,
     helpers::path_prefix,
-    runners::{run_wait, CommandRun, Run},
+    pipes::{Initialize, InitializeSolo},
+    run,
+    runners::Run,
 };
 use anyhow::Context;
-use duct::cmd;
 use std::{
     fs,
     path::{Path, PathBuf},
 };
 use tracing::info;
 
-use super::{
-    bcftools::{bcftools_keep_pass, BcftoolsConfig},
-    modkit::ModkitSummary,
-};
-
-#[derive(Debug, Clone)]
-pub struct LongphaseConfig {
-    pub bin: String,
-    pub result_dir: String,
-    pub reference: String,
-    pub threads: u8,
-    pub force: bool,
-}
-
-impl Default for LongphaseConfig {
-    fn default() -> Self {
-        Self {
-            bin: "/data/tools/longphase_linux-x64".to_string(),
-            reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
-            result_dir: "/data/longreads_basic_pipe".to_string(),
-            threads: 150,
-            force: true,
-        }
-    }
-}
+use super::modkit::ModkitSummary;
 
 #[derive(Debug)]
 pub struct LongphaseHap {
@@ -45,12 +25,13 @@ pub struct LongphaseHap {
     pub vcf: String,
     pub bam: PathBuf,
     pub bam_hp: PathBuf,
-    pub config: LongphaseConfig,
+    pub config: Config,
     pub log_dir: String,
+    job_args: Vec<String>,
 }
 
 impl LongphaseHap {
-    pub fn new(id: &str, bam: &str, phased_vcf: &str, config: LongphaseConfig) -> Self {
+    pub fn new(id: &str, bam: &str, phased_vcf: &str, config: Config) -> Self {
         let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
 
         let bam = Path::new(bam);
@@ -65,11 +46,12 @@ impl LongphaseHap {
             log_dir,
             vcf: phased_vcf.to_string(),
             bam_hp: bam_hp.to_path_buf(),
+            job_args: Vec::new(),
         }
     }
 
     pub fn run(&mut self) -> anyhow::Result<()> {
-        if self.config.force && self.bam_hp.exists() {
+        if self.config.longphase_force && self.bam_hp.exists() {
             fs::remove_file(&self.bam_hp)?;
         }
 
@@ -77,42 +59,37 @@ impl LongphaseHap {
             fs::create_dir_all(&self.log_dir).expect("Failed to create output directory");
         }
 
-        // Run command if output VCF doesn't exist
         if !self.bam_hp.exists() {
-            let args = [
-                "haplotag",
-                "-s",
-                &self.vcf,
-                "-b",
-                self.bam.to_str().unwrap(),
-                "-r",
-                &self.config.reference,
-                "-t",
-                &self.config.threads.to_string(),
-                "--tagSupplementary",
-                "-o",
-                self.bam_hp.to_str().unwrap(),
+            self.job_args = vec![
+                "haplotag".to_string(),
+                "-s".to_string(),
+                self.vcf.clone(),
+                "-b".to_string(),
+                self.bam.to_string_lossy().to_string(),
+                "-r".to_string(),
+                self.config.reference.clone(),
+                "-t".to_string(),
+                self.config.threads.to_string(),
+                "--tagSupplementary".to_string(),
+                "-o".to_string(),
+                self.bam_hp.to_string_lossy().to_string(),
             ];
-            let mut cmd_run = CommandRun::new(&self.config.bin, &args);
-            let report = run_wait(&mut cmd_run).context(format!(
-                "Error while running `{} {}`",
-                self.config.bin,
-                args.join(" ")
-            ))?;
+            let report = run!(&self.config, self)
+                .context(format!("Error while running `{}`", self.job_args.join(" ")))?;
 
             let log_file = format!("{}/longphase_", self.log_dir);
             report
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
 
-            let _ = cmd!(
-                "samtools",
-                "index",
-                "-@",
-                &self.config.threads.to_string(),
-                &format!("{}.bam", self.bam_hp.to_str().unwrap())
-            )
-            .run()?;
+            let bam_to_index = format!("{}.bam", self.bam_hp.to_string_lossy());
+            let mut sam_index = SamtoolsIndex {
+                bin: self.config.longphase_bin.clone(),
+                threads: self.config.longphase_threads,
+                bam: bam_to_index.clone(),
+            };
+            run!(&self.config, &mut sam_index)
+                .context(format!("samtools index failed for {bam_to_index}"))?;
         } else {
             info!("Longphase output vcf already exists");
         }
@@ -121,6 +98,27 @@ impl LongphaseHap {
     }
 }
 
+impl crate::commands::Command for LongphaseHap {
+    fn cmd(&self) -> String {
+        format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
+    }
+}
+
+impl crate::commands::LocalRunner for LongphaseHap {}
+
+impl crate::commands::SlurmRunner for LongphaseHap {
+    fn slurm_args(&self) -> Vec<String> {
+        crate::commands::SlurmParams {
+            job_name: Some(format!("longphase_hap_{}", self.id)),
+            cpus_per_task: Some(self.config.longphase_threads as u32),
+            mem: Some("60G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
 // /data/tools/longphase_linux-x64 phase -s ClairS/clair3_normal_tumoral_germline_output.vcf.gz -b CUNY_diag_hs1_hp.bam -r /data/ref/hs1/chm13v2.0.fa -t 155 --ont -o ClairS/clair3_normal_tumoral_germline_output_PS
 #[derive(Debug)]
 pub struct LongphasePhase {
@@ -131,10 +129,11 @@ pub struct LongphasePhase {
     pub config: Config,
     pub log_dir: String,
     pub modcall_vcf: String,
+    job_args: Vec<String>,
 }
 
 impl Initialize for LongphasePhase {
-    fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, config: &crate::config::Config) -> anyhow::Result<Self> {
         let log_dir = format!("{}/{}/log/longphase_phase", config.result_dir, id);
         if !Path::new(&log_dir).exists() {
             fs::create_dir_all(&log_dir)
@@ -147,16 +146,38 @@ impl Initialize for LongphasePhase {
 
         Ok(LongphasePhase {
             id: id.to_string(),
-            config,
+            config: config.clone(),
             log_dir,
             vcf,
             out_prefix,
             bam,
             modcall_vcf,
+            job_args: Vec::new(),
         })
     }
 }
 
+impl crate::commands::Command for LongphasePhase {
+    fn cmd(&self) -> String {
+        format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
+    }
+}
+
+impl crate::commands::LocalRunner for LongphasePhase {}
+
+impl crate::commands::SlurmRunner for LongphasePhase {
+    fn slurm_args(&self) -> Vec<String> {
+        crate::commands::SlurmParams {
+            job_name: Some(format!("longphase_phase_{}", self.id)),
+            cpus_per_task: Some(self.config.longphase_threads as u32),
+            mem: Some("60G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
 impl Run for LongphasePhase {
     fn run(&mut self) -> anyhow::Result<()> {
         info!("Running longphase phase for: {}", self.vcf);
@@ -164,27 +185,26 @@ impl Run for LongphasePhase {
 
         let final_vcf = self.config.constit_phased_vcf(&self.id);
         if !Path::new(&final_vcf).exists() {
-            let args = [
-                "phase",
-                "-s",
-                &self.vcf,
-                "-b",
-                &self.bam,
-                "-r",
-                &self.config.reference,
-                "--mod-file",
-                &self.modcall_vcf,
-                "-t",
-                &self.config.longphase_threads.to_string(),
-                "--ont",
-                "-o",
-                &self.out_prefix,
+            self.job_args = vec![
+                "phase".to_string(),
+                "-s".to_string(),
+                self.vcf.clone(),
+                "-b".to_string(),
+                self.bam.clone(),
+                "-r".to_string(),
+                self.config.reference.clone(),
+                "--mod-file".to_string(),
+                self.modcall_vcf.clone(),
+                "-t".to_string(),
+                self.config.longphase_threads.to_string(),
+                "--ont".to_string(),
+                "-o".to_string(),
+                self.out_prefix.clone(),
             ];
-            let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
-            let report = run_wait(&mut cmd_run).context(format!(
+            let report = run!(&self.config, self).context(format!(
                 "Error while running `{} {}`",
                 self.config.longphase_bin,
-                args.join(" ")
+                self.job_args.join(" ")
             ))?;
 
             let log_file = format!("{}/longphase_phase_", self.log_dir);
@@ -192,12 +212,16 @@ impl Run for LongphasePhase {
                 .save_to_file(&log_file)
                 .context(format!("Error while writing logs into {log_file}"))?;
 
-            bcftools_compress(
-                &format!("{}.vcf", self.out_prefix),
+            let mut compress = BcftoolsCompress::from_config(
+                &self.config,
+                format!("{}.vcf", self.out_prefix),
                 &final_vcf,
-                &BcftoolsConfig::default(),
-            )?;
-            bcftools_index(&final_vcf, &BcftoolsConfig::default())?;
+            );
+            run!(&self.config, &mut compress).context("bcftools compress failed")?;
+
+            let mut index = BcftoolsIndex::from_config(&self.config, &final_vcf);
+            run!(&self.config, &mut index).context("bcftools index failed")?;
+
             fs::remove_file(format!("{}.vcf", self.out_prefix))?;
         }
         Ok(())
@@ -215,10 +239,11 @@ pub struct LongphaseModcallSolo {
     pub log_dir: String,
     pub mod_threshold: f64,
     pub config: Config,
+    job_args: Vec<String>,
 }
 
 impl InitializeSolo for LongphaseModcallSolo {
-    fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         let time = time.to_string();
 
@@ -233,7 +258,7 @@ impl InitializeSolo for LongphaseModcallSolo {
             anyhow::bail!("Bam files doesn't exists: {bam}")
         }
 
-        let mut modkit_summary = ModkitSummary::initialize(&id, &time, config.clone())?;
+        let mut modkit_summary = ModkitSummary::initialize(&id, &time, config)?;
         modkit_summary.load()?;
         let mod_threshold = modkit_summary
             .result
@@ -253,56 +278,72 @@ impl InitializeSolo for LongphaseModcallSolo {
             bam,
             reference: config.reference.to_string(),
             threads: config.longphase_modcall_threads,
-            config,
+            config: config.clone(),
             log_dir,
             mod_threshold,
             prefix,
+            job_args: Vec::new(),
         })
     }
 }
 
+impl crate::commands::Command for LongphaseModcallSolo {
+    fn cmd(&self) -> String {
+        format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
+    }
+}
+
+impl crate::commands::LocalRunner for LongphaseModcallSolo {}
+
+impl crate::commands::SlurmRunner for LongphaseModcallSolo {
+    fn slurm_args(&self) -> Vec<String> {
+        crate::commands::SlurmParams {
+            job_name: Some(format!("longphase_modcall_{}_{}", self.id, self.time)),
+            cpus_per_task: Some(self.threads as u32),
+            mem: Some("60G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
 impl Run for LongphaseModcallSolo {
     fn run(&mut self) -> anyhow::Result<()> {
-        let args = [
-            "modcall",
-            "-b",
-            &self.bam,
-            "-t",
-            &self.threads.to_string(),
-            "-r",
-            &self.reference,
-            "-m",
-            &self.mod_threshold.to_string(),
-            "-o",
-            &self.prefix,
+        self.job_args = vec![
+            "modcall".to_string(),
+            "-b".to_string(),
+            self.bam.clone(),
+            "-t".to_string(),
+            self.threads.to_string(),
+            "-r".to_string(),
+            self.reference.clone(),
+            "-m".to_string(),
+            self.mod_threshold.to_string(),
+            "-o".to_string(),
+            self.prefix.clone(),
         ];
-        let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
-        run_wait(&mut cmd_run)
-            .context(format!(
-                "Error while running `longphase modcall {}`",
-                args.join(" ")
-            ))?
-            .save_to_file(&format!("{}/longphase_modcall_", self.log_dir))
+        let output = run!(&self.config, self).context("Error while running `longphase modcall`")?;
+        output
+            .save_to_file(format!("{}/longphase_modcall_", self.log_dir))
             .context(format!(
                 "Error while writing logs into {}/longphase_modcall",
                 self.log_dir
             ))?;
 
         let vcf = format!("{}.vcf", self.prefix);
-        bcftools_keep_pass(
-            &vcf,
-            &format!("{}.vcf.gz", self.prefix),
-            BcftoolsConfig::default(),
-        )
-        .context(format!(
+        let mut keep_pass =
+            BcftoolsKeepPass::from_config(&self.config, &vcf, format!("{}.vcf.gz", self.prefix));
+        let pass_report = run!(&self.config, &mut keep_pass).context(format!(
             "Can't run BCFtools PASS for LongphaseModcallSolo: {} {}",
             self.id, self.time
-        ))?
-        .save_to_file(&format!("{}/longphase_modcall_pass_", self.log_dir))
-        .context(format!(
-            "Error while writing logs into {}/longphase_modcall_pass",
-            self.log_dir
         ))?;
+        pass_report
+            .save_to_file(format!("{}/longphase_modcall_pass_", self.log_dir))
+            .context(format!(
+                "Error while writing logs into {}/longphase_modcall_pass",
+                self.log_dir
+            ))?;
         fs::remove_file(&vcf).context(format!("Can't remove file: {vcf}"))?;
         Ok(())
     }

+ 1 - 0
src/commands/mod.rs

@@ -3,6 +3,7 @@ pub mod dorado;
 // pub mod longphase;
 pub mod modkit;
 pub mod samtools;
+pub mod longphase;
 // pub mod wakhan;
 
 use std::{

+ 49 - 17
src/commands/wakhan.rs

@@ -1,6 +1,9 @@
 use crate::{
-    commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
-    runners::{run_wait, CommandRun},
+    commands::{
+        bcftools::{BcftoolsKeepPass, BcftoolsConfig},
+        CapturedOutput, Command as JobCommand, LocalRunner, SlurmParams, SlurmRunner,
+    },
+    run,
 };
 use anyhow::Context;
 use std::{fs, path::Path};
@@ -102,15 +105,13 @@ impl Wakhan {
                 "--vntr-bed",
                 &self.config.vntr_bed,
             ];
-            let args = [
-                "-c",
-                &format!("source {} && conda activate severus_env && {} {}", self.config.conda_sh, self.config.bin, severus_args.join(" ")),
-            ];
-            let mut cmd_run = CommandRun::new("bash", &args);
-            let report = run_wait(&mut cmd_run).context(format!(
-                "Error while running `severus.py {}`",
-                args.join(" ")
-            ))?;
+            let mut job = WakhanJob {
+                conda_sh: self.config.conda_sh.clone(),
+                bin: self.config.bin.clone(),
+                args: severus_args.iter().map(|s| s.to_string()).collect(),
+                threads: self.config.threads,
+            };
+            let report = run!(&self.config, &mut job).context("Error while running `severus.py`")?;
 
             let log_file = format!("{}/severus_", self.log_dir);
             report
@@ -122,12 +123,9 @@ impl Wakhan {
 
         // Keep PASS
         if !Path::new(&self.vcf_passed).exists() {
-            let report = bcftools_keep_pass(
-                &self.output_vcf,
-                &self.vcf_passed,
-                BcftoolsConfig::default(),
-            )
-            .context(format!(
+            let mut keep =
+                BcftoolsKeepPass::from_config(&self.config.into_config(), &self.output_vcf, &self.vcf_passed);
+            let report = run!(&self.config.into_config(), &mut keep).context(format!(
                 "Error while running bcftools keep PASS for {}",
                 &self.output_vcf
             ))?;
@@ -139,3 +137,37 @@ impl Wakhan {
         Ok(())
     }
 }
+
+#[derive(Debug)]
+struct WakhanJob {
+    conda_sh: String,
+    bin: String,
+    args: Vec<String>,
+    threads: u8,
+}
+
+impl JobCommand for WakhanJob {
+    fn cmd(&self) -> String {
+        format!(
+            "source {conda_sh} && conda activate severus_env && {bin} {args}",
+            conda_sh = self.conda_sh,
+            bin = self.bin,
+            args = self.args.join(" ")
+        )
+    }
+}
+
+impl LocalRunner for WakhanJob {}
+
+impl SlurmRunner for WakhanJob {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some("wakhan".into()),
+            cpus_per_task: Some(self.threads as u32),
+            mem: Some("120G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}

+ 3 - 0
src/config.rs

@@ -275,6 +275,9 @@ pub struct Config {
     /// Number of threads for longphase modcall step.
     pub longphase_modcall_threads: u8,
 
+    /// Force longphase recomputation (haplotagging/phasing).
+    pub longphase_force: bool,
+
     /// Template for longphase modcall VCF.
     ///
     /// Placeholders: `{result_dir}`, `{id}`, `{time}`.

+ 43 - 4
src/helpers.rs

@@ -6,7 +6,7 @@ use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use std::{
     cmp::Ordering,
-    collections::HashMap,
+    collections::{HashMap, HashSet},
     fmt, fs,
     iter::Sum,
     ops::{Add, Div},
@@ -46,7 +46,7 @@ pub fn find_unique_file(dir_path: &str, suffix: &str) -> anyhow::Result<String>
 }
 
 pub fn path_prefix(out: &str) -> anyhow::Result<String> {
-    let out_path = Path::new(&out);
+    let out_path = Path::new(out);
 
     let out_dir = out_path
         .parent()
@@ -79,11 +79,50 @@ pub fn force_or_not(_path: &str, _force: bool) -> anyhow::Result<()> {
     // }
     //
     // if output_exists {
-    //     info!("{} already exists.", path.display())
-    // }
+//     info!("{} already exists.", path.display())
+// }
     Ok(())
 }
 
+/// Builds Singularity bind flags from input/output paths.
+///
+/// - Converts file paths to their parent directory.
+/// - Skips non-existing paths to avoid Singularity errors.
+/// - Canonicalizes and deduplicates directories.
+pub fn singularity_bind_flags<I, P>(paths: I) -> String
+where
+    I: IntoIterator<Item = P>,
+    P: AsRef<Path>,
+{
+    let mut seen = HashSet::new();
+    let mut binds = Vec::new();
+
+    let mut push_dir = |p: &Path| {
+        if p.as_os_str().is_empty() || !p.exists() {
+            return;
+        }
+        let dir = p.canonicalize().unwrap_or_else(|_| p.to_path_buf());
+        if seen.insert(dir.clone()) {
+            binds.push(format!(
+                "--bind {src}:{dst}",
+                src = dir.display(),
+                dst = dir.display()
+            ));
+        }
+    };
+
+    for path_str in paths {
+        let path = path_str.as_ref();
+        if path.is_dir() {
+            push_dir(path);
+        } else if let Some(parent) = path.parent() {
+            push_dir(parent);
+        }
+    }
+
+    binds.join(" ")
+}
+
 use rayon::prelude::*;
 use std::cmp::Ord;