3 місяців тому · 069f6f8b38
--- a/pandora-config.example.toml
+++ b/pandora-config.example.toml
@@ -13,8 +13,6 @@ tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"
 
				 # Should use Slurm as runner
			
 
				 slurm_runner = true
			
 
				 
			
 
				-# slurm_genome_chunks = 150
			
 
				-
			
 
				 # Run cache directory.
			
 
				 run_cache_dir = "/home/t_steimle/data/prom_runs"
			
 
				 
			
@@ -208,6 +206,55 @@ clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
 
				 # {result_dir}, {id}
			
 
				 clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
			
 
				 
			
 
				+#######################################
			
 
				+# GATK configuration
			
 
				+#######################################
			
 
				+# Path to the GATK container image (Singularity/Apptainer .sif, or a docker:// URI
			
 
				+# if you pull at runtime).
			
 
				+#
			
 
				+# Examples:
			
 
				+# - "/containers/gatk_4.6.0.0.sif"
			
 
				+gatk_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/gatk_latest.sif"
			
 
				+
			
 
				+# Path to a BED file restricting analysis to target regions (0-based, half-open).
			
 
				+# Must match contig naming of the reference/BAMs (e.g. "chr9" vs "9").
			
 
				+#
			
 
				+# Used for targeted calling (e.g. Mutect2 `-L` or region chunking).
			
 
				+gatk_bed_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed"
			
 
				+
			
 
				+# Local single-run CPU threads (non-Slurm execution).
			
 
				+# Used for full-run Mutect2 or other GATK tools.
			
 
				+# Typically forwarded to:
			
 
				+#   - `--native-pair-hmm-threads`
			
 
				+#   - `--reader-threads`
			
 
				+# Should match available cores on the node.
			
 
				+gatk_threads = 100
			
 
				+
			
 
				+# Local single-run memory limit in GB.
			
 
				+# Used to size Java heap:
			
 
				+#   `--java-options "-Xmx{mem}g"`
			
 
				+# Should leave headroom for native memory (PairHMM, buffers).
			
 
				+gatk_mem_gb = 120
			
 
				+
			
 
				+# Per-chunk CPU threads when running chunked under Slurm.
			
 
				+# Applies to each parallel job independently.
			
 
				+gatk_slurm_threads = 8
			
 
				+
			
 
				+# Per-chunk memory (GB) when running under Slurm.
			
 
				+# Used both for scheduler request and Java heap sizing per chunk.
			
 
				+# Must be sufficient for interval-restricted Mutect2.
			
 
				+gatk_slurm_mem_gb = 32
			
 
				+
			
 
				+# If true, force re-run of GATK steps by removing or ignoring existing outputs.
			
 
				+gatk_force = false
			
 
				+
			
 
				+# GATK output directory template.
			
 
				+# {result_dir}, {id}
			
 
				+gatk_output_dir = "{result_dir}/{id}/{tumoral_name}/GATK"
			
 
				+
			
 
				+# GATK passed VCF.
			
 
				+gatk_passed_vcf = "{output_dir}/{id}_{tumoral_name}_{reference_name}_GATK_PASSED.vcf.gz"
			
 
				+
			
 
				 #######################################
			
 
				 # Savana configuration
			
 
				 #######################################
			
--- a/src/annotation/mod.rs
+++ b/src/annotation/mod.rs
@@ -269,6 +269,7 @@ pub enum Caller {
 
				     Savana,
			
 
				     Severus,
			
 
				     DeepSomatic,
			
 
				+    Mutect2,
			
 
				 }
			
 
				 
			
 
				 impl fmt::Display for Caller {
			
@@ -282,6 +283,7 @@ impl fmt::Display for Caller {
 
				             Savana => write!(f, "Savana"),
			
 
				             Severus => write!(f, "Severus"),
			
 
				             DeepSomatic => write!(f, "DeepSomatic"),
			
 
				+            Mutect2 => write!(f, "Mutect2"),
			
 
				         }
			
 
				     }
			
 
				 }
			
--- a/src/callers/gatk.rs
+++ b/src/callers/gatk.rs
@@ -0,0 +1,847 @@
 
				+//! # GATK Mutect2 Somatic Variant Calling Pipeline
			
 
				+//!
			
 
				+//! This module provides a pipeline runner for [GATK Mutect2](https://gatk.broadinstitute.org/),
			
 
				+//! a somatic variant caller for paired tumor-normal samples using BED-based interval targeting.
			
 
				+//!
			
 
				+//! ## Overview
			
 
				+//!
			
 
				+//! Mutect2 performs somatic variant calling using:
			
 
				+//!
			
 
				+//! - Local assembly-based haplotype caller tuned for somatic mutations
			
 
				+//! - Paired tumor-normal analysis
			
 
				+//! - Read orientation artifact filtering (LearnReadOrientationModel + FilterMutectCalls)
			
 
				+//! - Containerized execution via Singularity
			
 
				+//!
			
 
				+//! ## Pipeline Steps (chained per chunk)
			
 
				+//!
			
 
				+//! 1. `gatk Mutect2` — raw somatic calls + F1R2 orientation counts
			
 
				+//! 2. `gatk LearnReadOrientationModel` — artifact prior estimation
			
 
				+//! 3. `gatk FilterMutectCalls` — apply filters (orientation, contamination, etc.)
			
 
				+//! 4. `bcftools view -f PASS` — extract PASS-only variants
			
 
				+//!
			
 
				+//! Steps 1–3 run as a single chained command inside Singularity.
			
 
				+//! Step 4 runs as a separate bcftools command.
			
 
				+//!
			
 
				+//! ## Requirements
			
 
				+//!
			
 
				+//! - Tumor and normal BAMs indexed (`.bai` files present)
			
 
				+//! - BED interval list accessible (`config.gatk_bed_path`)
			
 
				+//! - Reference genome with `.fai` and `.dict`
			
 
				+//! - GATK Singularity image available (`config.gatk_image`)
			
 
				+//! - `--normal-sample` must match the `@RG SM` tag in the normal BAM header
			
 
				+//!
			
 
				+//! ## Execution Modes
			
 
				+//!
			
 
				+//! Execution mode is selected via `config.slurm_runner`:
			
 
				+//!
			
 
				+//! - **Local** — Single-node execution
			
 
				+//! - **Slurm** — HPC job submission
			
 
				+//!
			
 
				+//! Both modes support chunked parallel execution via [`run_mutect2_chunked`].
			
 
				+//!
			
 
				+//! ## Thread and Memory Consistency
			
 
				+//!
			
 
				+//! Two separate resource profiles exist:
			
 
				+//!
			
 
				+//! - **Local (non-chunked)**: `config.gatk_threads` / `config.gatk_mem_gb` — full machine resources
			
 
				+//! - **Slurm (chunked)**: `config.gatk_slurm_threads` / `config.gatk_slurm_mem_gb` — per-job allocation
			
 
				+//!
			
 
				+//! The struct resolves the correct values via `effective_threads()` / `effective_mem_gb()` based on
			
 
				+//! whether `part_index` is set. JVM heap (`-Xmx`) is always ~85% of the effective memory to leave
			
 
				+//! headroom for off-heap and OS overhead.
			
 
				+//!
			
 
				+//! ## Output Files
			
 
				+//!
			
 
				+//! PASS-filtered somatic variants:
			
 
				+//! ```text
			
 
				+//! {result_dir}/{id}/gatk/{id}_{tumoral_name}_Mutect2.pass.vcf.gz
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## Usage
			
 
				+//!
			
 
				+//! ### Chunked Parallel Execution (Recommended for WGS/WES)
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::gatk::run_mutect2_chunked;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! // Run Mutect2 in 30 parallel chunks over the BED intervals
			
 
				+//! run_mutect2_chunked("sample_001", &config, 30)?;
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ### Single-Run Execution
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::gatk::Mutect2;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//! use pandora_lib_promethion::pipes::Initialize;
			
 
				+//! use pandora_lib_promethion::runners::Run;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! let mut caller = Mutect2::initialize("sample_001", &config)?;
			
 
				+//!
			
 
				+//! if caller.should_run() {
			
 
				+//!     caller.run()?;
			
 
				+//! }
			
 
				+//!
			
 
				+//! // Load somatic variants
			
 
				+//! let variants = caller.variants(&annotations)?;
			
 
				+//! println!("Found {} somatic variants", variants.variants.len());
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## Config Fields Required
			
 
				+//!
			
 
				+//! The following must be present in [`Config`]:
			
 
				+//!
			
 
				+//! | Field                 | Example                             | Description                              |
			
 
				+//! |-----------------------|-------------------------------------|------------------------------------------|
			
 
				+//! | `gatk_image`          | `"/images/gatk_4.6.1.0.sif"`       | Singularity image path                   |
			
 
				+//! | `gatk_bed_path`       | `"/refs/targets.bed"`               | BED interval list                        |
			
 
				+//! | `gatk_threads`        | `32`                                | Threads for local (non-chunked) runs     |
			
 
				+//! | `gatk_mem_gb`         | `120`                               | Memory (GB) for local (non-chunked) runs |
			
 
				+//! | `gatk_slurm_threads`  | `8`                                 | Threads per Slurm chunk                  |
			
 
				+//! | `gatk_slurm_mem_gb`   | `32`                                | Memory (GB) per Slurm chunk              |
			
 
				+//! | `gatk_force`          | `false`                             | Force rerun even if outputs exist        |
			
 
				+//!
			
 
				+//! And the following methods:
			
 
				+//!
			
 
				+//! - `gatk_output_dir(&self, id: &str) -> String`
			
 
				+//! - `gatk_passed_vcf(&self, id: &str) -> String`
			
 
				+//!
			
 
				+//! ## References
			
 
				+//!
			
 
				+//! - [GATK Mutect2 documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360037593851-Mutect2)
			
 
				+//! - [GATK Best Practices for somatic calling](https://gatk.broadinstitute.org/hc/en-us/articles/360035894731)
			
 
				+
			
 
				+use std::{
			
 
				+    fmt, fs,
			
 
				+    io::{BufRead, BufReader, Write},
			
 
				+    path::{Path, PathBuf},
			
 
				+    process::{Command as ProcessCommand, Stdio},
			
 
				+};
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use log::{debug, info};
			
 
				+use rayon::prelude::*;
			
 
				+use regex::Regex;
			
 
				+use uuid::Uuid;
			
 
				+
			
 
				+use crate::{
			
 
				+    annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
			
 
				+    collection::vcf::Vcf,
			
 
				+    commands::{
			
 
				+        bcftools::{BcftoolsConcat, BcftoolsIndex, BcftoolsKeepPass},
			
 
				+        Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams,
			
 
				+        SlurmRunner,
			
 
				+    },
			
 
				+    config::Config,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists, singularity_bind_flags},
			
 
				+    io::{bed::BedRow, vcf::read_vcf},
			
 
				+    locker::SampleLock,
			
 
				+    pipes::{Initialize, ShouldRun, Version},
			
 
				+    run, run_many,
			
 
				+    runners::Run,
			
 
				+    variant::{
			
 
				+        variant_collection::VariantCollection,
			
 
				+        vcf_variant::{Label, Variants},
			
 
				+    },
			
 
				+};
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Constants
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+/// Fraction of `gatk_mem_gb` allocated to JVM heap (`-Xmx`).
			
 
				+///
			
 
				+/// The remaining ~15% covers JVM off-heap, native allocations (htslib, PairHMM),
			
 
				+/// and OS overhead. This is the standard GATK recommendation.
			
 
				+const JVM_HEAP_FRACTION: f64 = 0.85;
			
 
				+
			
 
				+/// Number of parallel BED chunks for Mutect2 (default when called from `Run`).
			
 
				+const DEFAULT_N_PARTS: usize = 30;
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Struct
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+/// GATK Mutect2 paired tumor-normal somatic variant caller.
			
 
				+///
			
 
				+/// Executes the full Mutect2 pipeline (calling → orientation model → filtering)
			
 
				+/// with BED-based interval targeting and automatic PASS extraction.
			
 
				+///
			
 
				+/// # Fields
			
 
				+///
			
 
				+/// - `id` — Sample identifier (e.g., `"34528"`)
			
 
				+/// - `bed_path` — Path to the BED interval file for this run/chunk
			
 
				+/// - `log_dir` — Directory for execution logs
			
 
				+/// - `config` — Global pipeline configuration
			
 
				+/// - `part_index` — Optional chunk index for parallel execution (1-indexed)
			
 
				+#[derive(Debug, Clone)]
			
 
				+pub struct Mutect2 {
			
 
				+    /// Sample identifier.
			
 
				+    pub id: String,
			
 
				+    /// BED interval file for this run (full BED or chunk sub-BED).
			
 
				+    pub bed_path: PathBuf,
			
 
				+    /// Directory for log file storage.
			
 
				+    pub log_dir: String,
			
 
				+    /// Global pipeline configuration.
			
 
				+    pub config: Config,
			
 
				+    /// Optional part index for chunked parallel runs (1-indexed).
			
 
				+    pub part_index: Option<usize>,
			
 
				+}
			
 
				+
			
 
				+impl fmt::Display for Mutect2 {
			
 
				+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
			
 
				+        writeln!(f, "🧬 Mutect2")?;
			
 
				+        writeln!(f, "   Case ID  : {}", self.id)?;
			
 
				+        writeln!(f, "   BED      : {}", self.bed_path.display())?;
			
 
				+        writeln!(f, "   Log dir  : {}", self.log_dir)?;
			
 
				+        writeln!(
			
 
				+            f,
			
 
				+            "   Part     : {}",
			
 
				+            self.part_index
			
 
				+                .map_or("full".into(), |n| format!("part{n}"))
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Trait implementations
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+impl Initialize for Mutect2 {
			
 
				+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        let id = id.to_string();
			
 
				+        info!("Initializing Mutect2 for {id}.");
			
 
				+
			
 
				+        let log_dir = format!("{}/{}/log/gatk", config.result_dir, &id);
			
 
				+
			
 
				+        let bed_path = PathBuf::from(&config.gatk_bed_path);
			
 
				+        anyhow::ensure!(
			
 
				+            bed_path.exists(),
			
 
				+            "GATK BED interval file not found: {}",
			
 
				+            bed_path.display()
			
 
				+        );
			
 
				+
			
 
				+        let mutect2 = Self {
			
 
				+            id,
			
 
				+            bed_path,
			
 
				+            log_dir,
			
 
				+            config: config.clone(),
			
 
				+            part_index: None,
			
 
				+        };
			
 
				+
			
 
				+        if mutect2.config.gatk_force {
			
 
				+            remove_dir_if_exists(&mutect2.config.gatk_output_dir(&mutect2.id))?;
			
 
				+        }
			
 
				+
			
 
				+        Ok(mutect2)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for Mutect2 {
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let passed_vcf = &self.config.gatk_passed_vcf(&self.id);
			
 
				+        let normal_older =
			
 
				+            is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true);
			
 
				+        let tumor_older =
			
 
				+            is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
			
 
				+        let bed_older = is_file_older(passed_vcf, &self.config.gatk_bed_path, true).unwrap_or(true);
			
 
				+
			
 
				+        let result = normal_older || tumor_older || bed_older;
			
 
				+        if result {
			
 
				+            info!("Mutect2 should run for id: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl JobCommand for Mutect2 {
			
 
				+    fn init(&mut self) -> anyhow::Result<()> {
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+        fs::create_dir_all(&output_dir)
			
 
				+            .with_context(|| format!("Failed to create dir: {output_dir}"))?;
			
 
				+        fs::create_dir_all(&self.log_dir)
			
 
				+            .with_context(|| format!("Failed to create dir: {}", self.log_dir))?;
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    fn cmd(&self) -> String {
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+        let output_vcf_name = self.output_vcf_filename();
			
 
				+        let jvm_heap = self.jvm_heap_gb();
			
 
				+        let bed = self.bed_path.display();
			
 
				+
			
 
				+        // Normal sample name must match @RG SM tag in normal BAM header.
			
 
				+        let sample_name_normal = format!("{}_{}", self.id, self.config.normal_name);
			
 
				+
			
 
				+        let bind_flags = singularity_bind_flags([
			
 
				+            &self.config.normal_bam(&self.id),
			
 
				+            &self.config.tumoral_bam(&self.id),
			
 
				+            &self.config.reference,
			
 
				+            &output_dir,
			
 
				+            &self.log_dir,
			
 
				+            &self.config.tmp_dir,
			
 
				+            &self.bed_path.display().to_string(),
			
 
				+        ]);
			
 
				+
			
 
				+        // Three-step pipeline chained inside a single Singularity exec.
			
 
				+        //
			
 
				+        // 1. Mutect2          → raw VCF + f1r2 counts
			
 
				+        // 2. LearnReadOrientationModel → artifact priors
			
 
				+        // 3. FilterMutectCalls → filtered VCF with PASS/non-PASS annotations
			
 
				+        let gatk_cmd = format!(
			
 
				+            "gatk --java-options '-Xmx{jvm_heap}g' Mutect2 \
			
 
				+                --reference {reference} \
			
 
				+                --input {tumor_bam} \
			
 
				+                --input {normal_bam} \
			
 
				+                --normal-sample {normal_sample} \
			
 
				+                --intervals {bed} \
			
 
				+                --native-pair-hmm-threads {threads} \
			
 
				+                --f1r2-tar-gz /output/f1r2.tar.gz \
			
 
				+                --output /output/raw.vcf.gz \
			
 
				+            && gatk --java-options '-Xmx{jvm_heap}g' LearnReadOrientationModel \
			
 
				+                -I /output/f1r2.tar.gz \
			
 
				+                -O /output/artifact_priors.tar.gz \
			
 
				+            && gatk --java-options '-Xmx{jvm_heap}g' FilterMutectCalls \
			
 
				+                -V /output/raw.vcf.gz \
			
 
				+                --reference {reference} \
			
 
				+                --ob-priors /output/artifact_priors.tar.gz \
			
 
				+                -O /output/{output_vcf_name}",
			
 
				+            reference = self.config.reference,
			
 
				+            tumor_bam = self.config.tumoral_bam(&self.id),
			
 
				+            normal_bam = self.config.normal_bam(&self.id),
			
 
				+            normal_sample = sample_name_normal,
			
 
				+            threads = self.effective_threads(),
			
 
				+        );
			
 
				+
			
 
				+        format!(
			
 
				+            "{singularity_bin} exec \
			
 
				+            {binds} \
			
 
				+            --bind {output_dir}:/output \
			
 
				+            {image} \
			
 
				+            bash -c \"{gatk_cmd}\"",
			
 
				+            singularity_bin = self.config.singularity_bin,
			
 
				+            binds = bind_flags,
			
 
				+            image = self.config.gatk_image,
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl LocalRunner for Mutect2 {}
			
 
				+impl LocalBatchRunner for Mutect2 {}
			
 
				+
			
 
				+impl SlurmRunner for Mutect2 {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        self.slurm_params().to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl SbatchRunner for Mutect2 {
			
 
				+    fn slurm_params(&self) -> SlurmParams {
			
 
				+        let batch_id = self.part_index.map(|i| format!("_{i}")).unwrap_or_default();
			
 
				+        SlurmParams {
			
 
				+            job_name: Some(format!("mutect2_{}{}", self.id, batch_id)),
			
 
				+            cpus_per_task: Some(self.config.gatk_slurm_threads as u32),
			
 
				+            mem: Some(format!("{}G", self.config.gatk_slurm_mem_gb)),
			
 
				+            partition: Some("shortq".into()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Private helpers
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+impl Mutect2 {
			
 
				+    /// Returns effective thread count based on execution context.
			
 
				+    ///
			
 
				+    /// - Chunked (Slurm): `config.gatk_slurm_threads` — per-job allocation
			
 
				+    /// - Non-chunked (local): `config.gatk_threads` — full machine
			
 
				+    fn effective_threads(&self) -> usize {
			
 
				+        if self.part_index.is_some() {
			
 
				+            self.config.gatk_slurm_threads
			
 
				+        } else {
			
 
				+            self.config.gatk_threads
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Returns effective memory in GB based on execution context.
			
 
				+    ///
			
 
				+    /// - Chunked (Slurm): `config.gatk_slurm_mem_gb`
			
 
				+    /// - Non-chunked (local): `config.gatk_mem_gb`
			
 
				+    fn effective_mem_gb(&self) -> u32 {
			
 
				+        if self.part_index.is_some() {
			
 
				+            self.config.gatk_slurm_mem_gb
			
 
				+        } else {
			
 
				+            self.config.gatk_mem_gb
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// JVM heap derived from effective memory (~85% to leave room for off-heap + OS).
			
 
				+    fn jvm_heap_gb(&self) -> u32 {
			
 
				+        (self.effective_mem_gb() as f64 * JVM_HEAP_FRACTION) as u32
			
 
				+    }
			
 
				+
			
 
				+    /// Part-specific output directory (or base output dir for non-chunked runs).
			
 
				+    fn part_output_dir(&self) -> String {
			
 
				+        let base_dir = self.config.gatk_output_dir(&self.id);
			
 
				+        match self.part_index {
			
 
				+            Some(idx) => format!("{base_dir}/part{idx}"),
			
 
				+            None => base_dir,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Filename of the FilterMutectCalls output VCF (inside the part dir).
			
 
				+    fn output_vcf_filename(&self) -> String {
			
 
				+        format!("{}_{}_Mutect2.vcf.gz", self.id, self.config.tumoral_name)
			
 
				+    }
			
 
				+
			
 
				+    /// Full host path to the FilterMutectCalls output VCF.
			
 
				+    fn output_vcf_path(&self) -> String {
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+        format!("{output_dir}/{}", self.output_vcf_filename())
			
 
				+    }
			
 
				+
			
 
				+    /// Path to the PASS-only VCF (per-part or final).
			
 
				+    fn passed_vcf_path(&self) -> String {
			
 
				+        match self.part_index {
			
 
				+            Some(_) => self.output_vcf_path().replace(".vcf.gz", ".pass.vcf.gz"),
			
 
				+            None => self.config.gatk_passed_vcf(&self.id),
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Filter the Mutect2-filtered VCF to keep only PASS variants.
			
 
				+    fn filter_pass(&self) -> anyhow::Result<()> {
			
 
				+        let output_vcf = self.output_vcf_path();
			
 
				+        let vcf_passed = self.passed_vcf_path();
			
 
				+
			
 
				+        if Path::new(&vcf_passed).exists() {
			
 
				+            debug!("PASS VCF already exists: {vcf_passed}");
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+
			
 
				+        info!(
			
 
				+            "Extracting PASS variants for Mutect2 {} (part: {:?})",
			
 
				+            self.id, self.part_index
			
 
				+        );
			
 
				+
			
 
				+        let mut cmd = BcftoolsKeepPass::from_config(&self.config, output_vcf, &vcf_passed);
			
 
				+        let report = run!(&self.config, &mut cmd)
			
 
				+            .with_context(|| format!("Failed to filter PASS for {}", self.id))?;
			
 
				+        report
			
 
				+            .save_to_file(format!("{}/bcftools_pass_", self.log_dir))
			
 
				+            .context("Failed to save bcftools PASS logs")?;
			
 
				+
			
 
				+        let mut cmd = BcftoolsIndex::from_config(&self.config, vcf_passed);
			
 
				+        let report = run!(&self.config, &mut cmd)
			
 
				+            .with_context(|| format!("Failed to index PASS VCF for {}", self.id))?;
			
 
				+        report
			
 
				+            .save_to_file(format!("{}/bcftools_index_", self.log_dir))
			
 
				+            .context("Failed to save bcftools index logs")?;
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Run
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+impl Run for Mutect2 {
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        let lock_dir = format!("{}/locks", self.config.result_dir);
			
 
				+        let _lock = SampleLock::acquire(&lock_dir, &self.id, "mutect2")
			
 
				+            .with_context(|| format!("Cannot start Mutect2 for {}", self.id))?;
			
 
				+
			
 
				+        if !self.should_run() {
			
 
				+            info!("Mutect2 is up-to-date for {}.", self.id);
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+
			
 
				+        if self.config.slurm_runner {
			
 
				+            run_mutect2_chunked(&self.id, &self.config, DEFAULT_N_PARTS)
			
 
				+        } else {
			
 
				+            run!(&self.config, self)?;
			
 
				+            self.filter_pass()
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Annotation / Variants
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+impl CallerCat for Mutect2 {
			
 
				+    fn caller_cat(&self) -> Annotation {
			
 
				+        Annotation::Callers(Caller::Mutect2, Sample::Somatic)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Variants for Mutect2 {
			
 
				+    fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
			
 
				+        let caller = self.caller_cat();
			
 
				+        let vcf_passed = self.config.gatk_passed_vcf(&self.id);
			
 
				+
			
 
				+        info!("Loading variants from {caller}: {vcf_passed}");
			
 
				+
			
 
				+        let variants = read_vcf(&vcf_passed)
			
 
				+            .with_context(|| format!("Failed to read Mutect2 VCF {vcf_passed}"))?;
			
 
				+
			
 
				+        variants.par_iter().for_each(|v| {
			
 
				+            annotations.insert_update(v.hash(), std::slice::from_ref(&caller));
			
 
				+        });
			
 
				+
			
 
				+        info!("{caller}: {} variants loaded", variants.len());
			
 
				+
			
 
				+        Ok(VariantCollection {
			
 
				+            variants,
			
 
				+            vcf: Vcf::new(vcf_passed.into())?,
			
 
				+            caller,
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Label for Mutect2 {
			
 
				+    fn label(&self) -> String {
			
 
				+        self.caller_cat().to_string()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Version
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+impl Version for Mutect2 {
			
 
				+    fn version(config: &Config) -> anyhow::Result<String> {
			
 
				+        let out = ProcessCommand::new("bash")
			
 
				+            .arg("-c")
			
 
				+            .arg(format!(
			
 
				+                "{} exec {} gatk --version",
			
 
				+                config.singularity_bin, config.gatk_image
			
 
				+            ))
			
 
				+            .stdout(Stdio::piped())
			
 
				+            .stderr(Stdio::piped())
			
 
				+            .output()
			
 
				+            .context("Failed to spawn Singularity process for GATK version")?;
			
 
				+
			
 
				+        if !out.status.success() {
			
 
				+            let combined = format!(
			
 
				+                "{}{}",
			
 
				+                String::from_utf8_lossy(&out.stdout),
			
 
				+                String::from_utf8_lossy(&out.stderr)
			
 
				+            );
			
 
				+            anyhow::bail!(
			
 
				+                "Singularity exec failed with status {}: {}",
			
 
				+                out.status,
			
 
				+                combined
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        let combined = format!(
			
 
				+            "{}{}",
			
 
				+            String::from_utf8_lossy(&out.stdout),
			
 
				+            String::from_utf8_lossy(&out.stderr)
			
 
				+        );
			
 
				+
			
 
				+        parse_gatk_version(&combined)
			
 
				+    }
			
 
				+
			
 
				+    fn version_slurm(config: &Config) -> anyhow::Result<String> {
			
 
				+        struct GatkVersionJob<'a> {
			
 
				+            config: &'a Config,
			
 
				+        }
			
 
				+
			
 
				+        impl JobCommand for GatkVersionJob<'_> {
			
 
				+            fn cmd(&self) -> String {
			
 
				+                format!(
			
 
				+                    "{} exec {} gatk --version",
			
 
				+                    self.config.singularity_bin, self.config.gatk_image
			
 
				+                )
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        impl SlurmRunner for GatkVersionJob<'_> {
			
 
				+            fn slurm_args(&self) -> Vec<String> {
			
 
				+                SlurmParams {
			
 
				+                    job_name: Some("gatk_version".into()),
			
 
				+                    partition: Some("shortq".into()),
			
 
				+                    cpus_per_task: Some(1),
			
 
				+                    mem: Some("10G".into()),
			
 
				+                    gres: None,
			
 
				+                }
			
 
				+                .to_args()
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        let mut job = GatkVersionJob { config };
			
 
				+        let out = crate::commands::SlurmRunner::exec(&mut job)
			
 
				+            .context("Failed to run GATK --version via Slurm")?;
			
 
				+
			
 
				+        let mut combined = out.stdout.clone();
			
 
				+        if let Some(epilog) = &out.slurm_epilog {
			
 
				+            combined.push_str(&epilog.to_string());
			
 
				+        }
			
 
				+        combined.push_str(&out.stderr);
			
 
				+
			
 
				+        parse_gatk_version(&combined)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Parses GATK version from `gatk --version` output.
			
 
				+///
			
 
				+/// Expected format: `The Genome Analysis Toolkit (GATK) v4.6.1.0`
			
 
				+fn parse_gatk_version(output: &str) -> anyhow::Result<String> {
			
 
				+    let re = Regex::new(r"(?m)GATK\)\s*v([^\s]+)").expect("GATK version regex is valid");
			
 
				+
			
 
				+    let caps = re
			
 
				+        .captures(output)
			
 
				+        .context("Could not parse GATK version from output")?;
			
 
				+
			
 
				+    Ok(caps
			
 
				+        .get(1)
			
 
				+        .expect("Regex has capture group 1")
			
 
				+        .as_str()
			
 
				+        .to_string())
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// BED I/O helpers
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+/// Reads a BED file into a vector of [`BedRow`], skipping comments and empty lines.
			
 
				+fn read_bed_file(path: &Path) -> anyhow::Result<Vec<BedRow>> {
			
 
				+    let file = fs::File::open(path)
			
 
				+        .with_context(|| format!("Failed to open BED file: {}", path.display()))?;
			
 
				+    let reader = BufReader::new(file);
			
 
				+
			
 
				+    reader
			
 
				+        .lines()
			
 
				+        .filter_map(|line| {
			
 
				+            let line = line.ok()?;
			
 
				+            let trimmed = line.trim();
			
 
				+            if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("track") {
			
 
				+                None
			
 
				+            } else {
			
 
				+                Some(trimmed.parse::<BedRow>())
			
 
				+            }
			
 
				+        })
			
 
				+        .collect::<Result<Vec<_>, _>>()
			
 
				+        .context("Failed to parse BED file")
			
 
				+}
			
 
				+
			
 
				+/// Writes a slice of [`BedRow`] as BED3 (contig, start, end) to `path`.
			
 
				+fn write_bed_chunk(rows: &[BedRow], path: &Path) -> anyhow::Result<()> {
			
 
				+    let mut f = fs::File::create(path)
			
 
				+        .with_context(|| format!("Failed to create BED chunk: {}", path.display()))?;
			
 
				+    for row in rows {
			
 
				+        let r = &row.range;
			
 
				+        // Adjust field access to match your GenomeRange struct.
			
 
				+        writeln!(f, "{}\t{}\t{}", r.contig(), r.range.start, r.range.end)?;
			
 
				+    }
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+/// Splits BED rows into `n` roughly equal chunks.
			
 
				+///
			
 
				+/// The last chunk absorbs any remainder rows. Returns at most `n` chunks
			
 
				+/// (fewer if `rows.len() < n`).
			
 
				+fn split_bed_rows(rows: &[BedRow], n: usize) -> Vec<&[BedRow]> {
			
 
				+    if n == 0 || rows.is_empty() {
			
 
				+        return vec![];
			
 
				+    }
			
 
				+    let n = n.min(rows.len());
			
 
				+    let chunk_size = rows.len() / n;
			
 
				+    let remainder = rows.len() % n;
			
 
				+
			
 
				+    let mut chunks = Vec::with_capacity(n);
			
 
				+    let mut offset = 0;
			
 
				+    for i in 0..n {
			
 
				+        let extra = if i < remainder { 1 } else { 0 };
			
 
				+        let end = offset + chunk_size + extra;
			
 
				+        chunks.push(&rows[offset..end]);
			
 
				+        offset = end;
			
 
				+    }
			
 
				+    chunks
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Chunked execution
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+/// Merges per-part PASS VCFs into the final PASS VCF.
			
 
				+fn merge_mutect2_parts(base: &Mutect2, n_parts: usize) -> anyhow::Result<()> {
			
 
				+    let mut part_pass_paths: Vec<PathBuf> = Vec::with_capacity(n_parts);
			
 
				+
			
 
				+    for i in 1..=n_parts {
			
 
				+        let mut m = base.clone();
			
 
				+        m.part_index = Some(i);
			
 
				+        let part_pass = m.passed_vcf_path();
			
 
				+
			
 
				+        anyhow::ensure!(
			
 
				+            Path::new(&part_pass).exists(),
			
 
				+            "Missing Mutect2 part {i} PASS VCF: {part_pass}"
			
 
				+        );
			
 
				+
			
 
				+        part_pass_paths.push(PathBuf::from(part_pass));
			
 
				+    }
			
 
				+
			
 
				+    let final_passed_vcf = base.config.gatk_passed_vcf(&base.id);
			
 
				+    let rand = Uuid::new_v4();
			
 
				+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
			
 
				+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
			
 
				+
			
 
				+    if let Some(parent) = Path::new(&final_passed_vcf).parent() {
			
 
				+        fs::create_dir_all(parent)?;
			
 
				+    }
			
 
				+
			
 
				+    info!(
			
 
				+        "Concatenating {} Mutect2 part VCFs into {}",
			
 
				+        n_parts, final_passed_vcf
			
 
				+    );
			
 
				+
			
 
				+    let mut concat = BcftoolsConcat::from_config(&base.config, part_pass_paths, &final_tmp);
			
 
				+    run!(&base.config, &mut concat).context("Failed to run bcftools concat for Mutect2 parts")?;
			
 
				+
			
 
				+    fs::rename(&final_tmp, &final_passed_vcf)
			
 
				+        .context("Failed to rename merged Mutect2 PASS VCF")?;
			
 
				+
			
 
				+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
			
 
				+        .context("Failed to rename merged Mutect2 PASS VCF CSI index")?;
			
 
				+
			
 
				+    info!(
			
 
				+        "Successfully merged {} Mutect2 parts into {}",
			
 
				+        n_parts, final_passed_vcf
			
 
				+    );
			
 
				+
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+/// Runs GATK Mutect2 in parallel chunks over BED intervals, then merges results.
			
 
				+///
			
 
				+/// 1. Reads the BED interval file from `config.gatk_bed_path`
			
 
				+/// 2. Splits intervals into `n_parts` sub-BED files
			
 
				+/// 3. Runs the full Mutect2 pipeline on each chunk (local or Slurm)
			
 
				+/// 4. Filters PASS variants per chunk
			
 
				+/// 5. Concatenates all PASS VCFs into the final output
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` — Sample identifier
			
 
				+/// * `config` — Global pipeline configuration
			
 
				+/// * `n_parts` — Number of parallel chunks (typically 20–30 for WGS)
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - `n_parts` is 0
			
 
				+/// - BED file cannot be read or is empty
			
 
				+/// - Mutect2 execution fails on any chunk
			
 
				+/// - PASS filtering or VCF merging fails
			
 
				+pub fn run_mutect2_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow::Result<()> {
			
 
				+    anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
			
 
				+
			
 
				+    let base = Mutect2::initialize(id, config)?;
			
 
				+
			
 
				+    // Read and split BED intervals
			
 
				+    let bed_rows = read_bed_file(&base.bed_path)
			
 
				+        .with_context(|| format!("Failed to read BED: {}", base.bed_path.display()))?;
			
 
				+
			
 
				+    anyhow::ensure!(
			
 
				+        !bed_rows.is_empty(),
			
 
				+        "BED file is empty: {}",
			
 
				+        base.bed_path.display()
			
 
				+    );
			
 
				+
			
 
				+    let chunks = split_bed_rows(&bed_rows, n_parts);
			
 
				+    let actual_n_parts = chunks.len();
			
 
				+
			
 
				+    info!(
			
 
				+        "Running Mutect2 in {} parallel parts for {} ({} BED intervals)",
			
 
				+        actual_n_parts,
			
 
				+        id,
			
 
				+        bed_rows.len()
			
 
				+    );
			
 
				+
			
 
				+    // Build jobs: write sub-BEDs and configure each chunk
			
 
				+    let mut jobs = Vec::with_capacity(actual_n_parts);
			
 
				+    for (i, chunk) in chunks.into_iter().enumerate() {
			
 
				+        let mut job = base.clone();
			
 
				+        job.part_index = Some(i + 1);
			
 
				+        job.log_dir = format!("{}/part{}", base.log_dir, i + 1);
			
 
				+
			
 
				+        // Write sub-BED into the part output directory
			
 
				+        let part_dir = job.part_output_dir();
			
 
				+        fs::create_dir_all(&part_dir)
			
 
				+            .with_context(|| format!("Failed to create part dir: {part_dir}"))?;
			
 
				+
			
 
				+        let sub_bed = PathBuf::from(format!("{part_dir}/intervals.bed"));
			
 
				+        write_bed_chunk(chunk, &sub_bed)
			
 
				+            .with_context(|| format!("Failed to write sub-BED for part {}", i + 1))?;
			
 
				+
			
 
				+        job.bed_path = sub_bed;
			
 
				+        info!("Planned Mutect2 job:\n{job}");
			
 
				+        jobs.push(job);
			
 
				+    }
			
 
				+
			
 
				+    // Run all Mutect2 jobs (local or Slurm, depending on config)
			
 
				+    let outputs = run_many!(config, jobs.clone())?;
			
 
				+    for output in outputs.iter() {
			
 
				+        output.save_to_file(format!("{}/mutect2_", base.log_dir))?;
			
 
				+    }
			
 
				+
			
 
				+    // Filter PASS variants for each part
			
 
				+    info!(
			
 
				+        "Filtering PASS variants for all {} Mutect2 parts",
			
 
				+        actual_n_parts
			
 
				+    );
			
 
				+    let filter_jobs: Vec<_> = jobs
			
 
				+        .iter()
			
 
				+        .map(|job| {
			
 
				+            BcftoolsKeepPass::from_config(&job.config, job.output_vcf_path(), job.passed_vcf_path())
			
 
				+        })
			
 
				+        .collect();
			
 
				+    run_many!(config, filter_jobs)?;
			
 
				+
			
 
				+    // Merge all PASS VCFs
			
 
				+    merge_mutect2_parts(&base, actual_n_parts)?;
			
 
				+
			
 
				+    info!(
			
 
				+        "Mutect2 completed for {}: {} parts merged",
			
 
				+        id, actual_n_parts
			
 
				+    );
			
 
				+
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+// ---------------------------------------------------------------------------
			
 
				+// Tests
			
 
				+// ---------------------------------------------------------------------------
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use super::*;
			
 
				+    use crate::helpers::test_init;
			
 
				+
			
 
				+    #[test]
			
 
				+    fn gatk_version() -> anyhow::Result<()> {
			
 
				+        test_init();
			
 
				+        let vl = Mutect2::version(&Config::default())?;
			
 
				+        info!("GATK local version: {vl}");
			
 
				+        let vs = Mutect2::version_slurm(&Config::default())?;
			
 
				+        info!("GATK slurm version: {vs}");
			
 
				+        assert_eq!(vl, vs);
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn mutect2_run() -> anyhow::Result<()> {
			
 
				+        test_init();
			
 
				+        let config = Config::default();
			
 
				+        run_mutect2_chunked("DUMCO", &config, 50)
			
 
				+    }
			
 
				+}
			
--- a/src/callers/mod.rs
+++ b/src/callers/mod.rs
@@ -138,8 +138,8 @@ use std::{sync::Arc, thread};
 
				 
			
 
				 use crate::{
			
 
				     callers::{
			
 
				-        clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
			
 
				-        savana::Savana, severus::Severus, straglr::Straglr,
			
 
				+        clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, gatk::Mutect2,
			
 
				+        nanomonsv::NanomonSV, savana::Savana, severus::Severus, straglr::Straglr,
			
 
				     },
			
 
				     commands::longphase::run_phasing_somatic,
			
 
				     config::Config,
			
@@ -151,6 +151,7 @@ use crate::{
 
				 pub mod clairs;
			
 
				 pub mod deep_somatic;
			
 
				 pub mod deep_variant;
			
 
				+pub mod gatk;
			
 
				 pub mod nanomonsv;
			
 
				 pub mod savana;
			
 
				 pub mod severus;
			
@@ -285,6 +286,9 @@ pub fn run_chunkeds(id: &str, config: &Config) -> anyhow::Result<()> {
 
				     // DeepSomatic - somatic SNV/indels
			
 
				     DeepSomatic::initialize(id, config)?.run()?;
			
 
				 
			
 
				+    // Mutect2 - somatic SNV/indels caller
			
 
				+    Mutect2::initialize(id, config)?.run()?;
			
 
				+
			
 
				     // DeepVariant - germline variants for normal sample
			
 
				     DeepVariant::initialize(id, &config.normal_name, config)?.run()?;
			
 
				 
			
--- a/src/collection/bam_stats.rs
+++ b/src/collection/bam_stats.rs
@@ -1359,10 +1359,10 @@ mod tests {
 
				 
			
 
				         let config = Config::default();
			
 
				 
			
 
				-        let stats = WGSBamStats::open("36167", "norm", &config)?;
			
 
				-        println!("{stats}");
			
 
				-        let stats = WGSBamStats::open("36434", "norm", &config)?;
			
 
				+        let stats = WGSBamStats::open("CHALO", "diag", &config)?;
			
 
				         println!("{stats}");
			
 
				+        // let stats = WGSBamStats::open("36434", "norm", &config)?;
			
 
				+        // println!("{stats}");
			
 
				         Ok(())
			
 
				     }
			
 
				 }
			
--- a/src/commands/dorado.rs
+++ b/src/commands/dorado.rs
@@ -4,7 +4,7 @@ use std::{
 
				 };
			
 
				 
			
 
				 use anyhow::Context;
			
 
				-use log::info;
			
 
				+use log::{debug, info};
			
 
				 
			
 
				 use crate::{
			
 
				     collection::pod5::Pod5,
			
@@ -95,9 +95,19 @@ impl Command for DoradoBasecall {
 
				         let dorado_arg = &self.dorado_basecall_arg;
			
 
				         let sequencing_kit = &self.sequencing_kit;
			
 
				 
			
 
				+        // Error while trying to trim with a non barcode kit !!!
			
 
				+        // Should find the right way....
			
 
				+        let sequencing_kit = "SQK-NBD114-24";
			
 
				+        let trim = if sequencing_kit == "SQK-LSK114" {
			
 
				+            "adapters"
			
 
				+        } else {
			
 
				+            "all"
			
 
				+        };
			
 
				+
			
 
				         format!(
			
 
				-            "{} basecaller --kit-name {sequencing_kit} {dorado_arg} {} --trim all --emit-moves > {}",
			
 
				+            "{} basecaller --kit-name {sequencing_kit} --trim {} --emit-moves {dorado_arg} --models-directory /home/t_steimle/ref/dorado_models {} > {}",
			
 
				             dorado_bin.display(),
			
 
				+            trim,
			
 
				             pod_dir.display(),
			
 
				             bam.display()
			
 
				         )
			
@@ -147,13 +157,14 @@ impl super::SlurmRunner for DoradoBasecall {
 
				         let (gpu, n) = if let (Some(h100_av), Some(a100_av)) =
			
 
				             (max_gpu_per_node("h100"), max_gpu_per_node("a100"))
			
 
				         {
			
 
				+            debug!("Available H100: {h100_av} and A100: {a100_av}");
			
 
				             let (gpu, n) = if h100_av >= a100_av {
			
 
				                 ("h100", h100_av)
			
 
				             } else {
			
 
				                 ("a100", a100_av)
			
 
				             };
			
 
				 
			
 
				-            let n = n.max(2);
			
 
				+            let n = n.clamp(1, 4);
			
 
				             (gpu, n)
			
 
				         } else {
			
 
				             panic!("Are you running slurm with a100 and h100 GPU ?");
			
@@ -213,7 +224,7 @@ impl DoradoAlign {
 
				                 job_name: Some("dorado_align".into()),
			
 
				                 cpus_per_task: Some(threads.into()),
			
 
				                 mem: Some("30G".into()),
			
 
				-                partition: Some("shortq".into()),
			
 
				+                partition: Some("mediumq".into()),
			
 
				                 gres: None,
			
 
				             },
			
 
				         }
			
@@ -283,11 +294,12 @@ mod tests {
 
				         test_init();
			
 
				 
			
 
				         let config = Config::default();
			
 
				+        let tmp_dir = config.tmp_dir.clone();
			
 
				 
			
 
				         let mut dca = DoradoBasecall::from_config(
			
 
				             &config,
			
 
				-            format!("{}/inputs/pod5/A", TEST_DIR.as_str()),
			
 
				-            format!("{}/outputs/unaligned_10.bam", TEST_DIR.as_str()),
			
 
				+            format!("/mnt/beegfs02/scratch/t_steimle/data/runs/20260206_1540_P2I-00461-A_PBI80774_67adb0bc/pod5"),
			
 
				+            format!("/mnt/beegfs02/scratch/t_steimle/data/36122_unaligned.bam"),
			
 
				         );
			
 
				 
			
 
				         info!("Basecalling");
			
@@ -306,8 +318,8 @@ mod tests {
 
				 
			
 
				         let mut dca = DoradoAlign::from_config(
			
 
				             &config,
			
 
				-            format!("{}/outputs/unaligned_10.bam", TEST_DIR.as_str()),
			
 
				-            format!("{}/outputs/10_hs1_sorted.bam", TEST_DIR.as_str()),
			
 
				+            format!("/mnt/beegfs02/scratch/t_steimle/data/36122_unaligned.bam"),
			
 
				+            format!("/mnt/beegfs02/scratch/t_steimle/data/36122_aligned.bam"),
			
 
				         );
			
 
				 
			
 
				         info!("Basecalling");
			
--- a/src/commands/samtools.rs
+++ b/src/commands/samtools.rs
@@ -713,8 +713,8 @@ mod tests {
 
				 
			
 
				         let sort_1 = SamtoolsSort::from_config(
			
 
				             &config,
			
 
				-            "/mnt/beegfs02/scratch/t_steimle/data/wgs/DUMCO/diag/DUMCO_diag_hs1_HP.bam",
			
 
				-            "/mnt/beegfs02/scratch/t_steimle/data/wgs/DUMCO/diag/DUMCO_diag_hs1_HP_sort.bam",
			
 
				+            "/mnt/beegfs02/scratch/t_steimle/data/wgs/CHALO/diag/CHALO_diag_hs1.bam",
			
 
				+            "/mnt/beegfs02/scratch/t_steimle/data/wgs/CHALO/diag/CHALO_diag_hs1ss.bam",
			
 
				         );
			
 
				 
			
 
				         // let sort_2 = SamtoolsSort::from_config(
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -197,6 +197,53 @@ pub struct Config {
 
				     /// Template for ClairS output directory (`{result_dir}`, `{id}`).
			
 
				     pub clairs_output_dir: String,
			
 
				 
			
 
				+    // === GATK configuration ===
			
 
				+    /// Path to the GATK container image (Singularity/Apptainer .sif, or a docker:// URI
			
 
				+    /// if you pull at runtime).
			
 
				+    ///
			
 
				+    /// Examples:
			
 
				+    /// - "/containers/gatk_4.6.0.0.sif"
			
 
				+    /// - "docker://broadinstitute/gatk:latest"
			
 
				+    pub gatk_image: String,
			
 
				+
			
 
				+    /// Path to a BED file restricting analysis to target regions (0-based, half-open).
			
 
				+    /// Must match contig naming of the reference/BAMs (e.g. "chr9" vs "9").
			
 
				+    ///
			
 
				+    /// Used for targeted calling (e.g. Mutect2 `-L` or region chunking).
			
 
				+    pub gatk_bed_path: String,
			
 
				+
			
 
				+    /// Local single-run CPU threads (non-Slurm execution).
			
 
				+    /// Used for full-run Mutect2 or other GATK tools.
			
 
				+    /// Typically forwarded to:
			
 
				+    ///   - `--native-pair-hmm-threads`
			
 
				+    ///   - `--reader-threads`
			
 
				+    /// Should match available cores on the node.
			
 
				+    pub gatk_threads: usize, // e.g. 32
			
 
				+
			
 
				+    /// Local single-run memory limit in GB.
			
 
				+    /// Used to size Java heap:
			
 
				+    ///   `--java-options "-Xmx{mem}g"`
			
 
				+    /// Should leave headroom for native memory (PairHMM, buffers).
			
 
				+    pub gatk_mem_gb: u32, // e.g. 120
			
 
				+
			
 
				+    /// Per-chunk CPU threads when running chunked under Slurm.
			
 
				+    /// Applies to each parallel job independently.
			
 
				+    pub gatk_slurm_threads: usize, // e.g. 8
			
 
				+
			
 
				+    /// Per-chunk memory (GB) when running under Slurm.
			
 
				+    /// Used both for scheduler request and Java heap sizing per chunk.
			
 
				+    /// Must be sufficient for interval-restricted Mutect2.
			
 
				+    pub gatk_slurm_mem_gb: u32, // e.g. 32
			
 
				+
			
 
				+    /// If true, force re-run of GATK steps by removing or ignoring existing outputs.
			
 
				+    pub gatk_force: bool,
			
 
				+
			
 
				+    /// Template for GATK output directory (`{result_dir}`, `{id}`).
			
 
				+    pub gatk_output_dir: String,
			
 
				+
			
 
				+    /// GATK passed VCF
			
 
				+    pub gatk_passed_vcf: String,
			
 
				+
			
 
				     // === Savana configuration ===
			
 
				     /// Savana binary name or full path.
			
 
				     pub savana_bin: String,
			
@@ -771,6 +818,23 @@ impl Config {
 
				         format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
			
 
				     }
			
 
				 
			
 
				+    /// gatk_output_dir = "{result_dir}/{id}/{tumoral_name}/GATK"
			
 
				+    pub fn gatk_output_dir(&self, id: &str) -> String {
			
 
				+        self.gatk_output_dir
			
 
				+            .replace("{result_dir}", &self.result_dir)
			
 
				+            .replace("{id}", id)
			
 
				+            .replace("{tumoral_name}", &self.tumoral_name)
			
 
				+    }
			
 
				+
			
 
				+    /// gatk_passed_vcf = "{output_dir}/{id}_{tumoral_name}_{reference_name}_GATK_PASSED.vcf.gz"
			
 
				+    pub fn gatk_passed_vcf(&self, id: &str) -> String {
			
 
				+        self.gatk_passed_vcf
			
 
				+            .replace("{output_dir}", &self.gatk_output_dir(id))
			
 
				+            .replace("{id}", id)
			
 
				+            .replace("{tumoral_name}", &self.tumoral_name)
			
 
				+            .replace("{reference_name}", &self.reference_name)
			
 
				+    }
			
 
				+
			
 
				     /// Paired nanomonsv output directory.
			
 
				     pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
			
 
				         self.nanomonsv_output_dir
			
--- a/src/positions.rs
+++ b/src/positions.rs
@@ -487,6 +487,10 @@ impl GenomeRange {
 
				             range: start..end,
			
 
				         }
			
 
				     }
			
 
				+
			
 
				+    pub fn contig(&self) -> String {
			
 
				+        num_to_contig(self.contig)
			
 
				+    }
			
 
				     /// Creates a `GenomeRange` from 1-based inclusive start and end positions.
			
 
				     ///
			
 
				     /// This method is useful when working with data sources that use 1-based coordinates