4 gün önce · d6250eeae7
--- a/pandora-config.example.toml
+++ b/pandora-config.example.toml
@@ -365,6 +365,66 @@ straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"
 
				 # Force Straglr recomputation.
			
 
				 straglr_force = false
			
 
				 
			
 
				+#######################################
			
 
				+# CoRAL
			
 
				+#######################################
			
 
				+# Number of CPU threads for the CoRAL reconstruction job.
			
 
				+#
			
 
				+# CoRAL is CPU-bound during breakpoint graph construction and quadratic
			
 
				+# programming cycle extraction. 8–16 threads is sufficient for most
			
 
				+# focal amplification cases; increase for highly complex ecDNA with
			
 
				+# many amplicons.
			
 
				+coral_threads = 16
			
 
				+
			
 
				+# Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
			
 
				+#
			
 
				+# Memory usage scales with amplicon complexity and BAM depth.
			
 
				+# 32G is sufficient for typical WGS at 30–60×; increase to 64G
			
 
				+# for highly rearranged genomes (chromothripsis, high ecDNA copy number).
			
 
				+coral_slurm_mem = "64G"
			
 
				+
			
 
				+# SLURM partition to use for CoRAL jobs.
			
 
				+#
			
 
				+# CoRAL requires only CPU — do not submit to a GPU partition.
			
 
				+coral_slurm_partition = "shortq"
			
 
				+
			
 
				+# Minimum copy number gain threshold for a segment to be considered
			
 
				+# a focal amplification seed (CoRAL `--gain`).
			
 
				+#
			
 
				+# CoRAL applies this threshold to the raw absolute CN values from the
			
 
				+# cn_segs BED — do NOT pre-correct for purity or ploidy, as this may
			
 
				+# cause entire chromosome arms to exceed the threshold in aneuploid tumours.
			
 
				+#
			
 
				+# Default in CoRAL is 6.0 (diploid assumption). For hyperdiploid tumours
			
 
				+# (e.g. hyperploid ALL, CML blast crisis) consider lowering to 4.0–5.0.
			
 
				+coral_seed_gain = 6.0
			
 
				+
			
 
				+# Minimum size in base pairs for a CN segment to qualify as a seed
			
 
				+# (CoRAL `--min-seed-size`).
			
 
				+#
			
 
				+# Segments below this size are discarded even if they exceed `coral_seed_gain`.
			
 
				+# Two merged proximal segments (see `coral_max_seg_gap`) are evaluated
			
 
				+# against this threshold as a single combined interval.
			
 
				+#
			
 
				+# Default in CoRAL is 100000 (100 kb). Reducing this risks including
			
 
				+# artefactual short high-copy segments; increasing it misses small focal
			
 
				+# amplifications (e.g. narrow EGFR or MYC peaks).
			
 
				+coral_min_seed_size = 100000
			
 
				+
			
 
				+# Maximum gap in base pairs between two proximal CN segments to allow
			
 
				+# merging into a single seed candidate (CoRAL `--max-seg-gap`).
			
 
				+#
			
 
				+# If two amplified segments are separated by a gap smaller than this value,
			
 
				+# they are merged before the `coral_min_seed_size` filter is applied.
			
 
				+# This handles cases where a single focal amplicon is split by a low-coverage
			
 
				+# or diploid bin.
			
 
				+#
			
 
				+# Default in CoRAL is 300000 (300 kb). For haematological cancers with
			
 
				+# compact focal amplifications (e.g. NUP214::ABL1, ABL1 amplification in
			
 
				+# CML blast crisis) a tighter value such as 100000 reduces spurious merging
			
 
				+# of adjacent independent amplicons.
			
 
				+coral_max_seg_gap = 100000
			
 
				+
			
 
				 #######################################
			
 
				 # Marlin
			
 
				 #######################################
			
--- a/src/callers/coral.rs
+++ b/src/callers/coral.rs
@@ -0,0 +1,341 @@
 
				+//! # CoRAL — Focal Amplification Reconstruction
			
 
				+//!
			
 
				+//! Wraps [CoRAL](https://github.com/AmpliconSuite/CoRAL) for ecDNA and focal
			
 
				+//! amplification cycle reconstruction from long-read WGS data.
			
 
				+//!
			
 
				+//! CoRAL takes as input:
			
 
				+//! - A tumour BAM (haplotagged recommended)
			
 
				+//! - A CN segments BED derived from SAVANA absolute CN output
			
 
				+//! - A seeds BED of amplified intervals (CN >= 4 * ploidy)
			
 
				+//!
			
 
				+//! ## Pipeline position
			
 
				+//!
			
 
				+//! CoRAL runs **after** SAVANA CNA fitting. The converter
			
 
				+//! [`savana_to_coral`] produces both required BED files from
			
 
				+//! `{sample}_segmented_absolute_copy_number.tsv` and
			
 
				+//! `{sample}_fitted_purity_ploidy.tsv`.
			
 
				+//!
			
 
				+//! ## Output files
			
 
				+//!
			
 
				+//! | File | Description |
			
 
				+//! |------|-------------|
			
 
				+//! | `coral_out/amplicon*_graph.txt` | Breakpoint graph per amplicon |
			
 
				+//! | `coral_out/amplicon*_cycles.txt` | Cycle decomposition per amplicon |
			
 
				+//!
			
 
				+//! ## Implemented traits
			
 
				+//!
			
 
				+//! - [`Initialize`] — Directory creation, SAVANA → CoRAL BED conversion
			
 
				+//! - [`ShouldRun`] — Skip if output directory already contains graph files
			
 
				+//! - [`JobCommand`] — CoRAL CLI command string
			
 
				+//! - [`LocalRunner`] — Local execution (conda env)
			
 
				+//! - [`SbatchRunner`] — SLURM submission
			
 
				+//! - [`Run`] — Unified execution via `run!` macro
			
 
				+
			
 
				+use std::fs;
			
 
				+use std::path::Path;
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use log::debug;
			
 
				+use tracing::info;
			
 
				+
			
 
				+use crate::callers::savana::SavanaCN;
			
 
				+use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner};
			
 
				+use crate::config::Config;
			
 
				+use crate::pipes::{Initialize, ShouldRun};
			
 
				+use crate::run;
			
 
				+use crate::runners::Run;
			
 
				+
			
 
				+// ── CoRALSeed ─────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+/// CoRAL seed interval identification.
			
 
				+///
			
 
				+/// Runs `coral seed` to identify focally amplified genomic intervals
			
 
				+/// from SAVANA absolute CN segments. Produces a `*_CNV_SEEDS.bed` file
			
 
				+/// consumed by [`CoRALReconstruct`].
			
 
				+#[derive(Debug)]
			
 
				+pub struct CoRALSeed {
			
 
				+    pub id: String,
			
 
				+    pub config: Config,
			
 
				+    pub cn_segs_bed: String,
			
 
				+    pub output_dir: String,
			
 
				+    pub log_dir: String,
			
 
				+}
			
 
				+
			
 
				+impl Initialize for CoRALSeed {
			
 
				+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        let id = id.to_string();
			
 
				+        let work_dir = format!("{}/{}/coral/work", config.result_dir, id);
			
 
				+        let output_dir = format!("{}/{}/coral/seed", config.result_dir, id);
			
 
				+        let log_dir = format!("{}/{}/log/coral", config.result_dir, id);
			
 
				+        let cn_segs_bed = format!("{work_dir}/cn_segs.bed");
			
 
				+
			
 
				+        for dir in [&work_dir, &output_dir, &log_dir] {
			
 
				+            fs::create_dir_all(dir)
			
 
				+                .with_context(|| format!("Failed to create directory: {dir}"))?;
			
 
				+        }
			
 
				+
			
 
				+        // Write SAVANA absolute CN → CoRAL cn_segs BED (no purity correction)
			
 
				+        let cn = SavanaCN::parse_file(&id, config)
			
 
				+            .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?;
			
 
				+        cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?;
			
 
				+
			
 
				+        Ok(CoRALSeed {
			
 
				+            id,
			
 
				+            config: config.clone(),
			
 
				+            cn_segs_bed,
			
 
				+            output_dir,
			
 
				+            log_dir,
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for CoRALSeed {
			
 
				+    /// Skip if the seeds BED already exists.
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        !Path::new(&self.seeds_bed()).exists()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl CoRALSeed {
			
 
				+    /// Path of the seeds BED produced by `coral seed`.
			
 
				+    pub fn seeds_bed(&self) -> String {
			
 
				+        format!("{}/seeds_CNV_SEEDS.bed", self.output_dir)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl JobCommand for CoRALSeed {
			
 
				+    fn cmd(&self) -> String {
			
 
				+        format!(
			
 
				+            "source {conda_sh} && conda activate coral_env && \
			
 
				+                coral seed \
			
 
				+                --cn_segs         {cn_segs} \
			
 
				+                --gain            {gain} \
			
 
				+                --min-seed-size   {min_seed_size} \
			
 
				+                --max-seg-gap     {max_seg_gap} \
			
 
				+                --output-prefix   {}/seeds",
			
 
				+            self.output_dir,
			
 
				+            conda_sh      = self.config.conda_sh,
			
 
				+            cn_segs       = self.cn_segs_bed,
			
 
				+            gain          = self.config.coral_seed_gain,
			
 
				+            min_seed_size = self.config.coral_min_seed_size,
			
 
				+            max_seg_gap   = self.config.coral_max_seg_gap,
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl LocalRunner for CoRALSeed {}
			
 
				+impl SbatchRunner for CoRALSeed {
			
 
				+    fn slurm_params(&self) -> SlurmParams {
			
 
				+        SlurmParams {
			
 
				+            job_name: Some(format!("coral_seed_{}", self.id)),
			
 
				+            partition: Some(self.config.coral_slurm_partition.clone()),
			
 
				+            cpus_per_task: Some(1), // seed step is lightweight
			
 
				+            mem: Some("8G".into()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl SlurmRunner for CoRALSeed {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        self.slurm_params().to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for CoRALSeed {
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        if !self.should_run() {
			
 
				+            debug!("CoRAL seed already exists for {}; skipping.", self.id);
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+
			
 
				+        let report = run!(self.config, self)
			
 
				+            .with_context(|| format!("CoRAL seed failed for: {}", self.id))?;
			
 
				+
			
 
				+        let log_file = format!("{}/coral_seed", self.log_dir);
			
 
				+        report
			
 
				+            .save_to_file(&log_file)
			
 
				+            .with_context(|| format!("Failed to write CoRAL seed log: {log_file}"))?;
			
 
				+
			
 
				+        // Guard: bail if no seeds were produced (no focal amplifications)
			
 
				+        anyhow::ensure!(
			
 
				+            Path::new(&self.seeds_bed()).exists(),
			
 
				+            "CoRAL seed produced no output for {} — no focal amplifications detected \
			
 
				+             at gain={:.1}, min-seed-size={}",
			
 
				+            self.id,
			
 
				+            self.config.coral_seed_gain,
			
 
				+            self.config.coral_min_seed_size,
			
 
				+        );
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ── CoRALReconstruct ──────────────────────────────────────────────────────────
			
 
				+
			
 
				+/// CoRAL focal amplification reconstruction.
			
 
				+///
			
 
				+/// Runs `coral reconstruct` from seed intervals produced by [`CoRALSeed`],
			
 
				+/// building a breakpoint graph and extracting ecDNA cycle candidates.
			
 
				+///
			
 
				+/// Output files per amplicon:
			
 
				+/// - `amplicon*_graph.txt`  — breakpoint graph
			
 
				+/// - `amplicon*_cycles.txt` — cycle decomposition
			
 
				+#[derive(Debug)]
			
 
				+pub struct CoRALReconstruct {
			
 
				+    pub id: String,
			
 
				+    pub config: Config,
			
 
				+    pub tumour_bam: String,
			
 
				+    pub cn_segs_bed: String,
			
 
				+    pub seeds_bed: String,
			
 
				+    pub output_dir: String,
			
 
				+    pub log_dir: String,
			
 
				+}
			
 
				+
			
 
				+impl Initialize for CoRALReconstruct {
			
 
				+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        let seed = CoRALSeed::initialize(id, config)?;
			
 
				+
			
 
				+        // seeds_bed must exist — CoRALSeed::run() must have completed first
			
 
				+        anyhow::ensure!(
			
 
				+            Path::new(&seed.seeds_bed()).exists(),
			
 
				+            "CoRAL seeds BED not found for {id}: {}. \
			
 
				+             Run CoRALSeed first.",
			
 
				+            seed.seeds_bed()
			
 
				+        );
			
 
				+
			
 
				+        let output_dir = format!("{}/{}/coral/reconstruct", config.result_dir, id);
			
 
				+        fs::create_dir_all(&output_dir)
			
 
				+            .with_context(|| format!("Failed to create reconstruct dir: {output_dir}"))?;
			
 
				+
			
 
				+        Ok(CoRALReconstruct {
			
 
				+            id: id.to_string(),
			
 
				+            config: config.clone(),
			
 
				+            tumour_bam: config.tumoral_bam(id),
			
 
				+            cn_segs_bed: seed.cn_segs_bed.clone(),
			
 
				+            seeds_bed: seed.seeds_bed(),
			
 
				+            output_dir,
			
 
				+            log_dir: seed.log_dir,
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for CoRALReconstruct {
			
 
				+    /// Skip if at least one graph file already exists in the output directory.
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let out = Path::new(&self.output_dir);
			
 
				+        if !out.exists() {
			
 
				+            return true;
			
 
				+        }
			
 
				+        let has_graph = fs::read_dir(out)
			
 
				+            .map(|entries| {
			
 
				+                entries
			
 
				+                    .filter_map(|e| e.ok())
			
 
				+                    .any(|e| e.file_name().to_string_lossy().ends_with("_graph.txt"))
			
 
				+            })
			
 
				+            .unwrap_or(true);
			
 
				+        !has_graph
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl JobCommand for CoRALReconstruct {
			
 
				+    fn cmd(&self) -> String {
			
 
				+        format!(
			
 
				+            "source {conda_sh} && conda activate coral_env && \
			
 
				+                coral reconstruct \
			
 
				+                --lr-bam          {bam} \
			
 
				+                --cnv-seed        {seeds} \
			
 
				+                --cn_segs         {cn_segs} \
			
 
				+                --ref_genome      {reference} \
			
 
				+                --output-prefix   {}/amplicon \
			
 
				+                --threads         {}",
			
 
				+            self.output_dir,
			
 
				+            self.config.coral_threads,
			
 
				+            conda_sh  = self.config.conda_sh,
			
 
				+            bam       = self.tumour_bam,
			
 
				+            seeds     = self.seeds_bed,
			
 
				+            cn_segs   = self.cn_segs_bed,
			
 
				+            reference = self.config.reference,
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl LocalRunner for CoRALReconstruct {}
			
 
				+impl SbatchRunner for CoRALReconstruct {
			
 
				+    fn slurm_params(&self) -> SlurmParams {
			
 
				+        SlurmParams {
			
 
				+            job_name: Some(format!("coral_reconstruct_{}", self.id)),
			
 
				+            partition: Some(self.config.coral_slurm_partition.clone()),
			
 
				+            cpus_per_task: Some(self.config.coral_threads.into()),
			
 
				+            mem: Some(self.config.coral_slurm_mem.clone()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl SlurmRunner for CoRALReconstruct {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        self.slurm_params().to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for CoRALReconstruct {
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        if !self.should_run() {
			
 
				+            debug!("CoRAL reconstruct already done for {}; skipping.", self.id);
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+
			
 
				+        let report = run!(self.config, self)
			
 
				+            .with_context(|| format!("CoRAL reconstruct failed for: {}", self.id))?;
			
 
				+
			
 
				+        let log_file = format!("{}/coral_reconstruct", self.log_dir);
			
 
				+        report
			
 
				+            .save_to_file(&log_file)
			
 
				+            .with_context(|| format!("Failed to write CoRAL reconstruct log: {log_file}"))?;
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// ── Top-level pipe ────────────────────────────────────────────────────────────
			
 
				+
			
 
				+/// Runs the full CoRAL pipeline: seed identification → amplicon reconstruction.
			
 
				+///
			
 
				+/// Steps:
			
 
				+/// 1. [`CoRALSeed`]        — identify focally amplified seed intervals
			
 
				+/// 2. [`CoRALReconstruct`] — build breakpoint graph and extract ecDNA cycles
			
 
				+///
			
 
				+/// Returns `Ok(())` early (without error) if the seed step finds no amplified
			
 
				+/// intervals, since absence of focal amplifications is a valid biological result.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `id`     - Sample identifier
			
 
				+/// * `config` - Pipeline configuration
			
 
				+pub fn run_coral(id: &str, config: &Config) -> anyhow::Result<()> {
			
 
				+    info!("CoRAL pipeline starting for {id}");
			
 
				+
			
 
				+    // Step 1: seed
			
 
				+    let mut seed = CoRALSeed::initialize(id, config)
			
 
				+        .with_context(|| format!("CoRALSeed::initialize failed for {id}"))?;
			
 
				+
			
 
				+    match seed.run() {
			
 
				+        Ok(()) => {}
			
 
				+        Err(e) if e.to_string().contains("no focal amplifications") => {
			
 
				+            info!("CoRAL: no focal amplifications for {id}; pipeline skipped.");
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+        Err(e) => return Err(e),
			
 
				+    }
			
 
				+
			
 
				+    // Step 2: reconstruct
			
 
				+    let mut reconstruct = CoRALReconstruct::initialize(id, config)
			
 
				+        .with_context(|| format!("CoRALReconstruct::initialize failed for {id}"))?;
			
 
				+
			
 
				+    reconstruct
			
 
				+        .run()
			
 
				+        .with_context(|| format!("CoRALReconstruct::run failed for {id}"))?;
			
 
				+
			
 
				+    info!("CoRAL pipeline complete for {id}");
			
 
				+    Ok(())
			
 
				+}
			
--- a/src/callers/deep_somatic.rs
+++ b/src/callers/deep_somatic.rs
@@ -498,12 +498,23 @@ impl Version for DeepSomatic {
 
				 }
			
 
				 
			
 
				 fn parse_deepsomatic_version(output: &str) -> anyhow::Result<String> {
			
 
				-    let re = Regex::new(r"(?m)DeepVariant version\s+([^\s]+)").expect("Version regex is valid");
			
 
				+    let deepsomatic_re =
			
 
				+        Regex::new(r"(?mi)\bdeepsomatic(?:\s+version)?\s*[:=]?\s*([^\s]+)")
			
 
				+            .expect("DeepSomatic version regex is valid");
			
 
				+    if let Some(caps) = deepsomatic_re.captures(output) {
			
 
				+        return Ok(caps
			
 
				+            .get(1)
			
 
				+            .expect("Regex has capture group 1")
			
 
				+            .as_str()
			
 
				+            .to_string());
			
 
				+    }
			
 
				 
			
 
				-    let caps = re
			
 
				+    // Fallback for containers that only expose the embedded DeepVariant version.
			
 
				+    let deepvariant_re =
			
 
				+        Regex::new(r"(?m)DeepVariant version\s+([^\s]+)").expect("DeepVariant regex is valid");
			
 
				+    let caps = deepvariant_re
			
 
				         .captures(output)
			
 
				         .context("Could not parse DeepSomatic version from output")?;
			
 
				-
			
 
				     Ok(caps
			
 
				         .get(1)
			
 
				         .expect("Regex has capture group 1")
			
--- a/src/callers/mod.rs
+++ b/src/callers/mod.rs
@@ -156,6 +156,7 @@ pub mod nanomonsv;
 
				 pub mod savana;
			
 
				 pub mod severus;
			
 
				 pub mod straglr;
			
 
				+pub mod coral;
			
 
				 
			
 
				 /// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
			
 
				 ///
			
@@ -269,7 +270,8 @@ pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
 
				         ];
			
 
				 
			
 
				         for h in handles {
			
 
				-            h.join().expect("thread panicked")?;
			
 
				+            h.join()
			
 
				+                .map_err(|_| anyhow::anyhow!("somatic caller thread panicked"))??;
			
 
				         }
			
 
				     } else {
			
 
				         Severus::initialize(id, config)?.run()?;
			
--- a/src/callers/nanomonsv.rs
+++ b/src/callers/nanomonsv.rs
@@ -705,9 +705,8 @@ fn somatic_parse(
 
				     config: &Config,
			
 
				     log_dir: &str,
			
 
				 ) -> anyhow::Result<()> {
			
 
				-    let tumoral_out_prefix = format!("{diag_out_dir}/{id}_diag");
			
 
				-    let norm_out_prefix = format!("{norm_out_dir}/{id}_norm"); // TODO: will break if norm
			
 
				-                                                               // name change
			
 
				+    let tumoral_out_prefix = format!("{diag_out_dir}/{id}_{}", config.tumoral_name);
			
 
				+    let norm_out_prefix = format!("{norm_out_dir}/{id}_{}", config.normal_name);
			
 
				 
			
 
				     let diag_info_vcf = format!("{tumoral_out_prefix}.bp_info.sorted.bed.gz");
			
 
				     let norm_info_vcf = format!("{norm_out_prefix}.bp_info.sorted.bed.gz");
			
--- a/src/callers/savana.rs
+++ b/src/callers/savana.rs
@@ -97,9 +97,10 @@ use itertools::Itertools;
 
				 use log::{debug, info, warn};
			
 
				 use rayon::prelude::*;
			
 
				 use serde::{Deserialize, Serialize};
			
 
				+use std::io::Write;
			
 
				 use std::{
			
 
				-    fs::{self},
			
 
				-    io::BufRead,
			
 
				+    fs::{self, File},
			
 
				+    io::{BufRead, BufWriter},
			
 
				     path::Path,
			
 
				     str::FromStr,
			
 
				 };
			
@@ -291,8 +292,8 @@ impl Run for Savana {
 
				             }
			
 
				 
			
 
				             if !Path::new(&format!("{tumoral_hp_bam}.bai")).exists() {
			
 
				-                let mut normal_index = SamtoolsIndex::from_config(&self.config, &normal_hp_bam);
			
 
				-                normal_index.run()?;
			
 
				+                let mut tumoral_index = SamtoolsIndex::from_config(&self.config, &tumoral_hp_bam);
			
 
				+                tumoral_index.run()?;
			
 
				             }
			
 
				 
			
 
				             self.job_args = vec![
			
@@ -820,15 +821,13 @@ impl SavanaCN {
 
				                     as u64,
			
 
				                 weight: parse_field(parts[6], "weight", line_num)?,
			
 
				                 copy_number: parse_field(parts[7], "copy_number", line_num)?,
			
 
				-                minor_allele_copy_number: if parts[8].is_empty() {
			
 
				-                    None
			
 
				-                } else {
			
 
				-                    Some(parse_field(parts[8], "minor_allele_copy_number", line_num)?)
			
 
				+                minor_allele_copy_number: match parts[8] {
			
 
				+                    "" | "NA" => None,
			
 
				+                    v => Some(parse_field(v, "minor_allele_copy_number", line_num)?),
			
 
				                 },
			
 
				-                mean_baf: if parts[9].is_empty() {
			
 
				-                    None
			
 
				-                } else {
			
 
				-                    Some(parse_field(parts[9], "mean_baf", line_num)?)
			
 
				+                mean_baf: match parts[9] {
			
 
				+                    "" | "NA" => None,
			
 
				+                    v => Some(parse_field(v, "mean_baf", line_num)?),
			
 
				                 },
			
 
				                 no_het_snps: if parts[10].is_empty() {
			
 
				                     0
			
@@ -842,6 +841,130 @@ impl SavanaCN {
 
				 
			
 
				         Ok(Self { segments })
			
 
				     }
			
 
				+
			
 
				+    /// Writes CoRAL `--cn_segs` BED: all segments with absolute CN.
			
 
				+    ///
			
 
				+    /// Format: `chrom\tstart\tend\tcopy_number`
			
 
				+    pub fn write_coral_cn_segs(&self, path: &Path) -> anyhow::Result<()> {
			
 
				+        let mut w = BufWriter::new(
			
 
				+            File::create(path)
			
 
				+                .with_context(|| format!("Cannot create cn_segs BED: {}", path.display()))?,
			
 
				+        );
			
 
				+        for seg in &self.segments {
			
 
				+            writeln!(
			
 
				+                w,
			
 
				+                "{}\t{}\t{}\t{:.4}",
			
 
				+                seg.chromosome, seg.start, seg.end, seg.copy_number
			
 
				+            )?;
			
 
				+        }
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    /// Writes CoRAL `--seeds` BED: amplified segments only, centromeres excluded.
			
 
				+    ///
			
 
				+    /// Uses a purity-aware threshold so that tumour CN dilution by normal
			
 
				+    /// contamination is accounted for:
			
 
				+    ///
			
 
				+    /// ```text
			
 
				+    /// fitted_CN = purity * tumour_CN + (1 - purity) * ploidy
			
 
				+    /// seed_threshold = purity * min_tumour_cn + (1 - purity) * ploidy
			
 
				+    /// ```
			
 
				+    ///
			
 
				+    /// Centromeric segments (stain `acen` in the cytoband BED) are excluded
			
 
				+    /// regardless of copy number, as they produce artifactual high-depth signal
			
 
				+    /// due to repetitive satellite sequences.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    /// * `path` - Output BED path
			
 
				+    /// * `purity` - Tumour purity (0..1) from `_fitted_purity_ploidy.tsv`
			
 
				+    /// * `ploidy` - Tumour ploidy from `_fitted_purity_ploidy.tsv`
			
 
				+    /// * `min_tumour_cn` - Minimum tumour CN to qualify as a seed (typically `4 * ploidy`)
			
 
				+    /// * `centromeres` - Parsed centromere intervals from `parse_centromere_intervals()`
			
 
				+    pub fn write_coral_seeds(
			
 
				+        &self,
			
 
				+        path: &Path,
			
 
				+        purity: f64,
			
 
				+        ploidy: f64,
			
 
				+        min_tumour_cn: f64,
			
 
				+        centromeres: &[(String, u64, u64)],
			
 
				+    ) -> anyhow::Result<()> {
			
 
				+        let threshold = purity * min_tumour_cn + (1.0 - purity) * ploidy;
			
 
				+        let mut w = BufWriter::new(
			
 
				+            File::create(path)
			
 
				+                .with_context(|| format!("Cannot create seeds BED: {}", path.display()))?,
			
 
				+        );
			
 
				+        for seg in &self.segments {
			
 
				+            if seg.copy_number >= threshold
			
 
				+                && !overlaps_centromere(&seg.chromosome, seg.start, seg.end, centromeres)
			
 
				+            {
			
 
				+                writeln!(w, "{}\t{}\t{}", seg.chromosome, seg.start, seg.end)?;
			
 
				+            }
			
 
				+        }
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    /// Returns the number of amplified non-centromeric seed segments.
			
 
				+    pub fn n_seeds(
			
 
				+        &self,
			
 
				+        purity: f64,
			
 
				+        ploidy: f64,
			
 
				+        min_tumour_cn: f64,
			
 
				+        centromeres: &[(String, u64, u64)],
			
 
				+    ) -> usize {
			
 
				+        let threshold = purity * min_tumour_cn + (1.0 - purity) * ploidy;
			
 
				+        self.segments
			
 
				+            .iter()
			
 
				+            .filter(|s| {
			
 
				+                s.copy_number >= threshold
			
 
				+                    && !overlaps_centromere(&s.chromosome, s.start, s.end, centromeres)
			
 
				+            })
			
 
				+            .count()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Returns true if [seg_start, seg_end) overlaps any centromeric interval
			
 
				+/// on the same chromosome.
			
 
				+fn overlaps_centromere(
			
 
				+    chrom: &str,
			
 
				+    start: u64,
			
 
				+    end: u64,
			
 
				+    centromeres: &[(String, u64, u64)],
			
 
				+) -> bool {
			
 
				+    centromeres
			
 
				+        .iter()
			
 
				+        .any(|(c, cs, ce)| c == chrom && start < *ce && end > *cs)
			
 
				+}
			
 
				+
			
 
				+/// Reads SAVANA fitted_purity_ploidy.tsv.
			
 
				+/// Expected format:
			
 
				+///   purity  ploidy  distance  rank
			
 
				+///   0.48    1.98    0.105     1
			
 
				+pub fn read_savana_purity_ploidy(path: &Path) -> anyhow::Result<(f64, f64)> {
			
 
				+    let content = std::fs::read_to_string(path)?;
			
 
				+    let mut lines = content.lines();
			
 
				+
			
 
				+    // Skip header
			
 
				+    lines.next().context("purity/ploidy file is empty")?;
			
 
				+
			
 
				+    let data = lines.next().context("purity/ploidy file has no data row")?;
			
 
				+    let fields: Vec<&str> = data.split('\t').collect();
			
 
				+
			
 
				+    anyhow::ensure!(
			
 
				+        fields.len() >= 2,
			
 
				+        "expected at least 2 columns, got {}",
			
 
				+        fields.len()
			
 
				+    );
			
 
				+
			
 
				+    let purity = fields[0]
			
 
				+        .trim()
			
 
				+        .parse::<f64>()
			
 
				+        .with_context(|| format!("cannot parse purity: '{}'", fields[0]))?;
			
 
				+    let ploidy = fields[1]
			
 
				+        .trim()
			
 
				+        .parse::<f64>()
			
 
				+        .with_context(|| format!("cannot parse ploidy: '{}'", fields[1]))?;
			
 
				+
			
 
				+    Ok((purity, ploidy))
			
 
				 }
			
 
				 
			
 
				 #[cfg(test)]
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -223,7 +223,7 @@ pub struct Config {
 
				     /// Typically forwarded to:
			
 
				     ///   - `--native-pair-hmm-threads`
			
 
				     ///   - `--reader-threads`
			
 
				-    /// Should match available cores on the node.
			
 
				+    ///     Should match available cores on the node.
			
 
				     pub gatk_threads: usize, // e.g. 32
			
 
				 
			
 
				     /// Local single-run memory limit in GB.
			
@@ -282,6 +282,95 @@ pub struct Config {
 
				     /// Template for constitutional phased VCF (`{result_dir}`, `{id}`).
			
 
				     pub germline_phased_vcf: String,
			
 
				 
			
 
				+    // === CoRAL ===
			
 
				+    /// Number of CPU threads for the CoRAL reconstruction job.
			
 
				+    ///
			
 
				+    /// CoRAL is CPU-bound during breakpoint graph construction and quadratic
			
 
				+    /// programming cycle extraction. 8–16 threads is sufficient for most
			
 
				+    /// focal amplification cases; increase for highly complex ecDNA with
			
 
				+    /// many amplicons.
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_threads = 16
			
 
				+    /// ```
			
 
				+    pub coral_threads: u8,
			
 
				+
			
 
				+    /// Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
			
 
				+    ///
			
 
				+    /// Memory usage scales with amplicon complexity and BAM depth.
			
 
				+    /// 32G is sufficient for typical WGS at 30–60×; increase to 64G
			
 
				+    /// for highly rearranged genomes (chromothripsis, high ecDNA copy number).
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_slurm_mem = "32G"
			
 
				+    /// ```
			
 
				+    pub coral_slurm_mem: String,
			
 
				+
			
 
				+    /// SLURM partition to use for CoRAL jobs.
			
 
				+    ///
			
 
				+    /// CoRAL requires only CPU — do not submit to a GPU partition.
			
 
				+    /// Use the same partition as other CPU-bound callers (e.g. Severus, NanomonSV).
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_slurm_partition = "cpuq"
			
 
				+    /// ```
			
 
				+    pub coral_slurm_partition: String,
			
 
				+
			
 
				+    /// Minimum copy number gain threshold for a segment to be considered
			
 
				+    /// a focal amplification seed (CoRAL `--gain`).
			
 
				+    ///
			
 
				+    /// CoRAL applies this threshold to the raw absolute CN values from the
			
 
				+    /// cn_segs BED — do NOT pre-correct for purity or ploidy, as this may
			
 
				+    /// cause entire chromosome arms to exceed the threshold in aneuploid tumours.
			
 
				+    ///
			
 
				+    /// Default in CoRAL is 6.0 (diploid assumption). For hyperdiploid tumours
			
 
				+    /// (e.g. hyperploid ALL, CML blast crisis) consider lowering to 4.0–5.0.
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_seed_gain = 6.0
			
 
				+    /// ```
			
 
				+    pub coral_seed_gain: f64,
			
 
				+
			
 
				+    /// Minimum size in base pairs for a CN segment to qualify as a seed
			
 
				+    /// (CoRAL `--min-seed-size`).
			
 
				+    ///
			
 
				+    /// Segments below this size are discarded even if they exceed `coral_seed_gain`.
			
 
				+    /// Two merged proximal segments (see `coral_max_seg_gap`) are evaluated
			
 
				+    /// against this threshold as a single combined interval.
			
 
				+    ///
			
 
				+    /// Default in CoRAL is 100000 (100 kb). Reducing this risks including
			
 
				+    /// artefactual short high-copy segments; increasing it misses small focal
			
 
				+    /// amplifications (e.g. narrow EGFR or MYC peaks).
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_min_seed_size = 100000
			
 
				+    /// ```
			
 
				+    pub coral_min_seed_size: u32,
			
 
				+
			
 
				+    /// Maximum gap in base pairs between two proximal CN segments to allow
			
 
				+    /// merging into a single seed candidate (CoRAL `--max-seg-gap`).
			
 
				+    ///
			
 
				+    /// If two amplified segments are separated by a gap smaller than this value,
			
 
				+    /// they are merged before the `coral_min_seed_size` filter is applied.
			
 
				+    /// This handles cases where a single focal amplicon is split by a low-coverage
			
 
				+    /// or diploid bin.
			
 
				+    ///
			
 
				+    /// Default in CoRAL is 300000 (300 kb). For haematological cancers with
			
 
				+    /// compact focal amplifications (e.g. NUP214::ABL1, ABL1 amplification in
			
 
				+    /// CML blast crisis) a tighter value such as 100000 reduces spurious merging
			
 
				+    /// of adjacent independent amplicons.
			
 
				+    ///
			
 
				+    /// # TOML
			
 
				+    /// ```toml
			
 
				+    /// coral_max_seg_gap = 100000
			
 
				+    /// ```
			
 
				+    pub coral_max_seg_gap: u32,
			
 
				+
			
 
				     // === Severus configuration ===
			
 
				     /// Path to Severus main script (`severus.py`).
			
 
				     pub severus_bin: String,
			
@@ -927,6 +1016,36 @@ impl Config {
 
				             .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
			
 
				     }
			
 
				 
			
 
				+    /// CoRAl
			
 
				+    pub fn coral_output_dir(&self, id: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}/{}/coral/output",
			
 
				+            self.result_dir, id, self.tumoral_name
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    pub fn coral_work_dir(&self, id: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}/{}/coral/work",
			
 
				+            self.result_dir, id, self.tumoral_name
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    pub fn savana_fitted_purity_ploidy(&self, id: &str) -> String {
			
 
				+        // adjust the glob pattern to match your actual SAVANA output naming
			
 
				+        format!(
			
 
				+            "{}/{}/{}/savana/{}_*_fitted_purity_ploidy.tsv",
			
 
				+            self.result_dir, id, self.tumoral_name, id
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    pub fn savana_segmented_absolute_cn(&self, id: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}/{}/savana/{}_*_segmented_absolute_copy_number.tsv",
			
 
				+            self.result_dir, id, self.tumoral_name, id
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				     /// Severus paired output directory.
			
 
				     pub fn severus_output_dir(&self, id: &str) -> String {
			
 
				         self.severus_output_dir
			
--- a/src/io/bed.rs
+++ b/src/io/bed.rs
@@ -372,6 +372,39 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				     Ok(())
			
 
				 }
			
 
				 
			
 
				+/// Returns a set of (chrom, start, end) intervals for centromeric regions
			
 
				+/// parsed from a UCSC-format cytoband BED.
			
 
				+///
			
 
				+/// Centromeres are identified by the `acen` stain field (column 4).
			
 
				+///
			
 
				+/// Format: `chrom\tstart\tend\tband_name\tstain`
			
 
				+pub fn parse_centromere_intervals(path: &str) -> anyhow::Result<Vec<(String, u64, u64)>> {
			
 
				+    let reader = BufReader::new(get_reader(path)
			
 
				+        .with_context(|| format!("Cannot open cytobands BED: {path}"))?);
			
 
				+
			
 
				+    let mut intervals = Vec::new();
			
 
				+    for line in reader.lines() {
			
 
				+        let line = line?;
			
 
				+        if line.starts_with('#') || line.is_empty() {
			
 
				+            continue;
			
 
				+        }
			
 
				+        let f: Vec<&str> = line.split('\t').collect();
			
 
				+        anyhow::ensure!(
			
 
				+            f.len() >= 5,
			
 
				+            "Expected 5 columns in cytoband BED, got {}: {line}",
			
 
				+            f.len()
			
 
				+        );
			
 
				+        if f[4].trim() == "acen" {
			
 
				+            intervals.push((
			
 
				+                f[0].to_string(),
			
 
				+                f[1].parse::<u64>().with_context(|| format!("start: {}", f[1]))?,
			
 
				+                f[2].parse::<u64>().with_context(|| format!("end: {}", f[2]))?,
			
 
				+            ));
			
 
				+        }
			
 
				+    }
			
 
				+    Ok(intervals)
			
 
				+}
			
 
				+
			
 
				 #[cfg(test)]
			
 
				 mod tests {
			
 
				     use super::*;
			
--- a/src/variant/variants_stats.rs
+++ b/src/variant/variants_stats.rs
@@ -620,9 +620,17 @@ pub fn somatic_depth_quality_ranges(
 
				                 match (n_ok, t_ok) {
			
 
				                     (false, false) => break,
			
 
				                     (true, false) => {
			
 
				-                        anyhow::bail!("{} has extra lines at {}", normal_path, line_no)
			
 
				+                        anyhow::bail!(
			
 
				+                            "{normal_path} has extra lines at {line_no}; last normal record = {:?}",
			
 
				+                            String::from_utf8_lossy(n_rec.as_slice())
			
 
				+                        )
			
 
				+                    }
			
 
				+                    (false, true) => {
			
 
				+                        anyhow::bail!(
			
 
				+                            "{tumor_path} has extra lines at {line_no}; last tumor record = {:?}",
			
 
				+                            String::from_utf8_lossy(t_rec.as_slice())
			
 
				+                        )
			
 
				                     }
			
 
				-                    (false, true) => anyhow::bail!("{} has extra lines at {}", tumor_path, line_no),
			
 
				                     (true, true) => {
			
 
				                         let (n_start, n_depths, n_lowq) =
			
 
				                             parse_bin_record_into(&n_rec, &mut n_buf, &contig)