//! # CoRAL — Focal Amplification Reconstruction //! //! Wraps [CoRAL](https://github.com/AmpliconSuite/CoRAL) for ecDNA and focal //! amplification cycle reconstruction from long-read WGS data. //! //! CoRAL takes as input: //! - A tumour BAM (haplotagged recommended) //! - A CN segments BED derived from SAVANA absolute CN output //! - A seeds BED of amplified intervals (CN >= 4 * ploidy) //! //! ## Pipeline position //! //! CoRAL runs **after** SAVANA CNA fitting. The converter //! [`savana_to_coral`] produces both required BED files from //! `{sample}_segmented_absolute_copy_number.tsv` and //! `{sample}_fitted_purity_ploidy.tsv`. //! //! ## Output files //! //! | File | Description | //! |------|-------------| //! | `coral_out/amplicon*_graph.txt` | Breakpoint graph per amplicon | //! | `coral_out/amplicon*_cycles.txt` | Cycle decomposition per amplicon | //! //! ## Implemented traits //! //! - [`Initialize`] — Directory creation, SAVANA → CoRAL BED conversion //! - [`ShouldRun`] — Skip if output directory already contains graph files //! - [`JobCommand`] — CoRAL CLI command string //! - [`LocalRunner`] — Local execution (conda env) //! - [`SbatchRunner`] — SLURM submission //! - [`Run`] — Unified execution via `run!` macro use std::fs; use std::path::Path; use anyhow::Context; use log::debug; use tracing::info; use crate::callers::savana::SavanaCN; use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner}; use crate::config::Config; use crate::pipes::{Initialize, ShouldRun}; use crate::run; use crate::runners::Run; // ── CoRALSeed ───────────────────────────────────────────────────────────────── /// CoRAL seed interval identification. /// /// Runs `coral seed` to identify focally amplified genomic intervals /// from SAVANA absolute CN segments. Produces a `*_CNV_SEEDS.bed` file /// consumed by [`CoRALReconstruct`]. #[derive(Debug)] pub struct CoRALSeed { pub id: String, pub config: Config, pub cn_segs_bed: String, pub output_dir: String, pub log_dir: String, } impl Initialize for CoRALSeed { fn initialize(id: &str, config: &Config) -> anyhow::Result { let id = id.to_string(); let work_dir = format!("{}/{}/coral/work", config.result_dir, id); let output_dir = format!("{}/{}/coral/seed", config.result_dir, id); let log_dir = format!("{}/{}/log/coral", config.result_dir, id); let cn_segs_bed = format!("{work_dir}/cn_segs.bed"); for dir in [&work_dir, &output_dir, &log_dir] { fs::create_dir_all(dir) .with_context(|| format!("Failed to create directory: {dir}"))?; } // Write SAVANA absolute CN → CoRAL cn_segs BED (no purity correction) let cn = SavanaCN::parse_file(&id, config) .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?; cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?; Ok(CoRALSeed { id, config: config.clone(), cn_segs_bed, output_dir, log_dir, }) } } impl ShouldRun for CoRALSeed { /// Skip if the seeds BED already exists. fn should_run(&self) -> bool { !Path::new(&self.seeds_bed()).exists() } } impl CoRALSeed { /// Path of the seeds BED produced by `coral seed`. pub fn seeds_bed(&self) -> String { format!("{}/seeds_CNV_SEEDS.bed", self.output_dir) } } impl JobCommand for CoRALSeed { fn cmd(&self) -> String { format!( "source {conda_sh} && conda activate coral_env && \ coral seed \ --cn_segs {cn_segs} \ --gain {gain} \ --min-seed-size {min_seed_size} \ --max-seg-gap {max_seg_gap} \ --output-prefix {}/seeds", self.output_dir, conda_sh = self.config.conda_sh, cn_segs = self.cn_segs_bed, gain = self.config.coral_seed_gain, min_seed_size = self.config.coral_min_seed_size, max_seg_gap = self.config.coral_max_seg_gap, ) } } impl LocalRunner for CoRALSeed {} impl SbatchRunner for CoRALSeed { fn slurm_params(&self) -> SlurmParams { SlurmParams { job_name: Some(format!("coral_seed_{}", self.id)), partition: Some(self.config.coral_slurm_partition.clone()), cpus_per_task: Some(1), // seed step is lightweight mem: Some("8G".into()), gres: None, } } } impl SlurmRunner for CoRALSeed { fn slurm_args(&self) -> Vec { self.slurm_params().to_args() } } impl Run for CoRALSeed { fn run(&mut self) -> anyhow::Result<()> { if !self.should_run() { debug!("CoRAL seed already exists for {}; skipping.", self.id); return Ok(()); } let report = run!(self.config, self) .with_context(|| format!("CoRAL seed failed for: {}", self.id))?; let log_file = format!("{}/coral_seed", self.log_dir); report .save_to_file(&log_file) .with_context(|| format!("Failed to write CoRAL seed log: {log_file}"))?; // Guard: bail if no seeds were produced (no focal amplifications) anyhow::ensure!( Path::new(&self.seeds_bed()).exists(), "CoRAL seed produced no output for {} — no focal amplifications detected \ at gain={:.1}, min-seed-size={}", self.id, self.config.coral_seed_gain, self.config.coral_min_seed_size, ); Ok(()) } } // ── CoRALReconstruct ────────────────────────────────────────────────────────── /// CoRAL focal amplification reconstruction. /// /// Runs `coral reconstruct` from seed intervals produced by [`CoRALSeed`], /// building a breakpoint graph and extracting ecDNA cycle candidates. /// /// Output files per amplicon: /// - `amplicon*_graph.txt` — breakpoint graph /// - `amplicon*_cycles.txt` — cycle decomposition #[derive(Debug)] pub struct CoRALReconstruct { pub id: String, pub config: Config, pub tumour_bam: String, pub cn_segs_bed: String, pub seeds_bed: String, pub output_dir: String, pub log_dir: String, } impl Initialize for CoRALReconstruct { fn initialize(id: &str, config: &Config) -> anyhow::Result { let seed = CoRALSeed::initialize(id, config)?; // seeds_bed must exist — CoRALSeed::run() must have completed first anyhow::ensure!( Path::new(&seed.seeds_bed()).exists(), "CoRAL seeds BED not found for {id}: {}. \ Run CoRALSeed first.", seed.seeds_bed() ); let output_dir = format!("{}/{}/coral/reconstruct", config.result_dir, id); fs::create_dir_all(&output_dir) .with_context(|| format!("Failed to create reconstruct dir: {output_dir}"))?; Ok(CoRALReconstruct { id: id.to_string(), config: config.clone(), tumour_bam: config.tumoral_bam(id), cn_segs_bed: seed.cn_segs_bed.clone(), seeds_bed: seed.seeds_bed(), output_dir, log_dir: seed.log_dir, }) } } impl ShouldRun for CoRALReconstruct { /// Skip if at least one graph file already exists in the output directory. fn should_run(&self) -> bool { let out = Path::new(&self.output_dir); if !out.exists() { return true; } let has_graph = fs::read_dir(out) .map(|entries| { entries .filter_map(|e| e.ok()) .any(|e| e.file_name().to_string_lossy().ends_with("_graph.txt")) }) .unwrap_or(true); !has_graph } } impl JobCommand for CoRALReconstruct { fn cmd(&self) -> String { format!( "source {conda_sh} && conda activate coral_env && \ coral reconstruct \ --lr-bam {bam} \ --cnv-seed {seeds} \ --cn_segs {cn_segs} \ --ref_genome {reference} \ --output-prefix {}/amplicon \ --threads {}", self.output_dir, self.config.coral_threads, conda_sh = self.config.conda_sh, bam = self.tumour_bam, seeds = self.seeds_bed, cn_segs = self.cn_segs_bed, reference = self.config.reference, ) } } impl LocalRunner for CoRALReconstruct {} impl SbatchRunner for CoRALReconstruct { fn slurm_params(&self) -> SlurmParams { SlurmParams { job_name: Some(format!("coral_reconstruct_{}", self.id)), partition: Some(self.config.coral_slurm_partition.clone()), cpus_per_task: Some(self.config.coral_threads.into()), mem: Some(self.config.coral_slurm_mem.clone()), gres: None, } } } impl SlurmRunner for CoRALReconstruct { fn slurm_args(&self) -> Vec { self.slurm_params().to_args() } } impl Run for CoRALReconstruct { fn run(&mut self) -> anyhow::Result<()> { if !self.should_run() { debug!("CoRAL reconstruct already done for {}; skipping.", self.id); return Ok(()); } let report = run!(self.config, self) .with_context(|| format!("CoRAL reconstruct failed for: {}", self.id))?; let log_file = format!("{}/coral_reconstruct", self.log_dir); report .save_to_file(&log_file) .with_context(|| format!("Failed to write CoRAL reconstruct log: {log_file}"))?; Ok(()) } } // ── Top-level pipe ──────────────────────────────────────────────────────────── /// Runs the full CoRAL pipeline: seed identification → amplicon reconstruction. /// /// Steps: /// 1. [`CoRALSeed`] — identify focally amplified seed intervals /// 2. [`CoRALReconstruct`] — build breakpoint graph and extract ecDNA cycles /// /// Returns `Ok(())` early (without error) if the seed step finds no amplified /// intervals, since absence of focal amplifications is a valid biological result. /// /// # Arguments /// * `id` - Sample identifier /// * `config` - Pipeline configuration pub fn run_coral(id: &str, config: &Config) -> anyhow::Result<()> { info!("CoRAL pipeline starting for {id}"); // Step 1: seed let mut seed = CoRALSeed::initialize(id, config) .with_context(|| format!("CoRALSeed::initialize failed for {id}"))?; match seed.run() { Ok(()) => {} Err(e) if e.to_string().contains("no focal amplifications") => { info!("CoRAL: no focal amplifications for {id}; pipeline skipped."); return Ok(()); } Err(e) => return Err(e), } // Step 2: reconstruct let mut reconstruct = CoRALReconstruct::initialize(id, config) .with_context(|| format!("CoRALReconstruct::initialize failed for {id}"))?; reconstruct .run() .with_context(|| format!("CoRALReconstruct::run failed for {id}"))?; info!("CoRAL pipeline complete for {id}"); Ok(()) }