|
|
@@ -0,0 +1,341 @@
|
|
|
+//! # CoRAL — Focal Amplification Reconstruction
|
|
|
+//!
|
|
|
+//! Wraps [CoRAL](https://github.com/AmpliconSuite/CoRAL) for ecDNA and focal
|
|
|
+//! amplification cycle reconstruction from long-read WGS data.
|
|
|
+//!
|
|
|
+//! CoRAL takes as input:
|
|
|
+//! - A tumour BAM (haplotagged recommended)
|
|
|
+//! - A CN segments BED derived from SAVANA absolute CN output
|
|
|
+//! - A seeds BED of amplified intervals (CN >= 4 * ploidy)
|
|
|
+//!
|
|
|
+//! ## Pipeline position
|
|
|
+//!
|
|
|
+//! CoRAL runs **after** SAVANA CNA fitting. The converter
|
|
|
+//! [`savana_to_coral`] produces both required BED files from
|
|
|
+//! `{sample}_segmented_absolute_copy_number.tsv` and
|
|
|
+//! `{sample}_fitted_purity_ploidy.tsv`.
|
|
|
+//!
|
|
|
+//! ## Output files
|
|
|
+//!
|
|
|
+//! | File | Description |
|
|
|
+//! |------|-------------|
|
|
|
+//! | `coral_out/amplicon*_graph.txt` | Breakpoint graph per amplicon |
|
|
|
+//! | `coral_out/amplicon*_cycles.txt` | Cycle decomposition per amplicon |
|
|
|
+//!
|
|
|
+//! ## Implemented traits
|
|
|
+//!
|
|
|
+//! - [`Initialize`] — Directory creation, SAVANA → CoRAL BED conversion
|
|
|
+//! - [`ShouldRun`] — Skip if output directory already contains graph files
|
|
|
+//! - [`JobCommand`] — CoRAL CLI command string
|
|
|
+//! - [`LocalRunner`] — Local execution (conda env)
|
|
|
+//! - [`SbatchRunner`] — SLURM submission
|
|
|
+//! - [`Run`] — Unified execution via `run!` macro
|
|
|
+
|
|
|
+use std::fs;
|
|
|
+use std::path::Path;
|
|
|
+
|
|
|
+use anyhow::Context;
|
|
|
+use log::debug;
|
|
|
+use tracing::info;
|
|
|
+
|
|
|
+use crate::callers::savana::SavanaCN;
|
|
|
+use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner};
|
|
|
+use crate::config::Config;
|
|
|
+use crate::pipes::{Initialize, ShouldRun};
|
|
|
+use crate::run;
|
|
|
+use crate::runners::Run;
|
|
|
+
|
|
|
+// ── CoRALSeed ─────────────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+/// CoRAL seed interval identification.
|
|
|
+///
|
|
|
+/// Runs `coral seed` to identify focally amplified genomic intervals
|
|
|
+/// from SAVANA absolute CN segments. Produces a `*_CNV_SEEDS.bed` file
|
|
|
+/// consumed by [`CoRALReconstruct`].
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct CoRALSeed {
|
|
|
+ pub id: String,
|
|
|
+ pub config: Config,
|
|
|
+ pub cn_segs_bed: String,
|
|
|
+ pub output_dir: String,
|
|
|
+ pub log_dir: String,
|
|
|
+}
|
|
|
+
|
|
|
+impl Initialize for CoRALSeed {
|
|
|
+ fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
+ let id = id.to_string();
|
|
|
+ let work_dir = format!("{}/{}/coral/work", config.result_dir, id);
|
|
|
+ let output_dir = format!("{}/{}/coral/seed", config.result_dir, id);
|
|
|
+ let log_dir = format!("{}/{}/log/coral", config.result_dir, id);
|
|
|
+ let cn_segs_bed = format!("{work_dir}/cn_segs.bed");
|
|
|
+
|
|
|
+ for dir in [&work_dir, &output_dir, &log_dir] {
|
|
|
+ fs::create_dir_all(dir)
|
|
|
+ .with_context(|| format!("Failed to create directory: {dir}"))?;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write SAVANA absolute CN → CoRAL cn_segs BED (no purity correction)
|
|
|
+ let cn = SavanaCN::parse_file(&id, config)
|
|
|
+ .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?;
|
|
|
+ cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?;
|
|
|
+
|
|
|
+ Ok(CoRALSeed {
|
|
|
+ id,
|
|
|
+ config: config.clone(),
|
|
|
+ cn_segs_bed,
|
|
|
+ output_dir,
|
|
|
+ log_dir,
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl ShouldRun for CoRALSeed {
|
|
|
+ /// Skip if the seeds BED already exists.
|
|
|
+ fn should_run(&self) -> bool {
|
|
|
+ !Path::new(&self.seeds_bed()).exists()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl CoRALSeed {
|
|
|
+ /// Path of the seeds BED produced by `coral seed`.
|
|
|
+ pub fn seeds_bed(&self) -> String {
|
|
|
+ format!("{}/seeds_CNV_SEEDS.bed", self.output_dir)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl JobCommand for CoRALSeed {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!(
|
|
|
+ "source {conda_sh} && conda activate coral_env && \
|
|
|
+ coral seed \
|
|
|
+ --cn_segs {cn_segs} \
|
|
|
+ --gain {gain} \
|
|
|
+ --min-seed-size {min_seed_size} \
|
|
|
+ --max-seg-gap {max_seg_gap} \
|
|
|
+ --output-prefix {}/seeds",
|
|
|
+ self.output_dir,
|
|
|
+ conda_sh = self.config.conda_sh,
|
|
|
+ cn_segs = self.cn_segs_bed,
|
|
|
+ gain = self.config.coral_seed_gain,
|
|
|
+ min_seed_size = self.config.coral_min_seed_size,
|
|
|
+ max_seg_gap = self.config.coral_max_seg_gap,
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalRunner for CoRALSeed {}
|
|
|
+impl SbatchRunner for CoRALSeed {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some(format!("coral_seed_{}", self.id)),
|
|
|
+ partition: Some(self.config.coral_slurm_partition.clone()),
|
|
|
+ cpus_per_task: Some(1), // seed step is lightweight
|
|
|
+ mem: Some("8G".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl SlurmRunner for CoRALSeed {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ self.slurm_params().to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Run for CoRALSeed {
|
|
|
+ fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+ if !self.should_run() {
|
|
|
+ debug!("CoRAL seed already exists for {}; skipping.", self.id);
|
|
|
+ return Ok(());
|
|
|
+ }
|
|
|
+
|
|
|
+ let report = run!(self.config, self)
|
|
|
+ .with_context(|| format!("CoRAL seed failed for: {}", self.id))?;
|
|
|
+
|
|
|
+ let log_file = format!("{}/coral_seed", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .with_context(|| format!("Failed to write CoRAL seed log: {log_file}"))?;
|
|
|
+
|
|
|
+ // Guard: bail if no seeds were produced (no focal amplifications)
|
|
|
+ anyhow::ensure!(
|
|
|
+ Path::new(&self.seeds_bed()).exists(),
|
|
|
+ "CoRAL seed produced no output for {} — no focal amplifications detected \
|
|
|
+ at gain={:.1}, min-seed-size={}",
|
|
|
+ self.id,
|
|
|
+ self.config.coral_seed_gain,
|
|
|
+ self.config.coral_min_seed_size,
|
|
|
+ );
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// ── CoRALReconstruct ──────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+/// CoRAL focal amplification reconstruction.
|
|
|
+///
|
|
|
+/// Runs `coral reconstruct` from seed intervals produced by [`CoRALSeed`],
|
|
|
+/// building a breakpoint graph and extracting ecDNA cycle candidates.
|
|
|
+///
|
|
|
+/// Output files per amplicon:
|
|
|
+/// - `amplicon*_graph.txt` — breakpoint graph
|
|
|
+/// - `amplicon*_cycles.txt` — cycle decomposition
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct CoRALReconstruct {
|
|
|
+ pub id: String,
|
|
|
+ pub config: Config,
|
|
|
+ pub tumour_bam: String,
|
|
|
+ pub cn_segs_bed: String,
|
|
|
+ pub seeds_bed: String,
|
|
|
+ pub output_dir: String,
|
|
|
+ pub log_dir: String,
|
|
|
+}
|
|
|
+
|
|
|
+impl Initialize for CoRALReconstruct {
|
|
|
+ fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
+ let seed = CoRALSeed::initialize(id, config)?;
|
|
|
+
|
|
|
+ // seeds_bed must exist — CoRALSeed::run() must have completed first
|
|
|
+ anyhow::ensure!(
|
|
|
+ Path::new(&seed.seeds_bed()).exists(),
|
|
|
+ "CoRAL seeds BED not found for {id}: {}. \
|
|
|
+ Run CoRALSeed first.",
|
|
|
+ seed.seeds_bed()
|
|
|
+ );
|
|
|
+
|
|
|
+ let output_dir = format!("{}/{}/coral/reconstruct", config.result_dir, id);
|
|
|
+ fs::create_dir_all(&output_dir)
|
|
|
+ .with_context(|| format!("Failed to create reconstruct dir: {output_dir}"))?;
|
|
|
+
|
|
|
+ Ok(CoRALReconstruct {
|
|
|
+ id: id.to_string(),
|
|
|
+ config: config.clone(),
|
|
|
+ tumour_bam: config.tumoral_bam(id),
|
|
|
+ cn_segs_bed: seed.cn_segs_bed.clone(),
|
|
|
+ seeds_bed: seed.seeds_bed(),
|
|
|
+ output_dir,
|
|
|
+ log_dir: seed.log_dir,
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl ShouldRun for CoRALReconstruct {
|
|
|
+ /// Skip if at least one graph file already exists in the output directory.
|
|
|
+ fn should_run(&self) -> bool {
|
|
|
+ let out = Path::new(&self.output_dir);
|
|
|
+ if !out.exists() {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ let has_graph = fs::read_dir(out)
|
|
|
+ .map(|entries| {
|
|
|
+ entries
|
|
|
+ .filter_map(|e| e.ok())
|
|
|
+ .any(|e| e.file_name().to_string_lossy().ends_with("_graph.txt"))
|
|
|
+ })
|
|
|
+ .unwrap_or(true);
|
|
|
+ !has_graph
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl JobCommand for CoRALReconstruct {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!(
|
|
|
+ "source {conda_sh} && conda activate coral_env && \
|
|
|
+ coral reconstruct \
|
|
|
+ --lr-bam {bam} \
|
|
|
+ --cnv-seed {seeds} \
|
|
|
+ --cn_segs {cn_segs} \
|
|
|
+ --ref_genome {reference} \
|
|
|
+ --output-prefix {}/amplicon \
|
|
|
+ --threads {}",
|
|
|
+ self.output_dir,
|
|
|
+ self.config.coral_threads,
|
|
|
+ conda_sh = self.config.conda_sh,
|
|
|
+ bam = self.tumour_bam,
|
|
|
+ seeds = self.seeds_bed,
|
|
|
+ cn_segs = self.cn_segs_bed,
|
|
|
+ reference = self.config.reference,
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalRunner for CoRALReconstruct {}
|
|
|
+impl SbatchRunner for CoRALReconstruct {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some(format!("coral_reconstruct_{}", self.id)),
|
|
|
+ partition: Some(self.config.coral_slurm_partition.clone()),
|
|
|
+ cpus_per_task: Some(self.config.coral_threads.into()),
|
|
|
+ mem: Some(self.config.coral_slurm_mem.clone()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl SlurmRunner for CoRALReconstruct {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ self.slurm_params().to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Run for CoRALReconstruct {
|
|
|
+ fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+ if !self.should_run() {
|
|
|
+ debug!("CoRAL reconstruct already done for {}; skipping.", self.id);
|
|
|
+ return Ok(());
|
|
|
+ }
|
|
|
+
|
|
|
+ let report = run!(self.config, self)
|
|
|
+ .with_context(|| format!("CoRAL reconstruct failed for: {}", self.id))?;
|
|
|
+
|
|
|
+ let log_file = format!("{}/coral_reconstruct", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .with_context(|| format!("Failed to write CoRAL reconstruct log: {log_file}"))?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// ── Top-level pipe ────────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+/// Runs the full CoRAL pipeline: seed identification → amplicon reconstruction.
|
|
|
+///
|
|
|
+/// Steps:
|
|
|
+/// 1. [`CoRALSeed`] — identify focally amplified seed intervals
|
|
|
+/// 2. [`CoRALReconstruct`] — build breakpoint graph and extract ecDNA cycles
|
|
|
+///
|
|
|
+/// Returns `Ok(())` early (without error) if the seed step finds no amplified
|
|
|
+/// intervals, since absence of focal amplifications is a valid biological result.
|
|
|
+///
|
|
|
+/// # Arguments
|
|
|
+/// * `id` - Sample identifier
|
|
|
+/// * `config` - Pipeline configuration
|
|
|
+pub fn run_coral(id: &str, config: &Config) -> anyhow::Result<()> {
|
|
|
+ info!("CoRAL pipeline starting for {id}");
|
|
|
+
|
|
|
+ // Step 1: seed
|
|
|
+ let mut seed = CoRALSeed::initialize(id, config)
|
|
|
+ .with_context(|| format!("CoRALSeed::initialize failed for {id}"))?;
|
|
|
+
|
|
|
+ match seed.run() {
|
|
|
+ Ok(()) => {}
|
|
|
+ Err(e) if e.to_string().contains("no focal amplifications") => {
|
|
|
+ info!("CoRAL: no focal amplifications for {id}; pipeline skipped.");
|
|
|
+ return Ok(());
|
|
|
+ }
|
|
|
+ Err(e) => return Err(e),
|
|
|
+ }
|
|
|
+
|
|
|
+ // Step 2: reconstruct
|
|
|
+ let mut reconstruct = CoRALReconstruct::initialize(id, config)
|
|
|
+ .with_context(|| format!("CoRALReconstruct::initialize failed for {id}"))?;
|
|
|
+
|
|
|
+ reconstruct
|
|
|
+ .run()
|
|
|
+ .with_context(|| format!("CoRALReconstruct::run failed for {id}"))?;
|
|
|
+
|
|
|
+ info!("CoRAL pipeline complete for {id}");
|
|
|
+ Ok(())
|
|
|
+}
|