coral.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. //! # CoRAL — Focal Amplification Reconstruction
  2. //!
  3. //! Wraps [CoRAL](https://github.com/AmpliconSuite/CoRAL) for ecDNA and focal
  4. //! amplification cycle reconstruction from long-read WGS data.
  5. //!
  6. //! CoRAL takes as input:
  7. //! - A tumour BAM (haplotagged recommended)
  8. //! - A CN segments BED derived from SAVANA absolute CN output
  9. //! - A seeds BED of amplified intervals (CN >= 4 * ploidy)
  10. //!
  11. //! ## Pipeline position
  12. //!
  13. //! CoRAL runs **after** SAVANA CNA fitting. The converter
  14. //! [`savana_to_coral`] produces both required BED files from
  15. //! `{sample}_segmented_absolute_copy_number.tsv` and
  16. //! `{sample}_fitted_purity_ploidy.tsv`.
  17. //!
  18. //! ## Output files
  19. //!
  20. //! | File | Description |
  21. //! |------|-------------|
  22. //! | `coral_out/amplicon*_graph.txt` | Breakpoint graph per amplicon |
  23. //! | `coral_out/amplicon*_cycles.txt` | Cycle decomposition per amplicon |
  24. //!
  25. //! ## Implemented traits
  26. //!
  27. //! - [`Initialize`] — Directory creation, SAVANA → CoRAL BED conversion
  28. //! - [`ShouldRun`] — Skip if output directory already contains graph files
  29. //! - [`JobCommand`] — CoRAL CLI command string
  30. //! - [`LocalRunner`] — Local execution (conda env)
  31. //! - [`SbatchRunner`] — SLURM submission
  32. //! - [`Run`] — Unified execution via `run!` macro
  33. use std::fs;
  34. use std::path::Path;
  35. use anyhow::Context;
  36. use log::debug;
  37. use tracing::info;
  38. use crate::callers::savana::SavanaCN;
  39. use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner};
  40. use crate::config::Config;
  41. use crate::pipes::{Initialize, ShouldRun};
  42. use crate::run;
  43. use crate::runners::Run;
  44. // ── CoRALSeed ─────────────────────────────────────────────────────────────────
  45. /// CoRAL seed interval identification.
  46. ///
  47. /// Runs `coral seed` to identify focally amplified genomic intervals
  48. /// from SAVANA absolute CN segments. Produces a `*_CNV_SEEDS.bed` file
  49. /// consumed by [`CoRALReconstruct`].
  50. #[derive(Debug)]
  51. pub struct CoRALSeed {
  52. pub id: String,
  53. pub config: Config,
  54. pub cn_segs_bed: String,
  55. pub output_dir: String,
  56. pub log_dir: String,
  57. }
  58. impl Initialize for CoRALSeed {
  59. fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
  60. let id = id.to_string();
  61. let work_dir = format!("{}/{}/coral/work", config.result_dir, id);
  62. let output_dir = format!("{}/{}/coral/seed", config.result_dir, id);
  63. let log_dir = format!("{}/{}/log/coral", config.result_dir, id);
  64. let cn_segs_bed = format!("{work_dir}/cn_segs.bed");
  65. for dir in [&work_dir, &output_dir, &log_dir] {
  66. fs::create_dir_all(dir)
  67. .with_context(|| format!("Failed to create directory: {dir}"))?;
  68. }
  69. // Write SAVANA absolute CN → CoRAL cn_segs BED (no purity correction)
  70. let cn = SavanaCN::parse_file(&id, config)
  71. .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?;
  72. cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?;
  73. Ok(CoRALSeed {
  74. id,
  75. config: config.clone(),
  76. cn_segs_bed,
  77. output_dir,
  78. log_dir,
  79. })
  80. }
  81. }
  82. impl ShouldRun for CoRALSeed {
  83. /// Skip if the seeds BED already exists.
  84. fn should_run(&self) -> bool {
  85. !Path::new(&self.seeds_bed()).exists()
  86. }
  87. }
  88. impl CoRALSeed {
  89. /// Path of the seeds BED produced by `coral seed`.
  90. pub fn seeds_bed(&self) -> String {
  91. format!("{}/seeds_CNV_SEEDS.bed", self.output_dir)
  92. }
  93. }
  94. impl JobCommand for CoRALSeed {
  95. fn cmd(&self) -> String {
  96. format!(
  97. "source {conda_sh} && conda activate coral_env && \
  98. coral seed \
  99. --cn_segs {cn_segs} \
  100. --gain {gain} \
  101. --min-seed-size {min_seed_size} \
  102. --max-seg-gap {max_seg_gap} \
  103. --output-prefix {}/seeds",
  104. self.output_dir,
  105. conda_sh = self.config.conda_sh,
  106. cn_segs = self.cn_segs_bed,
  107. gain = self.config.coral_seed_gain,
  108. min_seed_size = self.config.coral_min_seed_size,
  109. max_seg_gap = self.config.coral_max_seg_gap,
  110. )
  111. }
  112. }
  113. impl LocalRunner for CoRALSeed {}
  114. impl SbatchRunner for CoRALSeed {
  115. fn slurm_params(&self) -> SlurmParams {
  116. SlurmParams {
  117. job_name: Some(format!("coral_seed_{}", self.id)),
  118. partition: Some(self.config.coral_slurm_partition.clone()),
  119. cpus_per_task: Some(1), // seed step is lightweight
  120. mem: Some("8G".into()),
  121. gres: None,
  122. }
  123. }
  124. }
  125. impl SlurmRunner for CoRALSeed {
  126. fn slurm_args(&self) -> Vec<String> {
  127. self.slurm_params().to_args()
  128. }
  129. }
  130. impl Run for CoRALSeed {
  131. fn run(&mut self) -> anyhow::Result<()> {
  132. if !self.should_run() {
  133. debug!("CoRAL seed already exists for {}; skipping.", self.id);
  134. return Ok(());
  135. }
  136. let report = run!(self.config, self)
  137. .with_context(|| format!("CoRAL seed failed for: {}", self.id))?;
  138. let log_file = format!("{}/coral_seed", self.log_dir);
  139. report
  140. .save_to_file(&log_file)
  141. .with_context(|| format!("Failed to write CoRAL seed log: {log_file}"))?;
  142. // Guard: bail if no seeds were produced (no focal amplifications)
  143. anyhow::ensure!(
  144. Path::new(&self.seeds_bed()).exists(),
  145. "CoRAL seed produced no output for {} — no focal amplifications detected \
  146. at gain={:.1}, min-seed-size={}",
  147. self.id,
  148. self.config.coral_seed_gain,
  149. self.config.coral_min_seed_size,
  150. );
  151. Ok(())
  152. }
  153. }
  154. // ── CoRALReconstruct ──────────────────────────────────────────────────────────
  155. /// CoRAL focal amplification reconstruction.
  156. ///
  157. /// Runs `coral reconstruct` from seed intervals produced by [`CoRALSeed`],
  158. /// building a breakpoint graph and extracting ecDNA cycle candidates.
  159. ///
  160. /// Output files per amplicon:
  161. /// - `amplicon*_graph.txt` — breakpoint graph
  162. /// - `amplicon*_cycles.txt` — cycle decomposition
  163. #[derive(Debug)]
  164. pub struct CoRALReconstruct {
  165. pub id: String,
  166. pub config: Config,
  167. pub tumour_bam: String,
  168. pub cn_segs_bed: String,
  169. pub seeds_bed: String,
  170. pub output_dir: String,
  171. pub log_dir: String,
  172. }
  173. impl Initialize for CoRALReconstruct {
  174. fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
  175. let seed = CoRALSeed::initialize(id, config)?;
  176. // seeds_bed must exist — CoRALSeed::run() must have completed first
  177. anyhow::ensure!(
  178. Path::new(&seed.seeds_bed()).exists(),
  179. "CoRAL seeds BED not found for {id}: {}. \
  180. Run CoRALSeed first.",
  181. seed.seeds_bed()
  182. );
  183. let output_dir = format!("{}/{}/coral/reconstruct", config.result_dir, id);
  184. fs::create_dir_all(&output_dir)
  185. .with_context(|| format!("Failed to create reconstruct dir: {output_dir}"))?;
  186. Ok(CoRALReconstruct {
  187. id: id.to_string(),
  188. config: config.clone(),
  189. tumour_bam: config.tumoral_bam(id),
  190. cn_segs_bed: seed.cn_segs_bed.clone(),
  191. seeds_bed: seed.seeds_bed(),
  192. output_dir,
  193. log_dir: seed.log_dir,
  194. })
  195. }
  196. }
  197. impl ShouldRun for CoRALReconstruct {
  198. /// Skip if at least one graph file already exists in the output directory.
  199. fn should_run(&self) -> bool {
  200. let out = Path::new(&self.output_dir);
  201. if !out.exists() {
  202. return true;
  203. }
  204. let has_graph = fs::read_dir(out)
  205. .map(|entries| {
  206. entries
  207. .filter_map(|e| e.ok())
  208. .any(|e| e.file_name().to_string_lossy().ends_with("_graph.txt"))
  209. })
  210. .unwrap_or(true);
  211. !has_graph
  212. }
  213. }
  214. impl JobCommand for CoRALReconstruct {
  215. fn cmd(&self) -> String {
  216. format!(
  217. "source {conda_sh} && conda activate coral_env && \
  218. coral reconstruct \
  219. --lr-bam {bam} \
  220. --cnv-seed {seeds} \
  221. --cn_segs {cn_segs} \
  222. --ref_genome {reference} \
  223. --output-prefix {}/amplicon \
  224. --threads {}",
  225. self.output_dir,
  226. self.config.coral_threads,
  227. conda_sh = self.config.conda_sh,
  228. bam = self.tumour_bam,
  229. seeds = self.seeds_bed,
  230. cn_segs = self.cn_segs_bed,
  231. reference = self.config.reference,
  232. )
  233. }
  234. }
  235. impl LocalRunner for CoRALReconstruct {}
  236. impl SbatchRunner for CoRALReconstruct {
  237. fn slurm_params(&self) -> SlurmParams {
  238. SlurmParams {
  239. job_name: Some(format!("coral_reconstruct_{}", self.id)),
  240. partition: Some(self.config.coral_slurm_partition.clone()),
  241. cpus_per_task: Some(self.config.coral_threads.into()),
  242. mem: Some(self.config.coral_slurm_mem.clone()),
  243. gres: None,
  244. }
  245. }
  246. }
  247. impl SlurmRunner for CoRALReconstruct {
  248. fn slurm_args(&self) -> Vec<String> {
  249. self.slurm_params().to_args()
  250. }
  251. }
  252. impl Run for CoRALReconstruct {
  253. fn run(&mut self) -> anyhow::Result<()> {
  254. if !self.should_run() {
  255. debug!("CoRAL reconstruct already done for {}; skipping.", self.id);
  256. return Ok(());
  257. }
  258. let report = run!(self.config, self)
  259. .with_context(|| format!("CoRAL reconstruct failed for: {}", self.id))?;
  260. let log_file = format!("{}/coral_reconstruct", self.log_dir);
  261. report
  262. .save_to_file(&log_file)
  263. .with_context(|| format!("Failed to write CoRAL reconstruct log: {log_file}"))?;
  264. Ok(())
  265. }
  266. }
  267. // ── Top-level pipe ────────────────────────────────────────────────────────────
  268. /// Runs the full CoRAL pipeline: seed identification → amplicon reconstruction.
  269. ///
  270. /// Steps:
  271. /// 1. [`CoRALSeed`] — identify focally amplified seed intervals
  272. /// 2. [`CoRALReconstruct`] — build breakpoint graph and extract ecDNA cycles
  273. ///
  274. /// Returns `Ok(())` early (without error) if the seed step finds no amplified
  275. /// intervals, since absence of focal amplifications is a valid biological result.
  276. ///
  277. /// # Arguments
  278. /// * `id` - Sample identifier
  279. /// * `config` - Pipeline configuration
  280. pub fn run_coral(id: &str, config: &Config) -> anyhow::Result<()> {
  281. info!("CoRAL pipeline starting for {id}");
  282. // Step 1: seed
  283. let mut seed = CoRALSeed::initialize(id, config)
  284. .with_context(|| format!("CoRALSeed::initialize failed for {id}"))?;
  285. match seed.run() {
  286. Ok(()) => {}
  287. Err(e) if e.to_string().contains("no focal amplifications") => {
  288. info!("CoRAL: no focal amplifications for {id}; pipeline skipped.");
  289. return Ok(());
  290. }
  291. Err(e) => return Err(e),
  292. }
  293. // Step 2: reconstruct
  294. let mut reconstruct = CoRALReconstruct::initialize(id, config)
  295. .with_context(|| format!("CoRALReconstruct::initialize failed for {id}"))?;
  296. reconstruct
  297. .run()
  298. .with_context(|| format!("CoRALReconstruct::run failed for {id}"))?;
  299. info!("CoRAL pipeline complete for {id}");
  300. Ok(())
  301. }