Thomas 1 giorno fa
parent
commit
b85b028475
3 ha cambiato i file con 72 aggiunte e 15 eliminazioni
  1. 3 0
      pandora-config.example.toml
  2. 67 15
      src/callers/coral.rs
  3. 2 0
      src/config.rs

+ 3 - 0
pandora-config.example.toml

@@ -376,6 +376,9 @@ straglr_force = false
 # many amplicons.
 # many amplicons.
 coral_threads = 16
 coral_threads = 16
 
 
+# CoRAL cloned dir (required...)
+coral_dir = "/home/t_steimle/somatic_pipe_tools/CoRAL"
+
 # Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
 # Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
 #
 #
 # Memory usage scales with amplicon complexity and BAM depth.
 # Memory usage scales with amplicon complexity and BAM depth.

+ 67 - 15
src/callers/coral.rs

@@ -32,6 +32,7 @@
 //! - [`Run`] — Unified execution via `run!` macro
 //! - [`Run`] — Unified execution via `run!` macro
 
 
 use std::fs;
 use std::fs;
+use std::io::Write;
 use std::path::Path;
 use std::path::Path;
 
 
 use anyhow::Context;
 use anyhow::Context;
@@ -41,6 +42,8 @@ use tracing::info;
 use crate::callers::savana::SavanaCN;
 use crate::callers::savana::SavanaCN;
 use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner};
 use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner};
 use crate::config::Config;
 use crate::config::Config;
+use crate::io::bed::parse_centromere_intervals;
+use crate::io::writers::get_writer;
 use crate::pipes::{Initialize, ShouldRun};
 use crate::pipes::{Initialize, ShouldRun};
 use crate::run;
 use crate::run;
 use crate::runners::Run;
 use crate::runners::Run;
@@ -56,6 +59,7 @@ use crate::runners::Run;
 pub struct CoRALSeed {
 pub struct CoRALSeed {
     pub id: String,
     pub id: String,
     pub config: Config,
     pub config: Config,
+    pub centromeres_file: String,
     pub cn_segs_bed: String,
     pub cn_segs_bed: String,
     pub output_dir: String,
     pub output_dir: String,
     pub log_dir: String,
     pub log_dir: String,
@@ -79,9 +83,36 @@ impl Initialize for CoRALSeed {
             .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?;
             .with_context(|| format!("Failed to parse SAVANA CN segments for {id}"))?;
         cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?;
         cn.write_coral_cn_segs(Path::new(&cn_segs_bed))?;
 
 
+        let centromeres_file = Path::new(&config.cytobands_bed)
+            .parent()
+            .ok_or_else(|| {
+                anyhow::anyhow!(
+                    "Failed to parese cytoband file parent {}",
+                    config.cytobands_bed
+                )
+            })?
+            .join("centromeres.bed");
+
+        if !centromeres_file.exists() {
+            let centromeres = parse_centromere_intervals(&config.cytobands_bed)?;
+            let mut writer = get_writer(
+                centromeres_file
+                    .to_str()
+                    .ok_or_else(|| anyhow::anyhow!("invalid UTF-8 in path"))?,
+            )?;
+
+            for (contig, start, end) in centromeres {
+                writeln!(&mut writer, "{contig}\t{start}\t{end}")?;
+            }
+        }
+
         Ok(CoRALSeed {
         Ok(CoRALSeed {
             id,
             id,
             config: config.clone(),
             config: config.clone(),
+            centromeres_file: centromeres_file
+                .to_str()
+                .ok_or_else(|| anyhow::anyhow!("invalid UTF-8 in path"))?
+                .to_string(),
             cn_segs_bed,
             cn_segs_bed,
             output_dir,
             output_dir,
             log_dir,
             log_dir,
@@ -107,18 +138,25 @@ impl JobCommand for CoRALSeed {
     fn cmd(&self) -> String {
     fn cmd(&self) -> String {
         format!(
         format!(
             "source {conda_sh} && conda activate coral_env && \
             "source {conda_sh} && conda activate coral_env && \
-                coral seed \
-                --cn_segs         {cn_segs} \
+                cd {coral_dir} && \
+                poetry run coral seed \
+                --cn-seg         {cn_segs} \
+                --lr-bam {bam_file} \
+                --centromere-file {centromere_file} \
                 --gain            {gain} \
                 --gain            {gain} \
                 --min-seed-size   {min_seed_size} \
                 --min-seed-size   {min_seed_size} \
                 --max-seg-gap     {max_seg_gap} \
                 --max-seg-gap     {max_seg_gap} \
                 --output-prefix   {}/seeds",
                 --output-prefix   {}/seeds",
             self.output_dir,
             self.output_dir,
-            conda_sh      = self.config.conda_sh,
-            cn_segs       = self.cn_segs_bed,
-            gain          = self.config.coral_seed_gain,
+            conda_sh = self.config.conda_sh,
+            coral_dir = self.config.coral_dir,
+            cn_segs = self.cn_segs_bed,
+            bam_file = self.config.tumoral_bam(&self.id),
+            centromere_file = self.centromeres_file,
+            gain = 3,
+            // gain = self.config.coral_seed_gain,
             min_seed_size = self.config.coral_min_seed_size,
             min_seed_size = self.config.coral_min_seed_size,
-            max_seg_gap   = self.config.coral_max_seg_gap,
+            max_seg_gap = self.config.coral_max_seg_gap,
         )
         )
     }
     }
 }
 }
@@ -242,20 +280,22 @@ impl JobCommand for CoRALReconstruct {
     fn cmd(&self) -> String {
     fn cmd(&self) -> String {
         format!(
         format!(
             "source {conda_sh} && conda activate coral_env && \
             "source {conda_sh} && conda activate coral_env && \
-                coral reconstruct \
+                cd {coral_dir} && \
+                poetry run coral reconstruct \
                 --lr-bam          {bam} \
                 --lr-bam          {bam} \
                 --cnv-seed        {seeds} \
                 --cnv-seed        {seeds} \
-                --cn_segs         {cn_segs} \
-                --ref_genome      {reference} \
+                --cn-seg         {cn_segs} \
                 --output-prefix   {}/amplicon \
                 --output-prefix   {}/amplicon \
-                --threads         {}",
+                --solver {solver} \
+                --solver-threads         {}",
             self.output_dir,
             self.output_dir,
             self.config.coral_threads,
             self.config.coral_threads,
-            conda_sh  = self.config.conda_sh,
-            bam       = self.tumour_bam,
-            seeds     = self.seeds_bed,
-            cn_segs   = self.cn_segs_bed,
-            reference = self.config.reference,
+            conda_sh = self.config.conda_sh,
+            coral_dir = self.config.coral_dir,
+            bam = self.tumour_bam,
+            seeds = self.seeds_bed,
+            cn_segs = self.cn_segs_bed,
+            solver = "scip" // Gurobi is not open source
         )
         )
     }
     }
 }
 }
@@ -339,3 +379,15 @@ pub fn run_coral(id: &str, config: &Config) -> anyhow::Result<()> {
     info!("CoRAL pipeline complete for {id}");
     info!("CoRAL pipeline complete for {id}");
     Ok(())
     Ok(())
 }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::helpers::test_init;
+
+    #[test]
+    fn coral_run() -> anyhow::Result<()> {
+        test_init();
+        run_coral("CML2518", &Config::default())
+    }
+}

+ 2 - 0
src/config.rs

@@ -296,6 +296,8 @@ pub struct Config {
     /// ```
     /// ```
     pub coral_threads: u8,
     pub coral_threads: u8,
 
 
+    pub coral_dir: String,
+
     /// Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
     /// Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
     ///
     ///
     /// Memory usage scales with amplicon complexity and BAM depth.
     /// Memory usage scales with amplicon complexity and BAM depth.