Thomas 2 ヶ月 前
コミット
30faf74aa1

+ 13 - 0
pandora-config.example.toml

@@ -419,6 +419,19 @@ promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
 # JSON file mapping flowcell IDs / runs for Pandora.
 promethion_runs_input = "/data/pandora-flowcell-id.json"
 
+#######################################
+# VEP configuration
+#######################################
+
+# Path to VEP singularity image
+vep_image = "/home/t_steimle/somatic_pipe_tools/vep_latest.sif"
+
+# Path to the VEP cache directory
+vep_cache_dir = "/home/t_steimle/ref/hs1/vepcache"
+
+# Path to VEP sorted GFF
+vep_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
+
 #######################################
 # Alignment / basecalling (Dorado)
 #######################################

+ 96 - 1
src/annotation/vep.rs

@@ -1,4 +1,4 @@
-use anyhow::anyhow;
+use anyhow::{anyhow, Context};
 use bitcode::{Decode, Encode};
 use hashbrown::HashMap;
 use itertools::Itertools;
@@ -8,10 +8,19 @@ use std::{
     cmp::{Ordering, Reverse},
     fmt::Display,
     io::{BufRead, BufReader},
+    path::PathBuf,
     process::{Command, Stdio},
     str::FromStr,
 };
 
+use crate::{
+    commands::{Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner},
+    config::Config,
+    helpers::singularity_bind_flags,
+    run,
+    runners::Run,
+};
+
 use super::ncbi::NCBIAcc;
 
 /// Represents a single line of output from the Variant Effect Predictor (VEP).
@@ -635,6 +644,76 @@ impl FromStr for VEPExtra {
     }
 }
 
+#[derive(Debug)]
+struct VepJob {
+    in_vcf: PathBuf,
+    out_vcf: PathBuf,
+    config: Config,
+}
+
+impl JobCommand for VepJob {
+    fn cmd(&self) -> String {
+        let bind_flags = singularity_bind_flags([
+            &self.in_vcf.to_string_lossy().to_string(),
+            &self.out_vcf.to_string_lossy().to_string(),
+            &self.config.vep_gff,
+            &self.config.vep_cache_dir,
+            &self.config.reference,
+        ]);
+        let plugins = vec!["SpliceRegion", "Downstream"]
+            .into_iter()
+            .map(|e| format!("--plugin {e}"))
+            .collect::<Vec<String>>()
+            .join(" ");
+        format!(
+            "{singularity_bin} exec \
+                {binds} \
+                {image} vep \
+                --dir_cache {dir_cache} \
+                --cache --offline \
+                --fasta {fasta} \
+                --gff {gff} \
+                --symbol \
+                {plugins} \
+                --hgvs \
+                -i {input} \
+                -o {output}",
+            singularity_bin = self.config.singularity_bin,
+            binds = bind_flags,
+            image = self.config.vep_image,
+            dir_cache = self.config.vep_cache_dir,
+            fasta = self.config.reference,
+            gff = self.config.vep_gff,
+            plugins = plugins,
+            input = self.in_vcf.to_string_lossy(),
+            output = self.out_vcf.to_string_lossy(),
+        )
+    }
+}
+impl LocalRunner for VepJob {}
+impl LocalBatchRunner for VepJob {}
+impl SbatchRunner for VepJob {
+    fn slurm_params(&self) -> crate::commands::SlurmParams {
+        crate::commands::SlurmParams {
+            job_name: Some("VEP".into()),
+            partition: Some("shortq".into()),
+            cpus_per_task: Some(10),
+            mem: Some("30G".into()),
+            gres: None,
+        }
+    }
+}
+
+// impl Run for VepJob {
+//     fn run(&mut self) -> anyhow::Result<()> {
+//         let report = run!(&self.config, self).with_context(|| format!("Failed to filter PASS for {}", self.id))?;
+//         report
+//             .save_to_file(format!("{}/bcftools_pass_", self.config.log_dir))
+//             .context("Can't save bcftools PASS logs")?;;
+//         Ok(())
+//     }
+//     // add code here
+// }
 /// Runs the Variant Effect Predictor (VEP) on a given input file and outputs the results.
 ///
 /// This function executes the VEP tool with specific parameters to annotate genetic variants.
@@ -787,3 +866,19 @@ pub fn get_best_vep(d: &[VEP]) -> anyhow::Result<VEP> {
         .map(|(k, _)| best_impact_veps[*k].clone())
         .ok_or_else(|| anyhow::anyhow!("No valid NCBI accession found"))
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::helpers::test_init;
+
+    #[test]
+    fn vep_run() -> anyhow::Result<()> {
+        test_init();
+        let config = Config::default();
+        let mut vep_job = VepJob { in_vcf: "/mnt/beegfs02/scratch/t_steimle/data/wgs/DUMCO/diag/ClairS/DUMCO_diag_clairs_PASSED.vcf.gz".into(), out_vcf: "/mnt/beegfs02/scratch/t_steimle/data/wgs/DUMCO/diag/TEST_clairs_PASSED_VEP.vcf".into(), config: config.clone() };
+        let r = run!(config, &mut vep_job)?;
+        println!("{r:#?}");
+        Ok(())
+    }
+}

+ 17 - 5
src/callers/clairs.rs

@@ -170,7 +170,10 @@ use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
     collection::vcf::Vcf,
     commands::{
-        Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner, bcftools::{BcftoolsConcat, BcftoolsKeepPass}, samtools::SamtoolsMergeMany
+        bcftools::{BcftoolsConcat, BcftoolsKeepPass},
+        samtools::SamtoolsMergeMany,
+        Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams,
+        SlurmRunner,
     },
     config::Config,
     helpers::{
@@ -180,7 +183,7 @@ use crate::{
     io::vcf::read_vcf,
     pipes::{Initialize, ShouldRun, Version},
     run, run_many,
-    runners::{Run, RunErr},
+    runners::Run,
     variant::{
         variant_collection::VariantCollection,
         vcf_variant::{Label, Variants},
@@ -407,7 +410,7 @@ impl Run for ClairS {
 
         if self.config.slurm_runner {
             run_clairs_chunked(&self.id, &self.config, 50)?;
-            merge_haplotaged_tmp_bam(&self.config, &self.id)?;
+            // merge_haplotaged_tmp_bam(&self.config, &self.id)?;
         } else {
             run!(&self.config, self)?;
             let mut germline = self.process_germline()?;
@@ -735,7 +738,9 @@ fn merge_clairs_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<()> {
     }
 
     let final_passed_vcf = base.config.clairs_passed_vcf(&base.id);
-    let final_tmp = format!("{final_passed_vcf}.tmp");
+    let rand = Uuid::new_v4();
+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
 
     if let Some(parent) = Path::new(&final_passed_vcf).parent() {
         fs::create_dir_all(parent)?;
@@ -750,6 +755,8 @@ fn merge_clairs_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<()> {
     run!(&base.config, &mut concat).context("Failed to run bcftools concat for ClairS parts")?;
 
     fs::rename(&final_tmp, &final_passed_vcf).context("Failed to rename merged ClairS PASS VCF")?;
+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
+        .context("Failed to rename merged ClairS PASS VCF CSI")?;
 
     info!(
         "Successfully merged {} ClairS parts into {}",
@@ -778,7 +785,9 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
     }
 
     let final_passed_vcf = base.config.clairs_germline_passed_vcf(&base.id);
-    let final_tmp = format!("{final_passed_vcf}.tmp");
+    let rand = Uuid::new_v4();
+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
 
     if let Some(parent) = Path::new(&final_passed_vcf).parent() {
         fs::create_dir_all(parent)?;
@@ -796,6 +805,9 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
     fs::rename(&final_tmp, &final_passed_vcf)
         .context("Failed to rename merged ClairS germline PASS VCF")?;
 
+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
+        .context("Failed to rename merged ClairS germline PASS VCF CSI")?;
+
     info!(
         "Successfully merged {} ClairS germline parts into {}",
         n_parts, final_passed_vcf

+ 18 - 6
src/callers/deep_somatic.rs

@@ -92,12 +92,13 @@ use log::{debug, info};
 use rayon::prelude::*;
 use regex::Regex;
 use rust_htslib::bam::{self, Read};
+use uuid::Uuid;
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
     collection::vcf::Vcf,
     commands::{
-        bcftools::{BcftoolsConcat, BcftoolsKeepPass},
+        bcftools::{BcftoolsConcat, BcftoolsIndex, BcftoolsKeepPass},
         Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams,
         SlurmRunner,
     },
@@ -111,8 +112,8 @@ use crate::{
     run, run_many,
     runners::Run,
     variant::{
-        vcf_variant::{Label, Variants},
         variant_collection::VariantCollection,
+        vcf_variant::{Label, Variants},
     },
 };
 
@@ -343,14 +344,20 @@ impl DeepSomatic {
             self.id, self.part_index
         );
 
-        let mut cmd = BcftoolsKeepPass::from_config(&self.config, output_vcf, vcf_passed);
+        let mut cmd = BcftoolsKeepPass::from_config(&self.config, output_vcf, &vcf_passed);
         let report = run!(&self.config, &mut cmd)
             .with_context(|| format!("Failed to filter PASS for {}", self.id))?;
-
         report
             .save_to_file(format!("{}/bcftools_pass_", self.log_dir))
             .context("Can't save bcftools PASS logs")?;
 
+        let mut cmd = BcftoolsIndex::from_config(&self.config, vcf_passed);
+        let report = run!(&self.config, &mut cmd)
+            .with_context(|| format!("Failed to index vcf PASS for {}", self.id))?;
+        report
+            .save_to_file(format!("{}/bcftools_index_", self.log_dir))
+            .context("Failed to save bcftools index logs")?;
+
         Ok(())
     }
 }
@@ -359,7 +366,7 @@ impl Run for DeepSomatic {
     fn run(&mut self) -> anyhow::Result<()> {
         if !self.should_run() {
             info!("DeepSomatic is up-to-data.");
-            return Ok(())
+            return Ok(());
         }
 
         if self.config.slurm_runner {
@@ -514,7 +521,9 @@ fn merge_deepsomatic_parts(base: &DeepSomatic, n_parts: usize) -> anyhow::Result
     }
 
     let final_passed_vcf = base.config.deepsomatic_passed_vcf(&base.id);
-    let final_tmp = format!("{final_passed_vcf}.tmp");
+    let rand = Uuid::new_v4();
+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
 
     if let Some(parent) = Path::new(&final_passed_vcf).parent() {
         fs::create_dir_all(parent)?;
@@ -532,6 +541,9 @@ fn merge_deepsomatic_parts(base: &DeepSomatic, n_parts: usize) -> anyhow::Result
     fs::rename(&final_tmp, &final_passed_vcf)
         .context("Failed to rename merged DeepSomatic PASS VCF")?;
 
+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
+        .context("Failed to rename merged DeepVariant PASS VCF CSI")?;
+
     info!(
         "Successfully merged {} DeepSomatic parts into {}",
         n_parts, final_passed_vcf

+ 9 - 5
src/callers/deep_variant.rs

@@ -87,6 +87,7 @@ use log::{debug, info};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use regex::Regex;
 use rust_htslib::bam::{self, Read};
+use uuid::Uuid;
 use std::{
     fmt, fs,
     path::{Path, PathBuf},
@@ -100,8 +101,7 @@ use crate::{
         vcf::Vcf,
     },
     commands::{
-        bcftools::{BcftoolsConcat, BcftoolsKeepPass},
-        LocalBatchRunner, SlurmRunner,
+        LocalBatchRunner, SlurmRunner, bcftools::{BcftoolsConcat, BcftoolsIndex, BcftoolsKeepPass}
     },
     config::Config,
     helpers::{
@@ -113,8 +113,7 @@ use crate::{
     run, run_many,
     runners::Run,
     variant::{
-        vcf_variant::{Label, Variants},
-        variant_collection::VariantCollection,
+        variant_collection::VariantCollection, vcf_variant::{Label, Variants}
     },
 };
 
@@ -711,7 +710,9 @@ fn merge_deepvariant_parts(base: &DeepVariant, n_parts: usize) -> anyhow::Result
     let final_passed_vcf = base
         .config
         .deepvariant_solo_passed_vcf(&base.id, &base.time_point);
-    let final_tmp = format!("{final_passed_vcf}.tmp");
+    let rand = Uuid::new_v4();
+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
 
     if let Some(parent) = Path::new(&final_passed_vcf).parent() {
         fs::create_dir_all(parent)?;
@@ -730,6 +731,9 @@ fn merge_deepvariant_parts(base: &DeepVariant, n_parts: usize) -> anyhow::Result
     fs::rename(&final_tmp, &final_passed_vcf)
         .context("Failed to rename merged DeepVariant PASS VCF")?;
 
+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
+        .context("Failed to rename merged DeepVariant PASS VCF CSI")?;
+
     info!(
         "Successfully merged {} parts into {}",
         n_parts, final_passed_vcf

+ 5 - 7
src/callers/mod.rs

@@ -128,10 +128,7 @@ use crate::{
     callers::{
         clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
         savana::Savana, severus::Severus, straglr::Straglr,
-    },
-    config::Config,
-    pipes::{Initialize, InitializeSolo},
-    runners::Run,
+    }, commands::longphase::run_phasing_somatic, config::Config, pipes::{Initialize, InitializeSolo}, runners::Run
 };
 
 pub mod clairs;
@@ -210,6 +207,8 @@ pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
     // First gives germlines for phasing/haplotagging
     ClairS::initialize(id, config)?.run()?;
 
+    run_phasing_somatic(id, config)?;
+
     // if slurm send jobs in parallel else run caller sequentially
     if config.slurm_runner {
         let config = Arc::new(config.clone());
@@ -253,6 +252,8 @@ pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
         Severus::initialize(id, config)?.run()?;
         Savana::initialize(id, config)?.run()?;
         NanomonSV::initialize(id, config)?.run()?;
+        // DeepSomatic - somatic SNV/indels
+        DeepSomatic::initialize(id, config)?.run()?;
     }
 
     // DeepVariant - germline variants for normal sample
@@ -261,9 +262,6 @@ pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
     // DeepVariant - germline variants for tumor sample
     DeepVariant::initialize(id, &config.tumoral_name, config)?.run()?;
 
-    // DeepSomatic - somatic SNV/indels
-    DeepSomatic::initialize(id, config)?.run()?;
-
     // Straglr - Short Tandem Repeat (STR) genotyper
     Straglr::initialize(id, config)?.run()?;
 

+ 1 - 1
src/callers/savana.rs

@@ -248,7 +248,7 @@ impl Run for Savana {
 
             // Check for phased germline vcf
             // not required anymore since >= 1.3.0
-            let phased_germline_vcf = self.config.constit_phased_vcf(&self.id);
+            let phased_germline_vcf = self.config.germline_phased_vcf(&self.id);
             if !Path::new(&phased_germline_vcf).exists() {
                 let mut phase = LongphasePhase::initialize(&self.id, &self.config.clone())?;
                 run!(&self.config, &mut phase)?;

+ 1 - 1
src/callers/severus.rs

@@ -208,7 +208,7 @@ impl Run for Severus {
         let passed_vcf = &self.config.severus_passed_vcf(id);
 
         if !Path::new(output_vcf).exists() {
-            let constit_phased_vcf = &self.config.constit_phased_vcf(id);
+            let constit_phased_vcf = &self.config.germline_phased_vcf(id);
 
             // Run Longphase if necessary
             if !Path::new(constit_phased_vcf).exists() {

+ 3 - 2
src/callers/straglr.rs

@@ -134,6 +134,7 @@ use crate::{
 };
 use anyhow::Context;
 use log::{debug, info};
+use uuid::Uuid;
 use std::{
     collections::HashMap,
     fmt,
@@ -1071,8 +1072,8 @@ pub fn run_straglr_chunked(
 
         // Create temporary BED file for this chunk
         let bed_chunk_path = format!(
-            "{}/straglr_chunk_{}_{}_part{}.bed",
-            config.tmp_dir, id, time_point, part_num
+            "{}/straglr_chunk_{}_{}_part{}_{}.bed",
+            config.tmp_dir, id, time_point, part_num, Uuid::new_v4()
         );
 
         let mut bed_chunk_file = File::create(&bed_chunk_path).context(format!(

+ 66 - 5
src/commands/longphase.rs

@@ -20,9 +20,22 @@ use std::{
     path::{Path, PathBuf},
 };
 use tracing::info;
+use uuid::Uuid;
 
 use super::modkit::ModkitSummary;
 
+pub fn run_phasing_somatic(id: &str, config: &Config) -> anyhow::Result<()> {
+    LongphaseModcallSolo::initialize(id, &config.normal_name, config)?.run()?;
+    LongphaseModcallSolo::initialize(id, &config.tumoral_name, config)?.run()?;
+
+    LongphasePhase::initialize(id, config)?.run()?;
+
+    LongphaseHap::initialize(id, &config.normal_name, config)?.run()?;
+    LongphaseHap::initialize(id, &config.tumoral_name, config)?.run()?;
+
+    Ok(())
+}
+
 #[derive(Debug, Clone)]
 pub struct LongphaseHap {
     pub id: String,
@@ -34,12 +47,32 @@ pub struct LongphaseHap {
     job_args: Vec<String>,
 }
 
+impl InitializeSolo for LongphaseHap {
+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
+        let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
+
+        let bam = config.solo_bam(id, time);
+        let bam_hp = config.solo_haplotagged_bam(id, time);
+
+        let phased_vcf = config.germline_phased_vcf(id);
+
+        Ok(Self {
+            id: id.to_string(),
+            vcf: phased_vcf,
+            bam: PathBuf::from(bam),
+            bam_hp: PathBuf::from(bam_hp),
+            config: config.clone(),
+            log_dir,
+            job_args: Vec::new(),
+        })
+    }
+}
+
 impl LongphaseHap {
     pub fn new(id: &str, bam: &str, phased_vcf: &str, config: Config) -> Self {
         let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
 
         let bam = Path::new(bam);
-        // TODO change that use config.haplotagged_bam_tag_name
         let new_fn = format!("{}_HP", bam.file_stem().unwrap().to_str().unwrap());
         let bam_hp = bam.with_file_name(new_fn);
 
@@ -97,7 +130,8 @@ impl LongphaseHap {
         );
 
         // Temporary directory for per-chromosome BAMs
-        let tmp_dir = Path::new(&self.config.tmp_dir).join("longphase_hap");
+        let tmp_dir =
+            Path::new(&self.config.tmp_dir).join(format!("longphase_hap_{}", Uuid::new_v4()));
         fs::create_dir_all(&tmp_dir)
             .with_context(|| format!("Failed to create tmp dir {}", tmp_dir.display()))?;
 
@@ -153,7 +187,8 @@ impl LongphaseHap {
         }
 
         // Index final BAM
-        let mut sam_index = SamtoolsIndex::from_config(&self.config, &final_bam.to_string_lossy().to_string()) ;
+        let mut sam_index =
+            SamtoolsIndex::from_config(&self.config, final_bam.to_string_lossy().as_ref());
         run!(&self.config, &mut sam_index)
             .with_context(|| format!("samtools index failed for {}", final_bam.display()))?;
 
@@ -201,6 +236,18 @@ impl Run for LongphaseHap {
     }
 }
 
+impl ShouldRun for LongphaseHap {
+    fn should_run(&self) -> bool {
+        let final_bam = PathBuf::from(format!("{}.bam", self.bam_hp.to_string_lossy()));
+        is_file_older(
+            final_bam.to_string_lossy().as_ref(),
+            self.bam.to_string_lossy().as_ref(),
+            true,
+        )
+        .unwrap_or(true)
+    }
+}
+
 impl crate::commands::Command for LongphaseHap {
     fn cmd(&self) -> String {
         format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
@@ -258,7 +305,7 @@ impl Initialize for LongphasePhase {
         }
         let vcf = config.constit_vcf(id);
         let bam = config.normal_bam(id);
-        let out_prefix = path_prefix(&config.constit_phased_vcf(id))?;
+        let out_prefix = path_prefix(&config.germline_phased_vcf(id))?;
         let modcall_vcf = config.longphase_modcall_vcf(id, &config.normal_name);
 
         Ok(LongphasePhase {
@@ -274,6 +321,12 @@ impl Initialize for LongphasePhase {
     }
 }
 
+impl ShouldRun for LongphasePhase {
+    fn should_run(&self) -> bool {
+        is_file_older(&self.config.germline_phased_vcf(&self.id), &self.bam, true).unwrap_or(true)
+    }
+}
+
 impl crate::commands::Command for LongphasePhase {
     fn cmd(&self) -> String {
         format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
@@ -309,6 +362,9 @@ impl crate::commands::SbatchRunner for LongphasePhase {
 
 impl Run for LongphasePhase {
     fn run(&mut self) -> anyhow::Result<()> {
+        if !self.should_run() {
+            info!("Germline phased vcf is up-to-date for {}.", self.id)
+        }
         info!("Running longphase phase for: {}", self.vcf);
         info!("Saving longphase phase results in: {}", self.out_prefix);
 
@@ -318,7 +374,7 @@ impl Run for LongphasePhase {
             modcall.run()?;
         }
 
-        let final_vcf = self.config.constit_phased_vcf(&self.id);
+        let final_vcf = self.config.germline_phased_vcf(&self.id);
         if !Path::new(&final_vcf).exists() {
             self.job_args = vec![
                 "phase".to_string(),
@@ -467,6 +523,11 @@ impl crate::commands::SbatchRunner for LongphaseModcallSolo {
 
 impl Run for LongphaseModcallSolo {
     fn run(&mut self) -> anyhow::Result<()> {
+        if !self.should_run() {
+            info!("ModCall is up-to-date for {}, {}", self.id, self.time);
+            return Ok(());
+        }
+
         self.job_args = vec![
             "modcall".to_string(),
             "-b".to_string(),

+ 11 - 10
src/config.rs

@@ -191,7 +191,7 @@ pub struct Config {
     /// Number of threads for ClairS.
     pub clairs_threads: u8,
 
-    /// ClairS docker tag.
+    /// Path to ClairS singularity image.
     pub clairs_image: String,
 
     /// Force ClairS recomputation.
@@ -365,6 +365,16 @@ pub struct Config {
 
     /// JSON file describing PromethION runs and flowcell IDs.
     pub promethion_runs_input: String,
+
+    // === VEP ===
+    /// Path to VEP singularity image
+    pub vep_image: String,
+
+    /// Path to the VEP cache directory
+    pub vep_cache_dir: String,
+
+    /// Path to VEP sorted GFF
+    pub vep_gff: String,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -902,15 +912,6 @@ impl Config {
         self.clairs_germline_passed_vcf(id)
     }
 
-    /// Constitutional phased VCF path in the tumor directory.
-    pub fn constit_phased_vcf(&self, id: &str) -> String {
-        format!(
-            "{}/{}_variants_constit_phased.vcf.gz",
-            self.tumoral_dir(id),
-            id
-        )
-    }
-
     /// Somatic-scan output directory for a solo run (counts subdir).
     pub fn somatic_scan_solo_output_dir(&self, id: &str, time: &str) -> String {
         format!("{}/counts", self.solo_dir(id, time))

+ 1 - 0
src/pipes/mod.rs

@@ -4,6 +4,7 @@ use crate::{config::Config};
 
 // pub mod somatic;
 pub mod somatic_slurm;
+pub mod somatic;
 
 
 pub trait Initialize: Sized {

+ 33 - 12
src/pipes/somatic.rs

@@ -2,12 +2,12 @@ use crate::{
     annotation::is_gnomad_and_constit_alt,
     create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
     io::bed::read_bed,
-    pipes::{InitializeSolo, Initialize, ShouldRun},
+    pipes::{Initialize, InitializeSolo, ShouldRun},
     positions::GenomeRange,
     scan::scan::SomaticScan,
     variant::{
-        variant::{run_if_required, ShouldRunBox},
         variants_stats::somatic_depth_quality_ranges,
+        vcf_variant::{run_if_required, ShouldRunBox},
     },
 };
 use itertools::Itertools;
@@ -30,9 +30,9 @@ use crate::{
     create_should_run, init_somatic_callers,
     runners::Run,
     variant::{
-        variant::{load_variants, CallerBox},
         variant_collection::{ExternalAnnotation, VariantCollection, Variants},
         variants_stats::VariantsStats,
+        vcf_variant::{load_variants, CallerBox},
     },
 };
 
@@ -163,9 +163,12 @@ pub struct SomaticPipe {
 
 impl Initialize for SomaticPipe {
     /// Initializes a new `Somatic` instance with default annotations.
-    fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
+    fn initialize(id: &str, config: &crate::config::Config) -> anyhow::Result<Self> {
         let id = id.to_string();
-        Ok(Self { id, config })
+        Ok(Self {
+            id,
+            config: config.clone(),
+        })
     }
 }
 
@@ -231,7 +234,7 @@ impl Run for SomaticPipe {
         let mut to_run_if_req = create_should_run!(
             &id,
             &config,
-            SomaticScan,
+            // SomaticScan,
             ClairS,
             NanomonSV,
             Severus,
@@ -266,7 +269,7 @@ impl Run for SomaticPipe {
         // Load germline variants using ClairS
         info!("Loading germline variants.");
         let clairs_germline =
-            ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
+            ClairS::initialize(&id, &self.config.clone())?.germline(&annotations)?;
         variants_collections.push(clairs_germline);
 
         // Initialize statistics
@@ -552,7 +555,7 @@ impl Run for SomaticPipe {
 pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
     info!("Loading Germline");
     let annotations = Annotations::default();
-    let clairs_germline = ClairS::initialize(&id, config.clone())?.germline(&annotations)?;
+    let clairs_germline = ClairS::initialize(&id, &config.clone())?.germline(&annotations)?;
 
     info!("Annotation with Cosmic and GnomAD.");
     let ext_annot = ExternalAnnotation::init()?;
@@ -777,10 +780,7 @@ impl SomaticPipeStats {
         // Extract all unique germline caller labels
         let mut germlines_callers: Vec<String> = with_germline
             .iter()
-            .flat_map(|(_, r)| {
-                r.keys().map(|k| k.to_string())
-                    .collect::<Vec<String>>()
-            })
+            .flat_map(|(_, r)| r.keys().map(|k| k.to_string()).collect::<Vec<String>>())
             .collect();
         germlines_callers.sort();
         germlines_callers.dedup();
@@ -899,3 +899,24 @@ impl InputStats {
         stats
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use rayon::ThreadPoolBuilder;
+
+    use super::*;
+    use crate::helpers::test_init;
+
+    #[test]
+    fn somatic_pipe_run() -> anyhow::Result<()> {
+        test_init();
+        let config = Config::default();
+
+        ThreadPoolBuilder::new()
+            .num_threads(config.threads.into())
+            .build_global()?;
+
+        SomaticPipe::initialize("DUMCO", &config)?.run()?;
+        Ok(())
+    }
+}

+ 6 - 6
src/variant/vcf_variant.rs

@@ -2489,7 +2489,7 @@ macro_rules! create_should_run {
         use anyhow::Context;
         let mut runners: Vec<ShouldRunBox> = Vec::new();
         $(
-            let runner = <$runner>::initialize($id, $config.clone())
+            let runner = <$runner>::initialize($id, &$config)
                 .with_context(|| format!(
                     "Failed to initialize should-run checker {} for {}",
                     stringify!($runner), $id
@@ -2591,14 +2591,14 @@ macro_rules! create_should_run_normal_tumoral {
         use anyhow::Context;
         let mut runners: Vec<ShouldRunBox> = Vec::new();
         $(
-            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, &$config)
                 .with_context(|| format!(
                     "Failed to initialize tumoral should-run checker {} for {}",
                     stringify!($runner), $id
                 ))?;
             runners.push(Box::new(tumoral) as ShouldRunBox);
 
-            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
+            let normal = <$runner>::initialize($id, &$config.normal_name, &$config)
                 .with_context(|| format!(
                     "Failed to initialize normal should-run checker {} for {}",
                     stringify!($runner), $id
@@ -2650,7 +2650,7 @@ macro_rules! init_somatic_callers {
         use anyhow::Context;
         let mut callers: Vec<CallerBox> = Vec::new();
         $(
-            let caller = <$runner>::initialize($id, $config.clone())
+            let caller = <$runner>::initialize($id, &$config)
                 .with_context(|| format!(
                     "Failed to initialize somatic caller {} for {}",
                     stringify!($runner), $id
@@ -2755,14 +2755,14 @@ macro_rules! init_solo_callers_normal_tumoral {
         use anyhow::Context;
         let mut callers: Vec<CallerBox> = Vec::new();
         $(
-            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, $config.clone())
+            let tumoral = <$runner>::initialize($id, &$config.tumoral_name, &$config)
                 .with_context(|| format!(
                     "Failed to initialize tumoral caller {} for {} '{}'",
                     stringify!($runner), $id, $config.tumoral_name
                 ))?;
             callers.push(Box::new(tumoral) as CallerBox);
 
-            let normal = <$runner>::initialize($id, &$config.normal_name, $config.clone())
+            let normal = <$runner>::initialize($id, &$config.normal_name, &$config)
                 .with_context(|| format!(
                     "Failed to initialize normal caller {} for {} '{}'",
                     stringify!($runner), $id, $config.normal_name