|
|
@@ -5,24 +5,36 @@ use regex::Regex;
|
|
|
use std::{
|
|
|
fs,
|
|
|
path::Path,
|
|
|
- process::{Command, Stdio},
|
|
|
+ process::{Command as ProcessCommand, Stdio},
|
|
|
};
|
|
|
|
|
|
use crate::{
|
|
|
annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
|
|
|
collection::vcf::Vcf,
|
|
|
- commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
|
|
|
+ commands::{
|
|
|
+ bcftools::{bcftools_keep_pass, BcftoolsConfig},
|
|
|
+ run_many_sbatch,
|
|
|
+ },
|
|
|
config::Config,
|
|
|
helpers::{is_file_older, remove_dir_if_exists},
|
|
|
io::vcf::read_vcf,
|
|
|
pipes::{InitializeSolo, ShouldRun, Version},
|
|
|
- runners::{run_wait, DockerRun, Run},
|
|
|
+ runners::Run,
|
|
|
+ slurm_helpers::max_gpu_per_node,
|
|
|
variant::{
|
|
|
variant::{Label, Variants},
|
|
|
variant_collection::VariantCollection,
|
|
|
},
|
|
|
};
|
|
|
|
|
|
+use crate::commands::{
|
|
|
+ CapturedOutput,
|
|
|
+ Command as JobCommand, // your trait
|
|
|
+ Runner as LocalRunner,
|
|
|
+ SbatchRunner,
|
|
|
+ SlurmParams,
|
|
|
+};
|
|
|
+
|
|
|
/// A pipeline runner for executing DeepVariant on a single sample and a specific time point (e.g., normal or tumor).
|
|
|
///
|
|
|
/// This struct holds all necessary metadata, configuration, and output paths to perform variant calling
|
|
|
@@ -35,8 +47,10 @@ use crate::{
|
|
|
pub struct DeepVariant {
|
|
|
pub id: String,
|
|
|
pub time_point: String,
|
|
|
+ pub regions: String,
|
|
|
pub log_dir: String,
|
|
|
pub config: Config,
|
|
|
+ pub part_index: Option<usize>,
|
|
|
}
|
|
|
|
|
|
impl InitializeSolo for DeepVariant {
|
|
|
@@ -61,12 +75,18 @@ impl InitializeSolo for DeepVariant {
|
|
|
info!("Initializing DeepVariant for {id} {time_point}.");
|
|
|
|
|
|
let log_dir = format!("{}/{}/log/deepvariant", config.result_dir, &id);
|
|
|
+ let regions = (1..=22)
|
|
|
+ .map(|i| format!("chr{i}"))
|
|
|
+ .chain(["chrX", "chrY", "chrMT"].into_iter().map(String::from))
|
|
|
+ .collect::<Vec<String>>();
|
|
|
|
|
|
let deepvariant = Self {
|
|
|
id,
|
|
|
time_point,
|
|
|
log_dir,
|
|
|
config,
|
|
|
+ regions: regions.join(","),
|
|
|
+ part_index: None,
|
|
|
};
|
|
|
|
|
|
if deepvariant.config.deepvariant_force {
|
|
|
@@ -104,93 +124,192 @@ impl ShouldRun for DeepVariant {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-impl Run for DeepVariant {
|
|
|
- /// Executes DeepVariant inside Docker and filters PASS variants via bcftools.
|
|
|
- /// Runs DeepVariant inside Docker and filters variants using bcftools.
|
|
|
- ///
|
|
|
- /// This function:
|
|
|
- /// - Creates necessary output directories
|
|
|
- /// - Executes DeepVariant through Docker if needed
|
|
|
- /// - Filters for PASS variants
|
|
|
- /// - Saves logs and handles caching logic via file existence
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if any external command or file operation fails.
|
|
|
- fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+impl JobCommand for DeepVariant {
|
|
|
+ fn init(&mut self) -> anyhow::Result<()> {
|
|
|
+ // output dir for DeepVariant
|
|
|
+ let output_dir = self
|
|
|
+ .config
|
|
|
+ .deepvariant_output_dir(&self.id, &self.time_point);
|
|
|
+
|
|
|
+ fs::create_dir_all(&output_dir).context(format!("Failed to create dir: {output_dir}"))?;
|
|
|
+
|
|
|
+ // log dir
|
|
|
+ fs::create_dir_all(&self.log_dir)
|
|
|
+ .context(format!("Failed to create dir: {}", self.log_dir))?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ fn cmd(&self) -> String {
|
|
|
let bam = self.config.solo_bam(&self.id, &self.time_point);
|
|
|
- let output_vcf = self
|
|
|
+ let output_dir = self
|
|
|
.config
|
|
|
- .deepvariant_solo_output_vcf(&self.id, &self.time_point);
|
|
|
+ .deepvariant_output_dir(&self.id, &self.time_point);
|
|
|
+ let output_vcf_path = self.output_vcf_path();
|
|
|
+ let log_subdir = format!("{}_{}_DeepVariant_logs", self.id, self.time_point);
|
|
|
+ let sample_name = format!("{}_{}", self.id, self.time_point);
|
|
|
|
|
|
- // Run Docker command if output VCF doesn't exist
|
|
|
- if !Path::new(&output_vcf).exists() {
|
|
|
- let output_dir = self
|
|
|
- .config
|
|
|
- .deepvariant_output_dir(&self.id, &self.time_point);
|
|
|
-
|
|
|
- fs::create_dir_all(&output_dir)
|
|
|
- .context(format!("Failed to create dir: {output_dir}"))?;
|
|
|
-
|
|
|
- let mut docker_run = DockerRun::new(&[
|
|
|
- "run",
|
|
|
- "-d",
|
|
|
- "-v",
|
|
|
- "/data:/data",
|
|
|
- "-v",
|
|
|
- &format!("{}:/output", output_dir),
|
|
|
- &format!("google/deepvariant:{}", self.config.deepvariant_bin_version),
|
|
|
- "/opt/deepvariant/bin/run_deepvariant",
|
|
|
- &format!("--model_type={}", self.config.deepvariant_model_type),
|
|
|
- "--ref",
|
|
|
- &self.config.reference,
|
|
|
- "--reads",
|
|
|
- &bam,
|
|
|
- "--output_vcf",
|
|
|
- &format!("/output/{}_{}_DeepVariant.vcf.gz", self.id, self.time_point),
|
|
|
- // "--output_gvcf",
|
|
|
- // &format!(
|
|
|
- // "/output/{}_{}_DeepVariant.g.vcf.gz",
|
|
|
- // self.id, self.time_point
|
|
|
- // ),
|
|
|
- &format!("--num_shards={}", self.config.deepvariant_threads),
|
|
|
- "--logging_dir",
|
|
|
- "--vcf_stats_report=true",
|
|
|
- &format!("/output/{}_{}_DeepVariant_logs", self.id, self.time_point),
|
|
|
- "--dry_run=false",
|
|
|
- "--sample_name",
|
|
|
- &format!("{}_{}", self.id, self.time_point),
|
|
|
- ]);
|
|
|
-
|
|
|
- let report = run_wait(&mut docker_run).context(format!(
|
|
|
- "Erreur while running DeepVariant for {} {}",
|
|
|
- self.id, self.time_point
|
|
|
- ))?;
|
|
|
+ format!(
|
|
|
+ "module load singularity-ce && singularity exec --nv \
|
|
|
+ --bind /data:/data \
|
|
|
+ --bind {output_dir}:/output \
|
|
|
+ {image} \
|
|
|
+ /opt/deepvariant/bin/run_deepvariant \
|
|
|
+ --model_type={model_type} \
|
|
|
+ --ref {reference} \
|
|
|
+ --reads {bam} \
|
|
|
+ --regions '{regions}' \
|
|
|
+ --haploid_contigs='chrX,chrY' \
|
|
|
+ --output_vcf {output_vcf} \
|
|
|
+ --num_shards={threads} \
|
|
|
+ --vcf_stats_report=true \
|
|
|
+ --logging_dir /output/{log_dir} \
|
|
|
+ --dry_run=false \
|
|
|
+ --sample_name {sample_name}",
|
|
|
+ output_dir = output_dir,
|
|
|
+ image = self.config.deepvariant_image,
|
|
|
+ model_type = self.config.deepvariant_model_type,
|
|
|
+ reference = self.config.reference,
|
|
|
+ bam = bam,
|
|
|
+ regions = self.regions,
|
|
|
+ // we mount output_dir as /output; just rewrite path here:
|
|
|
+ output_vcf = format!(
|
|
|
+ "/output/{}",
|
|
|
+ Path::new(&output_vcf_path)
|
|
|
+ .file_name()
|
|
|
+ .unwrap()
|
|
|
+ .to_string_lossy()
|
|
|
+ ),
|
|
|
+ threads = self.config.deepvariant_threads,
|
|
|
+ log_dir = log_subdir,
|
|
|
+ sample_name = sample_name,
|
|
|
+ )
|
|
|
+ }
|
|
|
|
|
|
+ fn clean_up(&self) -> anyhow::Result<()> {
|
|
|
+ let output_vcf = self.output_vcf_path();
|
|
|
+ let vcf_passed = self.passed_vcf_path();
|
|
|
+
|
|
|
+ if !Path::new(&vcf_passed).exists() {
|
|
|
+ info!(
|
|
|
+ "Filtering PASS variants for {} {} (part: {:?})",
|
|
|
+ self.id, self.time_point, self.part_index
|
|
|
+ );
|
|
|
+
|
|
|
+ let report = bcftools_keep_pass(&output_vcf, &vcf_passed, BcftoolsConfig::default())?;
|
|
|
report
|
|
|
- .save_to_file(&format!("{}/deepvariant_", self.log_dir))
|
|
|
- .context("Can't save DeepVariant logs")?;
|
|
|
+ .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
|
|
|
+ .context("Can't save bcftools PASS logs")?;
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalRunner for DeepVariant {
|
|
|
+ // default shell() from trait ("bash") is fine
|
|
|
+}
|
|
|
+
|
|
|
+impl SbatchRunner for DeepVariant {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ let gpu = if let (Some(h100_av), Some(a100_av)) =
|
|
|
+ (max_gpu_per_node("h100"), max_gpu_per_node("a100"))
|
|
|
+ {
|
|
|
+ if h100_av > 0 {
|
|
|
+ "h100"
|
|
|
+ } else if a100_av > 0 {
|
|
|
+ "a100"
|
|
|
+ } else {
|
|
|
+ "h100" // waiting for free h100
|
|
|
+ }
|
|
|
} else {
|
|
|
+ panic!("Are you running slurm with a100 and h100 GPU ?");
|
|
|
+ };
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some(format!("deepvariant_{}_{}", self.id, self.time_point)),
|
|
|
+ cpus_per_task: Some(10),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("gpgpuq".into()),
|
|
|
+ gres: Some(format!("gpu:{gpu}:1")),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ fn sbatch_extra_args(&self) -> Vec<String> {
|
|
|
+ // if you want anything like --time=... here
|
|
|
+ Vec::new()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl DeepVariant {
|
|
|
+ /// Run DeepVariant directly on the local machine.
|
|
|
+ pub fn run_local(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
+ if !self.should_run() {
|
|
|
debug!(
|
|
|
- "DeepVariant output already exists for {} {}, skipping execution.",
|
|
|
+ "DeepVariant output already up-to-date for {} {}, skipping.",
|
|
|
self.id, self.time_point
|
|
|
);
|
|
|
+ return Ok(CapturedOutput::default());
|
|
|
}
|
|
|
|
|
|
- let vcf_passed = self
|
|
|
- .config
|
|
|
- .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
|
|
|
- if !Path::new(&vcf_passed).exists() {
|
|
|
- info!(
|
|
|
- "Filtering PASS variants for {} {}",
|
|
|
+ info!(
|
|
|
+ "Running DeepVariant locally for {} {}",
|
|
|
+ self.id, self.time_point
|
|
|
+ );
|
|
|
+ <Self as LocalRunner>::run(self)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Run DeepVariant via sbatch + tailing slurm-<jobid>.out.
|
|
|
+ pub fn run_sbatch(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
+ if !self.should_run() {
|
|
|
+ debug!(
|
|
|
+ "DeepVariant output already up-to-date for {} {}, skipping.",
|
|
|
self.id, self.time_point
|
|
|
);
|
|
|
- let report =
|
|
|
- bcftools_keep_pass(&output_vcf, &vcf_passed, BcftoolsConfig::default()).unwrap();
|
|
|
- report
|
|
|
- .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
|
|
|
- .unwrap();
|
|
|
+ return Ok(CapturedOutput::default());
|
|
|
+ }
|
|
|
+
|
|
|
+ info!(
|
|
|
+ "Submitting DeepVariant via sbatch for {} {}",
|
|
|
+ self.id, self.time_point
|
|
|
+ );
|
|
|
+ <Self as SbatchRunner>::run(self)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Per-part output VCF path.
|
|
|
+ fn output_vcf_path(&self) -> String {
|
|
|
+ let output_dir = self
|
|
|
+ .config
|
|
|
+ .deepvariant_output_dir(&self.id, &self.time_point);
|
|
|
+
|
|
|
+ match self.part_index {
|
|
|
+ Some(idx) => format!(
|
|
|
+ "{output_dir}/{}_{}_DeepVariant.part{idx}.vcf.gz",
|
|
|
+ self.id, self.time_point
|
|
|
+ ),
|
|
|
+ None => format!(
|
|
|
+ "{output_dir}/{}_{}_DeepVariant.vcf.gz",
|
|
|
+ self.id, self.time_point
|
|
|
+ ),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Per-part PASS VCF path (for part runs) or final PASS VCF (single run).
|
|
|
+ fn passed_vcf_path(&self) -> String {
|
|
|
+ match self.part_index {
|
|
|
+ Some(idx) => {
|
|
|
+ let v = self.output_vcf_path();
|
|
|
+ v.replace(".vcf.gz", &format!(".part{idx}.pass.vcf.gz"))
|
|
|
+ }
|
|
|
+ None => self
|
|
|
+ .config
|
|
|
+ .deepvariant_solo_passed_vcf(&self.id, &self.time_point),
|
|
|
}
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
+impl Run for DeepVariant {
|
|
|
+ fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+ self.run_local()?;
|
|
|
Ok(())
|
|
|
}
|
|
|
}
|
|
|
@@ -259,35 +378,27 @@ impl Label for DeepVariant {
|
|
|
}
|
|
|
|
|
|
impl Version for DeepVariant {
|
|
|
- /// Retrieves the DeepVariant version by running `savana --version` in its conda environment.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if command execution fails or "Version " not found in output.
|
|
|
fn version(config: &Config) -> anyhow::Result<String> {
|
|
|
- let out = Command::new("docker")
|
|
|
- .args([
|
|
|
- "run",
|
|
|
- "--rm",
|
|
|
- "--entrypoint",
|
|
|
- "/opt/deepvariant/bin/run_deepvariant",
|
|
|
- &format!("google/deepvariant:{}", config.deepvariant_bin_version),
|
|
|
- "--version",
|
|
|
- ])
|
|
|
+ let out = ProcessCommand::new("bash")
|
|
|
+ .arg("-lc")
|
|
|
+ .arg(format!(
|
|
|
+ "module load singularity-ce && singularity exec {} /opt/deepvariant/bin/run_deepvariant --version",
|
|
|
+ config.deepvariant_image
|
|
|
+ ))
|
|
|
.stdout(Stdio::piped())
|
|
|
.stderr(Stdio::piped())
|
|
|
.output()
|
|
|
- .context("failed to spawn docker")?;
|
|
|
+ .context("failed to spawn singularity")?;
|
|
|
|
|
|
if !out.status.success() {
|
|
|
let mut log = String::from_utf8_lossy(&out.stdout).to_string();
|
|
|
log.push_str(&String::from_utf8_lossy(&out.stderr));
|
|
|
- anyhow::bail!("docker run failed: {}\n{}", out.status, log);
|
|
|
+ anyhow::bail!("singularity exec failed: {}\n{}", out.status, log);
|
|
|
}
|
|
|
|
|
|
let mut log = String::from_utf8_lossy(&out.stdout).to_string();
|
|
|
log.push_str(&String::from_utf8_lossy(&out.stderr));
|
|
|
|
|
|
- // e.g. “DeepVariant version 1.9.0”
|
|
|
let re = Regex::new(r"(?m)DeepVariant version\s+([^\s]+)")?;
|
|
|
let caps = re
|
|
|
.captures(&log)
|
|
|
@@ -295,3 +406,116 @@ impl Version for DeepVariant {
|
|
|
Ok(caps.get(1).unwrap().as_str().to_string())
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+fn split_regions_into_n(all_regions: &[&str], n: usize) -> Vec<String> {
|
|
|
+ let len = all_regions.len();
|
|
|
+ if n == 0 || len == 0 {
|
|
|
+ return Vec::new();
|
|
|
+ }
|
|
|
+ let chunk_size = (len + n - 1) / n; // ceil
|
|
|
+ all_regions
|
|
|
+ .chunks(chunk_size)
|
|
|
+ .map(|chunk| chunk.join(","))
|
|
|
+ .collect()
|
|
|
+}
|
|
|
+
|
|
|
+/// Run DeepVariant in 4 sbatch jobs (by regions), then merge their PASS VCFs
|
|
|
+/// into the final deepvariant_solo_passed_vcf().
|
|
|
+pub fn run_deepvariant_quartered_sbatch_with_merge(
|
|
|
+ id: &str,
|
|
|
+ time_point: &str,
|
|
|
+ config: Config,
|
|
|
+) -> anyhow::Result<CapturedOutput> {
|
|
|
+ let base = DeepVariant::initialize(id, time_point, config)?;
|
|
|
+
|
|
|
+ // if already up-to-date, nothing to do
|
|
|
+ if !base.should_run() {
|
|
|
+ debug!(
|
|
|
+ "DeepVariant PASS VCF already up-to-date for {} {}, skipping.",
|
|
|
+ id, time_point
|
|
|
+ );
|
|
|
+ return Ok(CapturedOutput::default());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 1) split regions into 4 chunks
|
|
|
+ let all_regions: Vec<&str> = base.regions.split(',').collect();
|
|
|
+ let region_chunks = split_regions_into_n(&all_regions, 4);
|
|
|
+
|
|
|
+ // 2) build jobs
|
|
|
+ let mut jobs = Vec::with_capacity(region_chunks.len());
|
|
|
+ for (i, regions) in region_chunks.into_iter().enumerate() {
|
|
|
+ let mut job = base.clone();
|
|
|
+ job.regions = regions;
|
|
|
+ job.part_index = Some(i + 1);
|
|
|
+ job.log_dir = format!("{}/part{}", base.log_dir, i + 1);
|
|
|
+ jobs.push(job);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3) run them concurrently on Slurm
|
|
|
+ let outputs = run_many_sbatch(jobs)?;
|
|
|
+
|
|
|
+ // 4) merge PASS VCFs
|
|
|
+ merge_deepvariant_parts(&base, 4)?;
|
|
|
+
|
|
|
+ // we don’t really have a single merged CapturedOutput;
|
|
|
+ // return an empty one or synthesize something from `outputs`.
|
|
|
+ Ok(CapturedOutput {
|
|
|
+ stdout: String::new(),
|
|
|
+ stderr: String::new(),
|
|
|
+ slurm_epilog: None,
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+fn merge_deepvariant_parts(base: &DeepVariant, n_parts: usize) -> anyhow::Result<()> {
|
|
|
+ let mut part_pass_paths = Vec::with_capacity(n_parts);
|
|
|
+
|
|
|
+ for i in 1..=n_parts {
|
|
|
+ let mut dv = base.clone();
|
|
|
+ dv.part_index = Some(i);
|
|
|
+ let part_pass = dv.passed_vcf_path();
|
|
|
+ if !Path::new(&part_pass).exists() {
|
|
|
+ anyhow::bail!("Missing part PASS VCF: {part_pass}");
|
|
|
+ }
|
|
|
+ part_pass_paths.push(part_pass);
|
|
|
+ }
|
|
|
+
|
|
|
+ let final_passed_vcf = base
|
|
|
+ .config
|
|
|
+ .deepvariant_solo_passed_vcf(&base.id, &base.time_point);
|
|
|
+ let final_tmp = format!("{final_passed_vcf}.tmp");
|
|
|
+
|
|
|
+ fs::create_dir_all(
|
|
|
+ Path::new(&final_passed_vcf)
|
|
|
+ .parent()
|
|
|
+ .unwrap_or_else(|| Path::new(".")),
|
|
|
+ )?;
|
|
|
+
|
|
|
+ // bcftools concat parts -> tmp file, then index and rename
|
|
|
+ let concat_cmd = format!(
|
|
|
+ "bcftools concat -O z -o {out_tmp} {parts} && \
|
|
|
+ bcftools index -t {out_tmp}",
|
|
|
+ out_tmp = final_tmp,
|
|
|
+ parts = part_pass_paths.join(" ")
|
|
|
+ );
|
|
|
+
|
|
|
+ let out = ProcessCommand::new("bash")
|
|
|
+ .arg("-lc")
|
|
|
+ .arg(concat_cmd)
|
|
|
+ .stdout(Stdio::piped())
|
|
|
+ .stderr(Stdio::piped())
|
|
|
+ .output()
|
|
|
+ .context("failed to run bcftools concat for DeepVariant parts")?;
|
|
|
+
|
|
|
+ if !out.status.success() {
|
|
|
+ let mut log = String::from_utf8_lossy(&out.stdout).to_string();
|
|
|
+ log.push_str(&String::from_utf8_lossy(&out.stderr));
|
|
|
+ anyhow::bail!("bcftools concat failed: {}\n{}", out.status, log);
|
|
|
+ }
|
|
|
+
|
|
|
+ // move tmp → final
|
|
|
+ fs::rename(&final_tmp, &final_passed_vcf)
|
|
|
+ .context("failed to rename merged DeepVariant PASS VCF")?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|