|
|
@@ -1,55 +1,51 @@
|
|
|
use crate::{
|
|
|
- annotation::{Annotation, Annotations, Caller, CallerCat, Sample}, collection::vcf::Vcf, commands::bcftools::{BcftoolsConfig, bcftools_concat, bcftools_keep_pass}, config::Config, helpers::{is_file_older, remove_dir_if_exists, temp_file_path}, io::vcf::read_vcf, pipes::{Initialize, ShouldRun, Version}, runners::{DockerRun, Run, run_wait}, variant::{
|
|
|
+ annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
|
|
|
+ collection::vcf::Vcf,
|
|
|
+ commands::{
|
|
|
+ bcftools::{BcftoolsConcat, BcftoolsKeepPass},
|
|
|
+ CapturedOutput, Command as JobCommand, Runner as LocalRunner, SbatchRunner, SlurmParams,
|
|
|
+ SlurmRunner,
|
|
|
+ },
|
|
|
+ config::Config,
|
|
|
+ helpers::{is_file_older, remove_dir_if_exists, temp_file_path},
|
|
|
+ io::vcf::read_vcf,
|
|
|
+ pipes::{Initialize, ShouldRun, Version},
|
|
|
+ runners::Run,
|
|
|
+ variant::{
|
|
|
variant::{Label, Variants},
|
|
|
variant_collection::VariantCollection,
|
|
|
- }
|
|
|
+ },
|
|
|
};
|
|
|
-use anyhow::{Context, Ok};
|
|
|
+
|
|
|
+use anyhow::Context;
|
|
|
use log::{debug, info, warn};
|
|
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
|
|
use regex::Regex;
|
|
|
use std::{
|
|
|
fs,
|
|
|
path::Path,
|
|
|
- process::{Command, Stdio},
|
|
|
+ process::{Command as ProcessCommand, Stdio},
|
|
|
};
|
|
|
|
|
|
/// A pipeline runner for executing ClairS on paired tumor and normal samples.
|
|
|
///
|
|
|
/// ClairS is a somatic variant caller that uses haplotype tagging from LongPhase.
|
|
|
-/// This struct manages:
|
|
|
-/// - Dockerized execution of the ClairS pipeline
|
|
|
-/// - Handling and filtering of output VCFs
|
|
|
-/// - Logging and diagnostic tracking
|
|
|
-/// - Integration with variant annotation workflows
|
|
|
-///
|
|
|
-/// # References
|
|
|
-/// - ClairS: <https://github.com/HKU-BAL/ClairS>
|
|
|
+/// This struct supports:
|
|
|
+/// - Local execution via `run_local`
|
|
|
+/// - Slurm execution via `run_sbatch`
|
|
|
+/// - Optional region restriction via `-r` (for downstream batched runners)
|
|
|
+/// - bcftools post-processing (germline + somatic PASS)
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct ClairS {
|
|
|
pub id: String,
|
|
|
pub config: Config,
|
|
|
pub log_dir: String,
|
|
|
+ /// Optional list of regions passed as repeated `-r REGION` args.
|
|
|
+ /// When empty, ClairS runs genome-wide.
|
|
|
+ pub regions: Vec<String>,
|
|
|
}
|
|
|
|
|
|
impl Initialize for ClairS {
|
|
|
- /// Initializes the ClairS runner.
|
|
|
- ///
|
|
|
- /// This method constructs a [`ClairS`] instance with logging and configuration setup,
|
|
|
- /// and ensures the output directory is cleaned up if the results are outdated or force execution is enabled.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `id` - The identifier for the sample being analyzed.
|
|
|
- /// * `config` - Pipeline-wide configuration object containing paths, resources, and settings.
|
|
|
- ///
|
|
|
- /// # Returns
|
|
|
- /// A fully initialized [`ClairS`] instance ready for execution.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if the output directory fails to be removed when necessary.
|
|
|
- ///
|
|
|
- /// If the output VCF already exists and is not outdated, initialization skips deletion.
|
|
|
- /// Otherwise, the output directory is cleared for a fresh run.
|
|
|
fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
|
|
|
let id = id.to_string();
|
|
|
|
|
|
@@ -60,6 +56,7 @@ impl Initialize for ClairS {
|
|
|
id,
|
|
|
log_dir,
|
|
|
config,
|
|
|
+ regions: Vec::new(),
|
|
|
};
|
|
|
|
|
|
if clairs.config.clairs_force {
|
|
|
@@ -84,172 +81,287 @@ impl ShouldRun for ClairS {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/* ---------------- JobCommand / LocalRunner / SbatchRunner ---------------- */
|
|
|
+
|
|
|
+impl JobCommand for ClairS {
|
|
|
+ fn init(&mut self) -> anyhow::Result<()> {
|
|
|
+ let output_dir = self.config.clairs_output_dir(&self.id);
|
|
|
+
|
|
|
+ fs::create_dir_all(&output_dir)
|
|
|
+ .with_context(|| format!("Failed create dir: {output_dir}"))?;
|
|
|
+
|
|
|
+ fs::create_dir_all(&self.log_dir)
|
|
|
+ .with_context(|| format!("Failed create dir: {}", self.log_dir))?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ let output_dir = self.config.clairs_output_dir(&self.id);
|
|
|
+
|
|
|
+ // Build repeated -r REGION args if any regions were set (for batched runs)
|
|
|
+ let region_args = if self.regions.is_empty() {
|
|
|
+ String::new()
|
|
|
+ } else {
|
|
|
+ self.regions
|
|
|
+ .iter()
|
|
|
+ .map(|r| format!("-r {r}"))
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(" ")
|
|
|
+ };
|
|
|
+
|
|
|
+ let sample_name = format!("{}_diag", self.id);
|
|
|
+
|
|
|
+ format!(
|
|
|
+ "{singularity_bin} exec \
|
|
|
+ --bind /data:/data \
|
|
|
+ --bind {output_dir}:{output_dir} \
|
|
|
+ {image} \
|
|
|
+ /opt/bin/run_clairs \
|
|
|
+ -T {tumor_bam} \
|
|
|
+ -N {normal_bam} \
|
|
|
+ -R {reference} \
|
|
|
+ -t {threads} \
|
|
|
+ -p {platform} \
|
|
|
+ --enable_indel_calling \
|
|
|
+ --include_all_ctgs \
|
|
|
+ --print_germline_calls \
|
|
|
+ --enable_clair3_germline_output \
|
|
|
+ --use_longphase_for_intermediate_haplotagging true \
|
|
|
+ --output_dir {output_dir} \
|
|
|
+ -s {sample_name} \
|
|
|
+ {region_args}",
|
|
|
+ singularity_bin = self.config.singularity_bin,
|
|
|
+ image = self.config.clairs_image,
|
|
|
+ tumor_bam = self.config.tumoral_bam(&self.id),
|
|
|
+ normal_bam = self.config.normal_bam(&self.id),
|
|
|
+ reference = self.config.reference,
|
|
|
+ threads = self.config.clairs_threads,
|
|
|
+ platform = self.config.clairs_platform,
|
|
|
+ output_dir = output_dir,
|
|
|
+ sample_name = sample_name,
|
|
|
+ region_args = region_args,
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalRunner for ClairS {
|
|
|
+ // default shell() is fine ("bash")
|
|
|
+}
|
|
|
+
|
|
|
+impl SbatchRunner for ClairS {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some(format!("clairs_{}", self.id)),
|
|
|
+ cpus_per_task: Some(self.config.clairs_threads as u32),
|
|
|
+ mem: Some("60G".into()), // tune as needed
|
|
|
+ partition: Some("batch".into()), // CPU partition, no GPU
|
|
|
+ gres: None, // ClairS does not use GPU
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ fn sbatch_extra_args(&self) -> Vec<String> {
|
|
|
+ Vec::new()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
impl Run for ClairS {
|
|
|
- /// Executes the ClairS variant calling pipeline and post-processes its output.
|
|
|
- ///
|
|
|
- /// # Pipeline Overview
|
|
|
- /// - Runs ClairS in a Docker container using paired tumor and normal BAMs
|
|
|
- /// - Generates both somatic and germline variant VCFs
|
|
|
- /// - Applies bcftools filtering to keep only PASS variants
|
|
|
- /// - Concatenates separate VCFs (e.g., SNPs and INDELs) into a single somatic file
|
|
|
- /// - Tracks all operations via logs saved to disk
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if:
|
|
|
- /// - Docker execution fails
|
|
|
- /// - bcftools fails to process or filter VCFs
|
|
|
- /// - Temporary files can't be removed or written
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- // Run Docker command if output VCF doesn't exist
|
|
|
- let (output_vcf, output_indels_vcf) = self.config.clairs_output_vcfs(&self.id);
|
|
|
- if !Path::new(&output_vcf).exists() || !Path::new(&output_indels_vcf).exists() {
|
|
|
- let output_dir = self.config.clairs_output_dir(&self.id);
|
|
|
- fs::create_dir_all(&output_dir)
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed create dir: {output_dir}.\n{e}"))?;
|
|
|
-
|
|
|
- let mut docker_run = DockerRun::new(&[
|
|
|
- "run",
|
|
|
- "-d",
|
|
|
- "-v",
|
|
|
- "/data:/data",
|
|
|
- "-v",
|
|
|
- &format!("{output_dir}:{output_dir}"),
|
|
|
- &format!("hkubal/clairs:{}", self.config.clairs_docker_tag),
|
|
|
- "/opt/bin/run_clairs",
|
|
|
- "-T",
|
|
|
- &self.config.tumoral_bam(&self.id),
|
|
|
- "-N",
|
|
|
- &self.config.normal_bam(&self.id),
|
|
|
- "-R",
|
|
|
- &self.config.reference,
|
|
|
- "-t",
|
|
|
- &self.config.clairs_threads.to_string(),
|
|
|
- "-p",
|
|
|
- &self.config.clairs_platform,
|
|
|
- "--enable_indel_calling",
|
|
|
- "--include_all_ctgs",
|
|
|
- "--print_germline_calls",
|
|
|
- "--enable_clair3_germline_output",
|
|
|
- "--use_longphase_for_intermediate_haplotagging",
|
|
|
- "true",
|
|
|
- "--output_dir",
|
|
|
- &output_dir,
|
|
|
- "-s",
|
|
|
- &format!("{}_diag", self.id),
|
|
|
- ]);
|
|
|
- let report = run_wait(&mut docker_run)
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to run ClairS for {}.\n{e}", &self.id))?;
|
|
|
-
|
|
|
- let log_file = format!("{}/clairs_", self.log_dir);
|
|
|
+ self.run_local()?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/* ---------------- Post-processing helpers (germline + somatic) ----------- */
|
|
|
+
|
|
|
+impl ClairS {
|
|
|
+ fn postprocess_local(&self) -> anyhow::Result<()> {
|
|
|
+ // Germline PASS
|
|
|
+ let clair3_germline_passed = self.config.clairs_germline_passed_vcf(&self.id);
|
|
|
+ if !Path::new(&clair3_germline_passed).exists() {
|
|
|
+ let clair3_germline_normal = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
+
|
|
|
+ let mut cmd = BcftoolsKeepPass::from_config(
|
|
|
+ &self.config,
|
|
|
+ clair3_germline_normal,
|
|
|
+ clair3_germline_passed.clone(),
|
|
|
+ );
|
|
|
+ let report = <BcftoolsKeepPass as LocalRunner>::run(&mut cmd).with_context(|| {
|
|
|
+ format!(
|
|
|
+ "Failed to run `bcftools keep PASS` for {}.",
|
|
|
+ clair3_germline_passed
|
|
|
+ )
|
|
|
+ })?;
|
|
|
+
|
|
|
+ let log_file = format!("{}/bcftools_germline_pass_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
- .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
|
|
|
+ .with_context(|| format!("Error while writing logs into {log_file}"))?;
|
|
|
} else {
|
|
|
debug!(
|
|
|
- "ClairS output VCF already exists for {}, skipping execution.",
|
|
|
+ "ClairS Germline PASSED VCF already exists for {}, skipping.",
|
|
|
self.id
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- // Germline PASS
|
|
|
- let clair3_germline_passed = self.config.clairs_germline_passed_vcf(&self.id);
|
|
|
- if !Path::new(&clair3_germline_passed).exists() {
|
|
|
- let clair3_germline_normal = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
- // let clair3_germline_tumor = self.config.clairs_germline_tumor_vcf(&self.id);
|
|
|
-
|
|
|
- let report = bcftools_keep_pass(
|
|
|
- &clair3_germline_normal,
|
|
|
- &clair3_germline_passed,
|
|
|
- BcftoolsConfig::default(),
|
|
|
- )
|
|
|
- .map_err(|e| {
|
|
|
- anyhow::anyhow!(
|
|
|
- "Failed to run `bcftools keep PASS` for {}.\n{e}",
|
|
|
- &clair3_germline_passed
|
|
|
+ // Somatic concat + PASS
|
|
|
+ let passed_vcf = self.config.clairs_passed_vcf(&self.id);
|
|
|
+ if !Path::new(&passed_vcf).exists() {
|
|
|
+ let (output_vcf, output_indels_vcf) = self.config.clairs_output_vcfs(&self.id);
|
|
|
+
|
|
|
+ let tmp_file = temp_file_path(".vcf.gz")?.to_str().unwrap().to_string();
|
|
|
+
|
|
|
+ let mut concat = BcftoolsConcat::from_config(
|
|
|
+ &self.config,
|
|
|
+ vec![output_vcf.clone(), output_indels_vcf.clone()],
|
|
|
+ &tmp_file,
|
|
|
+ );
|
|
|
+ let report = <BcftoolsConcat as LocalRunner>::run(&mut concat).with_context(|| {
|
|
|
+ format!(
|
|
|
+ "Failed to run bcftools concat for {} and {}.",
|
|
|
+ output_vcf, output_indels_vcf
|
|
|
)
|
|
|
})?;
|
|
|
|
|
|
+ let log_file = format!("{}/bcftools_concat_", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .with_context(|| format!("Error while writing logs into {log_file}"))?;
|
|
|
+
|
|
|
+ let mut keep_pass =
|
|
|
+ BcftoolsKeepPass::from_config(&self.config, tmp_file.clone(), passed_vcf.clone());
|
|
|
+ let report =
|
|
|
+ <BcftoolsKeepPass as LocalRunner>::run(&mut keep_pass).with_context(|| {
|
|
|
+ format!("Error while running bcftools keep PASS for {}.", output_vcf)
|
|
|
+ })?;
|
|
|
+
|
|
|
let log_file = format!("{}/bcftools_pass_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
- .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
|
|
|
+ .with_context(|| format!("Error while writing logs into {log_file}"))?;
|
|
|
|
|
|
- // fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
|
|
|
+ fs::remove_file(&tmp_file)
|
|
|
+ .with_context(|| format!("Failed to remove temporary file {tmp_file}"))?;
|
|
|
} else {
|
|
|
debug!(
|
|
|
- "ClairS Germline PASSED VCF already exists for {}, skipping execution.",
|
|
|
+ "ClairS PASSED VCF already exists for {}, skipping.",
|
|
|
self.id
|
|
|
);
|
|
|
}
|
|
|
|
|
|
- let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ fn postprocess_sbatch(&self) -> anyhow::Result<()> {
|
|
|
+ // Germline PASS via Slurm
|
|
|
+ let clair3_germline_passed = self.config.clairs_germline_passed_vcf(&self.id);
|
|
|
+ if !Path::new(&clair3_germline_passed).exists() {
|
|
|
+ let clair3_germline_normal = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
+
|
|
|
+ let mut cmd = BcftoolsKeepPass::from_config(
|
|
|
+ &self.config,
|
|
|
+ clair3_germline_normal,
|
|
|
+ clair3_germline_passed.clone(),
|
|
|
+ );
|
|
|
+ let report = SlurmRunner::run(&mut cmd)
|
|
|
+ .context("Failed to run `bcftools keep PASS` on Slurm")?;
|
|
|
+
|
|
|
+ let log_file = format!("{}/bcftools_germline_pass_", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .context("Error while writing logs")?;
|
|
|
+ } else {
|
|
|
+ debug!(
|
|
|
+ "ClairS Germline PASSED VCF already exists for {}, skipping.",
|
|
|
+ self.id
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ // Somatic concat + PASS via Slurm
|
|
|
+ let passed_vcf = self.config.clairs_passed_vcf(&self.id);
|
|
|
if !Path::new(&passed_vcf).exists() {
|
|
|
- // Concat output and indels
|
|
|
+ let (output_vcf, output_indels_vcf) = self.config.clairs_output_vcfs(&self.id);
|
|
|
+
|
|
|
let tmp_file = temp_file_path(".vcf.gz")?.to_str().unwrap().to_string();
|
|
|
- let report = bcftools_concat(
|
|
|
- vec![output_vcf.to_string(), output_indels_vcf.to_string()],
|
|
|
+
|
|
|
+ let mut concat = BcftoolsConcat::from_config(
|
|
|
+ &self.config,
|
|
|
+ vec![output_vcf.clone(), output_indels_vcf.clone()],
|
|
|
&tmp_file,
|
|
|
- BcftoolsConfig::default(),
|
|
|
- )
|
|
|
- .map_err(|e| {
|
|
|
- anyhow::anyhow!(
|
|
|
- "Failed to run bcftools concat for {} and {}.\n{e}",
|
|
|
- &output_vcf,
|
|
|
- &output_indels_vcf
|
|
|
- )
|
|
|
- })?;
|
|
|
+ );
|
|
|
+ let report = SlurmRunner::run(&mut concat)
|
|
|
+ .context("Failed to run bcftools concat for ClairS somatic on Slurm")?;
|
|
|
+
|
|
|
let log_file = format!("{}/bcftools_concat_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
- .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}\n{e}"))?;
|
|
|
-
|
|
|
- let report = bcftools_keep_pass(&tmp_file, passed_vcf, BcftoolsConfig::default())
|
|
|
- .map_err(|e| {
|
|
|
- anyhow::anyhow!(
|
|
|
- "Error while running bcftools keep PASS for {}.\n{e}",
|
|
|
- &output_vcf
|
|
|
- )
|
|
|
- })?;
|
|
|
+ .context("Error while writing concat logs")?;
|
|
|
+
|
|
|
+ let mut keep_pass =
|
|
|
+ BcftoolsKeepPass::from_config(&self.config, tmp_file.clone(), passed_vcf.clone());
|
|
|
+ let report = SlurmRunner::run(&mut keep_pass)
|
|
|
+ .context("Failed to run bcftools keep PASS for ClairS somatic on Slurm")?;
|
|
|
|
|
|
let log_file = format!("{}/bcftools_pass_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
- .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
|
|
|
+ .context("Error while writing PASS logs")?;
|
|
|
|
|
|
- fs::remove_file(&tmp_file)
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to remove temporary file {tmp_file}.\n{e}"))?;
|
|
|
+ fs::remove_file(&tmp_file).context("Failed to remove temporary merged VCF")?;
|
|
|
} else {
|
|
|
debug!(
|
|
|
- "ClairS PASSED VCF already exists for {}, skipping execution.",
|
|
|
+ "ClairS PASSED VCF already exists for {}, skipping.",
|
|
|
self.id
|
|
|
);
|
|
|
}
|
|
|
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ /// Local execution: runs ClairS via `LocalRunner` then post-processes VCFs locally.
|
|
|
+ pub fn run_local(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
+ if !self.should_run() {
|
|
|
+ debug!(
|
|
|
+ "ClairS output already up-to-date for {}, skipping.",
|
|
|
+ self.id
|
|
|
+ );
|
|
|
+ return Ok(CapturedOutput::default());
|
|
|
+ }
|
|
|
+
|
|
|
+ info!("Running ClairS locally for {}", self.id);
|
|
|
+ let out = <Self as LocalRunner>::run(self)?;
|
|
|
+
|
|
|
+ self.postprocess_local()?;
|
|
|
+ Ok(out)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Slurm execution: submits ClairS via sbatch (Docker inside job) then bcftools via Slurm.
|
|
|
+ pub fn run_sbatch(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
+ if !self.should_run() {
|
|
|
+ debug!(
|
|
|
+ "ClairS output already up-to-date for {}, skipping.",
|
|
|
+ self.id
|
|
|
+ );
|
|
|
+ return Ok(CapturedOutput::default());
|
|
|
+ }
|
|
|
+
|
|
|
+ info!("Submitting ClairS via sbatch for {}", self.id);
|
|
|
+ let out = <Self as SbatchRunner>::run(self)?;
|
|
|
+
|
|
|
+ self.postprocess_sbatch()?;
|
|
|
+ Ok(out)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
+/* ---------------- Variant / Label / Version impls ------------------------ */
|
|
|
+
|
|
|
impl CallerCat for ClairS {
|
|
|
- /// Tags this runner as somatic, used for annotation classification.
|
|
|
fn caller_cat(&self) -> Annotation {
|
|
|
Annotation::Callers(Caller::ClairS, Sample::Somatic)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl Variants for ClairS {
|
|
|
- /// Loads and annotates somatic variants from the ClairS filtered VCF.
|
|
|
- ///
|
|
|
- /// This method reads the filtered PASS VCF file generated by ClairS for somatic variants.
|
|
|
- /// It tags each variant with the ClairS somatic annotation and adds it to the shared `annotations` map.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `annotations` - A reference to the global annotations structure used to store variant metadata.
|
|
|
- ///
|
|
|
- /// # Returns
|
|
|
- /// A [`VariantCollection`] with the list of variants, the source VCF file, and the associated caller tag.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Will return an error if the VCF file is unreadable, missing, or malformed.
|
|
|
fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
|
|
|
let caller = self.caller_cat();
|
|
|
let add = vec![caller.clone()];
|
|
|
@@ -257,7 +369,7 @@ impl Variants for ClairS {
|
|
|
|
|
|
info!("Loading variants from {caller}: {passed_vcf}");
|
|
|
let variants = read_vcf(passed_vcf)
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", passed_vcf))?;
|
|
|
+ .with_context(|| format!("Failed to read ClairS VCF {passed_vcf}"))?;
|
|
|
|
|
|
variants.par_iter().for_each(|v| {
|
|
|
annotations.insert_update(v.hash(), &add);
|
|
|
@@ -273,26 +385,12 @@ impl Variants for ClairS {
|
|
|
}
|
|
|
|
|
|
impl ClairS {
|
|
|
- /// Loads and annotates germline variants from the ClairS germline output.
|
|
|
- ///
|
|
|
- /// This function loads a pre-filtered VCF file containing germline variants called by ClairS.
|
|
|
- /// It updates the provided `annotations` structure with a tag indicating these are germline variants.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `annotations` - A shared annotation structure to update with variant hashes and tags.
|
|
|
- ///
|
|
|
- /// # Returns
|
|
|
- /// A [`VariantCollection`] object containing the loaded variants, associated VCF metadata, and caller category.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Will return an error if the VCF file cannot be read or parsed.
|
|
|
pub fn germline(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
|
|
|
let caller = Annotation::Callers(Caller::ClairS, Sample::Germline);
|
|
|
let add = vec![caller.clone()];
|
|
|
let clair3_germline_passed = &self.config.clairs_germline_passed_vcf(&self.id);
|
|
|
|
|
|
info!("Loading variants from {caller}: {clair3_germline_passed}");
|
|
|
-
|
|
|
let variants = read_vcf(clair3_germline_passed)?;
|
|
|
variants.par_iter().for_each(|v| {
|
|
|
annotations.insert_update(v.hash(), &add);
|
|
|
@@ -308,36 +406,28 @@ impl ClairS {
|
|
|
}
|
|
|
|
|
|
impl Label for ClairS {
|
|
|
- /// Returns the string label for this caller.
|
|
|
fn label(&self) -> String {
|
|
|
self.caller_cat().to_string()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl Version for ClairS {
|
|
|
- /// Retrieves the ClairS version by running `/opt/bin/run_clairs --version` in its docker environment.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if command execution fails or "Version " not found in output.
|
|
|
fn version(config: &Config) -> anyhow::Result<String> {
|
|
|
- let out = Command::new("docker")
|
|
|
- .args([
|
|
|
- "run",
|
|
|
- "--rm",
|
|
|
- "--entrypoint",
|
|
|
- "/opt/bin/run_clairs",
|
|
|
- &format!("hkubal/clairs:{}", config.clairs_docker_tag),
|
|
|
- "--version",
|
|
|
- ])
|
|
|
+ let out = ProcessCommand::new("bash")
|
|
|
+ .arg("-c")
|
|
|
+ .arg(format!(
|
|
|
+ "{} exec {} /opt/bin/run_clairs --version",
|
|
|
+ config.singularity_bin, config.clairs_image
|
|
|
+ ))
|
|
|
.stdout(Stdio::piped())
|
|
|
.stderr(Stdio::piped())
|
|
|
.output()
|
|
|
- .context("failed to spawn docker")?;
|
|
|
+ .context("failed to spawn singularity")?;
|
|
|
|
|
|
if !out.status.success() {
|
|
|
let mut log = String::from_utf8_lossy(&out.stdout).to_string();
|
|
|
log.push_str(&String::from_utf8_lossy(&out.stderr));
|
|
|
- anyhow::bail!("docker run failed: {}\n{}", out.status, log);
|
|
|
+ anyhow::bail!("singularity run failed: {}\n{}", out.status, log);
|
|
|
}
|
|
|
|
|
|
let mut log = String::from_utf8_lossy(&out.stdout).to_string();
|
|
|
@@ -346,7 +436,72 @@ impl Version for ClairS {
|
|
|
let re = Regex::new(r"(?m)run_clairs\s+([^\s]+)")?;
|
|
|
let caps = re
|
|
|
.captures(&log)
|
|
|
- .context("could not parse DeepSomatic version from output")?;
|
|
|
+ .context("could not parse ClairS version from output")?;
|
|
|
+ Ok(caps.get(1).unwrap().as_str().to_string())
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Slurm: run `/opt/bin/run_clairs --version` inside a small sbatch job.
|
|
|
+ fn version_slurm(config: &Config) -> anyhow::Result<String> {
|
|
|
+ // Minimal Slurm job just to run the version command
|
|
|
+ struct ClairSVersionJob<'a> {
|
|
|
+ config: &'a Config,
|
|
|
+ }
|
|
|
+
|
|
|
+ impl<'a> JobCommand for ClairSVersionJob<'a> {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!(
|
|
|
+ "{} exec {} /opt/bin/run_clairs \
|
|
|
+ --version",
|
|
|
+ self.config.singularity_bin, self.config.clairs_image
|
|
|
+ )
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ impl<'a> SlurmRunner for ClairSVersionJob<'a> {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some("clairs_version".into()),
|
|
|
+ partition: Some("shortq".into()), // adjust to your CPU partition
|
|
|
+ cpus_per_task: Some(1),
|
|
|
+ mem: Some("10G".into()),
|
|
|
+ gres: None, // no GPU
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut job = ClairSVersionJob { config };
|
|
|
+ let out = SlurmRunner::run(&mut job).context("failed to run ClairS --version via Slurm")?;
|
|
|
+
|
|
|
+ // Combine stdout, Slurm epilog (if any), and stderr for parsing
|
|
|
+ let mut log = out.stdout.clone();
|
|
|
+ if let Some(epilog) = &out.slurm_epilog {
|
|
|
+ log.push_str(&epilog.to_string());
|
|
|
+ }
|
|
|
+ log.push_str(&out.stderr);
|
|
|
+
|
|
|
+ let re = Regex::new(r"(?m)run_clairs\s+([^\s]+)")?;
|
|
|
+ let caps = re
|
|
|
+ .captures(&log)
|
|
|
+ .context("could not parse ClairS version from Slurm output")?;
|
|
|
+
|
|
|
Ok(caps.get(1).unwrap().as_str().to_string())
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+ use crate::helpers::test_init;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn clairs_version() -> anyhow::Result<()> {
|
|
|
+ test_init();
|
|
|
+ let vl = ClairS::version(&Config::default())?;
|
|
|
+ info!("ClairS local version: {vl}");
|
|
|
+ let vs = ClairS::version_slurm(&Config::default())?;
|
|
|
+ info!("ClairS slurm version: {vs}");
|
|
|
+ assert_eq!(vl, vs);
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|