|
|
@@ -1,18 +1,171 @@
|
|
|
+//! # ClairS Somatic Variant Calling Pipeline
|
|
|
+//!
|
|
|
+//! This module provides a pipeline runner for [ClairS](https://github.com/HKU-BAL/ClairS),
|
|
|
+//! a deep learning-based somatic variant caller designed for long-read sequencing data
|
|
|
+//! (ONT and PacBio HiFi).
|
|
|
+//!
|
|
|
+//! ## Overview
|
|
|
+//!
|
|
|
+//! ClairS performs somatic variant calling on paired tumor-normal samples using:
|
|
|
+//!
|
|
|
+//! - Haplotype-aware variant calling with LongPhase integration
|
|
|
+//! - Separate SNV and indel calling pipelines
|
|
|
+//! - Clair3-based germline variant detection on the normal sample
|
|
|
+//!
|
|
|
+//! ## Execution Modes
|
|
|
+//!
|
|
|
+//! The module supports three execution strategies:
|
|
|
+//!
|
|
|
+//! - **Local** ([`ClairS::run_local`]) — Single-node execution, useful for debugging
|
|
|
+//! - **Slurm** ([`ClairS::run_sbatch`]) — Single HPC job submission
|
|
|
+//! - **Chunked** ([`run_clairs_chunked_sbatch_with_merge`]) — Parallel execution across genome regions
|
|
|
+//!
|
|
|
+//! ### Chunked Parallel Execution
|
|
|
+//!
|
|
|
+//! For large genomes, the chunked mode provides significant speedup:
|
|
|
+//!
|
|
|
+//! ```text
|
|
|
+//! ┌─────────────────────────────────────────────────────────────────┐
|
|
|
+//! │ Genome Splitting │
|
|
|
+//! │ chr1:1-50M │ chr1:50M-100M │ chr2:1-50M │ ... │ chrN │
|
|
|
+//! └──────┬───────┴────────┬────────┴───────┬──────┴───────┴────┬────┘
|
|
|
+//! │ │ │ │
|
|
|
+//! ▼ ▼ ▼ ▼
|
|
|
+//! ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐
|
|
|
+//! │ ClairS │ │ ClairS │ │ ClairS │ ... │ ClairS │
|
|
|
+//! │ Part 1 │ │ Part 2 │ │ Part 3 │ │ Part N │
|
|
|
+//! └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ └─────┬──────┘
|
|
|
+//! │ │ │ │
|
|
|
+//! ▼ ▼ ▼ ▼
|
|
|
+//! ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐
|
|
|
+//! │ Postprocess│ │ Postprocess│ │ Postprocess│ ... │ Postprocess│
|
|
|
+//! │ SNV+Indel │ │ SNV+Indel │ │ SNV+Indel │ │ SNV+Indel │
|
|
|
+//! │ → PASS │ │ → PASS │ │ → PASS │ │ → PASS │
|
|
|
+//! └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ └─────┬──────┘
|
|
|
+//! │ │ │ │
|
|
|
+//! └───────────────┴───────┬───────┴───────────────────┘
|
|
|
+//! ▼
|
|
|
+//! ┌─────────────────────┐
|
|
|
+//! │ bcftools concat │
|
|
|
+//! │ (somatic PASS) │
|
|
|
+//! └──────────┬──────────┘
|
|
|
+//! ▼
|
|
|
+//! ┌─────────────────────┐
|
|
|
+//! │ Final VCF Output │
|
|
|
+//! └─────────────────────┘
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Output Files
|
|
|
+//!
|
|
|
+//! Somatic variants (PASS only):
|
|
|
+//! ```text
|
|
|
+//! {result_dir}/{id}/clairs/{id}_clairs.pass.vcf.gz
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! Germline variants (PASS only):
|
|
|
+//! ```text
|
|
|
+//! {result_dir}/{id}/clairs/clair3_germline.pass.vcf.gz
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Post-processing Pipeline
|
|
|
+//!
|
|
|
+//! Each ClairS run (or part) undergoes automatic post-processing:
|
|
|
+//!
|
|
|
+//! 1. **Somatic**: Concatenate SNV + indel VCFs → filter PASS variants
|
|
|
+//! 2. **Germline**: Filter PASS variants from Clair3 germline output
|
|
|
+//!
|
|
|
+//! ## Usage
|
|
|
+//!
|
|
|
+//! ### Basic Slurm Execution
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use crate::config::Config;
|
|
|
+//! use crate::pipes::clairs::ClairS;
|
|
|
+//! use crate::pipes::Initialize;
|
|
|
+//!
|
|
|
+//! let config = Config::default();
|
|
|
+//! let mut clairs = ClairS::initialize("sample_001", &config)?;
|
|
|
+//! let output = clairs.run_sbatch()?;
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ### Chunked Parallel Execution (Recommended for WGS)
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use crate::config::Config;
|
|
|
+//! use crate::pipes::clairs::run_clairs_chunked_sbatch_with_merge;
|
|
|
+//!
|
|
|
+//! let config = Config::default();
|
|
|
+//! let outputs = run_clairs_chunked_sbatch_with_merge("sample_001", &config, 20)?;
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ### Loading Variants for Downstream Analysis
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use crate::annotation::Annotations;
|
|
|
+//! use crate::variant::variant::Variants;
|
|
|
+//!
|
|
|
+//! let annotations = Annotations::new();
|
|
|
+//! let somatic = clairs.variants(&annotations)?;
|
|
|
+//! let germline = clairs.germline(&annotations)?;
|
|
|
+//!
|
|
|
+//! println!("Somatic: {} variants", somatic.variants.len());
|
|
|
+//! println!("Germline: {} variants", germline.variants.len());
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Configuration Requirements
|
|
|
+//!
|
|
|
+//! The following [`Config`](crate::config::Config) fields must be set:
|
|
|
+//!
|
|
|
+//! - `singularity_bin` — Path to Singularity executable
|
|
|
+//! - `clairs_image` — ClairS container image path
|
|
|
+//! - `reference` — Reference genome FASTA
|
|
|
+//! - `clairs_threads` — CPU threads per job
|
|
|
+//! - `clairs_platform` — Sequencing platform (`ont_r10_dorado_sup_4khz`, `hifi_revio`, etc.)
|
|
|
+//! - `clairs_force` — Force re-run even if outputs exist
|
|
|
+//!
|
|
|
+//! ## Implemented Traits
|
|
|
+//!
|
|
|
+//! - [`Initialize`](crate::pipes::Initialize) — Setup and directory creation
|
|
|
+//! - [`ShouldRun`](crate::pipes::ShouldRun) — Timestamp-based execution gating
|
|
|
+//! - [`JobCommand`](crate::commands::Command) — Command string generation
|
|
|
+//! - [`LocalRunner`](crate::commands::LocalRunner) — Local execution support
|
|
|
+//! - [`SbatchRunner`](crate::commands::SbatchRunner) — Slurm job submission
|
|
|
+//! - [`Run`](crate::runners::Run) — Unified execution interface
|
|
|
+//! - [`Variants`](crate::variant::variant::Variants) — Somatic variant loading
|
|
|
+//! - [`CallerCat`](crate::annotation::CallerCat) — Caller annotation category
|
|
|
+//! - [`Label`](crate::variant::variant::Label) — Human-readable identifier
|
|
|
+//! - [`Version`](crate::pipes::Version) — Tool version extraction
|
|
|
+//!
|
|
|
+//! ## Dependencies
|
|
|
+//!
|
|
|
+//! External tools (containerized or system):
|
|
|
+//!
|
|
|
+//! - **ClairS** — Somatic variant calling
|
|
|
+//! - **bcftools** — VCF concatenation and filtering
|
|
|
+//!
|
|
|
+//! ## References
|
|
|
+//!
|
|
|
+//! - [ClairS GitHub repository](https://github.com/HKU-BAL/ClairS)
|
|
|
+//! - [ClairS publication (Nature Communications, 2024)](https://doi.org/10.1038/s41467-024-52832-2)
|
|
|
use crate::{
|
|
|
annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
|
|
|
collection::vcf::Vcf,
|
|
|
commands::{
|
|
|
bcftools::{BcftoolsConcat, BcftoolsKeepPass},
|
|
|
- run_many_sbatch, CapturedOutput, Command as JobCommand, Runner as LocalRunner,
|
|
|
- SbatchRunner, SlurmParams, SlurmRunner,
|
|
|
+ CapturedOutput, Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner,
|
|
|
+ SlurmParams, SlurmRunner,
|
|
|
},
|
|
|
config::Config,
|
|
|
helpers::{
|
|
|
- get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions,
|
|
|
+ get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions_exact,
|
|
|
temp_file_path,
|
|
|
},
|
|
|
io::vcf::read_vcf,
|
|
|
pipes::{Initialize, ShouldRun, Version},
|
|
|
+ run, run_many,
|
|
|
runners::Run,
|
|
|
variant::{
|
|
|
variant::{Label, Variants},
|
|
|
@@ -182,9 +335,10 @@ impl JobCommand for ClairS {
|
|
|
}
|
|
|
|
|
|
impl LocalRunner for ClairS {}
|
|
|
+impl LocalBatchRunner for ClairS {}
|
|
|
|
|
|
-impl SbatchRunner for ClairS {
|
|
|
- fn slurm_params(&self) -> SlurmParams {
|
|
|
+impl SlurmRunner for ClairS {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
SlurmParams {
|
|
|
job_name: Some(format!("clairs_{}", self.id)),
|
|
|
cpus_per_task: Some(self.config.clairs_threads as u32),
|
|
|
@@ -192,16 +346,25 @@ impl SbatchRunner for ClairS {
|
|
|
partition: Some("batch".into()),
|
|
|
gres: None,
|
|
|
}
|
|
|
+ .to_args()
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- fn sbatch_extra_args(&self) -> Vec<String> {
|
|
|
- Vec::new()
|
|
|
+impl SbatchRunner for ClairS {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some(format!("clairs_{}", self.id)),
|
|
|
+ cpus_per_task: Some(self.config.clairs_threads as u32),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("batch".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl Run for ClairS {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- self.run_local()?;
|
|
|
+ run!(&self.config, self)?;
|
|
|
Ok(())
|
|
|
}
|
|
|
}
|
|
|
@@ -221,15 +384,14 @@ impl ClairS {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /// Post-processes ClairS output locally (concat SNV+indel, filter PASS).
|
|
|
- fn postprocess_local(&self) -> anyhow::Result<()> {
|
|
|
- self.process_germline_local()?;
|
|
|
- self.process_somatic_local()?;
|
|
|
+ /// Post-processes ClairS output (concat SNV+indel, filter PASS).
|
|
|
+ fn postprocess(&self) -> anyhow::Result<()> {
|
|
|
+ self.process_germline()?;
|
|
|
+ self.process_somatic()?;
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- /// Processes germline VCF (PASS filter only).
|
|
|
- fn process_germline_local(&self) -> anyhow::Result<()> {
|
|
|
+ fn process_germline(&self) -> anyhow::Result<()> {
|
|
|
let germline_passed = self.germline_passed_vcf_path();
|
|
|
|
|
|
if Path::new(&germline_passed).exists() {
|
|
|
@@ -240,12 +402,11 @@ impl ClairS {
|
|
|
return Ok(());
|
|
|
}
|
|
|
|
|
|
- // Input path depends on whether this is a part run
|
|
|
let germline_input = match self.part_index {
|
|
|
Some(_) => {
|
|
|
let output_dir = self.part_output_dir();
|
|
|
- let b = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
- let base_name = Path::new(&b)
|
|
|
+ let base = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
+ let base_name = Path::new(&base)
|
|
|
.file_name()
|
|
|
.expect("Germline VCF should have filename")
|
|
|
.to_string_lossy();
|
|
|
@@ -262,7 +423,7 @@ impl ClairS {
|
|
|
let mut cmd =
|
|
|
BcftoolsKeepPass::from_config(&self.config, germline_input, germline_passed.clone());
|
|
|
|
|
|
- let report = <BcftoolsKeepPass as LocalRunner>::run(&mut cmd)
|
|
|
+ let report = run!(&self.config, &mut cmd)
|
|
|
.with_context(|| format!("Failed to filter germline PASS for {}", self.id))?;
|
|
|
|
|
|
report
|
|
|
@@ -272,8 +433,7 @@ impl ClairS {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- /// Processes somatic VCFs (concat SNV+indel, then PASS filter).
|
|
|
- fn process_somatic_local(&self) -> anyhow::Result<()> {
|
|
|
+ fn process_somatic(&self) -> anyhow::Result<()> {
|
|
|
let passed_vcf = self.somatic_passed_vcf_path();
|
|
|
|
|
|
if Path::new(&passed_vcf).exists() {
|
|
|
@@ -309,7 +469,6 @@ impl ClairS {
|
|
|
self.id, self.part_index
|
|
|
);
|
|
|
|
|
|
- // Create temp file for intermediate concat result
|
|
|
let tmp_file = temp_file_path(".vcf.gz")?;
|
|
|
let tmp_path = tmp_file
|
|
|
.to_str()
|
|
|
@@ -323,7 +482,7 @@ impl ClairS {
|
|
|
&tmp_path,
|
|
|
);
|
|
|
|
|
|
- let report = <BcftoolsConcat as LocalRunner>::run(&mut concat)
|
|
|
+ let report = run!(&self.config, &mut concat)
|
|
|
.with_context(|| format!("Failed to concat {} and {}", snv_vcf, indel_vcf))?;
|
|
|
|
|
|
report
|
|
|
@@ -334,7 +493,7 @@ impl ClairS {
|
|
|
let mut keep_pass =
|
|
|
BcftoolsKeepPass::from_config(&self.config, tmp_path.clone(), passed_vcf.clone());
|
|
|
|
|
|
- let report = <BcftoolsKeepPass as LocalRunner>::run(&mut keep_pass)
|
|
|
+ let report = run!(&self.config, &mut keep_pass)
|
|
|
.with_context(|| format!("Failed to filter PASS for {}", self.id))?;
|
|
|
|
|
|
report
|
|
|
@@ -346,163 +505,6 @@ impl ClairS {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- /// Processes germline VCF via Slurm.
|
|
|
- fn process_germline_sbatch(&self) -> anyhow::Result<()> {
|
|
|
- let germline_passed = self.germline_passed_vcf_path();
|
|
|
-
|
|
|
- if Path::new(&germline_passed).exists() {
|
|
|
- debug!(
|
|
|
- "ClairS germline PASS VCF already exists for {} part {:?}",
|
|
|
- self.id, self.part_index
|
|
|
- );
|
|
|
- return Ok(());
|
|
|
- }
|
|
|
-
|
|
|
- let germline_input = match self.part_index {
|
|
|
- Some(_) => {
|
|
|
- let output_dir = self.part_output_dir();
|
|
|
- let germline_file = self.config.clairs_germline_normal_vcf(&self.id);
|
|
|
- let base_name = Path::new(&germline_file)
|
|
|
- .file_name()
|
|
|
- .expect("Germline VCF should have filename")
|
|
|
- .to_string_lossy();
|
|
|
- format!("{output_dir}/{base_name}")
|
|
|
- }
|
|
|
- None => self.config.clairs_germline_normal_vcf(&self.id),
|
|
|
- };
|
|
|
-
|
|
|
- info!(
|
|
|
- "Filtering germline PASS variants via Slurm for {} part {:?}",
|
|
|
- self.id, self.part_index
|
|
|
- );
|
|
|
-
|
|
|
- let mut cmd =
|
|
|
- BcftoolsKeepPass::from_config(&self.config, germline_input, germline_passed.clone());
|
|
|
-
|
|
|
- let report = SlurmRunner::run(&mut cmd)
|
|
|
- .with_context(|| format!("Failed to filter germline PASS for {}", self.id))?;
|
|
|
-
|
|
|
- report
|
|
|
- .save_to_file(format!("{}/bcftools_germline_pass_", self.log_dir))
|
|
|
- .context("Failed to save germline PASS logs")?;
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- /// Processes somatic VCFs via Slurm.
|
|
|
- fn process_somatic_sbatch(&self) -> anyhow::Result<()> {
|
|
|
- let passed_vcf = self.somatic_passed_vcf_path();
|
|
|
-
|
|
|
- if Path::new(&passed_vcf).exists() {
|
|
|
- debug!(
|
|
|
- "ClairS somatic PASS VCF already exists for {} part {:?}",
|
|
|
- self.id, self.part_index
|
|
|
- );
|
|
|
- return Ok(());
|
|
|
- }
|
|
|
-
|
|
|
- let output_dir = self.part_output_dir();
|
|
|
- let (snv_vcf_base, indel_vcf_base) = self.config.clairs_output_vcfs(&self.id);
|
|
|
-
|
|
|
- let snv_vcf = format!(
|
|
|
- "{}/{}",
|
|
|
- output_dir,
|
|
|
- Path::new(&snv_vcf_base)
|
|
|
- .file_name()
|
|
|
- .expect("SNV VCF should have filename")
|
|
|
- .to_string_lossy()
|
|
|
- );
|
|
|
- let indel_vcf = format!(
|
|
|
- "{}/{}",
|
|
|
- output_dir,
|
|
|
- Path::new(&indel_vcf_base)
|
|
|
- .file_name()
|
|
|
- .expect("Indel VCF should have filename")
|
|
|
- .to_string_lossy()
|
|
|
- );
|
|
|
-
|
|
|
- info!(
|
|
|
- "Concatenating and filtering somatic variants via Slurm for {} part {:?}",
|
|
|
- self.id, self.part_index
|
|
|
- );
|
|
|
-
|
|
|
- let tmp_file = temp_file_path(".vcf.gz")?;
|
|
|
- let tmp_path = tmp_file
|
|
|
- .to_str()
|
|
|
- .context("Temp path not valid UTF-8")?
|
|
|
- .to_string();
|
|
|
-
|
|
|
- // Concat SNV + indel
|
|
|
- let mut concat = BcftoolsConcat::from_config(
|
|
|
- &self.config,
|
|
|
- vec![PathBuf::from(&snv_vcf), PathBuf::from(&indel_vcf)],
|
|
|
- &tmp_path,
|
|
|
- );
|
|
|
-
|
|
|
- let report = SlurmRunner::run(&mut concat)
|
|
|
- .with_context(|| format!("Failed to concat {} and {}", snv_vcf, indel_vcf))?;
|
|
|
-
|
|
|
- report
|
|
|
- .save_to_file(format!("{}/bcftools_concat_", self.log_dir))
|
|
|
- .context("Failed to save concat logs")?;
|
|
|
-
|
|
|
- // Filter PASS
|
|
|
- let mut keep_pass =
|
|
|
- BcftoolsKeepPass::from_config(&self.config, tmp_path.clone(), passed_vcf.clone());
|
|
|
-
|
|
|
- let report = SlurmRunner::run(&mut keep_pass)
|
|
|
- .with_context(|| format!("Failed to filter PASS for {}", self.id))?;
|
|
|
-
|
|
|
- report
|
|
|
- .save_to_file(format!("{}/bcftools_pass_", self.log_dir))
|
|
|
- .context("Failed to save PASS filter logs")?;
|
|
|
-
|
|
|
- fs::remove_file(&tmp_path).context("Failed to remove temporary concat VCF")?;
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- /// Post-processes ClairS output via Slurm.
|
|
|
- fn postprocess_sbatch(&self) -> anyhow::Result<()> {
|
|
|
- self.process_germline_sbatch()?;
|
|
|
- self.process_somatic_sbatch()?;
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- /// Runs ClairS locally with post-processing.
|
|
|
- pub fn run_local(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
- if !self.should_run() {
|
|
|
- debug!(
|
|
|
- "ClairS output already up-to-date for {}, skipping.",
|
|
|
- self.id
|
|
|
- );
|
|
|
- return Ok(CapturedOutput::default());
|
|
|
- }
|
|
|
-
|
|
|
- info!("Running ClairS locally for {}", self.id);
|
|
|
- let out = <Self as LocalRunner>::run(self)?;
|
|
|
-
|
|
|
- self.postprocess_local()?;
|
|
|
- Ok(out)
|
|
|
- }
|
|
|
-
|
|
|
- /// Runs ClairS via Slurm with post-processing.
|
|
|
- pub fn run_sbatch(&mut self) -> anyhow::Result<CapturedOutput> {
|
|
|
- if !self.should_run() {
|
|
|
- debug!(
|
|
|
- "ClairS output already up-to-date for {}, skipping.",
|
|
|
- self.id
|
|
|
- );
|
|
|
- return Ok(CapturedOutput::default());
|
|
|
- }
|
|
|
-
|
|
|
- info!("Submitting ClairS via sbatch for {}", self.id);
|
|
|
- let out = <Self as SbatchRunner>::run(self)?;
|
|
|
-
|
|
|
- self.postprocess_sbatch()?;
|
|
|
- Ok(out)
|
|
|
- }
|
|
|
-
|
|
|
/// Returns the per-part output directory.
|
|
|
fn part_output_dir(&self) -> String {
|
|
|
let base_dir = self.config.clairs_output_dir(&self.id);
|
|
|
@@ -709,7 +711,7 @@ fn merge_clairs_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<()> {
|
|
|
);
|
|
|
|
|
|
let mut concat = BcftoolsConcat::from_config(&base.config, part_pass_paths, &final_tmp);
|
|
|
- SlurmRunner::run(&mut concat).context("Failed to run bcftools concat for ClairS parts")?;
|
|
|
+ run!(&base.config, &mut concat).context("Failed to run bcftools concat for ClairS parts")?;
|
|
|
|
|
|
fs::rename(&final_tmp, &final_passed_vcf).context("Failed to rename merged ClairS PASS VCF")?;
|
|
|
|
|
|
@@ -721,7 +723,6 @@ fn merge_clairs_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<()> {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-/// Merges N chunked ClairS germline PASS VCFs into the final output.
|
|
|
fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<()> {
|
|
|
let mut part_pass_paths: Vec<PathBuf> = Vec::with_capacity(n_parts);
|
|
|
|
|
|
@@ -753,7 +754,7 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
|
|
|
);
|
|
|
|
|
|
let mut concat = BcftoolsConcat::from_config(&base.config, part_pass_paths, &final_tmp);
|
|
|
- SlurmRunner::run(&mut concat)
|
|
|
+ run!(&base.config, &mut concat)
|
|
|
.context("Failed to run bcftools concat for ClairS germline parts")?;
|
|
|
|
|
|
fs::rename(&final_tmp, &final_passed_vcf)
|
|
|
@@ -767,26 +768,10 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-/// Runs ClairS in parallel chunks via Slurm, then merges results.
|
|
|
-///
|
|
|
-/// # Steps
|
|
|
-///
|
|
|
-/// 1. Split genome into N regions
|
|
|
-/// 2. Submit parallel Slurm jobs (one per region)
|
|
|
-/// 3. Post-process each part (concat SNV+indel, filter PASS)
|
|
|
-/// 4. Merge all part PASS VCFs into final output
|
|
|
-/// 5. Process germline VCF (from first part or full run)
|
|
|
+/// Runs ClairS in parallel chunks, then merges results.
|
|
|
///
|
|
|
-/// # Arguments
|
|
|
-///
|
|
|
-/// * `id` - Sample identifier
|
|
|
-/// * `config` - Pipeline configuration
|
|
|
-/// * `n_parts` - Target number of parallel jobs
|
|
|
-///
|
|
|
-/// # Returns
|
|
|
-///
|
|
|
-/// Vector of captured outputs from each Slurm job.
|
|
|
-pub fn run_clairs_chunked_sbatch_with_merge(
|
|
|
+/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
|
|
|
+pub fn run_clairs_chunked_with_merge(
|
|
|
id: &str,
|
|
|
config: &Config,
|
|
|
n_parts: usize,
|
|
|
@@ -800,15 +785,16 @@ pub fn run_clairs_chunked_sbatch_with_merge(
|
|
|
return Ok(Vec::new());
|
|
|
}
|
|
|
|
|
|
- // Get genome sizes from normal BAM header
|
|
|
let normal_bam = config.normal_bam(id);
|
|
|
let reader = bam::Reader::from_path(&normal_bam)
|
|
|
.with_context(|| format!("Failed to open BAM: {normal_bam}"))?;
|
|
|
let header = bam::Header::from_template(reader.header());
|
|
|
let genome_sizes = get_genome_sizes(&header)?;
|
|
|
+ let regions = split_genome_into_n_regions_exact(&genome_sizes, n_parts)
|
|
|
+ .into_iter()
|
|
|
+ .flatten()
|
|
|
+ .collect::<Vec<String>>();
|
|
|
|
|
|
- // Split genome into regions
|
|
|
- let regions = split_genome_into_n_regions(&genome_sizes, n_parts);
|
|
|
let actual_n_parts = regions.len();
|
|
|
|
|
|
info!(
|
|
|
@@ -816,7 +802,6 @@ pub fn run_clairs_chunked_sbatch_with_merge(
|
|
|
actual_n_parts, id
|
|
|
);
|
|
|
|
|
|
- // Build jobs
|
|
|
let mut jobs = Vec::with_capacity(actual_n_parts);
|
|
|
for (i, region) in regions.into_iter().enumerate() {
|
|
|
let mut job = base.clone();
|
|
|
@@ -827,18 +812,13 @@ pub fn run_clairs_chunked_sbatch_with_merge(
|
|
|
jobs.push(job);
|
|
|
}
|
|
|
|
|
|
- // Run all parts via Slurm
|
|
|
- let outputs = run_many_sbatch(jobs.clone())?;
|
|
|
+ let outputs = run_many!(config, jobs.clone())?;
|
|
|
|
|
|
- // Post-process each part (creates .pass.vcf.gz files for both somatic and germline)
|
|
|
for job in &jobs {
|
|
|
- job.postprocess_sbatch()?;
|
|
|
+ job.postprocess()?;
|
|
|
}
|
|
|
|
|
|
- // Merge somatic PASS VCFs
|
|
|
merge_clairs_parts(&base, actual_n_parts)?;
|
|
|
-
|
|
|
- // Merge germline PASS VCFs
|
|
|
merge_clairs_germline_parts(&base, actual_n_parts)?;
|
|
|
|
|
|
info!(
|
|
|
@@ -869,10 +849,10 @@ mod tests {
|
|
|
fn clairs_run() -> anyhow::Result<()> {
|
|
|
test_init();
|
|
|
let config = Config::default();
|
|
|
- let clairs = ClairS::initialize("34528", &config)?;
|
|
|
- info!("{clairs}");
|
|
|
+ // let clairs = ClairS::initialize("34528", &config)?;
|
|
|
+ // info!("{clairs}");
|
|
|
|
|
|
- let outputs = run_clairs_chunked_sbatch_with_merge("34528", &config, 5)?;
|
|
|
+ let outputs = run_clairs_chunked_with_merge("34528", &config, 20)?;
|
|
|
info!("Completed with {} job outputs", outputs.len());
|
|
|
|
|
|
Ok(())
|