|
|
@@ -1,92 +1,72 @@
|
|
|
use crate::{
|
|
|
- commands::{
|
|
|
- bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
|
|
|
- longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
|
|
|
- },
|
|
|
- runners::{run_wait, DockerRun},
|
|
|
+ collection::{vcf::Vcf, Initialize}, commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig}, config::Config, helpers::{force_or_not, get_temp_file_path}, io::vcf::read_vcf, runners::{run_wait, DockerRun, Run}, variant::{variant::Variants, variant_collection::VariantCollection}
|
|
|
};
|
|
|
-use anyhow::Context;
|
|
|
-use pandora_lib_scan::Config;
|
|
|
+use anyhow::{Context, Ok};
|
|
|
use std::{fs, path::Path};
|
|
|
use tracing::info;
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
-pub struct ClairSConfig {
|
|
|
- pub result_dir: String,
|
|
|
- pub reference: String,
|
|
|
- pub threads: u8,
|
|
|
- pub platform: String,
|
|
|
- pub force: bool,
|
|
|
-}
|
|
|
-
|
|
|
-impl Default for ClairSConfig {
|
|
|
- fn default() -> Self {
|
|
|
- Self {
|
|
|
- result_dir: "/data/longreads_basic_pipe".to_string(),
|
|
|
- reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
|
|
|
- threads: 155,
|
|
|
- platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
|
|
|
- force: true,
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Debug)]
|
|
|
pub struct ClairS {
|
|
|
pub id: String,
|
|
|
pub output_dir: String,
|
|
|
pub output_vcf: String,
|
|
|
- pub output_indel: String,
|
|
|
+ pub output_indels_vcf: String,
|
|
|
pub vcf_passed: String,
|
|
|
- pub indel_vcf_passed: String,
|
|
|
pub diag_bam: String,
|
|
|
pub mrd_bam: String,
|
|
|
- pub config: ClairSConfig,
|
|
|
- pub log: String,
|
|
|
pub log_dir: String,
|
|
|
+ pub config: Config,
|
|
|
+ pub clair3_germline_normal: String,
|
|
|
+ pub clair3_germline_tumor: String,
|
|
|
+ pub clair3_germline_passed: String,
|
|
|
}
|
|
|
|
|
|
-impl ClairS {
|
|
|
- pub fn new(id: &str, diag_bam: &str, mrd_bam: &str, config: ClairSConfig) -> Self {
|
|
|
- let output_dir = format!("{}/{}/diag/ClairS", config.result_dir, id);
|
|
|
- let output_vcf = format!("{output_dir}/output.vcf.gz");
|
|
|
- let output_indel = format!("{output_dir}/indel.vcf.gz");
|
|
|
- let vcf_passed = format!("{output_dir}/{id}_diag_clairs_PASSED.vcf.gz",);
|
|
|
- let indel_vcf_passed = format!("{output_dir}/{id}_diag_clairs_indel_PASSED.vcf.gz");
|
|
|
-
|
|
|
- let log_dir = format!("{}/{}/log/ClairS", config.result_dir, id);
|
|
|
- Self {
|
|
|
- id: id.to_string(),
|
|
|
+impl Initialize for ClairS {
|
|
|
+ fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
|
|
|
+ let id = id.to_string();
|
|
|
+ let log_dir = format!("{}/{}/log/clairs", config.result_dir, &id);
|
|
|
+
|
|
|
+ if !Path::new(&log_dir).exists() {
|
|
|
+ fs::create_dir_all(&log_dir)
|
|
|
+ .context(format!("Failed to create {log_dir} directory"))?;
|
|
|
+ }
|
|
|
+
|
|
|
+ let output_dir = config.clairs_output_dir(&id);
|
|
|
+ fs::create_dir_all(&output_dir).context(format!("Can't create dir: {output_dir}"))?;
|
|
|
+
|
|
|
+ let (output_vcf, output_indels_vcf) = config.clairs_output_vcfs(&id);
|
|
|
+ let vcf_passed = format!("{output_dir}/{id}_diag_clairs_PASSED.vcf.gz");
|
|
|
+
|
|
|
+ let diag_bam = config.tumoral_bam(&id);
|
|
|
+ let mrd_bam = config.normal_bam(&id);
|
|
|
+
|
|
|
+ let clair3_germline_normal = config.clairs_germline_normal_vcf(&id);
|
|
|
+ let clair3_germline_tumor = config.clairs_germline_tumor_vcf(&id);
|
|
|
+ let clair3_germline_passed = config.clairs_germline_passed_vcf(&id);
|
|
|
+
|
|
|
+ Ok(Self {
|
|
|
+ id,
|
|
|
output_dir,
|
|
|
output_vcf,
|
|
|
+ output_indels_vcf,
|
|
|
vcf_passed,
|
|
|
- diag_bam: diag_bam.to_string(),
|
|
|
- mrd_bam: mrd_bam.to_string(),
|
|
|
- config,
|
|
|
- output_indel,
|
|
|
- log: String::default(),
|
|
|
- indel_vcf_passed,
|
|
|
+ diag_bam,
|
|
|
+ mrd_bam,
|
|
|
log_dir,
|
|
|
- }
|
|
|
+ config,
|
|
|
+ clair3_germline_normal,
|
|
|
+ clair3_germline_tumor,
|
|
|
+ clair3_germline_passed,
|
|
|
+ })
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- if self.config.force && Path::new(&self.output_vcf).exists() {
|
|
|
- fs::remove_dir_all(&self.output_dir)?;
|
|
|
- }
|
|
|
-
|
|
|
- // Create out dir
|
|
|
- if !Path::new(&self.output_dir).exists() {
|
|
|
- fs::create_dir_all(&self.output_dir).expect("Failed to create output directory");
|
|
|
- } else {
|
|
|
- info!("ClairS output directory exists");
|
|
|
- }
|
|
|
- if !Path::new(&self.log_dir).exists() {
|
|
|
- fs::create_dir_all(&self.log_dir).expect("Failed to create output directory");
|
|
|
- }
|
|
|
+impl Run for ClairS {
|
|
|
+ fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+ force_or_not(&self.vcf_passed, self.config.clairs_force)?;
|
|
|
|
|
|
// Run Docker command if output VCF doesn't exist
|
|
|
- if !Path::new(&self.output_vcf).exists() {
|
|
|
+ if !Path::new(&self.output_vcf).exists() || !Path::new(&self.output_indels_vcf).exists() {
|
|
|
let mut docker_run = DockerRun::new(&[
|
|
|
"run",
|
|
|
"-d",
|
|
|
@@ -103,9 +83,9 @@ impl ClairS {
|
|
|
"-R",
|
|
|
&self.config.reference,
|
|
|
"-t",
|
|
|
- &self.config.threads.to_string(),
|
|
|
+ &self.config.clairs_threads.to_string(),
|
|
|
"-p",
|
|
|
- &self.config.platform,
|
|
|
+ &self.config.clairs_platform,
|
|
|
"--enable_indel_calling",
|
|
|
"--include_all_ctgs",
|
|
|
"--print_germline_calls",
|
|
|
@@ -125,87 +105,102 @@ impl ClairS {
|
|
|
.save_to_file(&log_file)
|
|
|
.context(format!("Error while writing logs into {log_file}"))?;
|
|
|
} else {
|
|
|
- info!("ClairS output vcf already exists");
|
|
|
+ info!("ClairS vcfs already exist");
|
|
|
}
|
|
|
|
|
|
- let germline_normal = format!("{}/clair3_normal_germline_output.vcf.gz", self.output_dir);
|
|
|
- let germline_tumor = format!("{}/clair3_tumor_germline_output.vcf.gz", self.output_dir);
|
|
|
- let germline_normal_tumor = format!(
|
|
|
- "{}/clair3_normal_tumoral_germline_output.vcf.gz",
|
|
|
- self.output_dir
|
|
|
- );
|
|
|
- let report = bcftools_concat(
|
|
|
- vec![germline_normal, germline_tumor],
|
|
|
- &germline_normal_tumor,
|
|
|
- BcftoolsConfig::default(),
|
|
|
- )
|
|
|
- .context(format!(
|
|
|
- "Error while running bcftools concat germlines for {}",
|
|
|
- &self.output_vcf
|
|
|
- ))?;
|
|
|
- let log_file = format!("{}/bcftools_concat_germline", self.log_dir);
|
|
|
- report
|
|
|
- .save_to_file(&log_file)
|
|
|
- .context(format!("Error while writing logs into {log_file}"))?;
|
|
|
-
|
|
|
- // Keep PASS
|
|
|
- if !Path::new(&self.vcf_passed).exists() {
|
|
|
+ // Germline
|
|
|
+ if !Path::new(&self.clair3_germline_passed).exists() {
|
|
|
+ let tmp_file = format!("{}.vcf.gz", get_temp_file_path()?.display());
|
|
|
+ let report = bcftools_concat(
|
|
|
+ vec![
|
|
|
+ self.clair3_germline_tumor.to_string(),
|
|
|
+ self.clair3_germline_normal.to_string(),
|
|
|
+ ],
|
|
|
+ &tmp_file,
|
|
|
+ BcftoolsConfig::default(),
|
|
|
+ )
|
|
|
+ .context(format!(
|
|
|
+ "Error while running bcftools concat for {} and {}",
|
|
|
+ &self.output_vcf, &self.output_indels_vcf
|
|
|
+ ))?;
|
|
|
+
|
|
|
+ let log_file = format!("{}/bcftools_concat_", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .context(format!("Error while writing logs into {log_file}"))?;
|
|
|
+
|
|
|
let report = bcftools_keep_pass(
|
|
|
- &self.output_vcf,
|
|
|
- &self.vcf_passed,
|
|
|
+ &tmp_file,
|
|
|
+ &self.clair3_germline_passed,
|
|
|
BcftoolsConfig::default(),
|
|
|
)
|
|
|
.context(format!(
|
|
|
"Error while running bcftools keep PASS for {}",
|
|
|
- &self.output_vcf
|
|
|
+ &self.clair3_germline_passed
|
|
|
))?;
|
|
|
- let log_file = format!("{}/bcftools_pass", self.log_dir);
|
|
|
+ let log_file = format!("{}/bcftools_pass_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
.context(format!("Error while writing logs into {log_file}"))?;
|
|
|
+
|
|
|
+ fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
|
|
|
}
|
|
|
|
|
|
- if !Path::new(&self.indel_vcf_passed).exists() {
|
|
|
- let report = bcftools_keep_pass(
|
|
|
- &self.output_indel,
|
|
|
- &self.indel_vcf_passed,
|
|
|
+ if !Path::new(&self.vcf_passed).exists() {
|
|
|
+ // Concat output and indels
|
|
|
+ let tmp_file = format!("{}.vcf.gz", get_temp_file_path()?.display());
|
|
|
+ let report = bcftools_concat(
|
|
|
+ vec![
|
|
|
+ self.output_vcf.to_string(),
|
|
|
+ self.output_indels_vcf.to_string(),
|
|
|
+ ],
|
|
|
+ &tmp_file,
|
|
|
BcftoolsConfig::default(),
|
|
|
)
|
|
|
.context(format!(
|
|
|
- "Error while running bcftools keep PASS for {}",
|
|
|
- &self.output_indel
|
|
|
+ "Error while running bcftools concat for {} and {}",
|
|
|
+ &self.output_vcf, &self.output_indels_vcf
|
|
|
))?;
|
|
|
|
|
|
- let log_file = format!("{}/bcftools_pass", self.log_dir);
|
|
|
+ let log_file = format!("{}/bcftools_concat_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
.context(format!("Error while writing logs into {log_file}"))?;
|
|
|
+
|
|
|
+ let report = bcftools_keep_pass(&tmp_file, &self.vcf_passed, BcftoolsConfig::default())
|
|
|
+ .context(format!(
|
|
|
+ "Error while running bcftools keep PASS for {}",
|
|
|
+ &self.output_vcf
|
|
|
+ ))?;
|
|
|
+ let log_file = format!("{}/bcftools_pass_", self.log_dir);
|
|
|
+ report
|
|
|
+ .save_to_file(&log_file)
|
|
|
+ .context(format!("Error while writing logs into {log_file}"))?;
|
|
|
+
|
|
|
+ fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
|
|
|
+ } else {
|
|
|
+ info!("ClairS PASSED vcf already exist");
|
|
|
}
|
|
|
|
|
|
- // let bam = Path::new(&self.diag_bam);
|
|
|
- // let new_fn = format!("{}_hp.bam", bam.file_stem().unwrap().to_str().unwrap());
|
|
|
- // let bam_hp = bam.with_file_name(new_fn);
|
|
|
- // LongphasePhase::new(
|
|
|
- // &self.id,
|
|
|
- // bam.to_str().unwrap(),
|
|
|
- // &germline_normal_tumor,
|
|
|
- // LongphaseConfig::default(),
|
|
|
- // )?
|
|
|
- // .run()?;
|
|
|
- // LongphaseHap::new(
|
|
|
- // &self.id,
|
|
|
- // &self.diag_bam,
|
|
|
- // &format!("{}/clair3_normal_tumoral_germline_output_PS.vcf.gz", self.output_dir),
|
|
|
- // LongphaseConfig::default(),
|
|
|
- // )
|
|
|
- // .run()?;
|
|
|
- // LongphaseHap::new(
|
|
|
- // &self.id,
|
|
|
- // &self.mrd_bam,
|
|
|
- // &format!("{}/clair3_normal_tumoral_germline_output_PS.vcf.gz", self.output_dir),
|
|
|
- // LongphaseConfig::default(),
|
|
|
- // )
|
|
|
- // .run()?;
|
|
|
Ok(())
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+impl Variants for ClairS {
|
|
|
+ fn variants(&self) -> anyhow::Result<VariantCollection> {
|
|
|
+ Ok(VariantCollection {
|
|
|
+ variants: read_vcf(&self.vcf_passed)?,
|
|
|
+ vcf: Vcf::new(self.vcf_passed.clone().into())?,
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl ClairS {
|
|
|
+ pub fn germline(&self) -> anyhow::Result<VariantCollection> {
|
|
|
+ Ok(VariantCollection {
|
|
|
+ variants: read_vcf(&self.clair3_germline_passed)?,
|
|
|
+ vcf: Vcf::new(self.clair3_germline_passed.clone().into())?,
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+}
|