|
|
@@ -1,43 +1,23 @@
|
|
|
+//! Longphase haplotagging, phasing, and modcall runners.
|
|
|
+//!
|
|
|
+//! All steps use the shared runner traits (local/Slurm) driven by the global `Config`.
|
|
|
use crate::{
|
|
|
- pipes::{Initialize, InitializeSolo},
|
|
|
- commands::bcftools::{bcftools_compress, bcftools_index},
|
|
|
+ commands::bcftools::{BcftoolsCompress, BcftoolsIndex, BcftoolsKeepPass},
|
|
|
+ commands::samtools::SamtoolsIndex,
|
|
|
config::Config,
|
|
|
helpers::path_prefix,
|
|
|
- runners::{run_wait, CommandRun, Run},
|
|
|
+ pipes::{Initialize, InitializeSolo},
|
|
|
+ run,
|
|
|
+ runners::Run,
|
|
|
};
|
|
|
use anyhow::Context;
|
|
|
-use duct::cmd;
|
|
|
use std::{
|
|
|
fs,
|
|
|
path::{Path, PathBuf},
|
|
|
};
|
|
|
use tracing::info;
|
|
|
|
|
|
-use super::{
|
|
|
- bcftools::{bcftools_keep_pass, BcftoolsConfig},
|
|
|
- modkit::ModkitSummary,
|
|
|
-};
|
|
|
-
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct LongphaseConfig {
|
|
|
- pub bin: String,
|
|
|
- pub result_dir: String,
|
|
|
- pub reference: String,
|
|
|
- pub threads: u8,
|
|
|
- pub force: bool,
|
|
|
-}
|
|
|
-
|
|
|
-impl Default for LongphaseConfig {
|
|
|
- fn default() -> Self {
|
|
|
- Self {
|
|
|
- bin: "/data/tools/longphase_linux-x64".to_string(),
|
|
|
- reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
|
|
|
- result_dir: "/data/longreads_basic_pipe".to_string(),
|
|
|
- threads: 150,
|
|
|
- force: true,
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
+use super::modkit::ModkitSummary;
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
pub struct LongphaseHap {
|
|
|
@@ -45,12 +25,13 @@ pub struct LongphaseHap {
|
|
|
pub vcf: String,
|
|
|
pub bam: PathBuf,
|
|
|
pub bam_hp: PathBuf,
|
|
|
- pub config: LongphaseConfig,
|
|
|
+ pub config: Config,
|
|
|
pub log_dir: String,
|
|
|
+ job_args: Vec<String>,
|
|
|
}
|
|
|
|
|
|
impl LongphaseHap {
|
|
|
- pub fn new(id: &str, bam: &str, phased_vcf: &str, config: LongphaseConfig) -> Self {
|
|
|
+ pub fn new(id: &str, bam: &str, phased_vcf: &str, config: Config) -> Self {
|
|
|
let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
|
|
|
|
|
|
let bam = Path::new(bam);
|
|
|
@@ -65,11 +46,12 @@ impl LongphaseHap {
|
|
|
log_dir,
|
|
|
vcf: phased_vcf.to_string(),
|
|
|
bam_hp: bam_hp.to_path_buf(),
|
|
|
+ job_args: Vec::new(),
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- if self.config.force && self.bam_hp.exists() {
|
|
|
+ if self.config.longphase_force && self.bam_hp.exists() {
|
|
|
fs::remove_file(&self.bam_hp)?;
|
|
|
}
|
|
|
|
|
|
@@ -77,42 +59,37 @@ impl LongphaseHap {
|
|
|
fs::create_dir_all(&self.log_dir).expect("Failed to create output directory");
|
|
|
}
|
|
|
|
|
|
- // Run command if output VCF doesn't exist
|
|
|
if !self.bam_hp.exists() {
|
|
|
- let args = [
|
|
|
- "haplotag",
|
|
|
- "-s",
|
|
|
- &self.vcf,
|
|
|
- "-b",
|
|
|
- self.bam.to_str().unwrap(),
|
|
|
- "-r",
|
|
|
- &self.config.reference,
|
|
|
- "-t",
|
|
|
- &self.config.threads.to_string(),
|
|
|
- "--tagSupplementary",
|
|
|
- "-o",
|
|
|
- self.bam_hp.to_str().unwrap(),
|
|
|
+ self.job_args = vec![
|
|
|
+ "haplotag".to_string(),
|
|
|
+ "-s".to_string(),
|
|
|
+ self.vcf.clone(),
|
|
|
+ "-b".to_string(),
|
|
|
+ self.bam.to_string_lossy().to_string(),
|
|
|
+ "-r".to_string(),
|
|
|
+ self.config.reference.clone(),
|
|
|
+ "-t".to_string(),
|
|
|
+ self.config.threads.to_string(),
|
|
|
+ "--tagSupplementary".to_string(),
|
|
|
+ "-o".to_string(),
|
|
|
+ self.bam_hp.to_string_lossy().to_string(),
|
|
|
];
|
|
|
- let mut cmd_run = CommandRun::new(&self.config.bin, &args);
|
|
|
- let report = run_wait(&mut cmd_run).context(format!(
|
|
|
- "Error while running `{} {}`",
|
|
|
- self.config.bin,
|
|
|
- args.join(" ")
|
|
|
- ))?;
|
|
|
+ let report = run!(&self.config, self)
|
|
|
+ .context(format!("Error while running `{}`", self.job_args.join(" ")))?;
|
|
|
|
|
|
let log_file = format!("{}/longphase_", self.log_dir);
|
|
|
report
|
|
|
.save_to_file(&log_file)
|
|
|
.context(format!("Error while writing logs into {log_file}"))?;
|
|
|
|
|
|
- let _ = cmd!(
|
|
|
- "samtools",
|
|
|
- "index",
|
|
|
- "-@",
|
|
|
- &self.config.threads.to_string(),
|
|
|
- &format!("{}.bam", self.bam_hp.to_str().unwrap())
|
|
|
- )
|
|
|
- .run()?;
|
|
|
+ let bam_to_index = format!("{}.bam", self.bam_hp.to_string_lossy());
|
|
|
+ let mut sam_index = SamtoolsIndex {
|
|
|
+ bin: self.config.longphase_bin.clone(),
|
|
|
+ threads: self.config.longphase_threads,
|
|
|
+ bam: bam_to_index.clone(),
|
|
|
+ };
|
|
|
+ run!(&self.config, &mut sam_index)
|
|
|
+ .context(format!("samtools index failed for {bam_to_index}"))?;
|
|
|
} else {
|
|
|
info!("Longphase output vcf already exists");
|
|
|
}
|
|
|
@@ -121,6 +98,27 @@ impl LongphaseHap {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl crate::commands::Command for LongphaseHap {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl crate::commands::LocalRunner for LongphaseHap {}
|
|
|
+
|
|
|
+impl crate::commands::SlurmRunner for LongphaseHap {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ crate::commands::SlurmParams {
|
|
|
+ job_name: Some(format!("longphase_hap_{}", self.id)),
|
|
|
+ cpus_per_task: Some(self.config.longphase_threads as u32),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("shortq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
// /data/tools/longphase_linux-x64 phase -s ClairS/clair3_normal_tumoral_germline_output.vcf.gz -b CUNY_diag_hs1_hp.bam -r /data/ref/hs1/chm13v2.0.fa -t 155 --ont -o ClairS/clair3_normal_tumoral_germline_output_PS
|
|
|
#[derive(Debug)]
|
|
|
pub struct LongphasePhase {
|
|
|
@@ -131,10 +129,11 @@ pub struct LongphasePhase {
|
|
|
pub config: Config,
|
|
|
pub log_dir: String,
|
|
|
pub modcall_vcf: String,
|
|
|
+ job_args: Vec<String>,
|
|
|
}
|
|
|
|
|
|
impl Initialize for LongphasePhase {
|
|
|
- fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
|
|
|
+ fn initialize(id: &str, config: &crate::config::Config) -> anyhow::Result<Self> {
|
|
|
let log_dir = format!("{}/{}/log/longphase_phase", config.result_dir, id);
|
|
|
if !Path::new(&log_dir).exists() {
|
|
|
fs::create_dir_all(&log_dir)
|
|
|
@@ -147,16 +146,38 @@ impl Initialize for LongphasePhase {
|
|
|
|
|
|
Ok(LongphasePhase {
|
|
|
id: id.to_string(),
|
|
|
- config,
|
|
|
+ config: config.clone(),
|
|
|
log_dir,
|
|
|
vcf,
|
|
|
out_prefix,
|
|
|
bam,
|
|
|
modcall_vcf,
|
|
|
+ job_args: Vec::new(),
|
|
|
})
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl crate::commands::Command for LongphasePhase {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl crate::commands::LocalRunner for LongphasePhase {}
|
|
|
+
|
|
|
+impl crate::commands::SlurmRunner for LongphasePhase {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ crate::commands::SlurmParams {
|
|
|
+ job_name: Some(format!("longphase_phase_{}", self.id)),
|
|
|
+ cpus_per_task: Some(self.config.longphase_threads as u32),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("shortq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
impl Run for LongphasePhase {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
info!("Running longphase phase for: {}", self.vcf);
|
|
|
@@ -164,27 +185,26 @@ impl Run for LongphasePhase {
|
|
|
|
|
|
let final_vcf = self.config.constit_phased_vcf(&self.id);
|
|
|
if !Path::new(&final_vcf).exists() {
|
|
|
- let args = [
|
|
|
- "phase",
|
|
|
- "-s",
|
|
|
- &self.vcf,
|
|
|
- "-b",
|
|
|
- &self.bam,
|
|
|
- "-r",
|
|
|
- &self.config.reference,
|
|
|
- "--mod-file",
|
|
|
- &self.modcall_vcf,
|
|
|
- "-t",
|
|
|
- &self.config.longphase_threads.to_string(),
|
|
|
- "--ont",
|
|
|
- "-o",
|
|
|
- &self.out_prefix,
|
|
|
+ self.job_args = vec![
|
|
|
+ "phase".to_string(),
|
|
|
+ "-s".to_string(),
|
|
|
+ self.vcf.clone(),
|
|
|
+ "-b".to_string(),
|
|
|
+ self.bam.clone(),
|
|
|
+ "-r".to_string(),
|
|
|
+ self.config.reference.clone(),
|
|
|
+ "--mod-file".to_string(),
|
|
|
+ self.modcall_vcf.clone(),
|
|
|
+ "-t".to_string(),
|
|
|
+ self.config.longphase_threads.to_string(),
|
|
|
+ "--ont".to_string(),
|
|
|
+ "-o".to_string(),
|
|
|
+ self.out_prefix.clone(),
|
|
|
];
|
|
|
- let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
|
|
|
- let report = run_wait(&mut cmd_run).context(format!(
|
|
|
+ let report = run!(&self.config, self).context(format!(
|
|
|
"Error while running `{} {}`",
|
|
|
self.config.longphase_bin,
|
|
|
- args.join(" ")
|
|
|
+ self.job_args.join(" ")
|
|
|
))?;
|
|
|
|
|
|
let log_file = format!("{}/longphase_phase_", self.log_dir);
|
|
|
@@ -192,12 +212,16 @@ impl Run for LongphasePhase {
|
|
|
.save_to_file(&log_file)
|
|
|
.context(format!("Error while writing logs into {log_file}"))?;
|
|
|
|
|
|
- bcftools_compress(
|
|
|
- &format!("{}.vcf", self.out_prefix),
|
|
|
+ let mut compress = BcftoolsCompress::from_config(
|
|
|
+ &self.config,
|
|
|
+ format!("{}.vcf", self.out_prefix),
|
|
|
&final_vcf,
|
|
|
- &BcftoolsConfig::default(),
|
|
|
- )?;
|
|
|
- bcftools_index(&final_vcf, &BcftoolsConfig::default())?;
|
|
|
+ );
|
|
|
+ run!(&self.config, &mut compress).context("bcftools compress failed")?;
|
|
|
+
|
|
|
+ let mut index = BcftoolsIndex::from_config(&self.config, &final_vcf);
|
|
|
+ run!(&self.config, &mut index).context("bcftools index failed")?;
|
|
|
+
|
|
|
fs::remove_file(format!("{}.vcf", self.out_prefix))?;
|
|
|
}
|
|
|
Ok(())
|
|
|
@@ -215,10 +239,11 @@ pub struct LongphaseModcallSolo {
|
|
|
pub log_dir: String,
|
|
|
pub mod_threshold: f64,
|
|
|
pub config: Config,
|
|
|
+ job_args: Vec<String>,
|
|
|
}
|
|
|
|
|
|
impl InitializeSolo for LongphaseModcallSolo {
|
|
|
- fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
|
|
|
+ fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
let id = id.to_string();
|
|
|
let time = time.to_string();
|
|
|
|
|
|
@@ -233,7 +258,7 @@ impl InitializeSolo for LongphaseModcallSolo {
|
|
|
anyhow::bail!("Bam files doesn't exists: {bam}")
|
|
|
}
|
|
|
|
|
|
- let mut modkit_summary = ModkitSummary::initialize(&id, &time, config.clone())?;
|
|
|
+ let mut modkit_summary = ModkitSummary::initialize(&id, &time, config)?;
|
|
|
modkit_summary.load()?;
|
|
|
let mod_threshold = modkit_summary
|
|
|
.result
|
|
|
@@ -253,56 +278,72 @@ impl InitializeSolo for LongphaseModcallSolo {
|
|
|
bam,
|
|
|
reference: config.reference.to_string(),
|
|
|
threads: config.longphase_modcall_threads,
|
|
|
- config,
|
|
|
+ config: config.clone(),
|
|
|
log_dir,
|
|
|
mod_threshold,
|
|
|
prefix,
|
|
|
+ job_args: Vec::new(),
|
|
|
})
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl crate::commands::Command for LongphaseModcallSolo {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!("{} {}", self.config.longphase_bin, self.job_args.join(" "))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl crate::commands::LocalRunner for LongphaseModcallSolo {}
|
|
|
+
|
|
|
+impl crate::commands::SlurmRunner for LongphaseModcallSolo {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ crate::commands::SlurmParams {
|
|
|
+ job_name: Some(format!("longphase_modcall_{}_{}", self.id, self.time)),
|
|
|
+ cpus_per_task: Some(self.threads as u32),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("shortq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
impl Run for LongphaseModcallSolo {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- let args = [
|
|
|
- "modcall",
|
|
|
- "-b",
|
|
|
- &self.bam,
|
|
|
- "-t",
|
|
|
- &self.threads.to_string(),
|
|
|
- "-r",
|
|
|
- &self.reference,
|
|
|
- "-m",
|
|
|
- &self.mod_threshold.to_string(),
|
|
|
- "-o",
|
|
|
- &self.prefix,
|
|
|
+ self.job_args = vec![
|
|
|
+ "modcall".to_string(),
|
|
|
+ "-b".to_string(),
|
|
|
+ self.bam.clone(),
|
|
|
+ "-t".to_string(),
|
|
|
+ self.threads.to_string(),
|
|
|
+ "-r".to_string(),
|
|
|
+ self.reference.clone(),
|
|
|
+ "-m".to_string(),
|
|
|
+ self.mod_threshold.to_string(),
|
|
|
+ "-o".to_string(),
|
|
|
+ self.prefix.clone(),
|
|
|
];
|
|
|
- let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
|
|
|
- run_wait(&mut cmd_run)
|
|
|
- .context(format!(
|
|
|
- "Error while running `longphase modcall {}`",
|
|
|
- args.join(" ")
|
|
|
- ))?
|
|
|
- .save_to_file(&format!("{}/longphase_modcall_", self.log_dir))
|
|
|
+ let output = run!(&self.config, self).context("Error while running `longphase modcall`")?;
|
|
|
+ output
|
|
|
+ .save_to_file(format!("{}/longphase_modcall_", self.log_dir))
|
|
|
.context(format!(
|
|
|
"Error while writing logs into {}/longphase_modcall",
|
|
|
self.log_dir
|
|
|
))?;
|
|
|
|
|
|
let vcf = format!("{}.vcf", self.prefix);
|
|
|
- bcftools_keep_pass(
|
|
|
- &vcf,
|
|
|
- &format!("{}.vcf.gz", self.prefix),
|
|
|
- BcftoolsConfig::default(),
|
|
|
- )
|
|
|
- .context(format!(
|
|
|
+ let mut keep_pass =
|
|
|
+ BcftoolsKeepPass::from_config(&self.config, &vcf, format!("{}.vcf.gz", self.prefix));
|
|
|
+ let pass_report = run!(&self.config, &mut keep_pass).context(format!(
|
|
|
"Can't run BCFtools PASS for LongphaseModcallSolo: {} {}",
|
|
|
self.id, self.time
|
|
|
- ))?
|
|
|
- .save_to_file(&format!("{}/longphase_modcall_pass_", self.log_dir))
|
|
|
- .context(format!(
|
|
|
- "Error while writing logs into {}/longphase_modcall_pass",
|
|
|
- self.log_dir
|
|
|
))?;
|
|
|
+ pass_report
|
|
|
+ .save_to_file(format!("{}/longphase_modcall_pass_", self.log_dir))
|
|
|
+ .context(format!(
|
|
|
+ "Error while writing logs into {}/longphase_modcall_pass",
|
|
|
+ self.log_dir
|
|
|
+ ))?;
|
|
|
fs::remove_file(&vcf).context(format!("Can't remove file: {vcf}"))?;
|
|
|
Ok(())
|
|
|
}
|