use crate::{ collection::{Initialize, InitializeSolo}, commands::bcftools::{bcftools_compress, bcftools_index}, config::Config, helpers::path_prefix, runners::{run_wait, CommandRun, Run}, }; use anyhow::Context; use duct::cmd; use std::{ fs, path::{Path, PathBuf}, }; use tracing::info; use super::{ bcftools::{bcftools_keep_pass, BcftoolsConfig}, modkit::ModkitSummary, }; #[derive(Debug, Clone)] pub struct LongphaseConfig { pub bin: String, pub result_dir: String, pub reference: String, pub threads: u8, pub force: bool, } impl Default for LongphaseConfig { fn default() -> Self { Self { bin: "/data/tools/longphase_linux-x64".to_string(), reference: "/data/ref/hs1/chm13v2.0.fa".to_string(), result_dir: "/data/longreads_basic_pipe".to_string(), threads: 150, force: true, } } } #[derive(Debug)] pub struct LongphaseHap { pub id: String, pub vcf: String, pub bam: PathBuf, pub bam_hp: PathBuf, pub config: LongphaseConfig, pub log_dir: String, } impl LongphaseHap { pub fn new(id: &str, bam: &str, phased_vcf: &str, config: LongphaseConfig) -> Self { let log_dir = format!("{}/{}/log/longphase", config.result_dir, id); let bam = Path::new(bam); // TODO change that use config.haplotagged_bam_tag_name let new_fn = format!("{}_HP", bam.file_stem().unwrap().to_str().unwrap()); let bam_hp = bam.with_file_name(new_fn); Self { id: id.to_string(), bam: bam.to_path_buf(), config, log_dir, vcf: phased_vcf.to_string(), bam_hp: bam_hp.to_path_buf(), } } pub fn run(&mut self) -> anyhow::Result<()> { if self.config.force && self.bam_hp.exists() { fs::remove_file(&self.bam_hp)?; } if !Path::new(&self.log_dir).exists() { fs::create_dir_all(&self.log_dir).expect("Failed to create output directory"); } // Run command if output VCF doesn't exist if !self.bam_hp.exists() { let args = [ "haplotag", "-s", &self.vcf, "-b", self.bam.to_str().unwrap(), "-r", &self.config.reference, "-t", &self.config.threads.to_string(), "--tagSupplementary", "-o", self.bam_hp.to_str().unwrap(), ]; let mut cmd_run = CommandRun::new(&self.config.bin, &args); let report = run_wait(&mut cmd_run).context(format!( "Error while running `{} {}`", self.config.bin, args.join(" ") ))?; let log_file = format!("{}/longphase_", self.log_dir); report .save_to_file(&log_file) .context(format!("Error while writing logs into {log_file}"))?; let _ = cmd!( "samtools", "index", "-@", &self.config.threads.to_string(), &format!("{}.bam", self.bam_hp.to_str().unwrap()) ) .run()?; } else { info!("Longphase output vcf already exists"); } Ok(()) } } // /data/tools/longphase_linux-x64 phase -s ClairS/clair3_normal_tumoral_germline_output.vcf.gz -b CUNY_diag_hs1_hp.bam -r /data/ref/hs1/chm13v2.0.fa -t 155 --ont -o ClairS/clair3_normal_tumoral_germline_output_PS #[derive(Debug)] pub struct LongphasePhase { pub id: String, pub vcf: String, pub out_prefix: String, pub bam: String, pub config: Config, pub log_dir: String, pub modcall_vcf: String, } impl Initialize for LongphasePhase { fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result { let log_dir = format!("{}/{}/log/longphase_phase", config.result_dir, id); if !Path::new(&log_dir).exists() { fs::create_dir_all(&log_dir) .context(format!("Failed to create {log_dir} directory"))?; } let vcf = config.constit_vcf(id); let bam = config.tumoral_bam(id); let out_prefix = path_prefix(&config.constit_phased_vcf(id))?; let modcall_vcf = config.longphase_modcall_vcf(id, "diag"); Ok(LongphasePhase { id: id.to_string(), config, log_dir, vcf, out_prefix, bam, modcall_vcf, }) } } impl Run for LongphasePhase { fn run(&mut self) -> anyhow::Result<()> { info!("Running longphase phase for: {}", self.vcf); info!("Saving longphase phase results in: {}", self.out_prefix); let final_vcf = self.config.constit_phased_vcf(&self.id); if !Path::new(&final_vcf).exists() { let args = [ "phase", "-s", &self.vcf, "-b", &self.bam, "-r", &self.config.reference, "--mod-file", &self.modcall_vcf, "-t", &self.config.longphase_threads.to_string(), "--ont", "-o", &self.out_prefix, ]; let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args); let report = run_wait(&mut cmd_run).context(format!( "Error while running `{} {}`", self.config.longphase_bin, args.join(" ") ))?; let log_file = format!("{}/longphase_phase_", self.log_dir); report .save_to_file(&log_file) .context(format!("Error while writing logs into {log_file}"))?; bcftools_compress( &format!("{}.vcf", self.out_prefix), &final_vcf, &BcftoolsConfig::default(), )?; bcftools_index(&final_vcf, &BcftoolsConfig::default())?; fs::remove_file(format!("{}.vcf", self.out_prefix))?; } Ok(()) } } #[derive(Debug)] pub struct LongphaseModcallSolo { pub id: String, pub time: String, pub bam: String, pub prefix: String, pub reference: String, pub threads: u8, pub log_dir: String, pub mod_threshold: f64, pub config: Config, } impl InitializeSolo for LongphaseModcallSolo { fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result { let id = id.to_string(); let time = time.to_string(); let log_dir = format!("{}/{}/log/longphase_modcall_solo", config.result_dir, &id); if !Path::new(&log_dir).exists() { fs::create_dir_all(&log_dir) .context(format!("Failed to create {log_dir} directory"))?; } let bam = config.solo_bam(&id, &time); if !Path::new(&bam).exists() { anyhow::bail!("Bam files doesn't exists: {bam}") } let mut modkit_summary = ModkitSummary::initialize(&id, &time, config.clone())?; modkit_summary.load()?; let mod_threshold = modkit_summary .result .ok_or_else(|| anyhow::anyhow!("Error no ModkitSummary for {id} {time}"))? .pass_threshold; let out_vcf = config.longphase_modcall_vcf(&id, &time); let out_dir = Path::new(&out_vcf) .parent() .ok_or_else(|| anyhow::anyhow!("Can't get dir of {out_vcf}"))?; fs::create_dir_all(out_dir)?; let prefix = path_prefix(&out_vcf)?; Ok(Self { id, time, bam, reference: config.reference.to_string(), threads: config.longphase_modcall_threads, config, log_dir, mod_threshold, prefix, }) } } impl Run for LongphaseModcallSolo { fn run(&mut self) -> anyhow::Result<()> { let args = [ "modcall", "-b", &self.bam, "-t", &self.threads.to_string(), "-r", &self.reference, "-m", &self.mod_threshold.to_string(), "-o", &self.prefix, ]; let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args); run_wait(&mut cmd_run) .context(format!( "Error while running `longphase modcall {}`", args.join(" ") ))? .save_to_file(&format!("{}/longphase_modcall_", self.log_dir)) .context(format!( "Error while writing logs into {}/longphase_modcall", self.log_dir ))?; let vcf = format!("{}.vcf", self.prefix); bcftools_keep_pass( &vcf, &format!("{}.vcf.gz", self.prefix), BcftoolsConfig::default(), ) .context(format!( "Can't run BCFtools PASS for LongphaseModcallSolo: {} {}", self.id, self.time ))? .save_to_file(&format!("{}/longphase_modcall_pass_", self.log_dir)) .context(format!( "Error while writing logs into {}/longphase_modcall_pass", self.log_dir ))?; fs::remove_file(&vcf).context(format!("Can't remove file: {vcf}"))?; Ok(()) } }