| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309 |
- use crate::{
- collection::{Initialize, InitializeSolo},
- commands::bcftools::{bcftools_compress, bcftools_index},
- config::Config,
- helpers::path_prefix,
- runners::{run_wait, CommandRun, Run},
- };
- use anyhow::Context;
- use duct::cmd;
- use std::{
- fs,
- path::{Path, PathBuf},
- };
- use tracing::info;
- use super::{
- bcftools::{bcftools_keep_pass, BcftoolsConfig},
- modkit::ModkitSummary,
- };
- #[derive(Debug, Clone)]
- pub struct LongphaseConfig {
- pub bin: String,
- pub result_dir: String,
- pub reference: String,
- pub threads: u8,
- pub force: bool,
- }
- impl Default for LongphaseConfig {
- fn default() -> Self {
- Self {
- bin: "/data/tools/longphase_linux-x64".to_string(),
- reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
- result_dir: "/data/longreads_basic_pipe".to_string(),
- threads: 150,
- force: true,
- }
- }
- }
- #[derive(Debug)]
- pub struct LongphaseHap {
- pub id: String,
- pub vcf: String,
- pub bam: PathBuf,
- pub bam_hp: PathBuf,
- pub config: LongphaseConfig,
- pub log_dir: String,
- }
- impl LongphaseHap {
- pub fn new(id: &str, bam: &str, phased_vcf: &str, config: LongphaseConfig) -> Self {
- let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
- let bam = Path::new(bam);
- // TODO change that use config.haplotagged_bam_tag_name
- let new_fn = format!("{}_HP", bam.file_stem().unwrap().to_str().unwrap());
- let bam_hp = bam.with_file_name(new_fn);
- Self {
- id: id.to_string(),
- bam: bam.to_path_buf(),
- config,
- log_dir,
- vcf: phased_vcf.to_string(),
- bam_hp: bam_hp.to_path_buf(),
- }
- }
- pub fn run(&mut self) -> anyhow::Result<()> {
- if self.config.force && self.bam_hp.exists() {
- fs::remove_file(&self.bam_hp)?;
- }
- if !Path::new(&self.log_dir).exists() {
- fs::create_dir_all(&self.log_dir).expect("Failed to create output directory");
- }
- // Run command if output VCF doesn't exist
- if !self.bam_hp.exists() {
- let args = [
- "haplotag",
- "-s",
- &self.vcf,
- "-b",
- self.bam.to_str().unwrap(),
- "-r",
- &self.config.reference,
- "-t",
- &self.config.threads.to_string(),
- "--tagSupplementary",
- "-o",
- self.bam_hp.to_str().unwrap(),
- ];
- let mut cmd_run = CommandRun::new(&self.config.bin, &args);
- let report = run_wait(&mut cmd_run).context(format!(
- "Error while running `{} {}`",
- self.config.bin,
- args.join(" ")
- ))?;
- let log_file = format!("{}/longphase_", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- let _ = cmd!(
- "samtools",
- "index",
- "-@",
- &self.config.threads.to_string(),
- &format!("{}.bam", self.bam_hp.to_str().unwrap())
- )
- .run()?;
- } else {
- info!("Longphase output vcf already exists");
- }
- Ok(())
- }
- }
- // /data/tools/longphase_linux-x64 phase -s ClairS/clair3_normal_tumoral_germline_output.vcf.gz -b CUNY_diag_hs1_hp.bam -r /data/ref/hs1/chm13v2.0.fa -t 155 --ont -o ClairS/clair3_normal_tumoral_germline_output_PS
- #[derive(Debug)]
- pub struct LongphasePhase {
- pub id: String,
- pub vcf: String,
- pub out_prefix: String,
- pub bam: String,
- pub config: Config,
- pub log_dir: String,
- pub modcall_vcf: String,
- }
- impl Initialize for LongphasePhase {
- fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
- let log_dir = format!("{}/{}/log/longphase_phase", config.result_dir, id);
- if !Path::new(&log_dir).exists() {
- fs::create_dir_all(&log_dir)
- .context(format!("Failed to create {log_dir} directory"))?;
- }
- let vcf = config.constit_vcf(id);
- let bam = config.tumoral_bam(id);
- let out_prefix = path_prefix(&config.constit_phased_vcf(id))?;
- let modcall_vcf = config.longphase_modcall_vcf(id, "diag");
- Ok(LongphasePhase {
- id: id.to_string(),
- config,
- log_dir,
- vcf,
- out_prefix,
- bam,
- modcall_vcf,
- })
- }
- }
- impl Run for LongphasePhase {
- fn run(&mut self) -> anyhow::Result<()> {
- info!("Running longphase phase for: {}", self.vcf);
- info!("Saving longphase phase results in: {}", self.out_prefix);
- let final_vcf = self.config.constit_phased_vcf(&self.id);
- if !Path::new(&final_vcf).exists() {
- let args = [
- "phase",
- "-s",
- &self.vcf,
- "-b",
- &self.bam,
- "-r",
- &self.config.reference,
- "--mod-file",
- &self.modcall_vcf,
- "-t",
- &self.config.longphase_threads.to_string(),
- "--ont",
- "-o",
- &self.out_prefix,
- ];
- let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
- let report = run_wait(&mut cmd_run).context(format!(
- "Error while running `{} {}`",
- self.config.longphase_bin,
- args.join(" ")
- ))?;
- let log_file = format!("{}/longphase_phase_", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- bcftools_compress(
- &format!("{}.vcf", self.out_prefix),
- &final_vcf,
- &BcftoolsConfig::default(),
- )?;
- bcftools_index(&final_vcf, &BcftoolsConfig::default())?;
- fs::remove_file(format!("{}.vcf", self.out_prefix))?;
- }
- Ok(())
- }
- }
- #[derive(Debug)]
- pub struct LongphaseModcallSolo {
- pub id: String,
- pub time: String,
- pub bam: String,
- pub prefix: String,
- pub reference: String,
- pub threads: u8,
- pub log_dir: String,
- pub mod_threshold: f64,
- pub config: Config,
- }
- impl InitializeSolo for LongphaseModcallSolo {
- fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
- let id = id.to_string();
- let time = time.to_string();
- let log_dir = format!("{}/{}/log/longphase_modcall_solo", config.result_dir, &id);
- if !Path::new(&log_dir).exists() {
- fs::create_dir_all(&log_dir)
- .context(format!("Failed to create {log_dir} directory"))?;
- }
- let bam = config.solo_bam(&id, &time);
- if !Path::new(&bam).exists() {
- anyhow::bail!("Bam files doesn't exists: {bam}")
- }
- let mut modkit_summary = ModkitSummary::initialize(&id, &time, config.clone())?;
- modkit_summary.load()?;
- let mod_threshold = modkit_summary
- .result
- .ok_or_else(|| anyhow::anyhow!("Error no ModkitSummary for {id} {time}"))?
- .pass_threshold;
- let out_vcf = config.longphase_modcall_vcf(&id, &time);
- let out_dir = Path::new(&out_vcf)
- .parent()
- .ok_or_else(|| anyhow::anyhow!("Can't get dir of {out_vcf}"))?;
- fs::create_dir_all(out_dir)?;
- let prefix = path_prefix(&out_vcf)?;
- Ok(Self {
- id,
- time,
- bam,
- reference: config.reference.to_string(),
- threads: config.longphase_modcall_threads,
- config,
- log_dir,
- mod_threshold,
- prefix,
- })
- }
- }
- impl Run for LongphaseModcallSolo {
- fn run(&mut self) -> anyhow::Result<()> {
- let args = [
- "modcall",
- "-b",
- &self.bam,
- "-t",
- &self.threads.to_string(),
- "-r",
- &self.reference,
- "-m",
- &self.mod_threshold.to_string(),
- "-o",
- &self.prefix,
- ];
- let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
- run_wait(&mut cmd_run)
- .context(format!(
- "Error while running `longphase modcall {}`",
- args.join(" ")
- ))?
- .save_to_file(&format!("{}/longphase_modcall_", self.log_dir))
- .context(format!(
- "Error while writing logs into {}/longphase_modcall",
- self.log_dir
- ))?;
- let vcf = format!("{}.vcf", self.prefix);
- bcftools_keep_pass(
- &vcf,
- &format!("{}.vcf.gz", self.prefix),
- BcftoolsConfig::default(),
- )
- .context(format!(
- "Can't run BCFtools PASS for LongphaseModcallSolo: {} {}",
- self.id, self.time
- ))?
- .save_to_file(&format!("{}/longphase_modcall_pass_", self.log_dir))
- .context(format!(
- "Error while writing logs into {}/longphase_modcall_pass",
- self.log_dir
- ))?;
- fs::remove_file(&vcf).context(format!("Can't remove file: {vcf}"))?;
- Ok(())
- }
- }
|