| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- use crate::{
- annotation::{Annotation, Annotations, Caller, CallerCat},
- collection::{vcf::Vcf, HasOutputs, Initialize, Version},
- commands::{
- bcftools::{bcftools_keep_pass, BcftoolsConfig},
- longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
- },
- config::Config,
- helpers::force_or_not,
- io::vcf::read_vcf,
- runners::{run_wait, CommandRun, Run},
- variant::{
- variant::{RunnerVariants, Variants},
- variant_collection::VariantCollection,
- },
- };
- use anyhow::Context;
- use log::info;
- use rayon::prelude::*;
- use std::{fs, path::Path};
- #[derive(Debug)]
- pub struct Savana {
- pub id: String,
- pub passed_vcf: String,
- pub phased_germline_vcf: String,
- pub normal_hp_bam: String,
- pub tumoral_hp_bam: String,
- pub config: Config,
- pub log_dir: String,
- }
- impl Initialize for Savana {
- fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
- let log_dir = format!("{}/{}/log/savana", config.result_dir, id);
- if !Path::new(&log_dir).exists() {
- fs::create_dir_all(&log_dir)
- .context(format!("Failed to create {log_dir} directory"))?;
- }
- // Check for phased germline vcf
- let phased_germline_vcf = config.constit_phased_vcf(id);
- if !Path::new(&phased_germline_vcf).exists() {
- LongphasePhase::initialize(id, config.clone())?.run()?;
- }
- let normal_hp_bam = config.normal_haplotagged_bam(id);
- let tumoral_hp_bam = config.tumoral_haplotagged_bam(id);
- let passed_vcf = config.savana_passed_vcf(id);
- fs::create_dir_all(config.savana_output_dir(id))?;
- Ok(Self {
- id: id.to_string(),
- passed_vcf,
- config,
- log_dir,
- phased_germline_vcf,
- normal_hp_bam,
- tumoral_hp_bam,
- })
- }
- }
- impl Run for Savana {
- fn run(&mut self) -> anyhow::Result<()> {
- force_or_not(
- &self.config.savana_output_vcf(&self.id),
- self.config.savana_force,
- )?;
- info!("Running Savana v{}", Savana::version(&self.config)?);
- let id = &self.id;
- let output_vcf = &self.config.savana_output_vcf(id);
- if !Path::new(&output_vcf).exists() {
- // Check for haplotagged bam
- if !Path::new(&self.normal_hp_bam).exists() {
- LongphaseHap::new(
- id,
- &self.config.normal_bam(id),
- &self.phased_germline_vcf,
- LongphaseConfig::default(),
- )
- .run()?;
- }
- if !Path::new(&self.tumoral_hp_bam).exists() {
- LongphaseHap::new(
- id,
- &self.config.tumoral_bam(id),
- &self.phased_germline_vcf,
- LongphaseConfig::default(),
- )
- .run()?;
- }
- let savana_args = [
- // "run",
- "--tumour",
- &self.tumoral_hp_bam,
- "--normal",
- &self.normal_hp_bam,
- "--outdir",
- &self.config.savana_output_dir(id),
- "--ref",
- &self.config.reference,
- "--phased_vcf",
- &self.phased_germline_vcf,
- "--no_blacklist",
- "--threads",
- &self.config.savana_threads.to_string(),
- ];
- let args = [
- "-c",
- &format!(
- "source {} && conda activate savana && {} {}",
- self.config.conda_sh,
- self.config.savana_bin,
- savana_args.join(" ")
- ),
- ];
- let mut cmd_run = CommandRun::new("bash", &args);
- let report = run_wait(&mut cmd_run).context(format!(
- "Error while running `severus.py {}`",
- args.join(" ")
- ))?;
- let log_file = format!("{}/savana_", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- }
- // Keep PASS
- if !Path::new(&self.passed_vcf).exists() && Path::new(output_vcf).exists() {
- let report =
- bcftools_keep_pass(output_vcf, &self.passed_vcf, BcftoolsConfig::default())
- .context(format!(
- "Error while running bcftools keep PASS for {output_vcf}"
- ))?;
- let log_file = format!("{}/bcftools_pass", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- }
- Ok(())
- }
- }
- impl HasOutputs for Savana {
- fn has_output(&self, id: &str) -> (bool, bool) {
- let output = &self.config.savana_passed_vcf(id);
- let passed = &self.config.savana_passed_vcf(id);
- (Path::new(output).exists(), Path::new(passed).exists())
- }
- }
- impl Version for Savana {
- fn version(config: &Config) -> anyhow::Result<String> {
- let savana_args = ["--version"];
- let args = [
- "-c",
- &format!(
- "source {} && conda activate savana && {} {}",
- config.conda_sh,
- config.savana_bin,
- savana_args.join(" ")
- ),
- ];
- let mut cmd_run = CommandRun::new("bash", &args);
- let report = run_wait(&mut cmd_run)
- .context(format!("Error while running `savana {}`", args.join(" ")))?;
- let log = report.log;
- let start = log
- .find("Version ")
- .context("Failed to find 'Version ' in the log")?;
- let start_index = start + "Version ".len();
- let end = log[start_index..]
- .find('\n')
- .context("Failed to find newline after 'Version '")?;
- Ok(log[start_index..start_index + end]
- .to_string()
- .trim()
- .to_string())
- }
- }
- impl CallerCat for Savana {
- fn caller_cat(&self) -> (Caller, Annotation) {
- (Caller::Savana, Annotation::Somatic)
- }
- }
- impl Variants for Savana {
- fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
- let (caller, category) = self.caller_cat();
- let add = vec![Annotation::Callers(caller.clone()), category.clone()];
- info!(
- "Loading variants from Savana {} with annotations: {:?}",
- self.id, add
- );
- let variants = read_vcf(&self.passed_vcf)?;
- variants.par_iter().for_each(|v| {
- annotations.insert_update(v.hash(), &add);
- });
- Ok(VariantCollection {
- variants,
- vcf: Vcf::new(self.passed_vcf.clone().into())?,
- caller,
- category,
- })
- }
- }
- impl RunnerVariants for Savana {}
|