savana.rs 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. use crate::{
  2. annotation::{Annotation, Annotations, Caller, CallerCat},
  3. collection::{vcf::Vcf, HasOutputs, Initialize, Version},
  4. commands::{
  5. bcftools::{bcftools_keep_pass, BcftoolsConfig},
  6. longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
  7. },
  8. config::Config,
  9. helpers::force_or_not,
  10. io::vcf::read_vcf,
  11. runners::{run_wait, CommandRun, Run},
  12. variant::{
  13. variant::{RunnerVariants, Variants},
  14. variant_collection::VariantCollection,
  15. },
  16. };
  17. use anyhow::Context;
  18. use log::info;
  19. use rayon::prelude::*;
  20. use std::{fs, path::Path};
  21. #[derive(Debug)]
  22. pub struct Savana {
  23. pub id: String,
  24. pub passed_vcf: String,
  25. pub phased_germline_vcf: String,
  26. pub normal_hp_bam: String,
  27. pub tumoral_hp_bam: String,
  28. pub config: Config,
  29. pub log_dir: String,
  30. }
  31. impl Initialize for Savana {
  32. fn initialize(id: &str, config: Config) -> anyhow::Result<Self> {
  33. let log_dir = format!("{}/{}/log/savana", config.result_dir, id);
  34. if !Path::new(&log_dir).exists() {
  35. fs::create_dir_all(&log_dir)
  36. .context(format!("Failed to create {log_dir} directory"))?;
  37. }
  38. // Check for phased germline vcf
  39. let phased_germline_vcf = config.constit_phased_vcf(id);
  40. if !Path::new(&phased_germline_vcf).exists() {
  41. LongphasePhase::initialize(id, config.clone())?.run()?;
  42. }
  43. let normal_hp_bam = config.normal_haplotagged_bam(id);
  44. let tumoral_hp_bam = config.tumoral_haplotagged_bam(id);
  45. let passed_vcf = config.savana_passed_vcf(id);
  46. fs::create_dir_all(config.savana_output_dir(id))?;
  47. Ok(Self {
  48. id: id.to_string(),
  49. passed_vcf,
  50. config,
  51. log_dir,
  52. phased_germline_vcf,
  53. normal_hp_bam,
  54. tumoral_hp_bam,
  55. })
  56. }
  57. }
  58. impl Run for Savana {
  59. fn run(&mut self) -> anyhow::Result<()> {
  60. force_or_not(
  61. &self.config.savana_output_vcf(&self.id),
  62. self.config.savana_force,
  63. )?;
  64. info!("Running Savana v{}", Savana::version(&self.config)?);
  65. let id = &self.id;
  66. let output_vcf = &self.config.savana_output_vcf(id);
  67. if !Path::new(&output_vcf).exists() {
  68. // Check for haplotagged bam
  69. if !Path::new(&self.normal_hp_bam).exists() {
  70. LongphaseHap::new(
  71. id,
  72. &self.config.normal_bam(id),
  73. &self.phased_germline_vcf,
  74. LongphaseConfig::default(),
  75. )
  76. .run()?;
  77. }
  78. if !Path::new(&self.tumoral_hp_bam).exists() {
  79. LongphaseHap::new(
  80. id,
  81. &self.config.tumoral_bam(id),
  82. &self.phased_germline_vcf,
  83. LongphaseConfig::default(),
  84. )
  85. .run()?;
  86. }
  87. let savana_args = [
  88. // "run",
  89. "--tumour",
  90. &self.tumoral_hp_bam,
  91. "--normal",
  92. &self.normal_hp_bam,
  93. "--outdir",
  94. &self.config.savana_output_dir(id),
  95. "--ref",
  96. &self.config.reference,
  97. "--phased_vcf",
  98. &self.phased_germline_vcf,
  99. "--no_blacklist",
  100. "--threads",
  101. &self.config.savana_threads.to_string(),
  102. ];
  103. let args = [
  104. "-c",
  105. &format!(
  106. "source {} && conda activate savana && {} {}",
  107. self.config.conda_sh,
  108. self.config.savana_bin,
  109. savana_args.join(" ")
  110. ),
  111. ];
  112. let mut cmd_run = CommandRun::new("bash", &args);
  113. let report = run_wait(&mut cmd_run).context(format!(
  114. "Error while running `severus.py {}`",
  115. args.join(" ")
  116. ))?;
  117. let log_file = format!("{}/savana_", self.log_dir);
  118. report
  119. .save_to_file(&log_file)
  120. .context(format!("Error while writing logs into {log_file}"))?;
  121. }
  122. // Keep PASS
  123. if !Path::new(&self.passed_vcf).exists() && Path::new(output_vcf).exists() {
  124. let report =
  125. bcftools_keep_pass(output_vcf, &self.passed_vcf, BcftoolsConfig::default())
  126. .context(format!(
  127. "Error while running bcftools keep PASS for {output_vcf}"
  128. ))?;
  129. let log_file = format!("{}/bcftools_pass", self.log_dir);
  130. report
  131. .save_to_file(&log_file)
  132. .context(format!("Error while writing logs into {log_file}"))?;
  133. }
  134. Ok(())
  135. }
  136. }
  137. impl HasOutputs for Savana {
  138. fn has_output(&self, id: &str) -> (bool, bool) {
  139. let output = &self.config.savana_passed_vcf(id);
  140. let passed = &self.config.savana_passed_vcf(id);
  141. (Path::new(output).exists(), Path::new(passed).exists())
  142. }
  143. }
  144. impl Version for Savana {
  145. fn version(config: &Config) -> anyhow::Result<String> {
  146. let savana_args = ["--version"];
  147. let args = [
  148. "-c",
  149. &format!(
  150. "source {} && conda activate savana && {} {}",
  151. config.conda_sh,
  152. config.savana_bin,
  153. savana_args.join(" ")
  154. ),
  155. ];
  156. let mut cmd_run = CommandRun::new("bash", &args);
  157. let report = run_wait(&mut cmd_run)
  158. .context(format!("Error while running `savana {}`", args.join(" ")))?;
  159. let log = report.log;
  160. let start = log
  161. .find("Version ")
  162. .context("Failed to find 'Version ' in the log")?;
  163. let start_index = start + "Version ".len();
  164. let end = log[start_index..]
  165. .find('\n')
  166. .context("Failed to find newline after 'Version '")?;
  167. Ok(log[start_index..start_index + end]
  168. .to_string()
  169. .trim()
  170. .to_string())
  171. }
  172. }
  173. impl CallerCat for Savana {
  174. fn caller_cat(&self) -> (Caller, Annotation) {
  175. (Caller::Savana, Annotation::Somatic)
  176. }
  177. }
  178. impl Variants for Savana {
  179. fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
  180. let (caller, category) = self.caller_cat();
  181. let add = vec![Annotation::Callers(caller.clone()), category.clone()];
  182. info!(
  183. "Loading variants from Savana {} with annotations: {:?}",
  184. self.id, add
  185. );
  186. let variants = read_vcf(&self.passed_vcf)?;
  187. variants.par_iter().for_each(|v| {
  188. annotations.insert_update(v.hash(), &add);
  189. });
  190. Ok(VariantCollection {
  191. variants,
  192. vcf: Vcf::new(self.passed_vcf.clone().into())?,
  193. caller,
  194. category,
  195. })
  196. }
  197. }
  198. impl RunnerVariants for Savana {}