longphase.rs 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. use crate::{
  2. collection::{Initialize, InitializeSolo},
  3. commands::bcftools::{bcftools_compress, bcftools_index},
  4. config::Config,
  5. helpers::path_prefix,
  6. runners::{run_wait, CommandRun, Run},
  7. };
  8. use anyhow::Context;
  9. use duct::cmd;
  10. use std::{
  11. fs,
  12. path::{Path, PathBuf},
  13. };
  14. use tracing::info;
  15. use super::{
  16. bcftools::{bcftools_keep_pass, BcftoolsConfig},
  17. modkit::ModkitSummary,
  18. };
  19. #[derive(Debug, Clone)]
  20. pub struct LongphaseConfig {
  21. pub bin: String,
  22. pub result_dir: String,
  23. pub reference: String,
  24. pub threads: u8,
  25. pub force: bool,
  26. }
  27. impl Default for LongphaseConfig {
  28. fn default() -> Self {
  29. Self {
  30. bin: "/data/tools/longphase_linux-x64".to_string(),
  31. reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  32. result_dir: "/data/longreads_basic_pipe".to_string(),
  33. threads: 150,
  34. force: true,
  35. }
  36. }
  37. }
  38. #[derive(Debug)]
  39. pub struct LongphaseHap {
  40. pub id: String,
  41. pub vcf: String,
  42. pub bam: PathBuf,
  43. pub bam_hp: PathBuf,
  44. pub config: LongphaseConfig,
  45. pub log_dir: String,
  46. }
  47. impl LongphaseHap {
  48. pub fn new(id: &str, bam: &str, phased_vcf: &str, config: LongphaseConfig) -> Self {
  49. let log_dir = format!("{}/{}/log/longphase", config.result_dir, id);
  50. let bam = Path::new(bam);
  51. // TODO change that use config.haplotagged_bam_tag_name
  52. let new_fn = format!("{}_HP", bam.file_stem().unwrap().to_str().unwrap());
  53. let bam_hp = bam.with_file_name(new_fn);
  54. Self {
  55. id: id.to_string(),
  56. bam: bam.to_path_buf(),
  57. config,
  58. log_dir,
  59. vcf: phased_vcf.to_string(),
  60. bam_hp: bam_hp.to_path_buf(),
  61. }
  62. }
  63. pub fn run(&mut self) -> anyhow::Result<()> {
  64. if self.config.force && self.bam_hp.exists() {
  65. fs::remove_file(&self.bam_hp)?;
  66. }
  67. if !Path::new(&self.log_dir).exists() {
  68. fs::create_dir_all(&self.log_dir).expect("Failed to create output directory");
  69. }
  70. // Run command if output VCF doesn't exist
  71. if !self.bam_hp.exists() {
  72. let args = [
  73. "haplotag",
  74. "-s",
  75. &self.vcf,
  76. "-b",
  77. self.bam.to_str().unwrap(),
  78. "-r",
  79. &self.config.reference,
  80. "-t",
  81. &self.config.threads.to_string(),
  82. "--tagSupplementary",
  83. "-o",
  84. self.bam_hp.to_str().unwrap(),
  85. ];
  86. let mut cmd_run = CommandRun::new(&self.config.bin, &args);
  87. let report = run_wait(&mut cmd_run).context(format!(
  88. "Error while running `{} {}`",
  89. self.config.bin,
  90. args.join(" ")
  91. ))?;
  92. let log_file = format!("{}/longphase_", self.log_dir);
  93. report
  94. .save_to_file(&log_file)
  95. .context(format!("Error while writing logs into {log_file}"))?;
  96. let _ = cmd!(
  97. "samtools",
  98. "index",
  99. "-@",
  100. &self.config.threads.to_string(),
  101. &format!("{}.bam", self.bam_hp.to_str().unwrap())
  102. )
  103. .run()?;
  104. } else {
  105. info!("Longphase output vcf already exists");
  106. }
  107. Ok(())
  108. }
  109. }
  110. // /data/tools/longphase_linux-x64 phase -s ClairS/clair3_normal_tumoral_germline_output.vcf.gz -b CUNY_diag_hs1_hp.bam -r /data/ref/hs1/chm13v2.0.fa -t 155 --ont -o ClairS/clair3_normal_tumoral_germline_output_PS
  111. #[derive(Debug)]
  112. pub struct LongphasePhase {
  113. pub id: String,
  114. pub vcf: String,
  115. pub out_prefix: String,
  116. pub bam: String,
  117. pub config: Config,
  118. pub log_dir: String,
  119. pub modcall_vcf: String,
  120. }
  121. impl Initialize for LongphasePhase {
  122. fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
  123. let log_dir = format!("{}/{}/log/longphase_phase", config.result_dir, id);
  124. if !Path::new(&log_dir).exists() {
  125. fs::create_dir_all(&log_dir)
  126. .context(format!("Failed to create {log_dir} directory"))?;
  127. }
  128. let vcf = config.constit_vcf(id);
  129. let bam = config.tumoral_bam(id);
  130. let out_prefix = path_prefix(&config.constit_phased_vcf(id))?;
  131. let modcall_vcf = config.longphase_modcall_vcf(id, "diag");
  132. Ok(LongphasePhase {
  133. id: id.to_string(),
  134. config,
  135. log_dir,
  136. vcf,
  137. out_prefix,
  138. bam,
  139. modcall_vcf,
  140. })
  141. }
  142. }
  143. impl Run for LongphasePhase {
  144. fn run(&mut self) -> anyhow::Result<()> {
  145. info!("Running longphase phase for: {}", self.vcf);
  146. info!("Saving longphase phase results in: {}", self.out_prefix);
  147. let final_vcf = self.config.constit_phased_vcf(&self.id);
  148. if !Path::new(&final_vcf).exists() {
  149. let args = [
  150. "phase",
  151. "-s",
  152. &self.vcf,
  153. "-b",
  154. &self.bam,
  155. "-r",
  156. &self.config.reference,
  157. "--mod-file",
  158. &self.modcall_vcf,
  159. "-t",
  160. &self.config.longphase_threads.to_string(),
  161. "--ont",
  162. "-o",
  163. &self.out_prefix,
  164. ];
  165. let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
  166. let report = run_wait(&mut cmd_run).context(format!(
  167. "Error while running `{} {}`",
  168. self.config.longphase_bin,
  169. args.join(" ")
  170. ))?;
  171. let log_file = format!("{}/longphase_phase_", self.log_dir);
  172. report
  173. .save_to_file(&log_file)
  174. .context(format!("Error while writing logs into {log_file}"))?;
  175. bcftools_compress(
  176. &format!("{}.vcf", self.out_prefix),
  177. &final_vcf,
  178. &BcftoolsConfig::default(),
  179. )?;
  180. bcftools_index(&final_vcf, &BcftoolsConfig::default())?;
  181. fs::remove_file(format!("{}.vcf", self.out_prefix))?;
  182. }
  183. Ok(())
  184. }
  185. }
  186. #[derive(Debug)]
  187. pub struct LongphaseModcallSolo {
  188. pub id: String,
  189. pub time: String,
  190. pub bam: String,
  191. pub prefix: String,
  192. pub reference: String,
  193. pub threads: u8,
  194. pub log_dir: String,
  195. pub mod_threshold: f64,
  196. pub config: Config,
  197. }
  198. impl InitializeSolo for LongphaseModcallSolo {
  199. fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
  200. let id = id.to_string();
  201. let time = time.to_string();
  202. let log_dir = format!("{}/{}/log/longphase_modcall_solo", config.result_dir, &id);
  203. if !Path::new(&log_dir).exists() {
  204. fs::create_dir_all(&log_dir)
  205. .context(format!("Failed to create {log_dir} directory"))?;
  206. }
  207. let bam = config.solo_bam(&id, &time);
  208. if !Path::new(&bam).exists() {
  209. anyhow::bail!("Bam files doesn't exists: {bam}")
  210. }
  211. let mut modkit_summary = ModkitSummary::initialize(&id, &time, config.clone())?;
  212. modkit_summary.load()?;
  213. let mod_threshold = modkit_summary
  214. .result
  215. .ok_or_else(|| anyhow::anyhow!("Error no ModkitSummary for {id} {time}"))?
  216. .pass_threshold;
  217. let out_vcf = config.longphase_modcall_vcf(&id, &time);
  218. let out_dir = Path::new(&out_vcf)
  219. .parent()
  220. .ok_or_else(|| anyhow::anyhow!("Can't get dir of {out_vcf}"))?;
  221. fs::create_dir_all(out_dir)?;
  222. let prefix = path_prefix(&out_vcf)?;
  223. Ok(Self {
  224. id,
  225. time,
  226. bam,
  227. reference: config.reference.to_string(),
  228. threads: config.longphase_modcall_threads,
  229. config,
  230. log_dir,
  231. mod_threshold,
  232. prefix,
  233. })
  234. }
  235. }
  236. impl Run for LongphaseModcallSolo {
  237. fn run(&mut self) -> anyhow::Result<()> {
  238. let args = [
  239. "modcall",
  240. "-b",
  241. &self.bam,
  242. "-t",
  243. &self.threads.to_string(),
  244. "-r",
  245. &self.reference,
  246. "-m",
  247. &self.mod_threshold.to_string(),
  248. "-o",
  249. &self.prefix,
  250. ];
  251. let mut cmd_run = CommandRun::new(&self.config.longphase_bin, &args);
  252. run_wait(&mut cmd_run)
  253. .context(format!(
  254. "Error while running `longphase modcall {}`",
  255. args.join(" ")
  256. ))?
  257. .save_to_file(&format!("{}/longphase_modcall_", self.log_dir))
  258. .context(format!(
  259. "Error while writing logs into {}/longphase_modcall",
  260. self.log_dir
  261. ))?;
  262. let vcf = format!("{}.vcf", self.prefix);
  263. bcftools_keep_pass(
  264. &vcf,
  265. &format!("{}.vcf.gz", self.prefix),
  266. BcftoolsConfig::default(),
  267. )
  268. .context(format!(
  269. "Can't run BCFtools PASS for LongphaseModcallSolo: {} {}",
  270. self.id, self.time
  271. ))?
  272. .save_to_file(&format!("{}/longphase_modcall_pass_", self.log_dir))
  273. .context(format!(
  274. "Error while writing logs into {}/longphase_modcall_pass",
  275. self.log_dir
  276. ))?;
  277. fs::remove_file(&vcf).context(format!("Can't remove file: {vcf}"))?;
  278. Ok(())
  279. }
  280. }