bcftools.rs 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. use anyhow::Context;
  2. use log::info;
  3. use std::{fs, path::Path};
  4. use uuid::Uuid;
  5. use crate::runners::{run_wait, CommandRun, RunReport};
  6. #[derive(Debug)]
  7. pub struct BcftoolsConfig {
  8. pub bin: String,
  9. pub threads: u8,
  10. }
  11. impl Default for BcftoolsConfig {
  12. fn default() -> Self {
  13. Self {
  14. bin: "/data/tools/bcftools-1.21/bcftools".to_string(),
  15. threads: 20,
  16. }
  17. }
  18. }
  19. pub fn bcftools_keep_pass(
  20. input: &str,
  21. output: &str,
  22. config: BcftoolsConfig,
  23. ) -> anyhow::Result<RunReport> {
  24. if !Path::new(input).exists() {
  25. anyhow::bail!("File doesnt exist {input}")
  26. }
  27. // First sort
  28. let tmp_file = format!("/tmp/{}", Uuid::new_v4());
  29. let mut cmd_run = CommandRun::new(&config.bin, &["sort", input, "-o", &tmp_file]);
  30. let _ = run_wait(&mut cmd_run)?;
  31. // 2. norm
  32. let tmp2_file = format!("/tmp/{}", Uuid::new_v4());
  33. let mut cmd_run = CommandRun::new(
  34. &config.bin,
  35. &[
  36. "norm",
  37. "--threads",
  38. &config.threads.to_string(),
  39. "-a",
  40. "--atom-overlaps",
  41. ".",
  42. &tmp_file,
  43. "-o",
  44. &tmp2_file,
  45. ],
  46. );
  47. let _ = run_wait(&mut cmd_run)?;
  48. fs::remove_file(tmp_file)?;
  49. // Then filter
  50. let mut cmd_run = CommandRun::new(
  51. &config.bin,
  52. &[
  53. "view",
  54. "--write-index",
  55. "--threads",
  56. &config.threads.to_string(),
  57. "-i",
  58. "FILTER='PASS'",
  59. &tmp2_file,
  60. "-o",
  61. output,
  62. ],
  63. );
  64. let res = run_wait(&mut cmd_run)?;
  65. fs::remove_file(tmp2_file)?;
  66. Ok(res)
  67. }
  68. pub fn bcftools_keep_pass_precise(
  69. input: &str,
  70. output: &str,
  71. config: BcftoolsConfig,
  72. ) -> anyhow::Result<RunReport> {
  73. if !Path::new(input).exists() {
  74. anyhow::bail!("File doesnt exist {input}")
  75. }
  76. // First sort
  77. let tmp_file = format!("/tmp/{}", Uuid::new_v4());
  78. let mut cmd_run = CommandRun::new(&config.bin, &["sort", input, "-o", &tmp_file]);
  79. let _ = run_wait(&mut cmd_run)?;
  80. // 2. norm
  81. let tmp2_file = format!("/tmp/{}", Uuid::new_v4());
  82. let mut cmd_run = CommandRun::new(
  83. &config.bin,
  84. &[
  85. "norm",
  86. "--threads",
  87. &config.threads.to_string(),
  88. "-a",
  89. "--atom-overlaps",
  90. ".",
  91. &tmp_file,
  92. "-o",
  93. &tmp2_file,
  94. ],
  95. );
  96. let _ = run_wait(&mut cmd_run)?;
  97. fs::remove_file(tmp_file)?;
  98. // Then filter
  99. let mut cmd_run = CommandRun::new(
  100. &config.bin,
  101. &[
  102. "view",
  103. "--write-index",
  104. "--threads",
  105. &config.threads.to_string(),
  106. "-e",
  107. "INFO/IMPRECISE==1 || FILTER!=\"PASS\"",
  108. &tmp2_file,
  109. "-o",
  110. output,
  111. ],
  112. );
  113. let res = run_wait(&mut cmd_run)?;
  114. fs::remove_file(tmp2_file)?;
  115. Ok(res)
  116. }
  117. pub fn bcftools_concat(
  118. inputs: Vec<String>,
  119. output: &str,
  120. config: BcftoolsConfig,
  121. ) -> anyhow::Result<RunReport> {
  122. info!("Concatening vcf with bcftools: {}", inputs.join(", "));
  123. let tmp_file = format!("/tmp/{}", Uuid::new_v4());
  124. fs::write(&tmp_file, inputs.join("\n"))?;
  125. let args = [
  126. "concat",
  127. "--write-index",
  128. "--threads",
  129. &config.threads.to_string(),
  130. "-a",
  131. "-D",
  132. "-f",
  133. &tmp_file,
  134. "-o",
  135. output,
  136. ];
  137. // Then filter
  138. let mut cmd_run = CommandRun::new(&config.bin, &args);
  139. let res = run_wait(&mut cmd_run)
  140. .context(format!("Error while running `bcftools {}`", args.join(" ")))?;
  141. fs::remove_file(tmp_file)?;
  142. Ok(res)
  143. }
  144. pub fn bcftools_keep_only_in_a(
  145. a: &str,
  146. b: &str,
  147. out: &str,
  148. config: &BcftoolsConfig,
  149. ) -> anyhow::Result<()> {
  150. let args = ["isec", "-C", "-w", "1", a, b, "-o", out];
  151. let mut cmd_run = CommandRun::new(&config.bin, &args);
  152. let _ = run_wait(&mut cmd_run)
  153. .context(format!("Error while running `bcftools {}`", args.join(" ")))?;
  154. Ok(())
  155. }
  156. pub fn bcftools_index(vcf: &str, config: &BcftoolsConfig) -> anyhow::Result<()> {
  157. let args = ["index", "--threads", &config.threads.to_string(), vcf];
  158. let mut cmd_run = CommandRun::new(&config.bin, &args);
  159. let _ = run_wait(&mut cmd_run)
  160. .context(format!("Error while running `bcftools {}`", args.join(" ")))?;
  161. Ok(())
  162. }
  163. pub fn bcftools_compress(
  164. in_vcf: &str,
  165. out_vcf: &str,
  166. config: &BcftoolsConfig,
  167. ) -> anyhow::Result<()> {
  168. let args = [
  169. "view",
  170. "--threads",
  171. &config.threads.to_string(),
  172. in_vcf,
  173. "-Oz",
  174. "-o",
  175. out_vcf,
  176. ];
  177. let mut cmd_run = CommandRun::new(&config.bin, &args);
  178. let _ = run_wait(&mut cmd_run)
  179. .context(format!("Error while running `bcftools {}`", args.join(" ")))?;
  180. Ok(())
  181. }