cramino.rs 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. use std::{
  2. collections::HashMap,
  3. env,
  4. fs::{self, File},
  5. io::{self, BufRead},
  6. path::PathBuf,
  7. };
  8. use anyhow::anyhow;
  9. use log::info;
  10. use uuid::Uuid;
  11. use crate::utils::{exec_bin_to_file, RunBin};
  12. #[derive(Debug)]
  13. pub struct Cramino {
  14. pub prog_path: String,
  15. pub args: Vec<String>,
  16. pub bam_file: PathBuf,
  17. pub result_path: PathBuf,
  18. pub results: Option<CraminoRes>,
  19. pub remove_result_file: bool,
  20. }
  21. impl Default for Cramino {
  22. fn default() -> Self {
  23. let mut result_path = env::temp_dir();
  24. result_path.push(format!("{}_cramino.txt", Uuid::new_v4()));
  25. Self {
  26. prog_path: "cramino".to_string(),
  27. args: vec![
  28. "--hist".to_string(),
  29. "--checksum".to_string(),
  30. "--karyotype".to_string(),
  31. ],
  32. result_path,
  33. results: None,
  34. remove_result_file: false,
  35. bam_file: PathBuf::new(),
  36. }
  37. }
  38. }
  39. impl Cramino {
  40. pub fn with_result_path(mut self, result_path: &str) -> Self {
  41. self.result_path = PathBuf::from(result_path);
  42. self
  43. }
  44. pub fn with_threads(mut self, n_threads: usize) -> Self {
  45. self.args.push("-t".to_string());
  46. self.args.push(n_threads.to_string());
  47. self
  48. }
  49. pub fn with_bam(mut self, bam_file: &str) -> anyhow::Result<Self> {
  50. let bam_file = PathBuf::from(bam_file);
  51. if !bam_file.exists() {
  52. Err(anyhow!("BAM file doesn't exists."))
  53. } else {
  54. self.bam_file = bam_file;
  55. Ok(self)
  56. }
  57. }
  58. }
  59. impl RunBin for Cramino {
  60. fn exec(&self) -> anyhow::Result<()> {
  61. if self.result_path.exists() {
  62. info!("Result file already exists.");
  63. Ok(())
  64. } else {
  65. let mut args = self.args.clone();
  66. args.push(self.bam_file.to_string_lossy().to_string());
  67. exec_bin_to_file(&self.prog_path, &args, &self.result_path)
  68. }
  69. }
  70. fn parse_results(&mut self) -> anyhow::Result<()> {
  71. let file = File::open(&self.result_path)?;
  72. let reader = io::BufReader::new(file);
  73. let mut lines = reader.lines();
  74. let mut cramino_res = CraminoRes {
  75. file_name: String::new(),
  76. number_of_reads: 0,
  77. yield_gb: 0.0,
  78. mean_coverage: 0.0,
  79. n50: 0,
  80. median_length: 0.0,
  81. mean_length: 0.0,
  82. median_identity: 0.0,
  83. mean_identity: 0.0,
  84. path: String::new(),
  85. creation_time: String::new(),
  86. checksum: String::new(),
  87. normalized_read_count_per_chromosome: HashMap::new(),
  88. };
  89. while let Some(Ok(line)) = lines.next() {
  90. if line.starts_with('#') {
  91. continue; // Skip the normalized read count header line
  92. }
  93. let parts: Vec<&str> = line.split_whitespace().collect();
  94. if parts.len() < 2 {
  95. continue; // Skip any malformed lines
  96. }
  97. if !parts[0].starts_with("chr") {
  98. match parts[0] {
  99. "File" => cramino_res.file_name = parts[2].to_string(),
  100. "Number" => cramino_res.number_of_reads = parts[3].parse().unwrap_or(0),
  101. "Yield" => cramino_res.yield_gb = parts[2].parse().unwrap_or(0.0),
  102. "N50" => cramino_res.n50 = parts[1].parse().unwrap_or(0),
  103. "Median" => match parts[1] {
  104. "length" => cramino_res.median_length = parts[2].parse().unwrap_or(0.0),
  105. "identity" => cramino_res.median_identity = parts[2].parse().unwrap_or(0.0),
  106. _ => {}
  107. },
  108. "Mean" => match parts[1] {
  109. "coverage" => cramino_res.mean_coverage = parts[2].parse().unwrap_or(0.0),
  110. "length" => cramino_res.mean_length = parts[2].parse().unwrap_or(0.0),
  111. "identity" => cramino_res.mean_identity = parts[2].parse().unwrap_or(0.0),
  112. _ => {}
  113. },
  114. "Path" => cramino_res.path = parts[1].to_string(),
  115. "Creation" => cramino_res.creation_time = parts[2..].join(" "),
  116. "Checksum" => cramino_res.checksum = parts[1].to_string(),
  117. _ => {}
  118. }
  119. } else {
  120. if parts.len() == 2 {
  121. let chromosome = parts[0].to_string();
  122. let count = parts[1].parse().unwrap_or(0.0);
  123. cramino_res
  124. .normalized_read_count_per_chromosome
  125. .insert(chromosome, count);
  126. }
  127. }
  128. }
  129. self.results = Some(cramino_res);
  130. Ok(())
  131. }
  132. fn clean(&self) -> anyhow::Result<()> {
  133. if self.remove_result_file {
  134. fs::remove_file(&self.result_path)?;
  135. }
  136. Ok(())
  137. }
  138. }
  139. #[derive(Debug)]
  140. pub struct CraminoRes {
  141. pub file_name: String,
  142. pub number_of_reads: u64,
  143. pub yield_gb: f64,
  144. pub mean_coverage: f64,
  145. pub n50: u32,
  146. pub median_length: f64,
  147. pub mean_length: f64,
  148. pub median_identity: f64,
  149. pub mean_identity: f64,
  150. pub path: String,
  151. pub creation_time: String,
  152. pub checksum: String,
  153. pub normalized_read_count_per_chromosome: HashMap<String, f64>,
  154. }
  155. impl CraminoRes {
  156. pub fn is_woman(&self) -> anyhow::Result<bool> {
  157. if let Some(v) = self.normalized_read_count_per_chromosome.get("chrY") {
  158. Ok(*v < 0.1)
  159. } else {
  160. Err(anyhow::anyhow!("Can't get chrY normalized read count."))
  161. }
  162. }
  163. }