: 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. use std::{
  2. fs::{self, File, Metadata}, io::Read, path::PathBuf, str::FromStr
  3. };
  4. use anyhow::{anyhow, Context};
  5. use glob::glob;
  6. use hashbrown::HashMap;
  7. use log::warn;
  8. use pandora_lib_bindings::{
  9. progs::cramino::{Cramino, CraminoRes},
  10. utils::RunBin,
  11. };
  12. use rayon::prelude::*;
  13. use serde::{Deserialize, Serialize};
  14. #[derive(Debug, Clone, Deserialize, Serialize)]
  15. pub struct Bam {
  16. pub id: String,
  17. pub time_point: String,
  18. pub reference_genome: String,
  19. pub bam_type: BamType,
  20. pub path: PathBuf,
  21. #[serde(with = "metadata_serde")]
  22. pub file_metadata: Metadata,
  23. // #[serde(skip)]
  24. // pub file_metadata: Metadata,
  25. pub cramino: Option<CraminoRes>,
  26. pub composition: Vec<(String, f64)>,
  27. }
  28. #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)]
  29. pub enum BamType {
  30. WGS,
  31. Panel(String),
  32. ChIP(String),
  33. }
  34. impl Bam {
  35. pub fn new(path: PathBuf) -> anyhow::Result<Self> {
  36. let stem = path
  37. .clone()
  38. .file_stem()
  39. .context("Can't parse stem from {path}")?
  40. .to_string_lossy()
  41. .to_string();
  42. let stem: Vec<&str> = stem.split('_').collect();
  43. if stem.len() > 4 || stem.len() < 3 {
  44. return Err(anyhow!("Error in bam name: {}", path.display()));
  45. }
  46. let id = stem[0].to_string();
  47. let time_point = stem[1].to_string();
  48. let reference_genome = stem
  49. .last()
  50. .context("Can't get last from stem {stem}")?
  51. .to_string();
  52. let bam_type = if stem.len() == 4 {
  53. match stem[2] {
  54. "oncoT" => BamType::Panel("oncoT".to_string()),
  55. "H3K27ac" => BamType::ChIP("H3K27ac".to_string()),
  56. "H3K4me3" => BamType::ChIP("H3K4me3".to_string()),
  57. _ => return Err(anyhow!("Error in bam name: {}", path.display())),
  58. }
  59. } else {
  60. BamType::WGS
  61. };
  62. let tp_dir = path
  63. .parent()
  64. .context("Can't parse parent from: {bam_path}")?;
  65. let cramino_path = format!(
  66. "{}/{id}_{time_point}_hs1_cramino.txt",
  67. tp_dir.to_string_lossy()
  68. );
  69. let file_metadata = fs::metadata(&path)?;
  70. let cramino = if bam_type == BamType::WGS {
  71. if !PathBuf::from_str(&cramino_path)?.exists() {
  72. return Err(anyhow!("Cramino file missing {cramino_path}"));
  73. }
  74. let mut cramino = Cramino::default().with_result_path(&cramino_path);
  75. cramino
  76. .parse_results()
  77. .context(format!("Error while parsing cramino for {cramino_path}"))?;
  78. if let Some(cramino) = cramino.results {
  79. Some(cramino)
  80. } else {
  81. return Err(anyhow!("Cramino results parsing failed"));
  82. }
  83. } else {
  84. None
  85. };
  86. let composition =
  87. pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 20000).context(
  88. format!("Error while reading BAM composition for {}", path.display()),
  89. )?;
  90. Ok(Self {
  91. path,
  92. // file_metadata,
  93. cramino,
  94. id: id.to_string(),
  95. time_point: time_point.to_string(),
  96. bam_type,
  97. reference_genome,
  98. composition,
  99. })
  100. }
  101. pub fn load_json(path: &str) -> anyhow::Result<Self> {
  102. let f = File::open(path)?;
  103. let s: Self = serde_json::from_reader(f)?;
  104. Ok(s)
  105. }
  106. pub fn save_json(path: &str) -> anyhow::Result<()> {
  107. }
  108. }
  109. #[derive(Debug)]
  110. pub struct BamCollection {
  111. pub bams: Vec<Bam>,
  112. }
  113. impl BamCollection {
  114. pub fn new(result_dir: &str) -> Self {
  115. load_bam_collection(result_dir)
  116. }
  117. pub fn by_acquisition_id(&self) -> HashMap<String, Vec<&Bam>> {
  118. let mut acq: HashMap<String, Vec<&Bam>> = HashMap::new();
  119. for bam in self.bams.iter() {
  120. for (acq_id, _) in bam.composition.iter() {
  121. if let Some(entry) = acq.get_mut(acq_id) {
  122. entry.push(bam);
  123. } else {
  124. acq.insert(acq_id.to_string(), vec![bam]);
  125. }
  126. }
  127. }
  128. acq
  129. }
  130. pub fn get(&self, id: &str, time_point: &str) -> Vec<&Bam> {
  131. self.bams
  132. .iter()
  133. .filter(|b| b.id == id && b.time_point == time_point)
  134. .collect()
  135. }
  136. pub fn by_id_completed(&self, min_diag_cov: f32, min_mrd_cov: f32) -> Vec<Bam> {
  137. self.bams
  138. .iter()
  139. .filter(|b| matches!(b.bam_type, BamType::WGS))
  140. .filter(|b| match &b.cramino {
  141. Some(cramino) => match b.time_point.as_str() {
  142. "diag" => cramino.mean_length >= min_diag_cov as f64,
  143. "mrd" => cramino.mean_length >= min_mrd_cov as f64,
  144. _ => false
  145. },
  146. _ => false,
  147. })
  148. .cloned()
  149. .collect()
  150. }
  151. }
  152. pub fn load_bam_collection(result_dir: &str) -> BamCollection {
  153. let pattern = format!("{}/*/*/*.bam", result_dir);
  154. let bams = glob(&pattern)
  155. .expect("Failed to read glob pattern")
  156. .par_bridge()
  157. .filter_map(|entry| {
  158. match entry {
  159. Ok(path) => match Bam::new(path) {
  160. Ok(bam) => return Some(bam),
  161. Err(e) => warn!("{e}"),
  162. },
  163. Err(e) => warn!("Error: {:?}", e),
  164. }
  165. None
  166. })
  167. .collect();
  168. BamCollection { bams }
  169. }
  170. mod metadata_serde {
  171. use super::*;
  172. use serde::{Serializer, Deserializer};
  173. #[derive(Serialize, Deserialize)]
  174. struct SerializableMetadata {
  175. len: u64,
  176. modified: u64,
  177. created: u64,
  178. }
  179. pub fn serialize<S>(metadata: &Metadata, serializer: S) -> Result<S::Ok, S::Error>
  180. where
  181. S: Serializer,
  182. {
  183. let serializable = SerializableMetadata {
  184. len: metadata.len(),
  185. modified: metadata.modified()
  186. .unwrap_or(UNIX_EPOCH)
  187. .duration_since(UNIX_EPOCH)
  188. .unwrap_or_default()
  189. .as_secs(),
  190. created: metadata.created()
  191. .unwrap_or(UNIX_EPOCH)
  192. .duration_since(UNIX_EPOCH)
  193. .unwrap_or_default()
  194. .as_secs(),
  195. };
  196. serializable.serialize(serializer)
  197. }
  198. pub fn deserialize<'de, D>(deserializer: D) -> Result<Metadata, D::Error>
  199. where
  200. D: Deserializer<'de>,
  201. {
  202. let serializable = SerializableMetadata::deserialize(deserializer)?;
  203. let file = tempfile::tempfile().map_err(serde::de::Error::custom)?;
  204. let metadata = file.metadata().map_err(serde::de::Error::custom)?;
  205. Ok(metadata)
  206. }
  207. }