use std::{ fs::{self, File, Metadata}, io::Read, path::PathBuf, str::FromStr }; use anyhow::{anyhow, Context}; use glob::glob; use hashbrown::HashMap; use log::warn; use pandora_lib_bindings::{ progs::cramino::{Cramino, CraminoRes}, utils::RunBin, }; use rayon::prelude::*; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Deserialize, Serialize)] pub struct Bam { pub id: String, pub time_point: String, pub reference_genome: String, pub bam_type: BamType, pub path: PathBuf, #[serde(with = "metadata_serde")] pub file_metadata: Metadata, // #[serde(skip)] // pub file_metadata: Metadata, pub cramino: Option, pub composition: Vec<(String, f64)>, } #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)] pub enum BamType { WGS, Panel(String), ChIP(String), } impl Bam { pub fn new(path: PathBuf) -> anyhow::Result { let stem = path .clone() .file_stem() .context("Can't parse stem from {path}")? .to_string_lossy() .to_string(); let stem: Vec<&str> = stem.split('_').collect(); if stem.len() > 4 || stem.len() < 3 { return Err(anyhow!("Error in bam name: {}", path.display())); } let id = stem[0].to_string(); let time_point = stem[1].to_string(); let reference_genome = stem .last() .context("Can't get last from stem {stem}")? .to_string(); let bam_type = if stem.len() == 4 { match stem[2] { "oncoT" => BamType::Panel("oncoT".to_string()), "H3K27ac" => BamType::ChIP("H3K27ac".to_string()), "H3K4me3" => BamType::ChIP("H3K4me3".to_string()), _ => return Err(anyhow!("Error in bam name: {}", path.display())), } } else { BamType::WGS }; let tp_dir = path .parent() .context("Can't parse parent from: {bam_path}")?; let cramino_path = format!( "{}/{id}_{time_point}_hs1_cramino.txt", tp_dir.to_string_lossy() ); let file_metadata = fs::metadata(&path)?; let cramino = if bam_type == BamType::WGS { if !PathBuf::from_str(&cramino_path)?.exists() { return Err(anyhow!("Cramino file missing {cramino_path}")); } let mut cramino = Cramino::default().with_result_path(&cramino_path); cramino .parse_results() .context(format!("Error while parsing cramino for {cramino_path}"))?; if let Some(cramino) = cramino.results { Some(cramino) } else { return Err(anyhow!("Cramino results parsing failed")); } } else { None }; let composition = pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 20000).context( format!("Error while reading BAM composition for {}", path.display()), )?; Ok(Self { path, // file_metadata, cramino, id: id.to_string(), time_point: time_point.to_string(), bam_type, reference_genome, composition, }) } pub fn load_json(path: &str) -> anyhow::Result { let f = File::open(path)?; let s: Self = serde_json::from_reader(f)?; Ok(s) } pub fn save_json(path: &str) -> anyhow::Result<()> { } } #[derive(Debug)] pub struct BamCollection { pub bams: Vec, } impl BamCollection { pub fn new(result_dir: &str) -> Self { load_bam_collection(result_dir) } pub fn by_acquisition_id(&self) -> HashMap> { let mut acq: HashMap> = HashMap::new(); for bam in self.bams.iter() { for (acq_id, _) in bam.composition.iter() { if let Some(entry) = acq.get_mut(acq_id) { entry.push(bam); } else { acq.insert(acq_id.to_string(), vec![bam]); } } } acq } pub fn get(&self, id: &str, time_point: &str) -> Vec<&Bam> { self.bams .iter() .filter(|b| b.id == id && b.time_point == time_point) .collect() } pub fn by_id_completed(&self, min_diag_cov: f32, min_mrd_cov: f32) -> Vec { self.bams .iter() .filter(|b| matches!(b.bam_type, BamType::WGS)) .filter(|b| match &b.cramino { Some(cramino) => match b.time_point.as_str() { "diag" => cramino.mean_length >= min_diag_cov as f64, "mrd" => cramino.mean_length >= min_mrd_cov as f64, _ => false }, _ => false, }) .cloned() .collect() } } pub fn load_bam_collection(result_dir: &str) -> BamCollection { let pattern = format!("{}/*/*/*.bam", result_dir); let bams = glob(&pattern) .expect("Failed to read glob pattern") .par_bridge() .filter_map(|entry| { match entry { Ok(path) => match Bam::new(path) { Ok(bam) => return Some(bam), Err(e) => warn!("{e}"), }, Err(e) => warn!("Error: {:?}", e), } None }) .collect(); BamCollection { bams } } mod metadata_serde { use super::*; use serde::{Serializer, Deserializer}; #[derive(Serialize, Deserialize)] struct SerializableMetadata { len: u64, modified: u64, created: u64, } pub fn serialize(metadata: &Metadata, serializer: S) -> Result where S: Serializer, { let serializable = SerializableMetadata { len: metadata.len(), modified: metadata.modified() .unwrap_or(UNIX_EPOCH) .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_secs(), created: metadata.created() .unwrap_or(UNIX_EPOCH) .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_secs(), }; serializable.serialize(serializer) } pub fn deserialize<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { let serializable = SerializableMetadata::deserialize(deserializer)?; let file = tempfile::tempfile().map_err(serde::de::Error::custom)?; let metadata = file.metadata().map_err(serde::de::Error::custom)?; Ok(metadata) } }