|
@@ -1,235 +0,0 @@
|
|
|
-use std::{
|
|
|
|
|
- fs::{self, File, Metadata}, io::Read, path::PathBuf, str::FromStr
|
|
|
|
|
-};
|
|
|
|
|
-
|
|
|
|
|
-use anyhow::{anyhow, Context};
|
|
|
|
|
-use glob::glob;
|
|
|
|
|
-use hashbrown::HashMap;
|
|
|
|
|
-use log::warn;
|
|
|
|
|
-use pandora_lib_bindings::{
|
|
|
|
|
- progs::cramino::{Cramino, CraminoRes},
|
|
|
|
|
- utils::RunBin,
|
|
|
|
|
-};
|
|
|
|
|
-use rayon::prelude::*;
|
|
|
|
|
-use serde::{Deserialize, Serialize};
|
|
|
|
|
-
|
|
|
|
|
-#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
|
|
|
-pub struct Bam {
|
|
|
|
|
- pub id: String,
|
|
|
|
|
- pub time_point: String,
|
|
|
|
|
- pub reference_genome: String,
|
|
|
|
|
- pub bam_type: BamType,
|
|
|
|
|
- pub path: PathBuf,
|
|
|
|
|
- #[serde(with = "metadata_serde")]
|
|
|
|
|
- pub file_metadata: Metadata,
|
|
|
|
|
- // #[serde(skip)]
|
|
|
|
|
-// pub file_metadata: Metadata,
|
|
|
|
|
- pub cramino: Option<CraminoRes>,
|
|
|
|
|
- pub composition: Vec<(String, f64)>,
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-#[derive(Debug, PartialEq, Clone, Deserialize, Serialize)]
|
|
|
|
|
-pub enum BamType {
|
|
|
|
|
- WGS,
|
|
|
|
|
- Panel(String),
|
|
|
|
|
- ChIP(String),
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-impl Bam {
|
|
|
|
|
- pub fn new(path: PathBuf) -> anyhow::Result<Self> {
|
|
|
|
|
- let stem = path
|
|
|
|
|
- .clone()
|
|
|
|
|
- .file_stem()
|
|
|
|
|
- .context("Can't parse stem from {path}")?
|
|
|
|
|
- .to_string_lossy()
|
|
|
|
|
- .to_string();
|
|
|
|
|
- let stem: Vec<&str> = stem.split('_').collect();
|
|
|
|
|
-
|
|
|
|
|
- if stem.len() > 4 || stem.len() < 3 {
|
|
|
|
|
- return Err(anyhow!("Error in bam name: {}", path.display()));
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- let id = stem[0].to_string();
|
|
|
|
|
- let time_point = stem[1].to_string();
|
|
|
|
|
- let reference_genome = stem
|
|
|
|
|
- .last()
|
|
|
|
|
- .context("Can't get last from stem {stem}")?
|
|
|
|
|
- .to_string();
|
|
|
|
|
-
|
|
|
|
|
- let bam_type = if stem.len() == 4 {
|
|
|
|
|
- match stem[2] {
|
|
|
|
|
- "oncoT" => BamType::Panel("oncoT".to_string()),
|
|
|
|
|
- "H3K27ac" => BamType::ChIP("H3K27ac".to_string()),
|
|
|
|
|
- "H3K4me3" => BamType::ChIP("H3K4me3".to_string()),
|
|
|
|
|
- _ => return Err(anyhow!("Error in bam name: {}", path.display())),
|
|
|
|
|
- }
|
|
|
|
|
- } else {
|
|
|
|
|
- BamType::WGS
|
|
|
|
|
- };
|
|
|
|
|
-
|
|
|
|
|
- let tp_dir = path
|
|
|
|
|
- .parent()
|
|
|
|
|
- .context("Can't parse parent from: {bam_path}")?;
|
|
|
|
|
- let cramino_path = format!(
|
|
|
|
|
- "{}/{id}_{time_point}_hs1_cramino.txt",
|
|
|
|
|
- tp_dir.to_string_lossy()
|
|
|
|
|
- );
|
|
|
|
|
- let file_metadata = fs::metadata(&path)?;
|
|
|
|
|
-
|
|
|
|
|
- let cramino = if bam_type == BamType::WGS {
|
|
|
|
|
- if !PathBuf::from_str(&cramino_path)?.exists() {
|
|
|
|
|
- return Err(anyhow!("Cramino file missing {cramino_path}"));
|
|
|
|
|
- }
|
|
|
|
|
- let mut cramino = Cramino::default().with_result_path(&cramino_path);
|
|
|
|
|
- cramino
|
|
|
|
|
- .parse_results()
|
|
|
|
|
- .context(format!("Error while parsing cramino for {cramino_path}"))?;
|
|
|
|
|
-
|
|
|
|
|
- if let Some(cramino) = cramino.results {
|
|
|
|
|
- Some(cramino)
|
|
|
|
|
- } else {
|
|
|
|
|
- return Err(anyhow!("Cramino results parsing failed"));
|
|
|
|
|
- }
|
|
|
|
|
- } else {
|
|
|
|
|
- None
|
|
|
|
|
- };
|
|
|
|
|
-
|
|
|
|
|
- let composition =
|
|
|
|
|
- pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 20000).context(
|
|
|
|
|
- format!("Error while reading BAM composition for {}", path.display()),
|
|
|
|
|
- )?;
|
|
|
|
|
-
|
|
|
|
|
- Ok(Self {
|
|
|
|
|
- path,
|
|
|
|
|
- // file_metadata,
|
|
|
|
|
- cramino,
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- time_point: time_point.to_string(),
|
|
|
|
|
- bam_type,
|
|
|
|
|
- reference_genome,
|
|
|
|
|
- composition,
|
|
|
|
|
- })
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn load_json(path: &str) -> anyhow::Result<Self> {
|
|
|
|
|
- let f = File::open(path)?;
|
|
|
|
|
- let s: Self = serde_json::from_reader(f)?;
|
|
|
|
|
- Ok(s)
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn save_json(path: &str) -> anyhow::Result<()> {
|
|
|
|
|
-
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-#[derive(Debug)]
|
|
|
|
|
-pub struct BamCollection {
|
|
|
|
|
- pub bams: Vec<Bam>,
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-impl BamCollection {
|
|
|
|
|
- pub fn new(result_dir: &str) -> Self {
|
|
|
|
|
- load_bam_collection(result_dir)
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn by_acquisition_id(&self) -> HashMap<String, Vec<&Bam>> {
|
|
|
|
|
- let mut acq: HashMap<String, Vec<&Bam>> = HashMap::new();
|
|
|
|
|
- for bam in self.bams.iter() {
|
|
|
|
|
- for (acq_id, _) in bam.composition.iter() {
|
|
|
|
|
- if let Some(entry) = acq.get_mut(acq_id) {
|
|
|
|
|
- entry.push(bam);
|
|
|
|
|
- } else {
|
|
|
|
|
- acq.insert(acq_id.to_string(), vec![bam]);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- acq
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn get(&self, id: &str, time_point: &str) -> Vec<&Bam> {
|
|
|
|
|
- self.bams
|
|
|
|
|
- .iter()
|
|
|
|
|
- .filter(|b| b.id == id && b.time_point == time_point)
|
|
|
|
|
- .collect()
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn by_id_completed(&self, min_diag_cov: f32, min_mrd_cov: f32) -> Vec<Bam> {
|
|
|
|
|
- self.bams
|
|
|
|
|
- .iter()
|
|
|
|
|
- .filter(|b| matches!(b.bam_type, BamType::WGS))
|
|
|
|
|
- .filter(|b| match &b.cramino {
|
|
|
|
|
- Some(cramino) => match b.time_point.as_str() {
|
|
|
|
|
- "diag" => cramino.mean_length >= min_diag_cov as f64,
|
|
|
|
|
- "mrd" => cramino.mean_length >= min_mrd_cov as f64,
|
|
|
|
|
- _ => false
|
|
|
|
|
- },
|
|
|
|
|
- _ => false,
|
|
|
|
|
- })
|
|
|
|
|
- .cloned()
|
|
|
|
|
- .collect()
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-pub fn load_bam_collection(result_dir: &str) -> BamCollection {
|
|
|
|
|
- let pattern = format!("{}/*/*/*.bam", result_dir);
|
|
|
|
|
- let bams = glob(&pattern)
|
|
|
|
|
- .expect("Failed to read glob pattern")
|
|
|
|
|
- .par_bridge()
|
|
|
|
|
- .filter_map(|entry| {
|
|
|
|
|
- match entry {
|
|
|
|
|
- Ok(path) => match Bam::new(path) {
|
|
|
|
|
- Ok(bam) => return Some(bam),
|
|
|
|
|
- Err(e) => warn!("{e}"),
|
|
|
|
|
- },
|
|
|
|
|
- Err(e) => warn!("Error: {:?}", e),
|
|
|
|
|
- }
|
|
|
|
|
- None
|
|
|
|
|
- })
|
|
|
|
|
- .collect();
|
|
|
|
|
-
|
|
|
|
|
- BamCollection { bams }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-mod metadata_serde {
|
|
|
|
|
- use super::*;
|
|
|
|
|
- use serde::{Serializer, Deserializer};
|
|
|
|
|
-
|
|
|
|
|
- #[derive(Serialize, Deserialize)]
|
|
|
|
|
- struct SerializableMetadata {
|
|
|
|
|
- len: u64,
|
|
|
|
|
- modified: u64,
|
|
|
|
|
- created: u64,
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn serialize<S>(metadata: &Metadata, serializer: S) -> Result<S::Ok, S::Error>
|
|
|
|
|
- where
|
|
|
|
|
- S: Serializer,
|
|
|
|
|
- {
|
|
|
|
|
- let serializable = SerializableMetadata {
|
|
|
|
|
- len: metadata.len(),
|
|
|
|
|
- modified: metadata.modified()
|
|
|
|
|
- .unwrap_or(UNIX_EPOCH)
|
|
|
|
|
- .duration_since(UNIX_EPOCH)
|
|
|
|
|
- .unwrap_or_default()
|
|
|
|
|
- .as_secs(),
|
|
|
|
|
- created: metadata.created()
|
|
|
|
|
- .unwrap_or(UNIX_EPOCH)
|
|
|
|
|
- .duration_since(UNIX_EPOCH)
|
|
|
|
|
- .unwrap_or_default()
|
|
|
|
|
- .as_secs(),
|
|
|
|
|
- };
|
|
|
|
|
- serializable.serialize(serializer)
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn deserialize<'de, D>(deserializer: D) -> Result<Metadata, D::Error>
|
|
|
|
|
- where
|
|
|
|
|
- D: Deserializer<'de>,
|
|
|
|
|
- {
|
|
|
|
|
- let serializable = SerializableMetadata::deserialize(deserializer)?;
|
|
|
|
|
- let file = tempfile::tempfile().map_err(serde::de::Error::custom)?;
|
|
|
|
|
- let metadata = file.metadata().map_err(serde::de::Error::custom)?;
|
|
|
|
|
- Ok(metadata)
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|