use anyhow::{anyhow, Context}; use chrono::{DateTime, Utc}; use csi::binning_index::ReferenceSequence; use glob::glob; use log::warn; use std::{fs::Metadata, os::unix::fs::MetadataExt, path::PathBuf}; use noodles_csi as csi; use num_format::{Locale, ToFormattedString}; #[derive(Debug)] pub struct Vcf { pub id: String, pub caller: String, pub time_point: String, pub path: PathBuf, pub file_metadata: Metadata, pub n_variants: u64, } impl Vcf { pub fn new(path: PathBuf) -> anyhow::Result { let stem = path .file_stem() .context("Can't parse stem")? .to_string_lossy() .to_string(); let stem_splt: Vec<&str> = stem.split('_').collect(); let id = stem_splt[0].to_string(); let time_point = stem_splt[1].to_string(); let caller = stem_splt[2..stem_splt.len() - 1].join("_"); if !PathBuf::from(format!("{}.csi", path.display())).exists() { return Err(anyhow!("No csi for {}", path.display())); } let n_variants = n_variants(path.to_str().context("Can't convert path to str")?)?; let file_metadata = path.metadata()?; Ok(Self { id, caller, time_point, path, file_metadata, n_variants, }) } pub fn modified(&self) -> anyhow::Result> { Ok(self.file_metadata.modified().unwrap().into()) } pub fn size(&self) -> u64 { self.file_metadata.size() } pub fn tsv(&self) -> anyhow::Result { Ok([ self.id.clone(), self.time_point.clone(), self.caller.clone(), self.n_variants.to_string(), self.modified()?.to_string(), self.size().to_string(), self.path.display().to_string(), ] .join("\t")) } pub fn println(&self) -> anyhow::Result<()> { let formated_n_variants = self.n_variants.to_formatted_string(&Locale::en); let formated_modified = self.modified()?.naive_local().to_string(); let formated_size = format!("{:#}", byte_unit::Byte::from_u64(self.size())); println!( "{}", [ self.id.to_string(), self.time_point.to_string(), self.caller.to_string(), formated_n_variants, formated_modified, formated_size, self.path.display().to_string() ] .join("\t") ); Ok(()) } } #[derive(Debug)] pub struct VcfCollection { pub vcfs: Vec, } impl VcfCollection { // pub fn print_tsv(&self) { // for vcf in self.vcfs.iter() {} // } pub fn sort_by_id(&mut self) { self.vcfs.sort_by_key(|v| v.id.clone()); } } pub fn load_vcf_collection(result_dir: &str) -> VcfCollection { let mut vcfs = Vec::new(); let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir); for entry in glob(&pattern).expect("Failed to read glob pattern") { match entry { Ok(path) => match Vcf::new(path) { Ok(vcf) => vcfs.push(vcf), Err(e) => warn!("{e}"), }, Err(e) => warn!("Error: {:?}", e), } } VcfCollection { vcfs } } pub fn n_variants(path: &str) -> anyhow::Result { let csi_src = format!("{path}.csi"); let index = csi::read(csi_src)?; let mut n = 0; for reference_sequence in index.reference_sequences() { if let Some(metadata) = reference_sequence.metadata() { n += metadata.mapped_record_count() } } Ok(n) }