| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- use anyhow::{anyhow, Context};
- use chrono::{DateTime, Utc};
- use csi::binning_index::ReferenceSequence;
- use glob::glob;
- use log::warn;
- use std::{fs::Metadata, os::unix::fs::MetadataExt, path::PathBuf};
- use noodles_csi as csi;
- use num_format::{Locale, ToFormattedString};
- #[derive(Debug)]
- pub struct Vcf {
- pub id: String,
- pub caller: String,
- pub time_point: String,
- pub path: PathBuf,
- pub file_metadata: Metadata,
- pub n_variants: u64,
- }
- impl Vcf {
- pub fn new(path: PathBuf) -> anyhow::Result<Self> {
- let stem = path
- .file_stem()
- .context("Can't parse stem")?
- .to_string_lossy()
- .to_string();
- let stem_splt: Vec<&str> = stem.split('_').collect();
- let id = stem_splt[0].to_string();
- let time_point = stem_splt[1].to_string();
- let caller = stem_splt[2..stem_splt.len() - 1].join("_");
- if !PathBuf::from(format!("{}.csi", path.display())).exists() {
- return Err(anyhow!("No csi for {}", path.display()));
- }
- let n_variants = n_variants(path.to_str().context("Can't convert path to str")?)?;
- let file_metadata = path.metadata()?;
- Ok(Self {
- id,
- caller,
- time_point,
- path,
- file_metadata,
- n_variants,
- })
- }
- pub fn modified(&self) -> anyhow::Result<DateTime<Utc>> {
- Ok(self.file_metadata.modified().unwrap().into())
- }
- pub fn size(&self) -> u64 {
- self.file_metadata.size()
- }
- pub fn tsv(&self) -> anyhow::Result<String> {
- Ok([
- self.id.clone(),
- self.time_point.clone(),
- self.caller.clone(),
- self.n_variants.to_string(),
- self.modified()?.to_string(),
- self.size().to_string(),
- self.path.display().to_string(),
- ]
- .join("\t"))
- }
- pub fn println(&self) -> anyhow::Result<()> {
- let formated_n_variants = self.n_variants.to_formatted_string(&Locale::en);
- let formated_modified = self.modified()?.naive_local().to_string();
- let formated_size = format!("{:#}", byte_unit::Byte::from_u64(self.size()));
- println!(
- "{}",
- [
- self.id.to_string(),
- self.time_point.to_string(),
- self.caller.to_string(),
- formated_n_variants,
- formated_modified,
- formated_size,
- self.path.display().to_string()
- ]
- .join("\t")
- );
- Ok(())
- }
- }
- #[derive(Debug)]
- pub struct VcfCollection {
- pub vcfs: Vec<Vcf>,
- }
- impl VcfCollection {
- pub fn new(result_dir: &str) -> Self {
- let mut vcfs = Vec::new();
- let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir);
- for entry in glob(&pattern).expect("Failed to read glob pattern") {
- match entry {
- Ok(path) => match Vcf::new(path) {
- Ok(vcf) => vcfs.push(vcf),
- Err(e) => warn!("{e}"),
- },
- Err(e) => warn!("Error: {:?}", e),
- }
- }
- VcfCollection { vcfs }
- }
- pub fn sort_by_id(&mut self) {
- self.vcfs.sort_by_key(|v| v.id.clone());
- }
- }
- pub fn n_variants(path: &str) -> anyhow::Result<u64> {
- let csi_src = format!("{path}.csi");
- let index = csi::read(csi_src)?;
- let mut n = 0;
- for reference_sequence in index.reference_sequences() {
- if let Some(metadata) = reference_sequence.metadata() {
- n += metadata.mapped_record_count()
- }
- }
- Ok(n)
- }
|