use std::{ collections::HashMap, env, fs::{self, File}, io::{self, BufRead}, path::PathBuf, }; use anyhow::anyhow; use log::info; use uuid::Uuid; use crate::utils::{exec_bin_to_file, RunBin}; #[derive(Debug)] pub struct Cramino { pub prog_path: String, pub args: Vec, pub bam_file: PathBuf, pub result_path: PathBuf, pub results: Option, pub remove_result_file: bool, } impl Default for Cramino { fn default() -> Self { let mut result_path = env::temp_dir(); result_path.push(format!("{}_cramino.txt", Uuid::new_v4())); Self { prog_path: "cramino".to_string(), args: vec![ "--hist".to_string(), "--checksum".to_string(), "--karyotype".to_string(), ], result_path, results: None, remove_result_file: false, bam_file: PathBuf::new(), } } } impl Cramino { pub fn with_result_path(mut self, result_path: &str) -> Self { self.result_path = PathBuf::from(result_path); self } pub fn with_threads(mut self, n_threads: usize) -> Self { self.args.push("-t".to_string()); self.args.push(n_threads.to_string()); self } pub fn with_bam(mut self, bam_file: &str) -> anyhow::Result { let bam_file = PathBuf::from(bam_file); if !bam_file.exists() { Err(anyhow!("BAM file doesn't exists.")) } else { self.bam_file = bam_file; Ok(self) } } } impl RunBin for Cramino { fn exec(&self) -> anyhow::Result<()> { if self.result_path.exists() { info!("Result file already exists."); Ok(()) } else { let mut args = self.args.clone(); args.push(self.bam_file.to_string_lossy().to_string()); exec_bin_to_file(&self.prog_path, &args, &self.result_path) } } fn parse_results(&mut self) -> anyhow::Result<()> { let file = File::open(&self.result_path)?; let reader = io::BufReader::new(file); let mut lines = reader.lines(); let mut cramino_res = CraminoRes { file_name: String::new(), number_of_reads: 0, yield_gb: 0.0, mean_coverage: 0.0, n50: 0, median_length: 0.0, mean_length: 0.0, median_identity: 0.0, mean_identity: 0.0, path: String::new(), creation_time: String::new(), checksum: String::new(), normalized_read_count_per_chromosome: HashMap::new(), }; while let Some(Ok(line)) = lines.next() { if line.starts_with('#') { continue; // Skip the normalized read count header line } let parts: Vec<&str> = line.split_whitespace().collect(); if parts.len() < 2 { continue; // Skip any malformed lines } if !parts[0].starts_with("chr") { match parts[0] { "File" => cramino_res.file_name = parts[2].to_string(), "Number" => cramino_res.number_of_reads = parts[3].parse().unwrap_or(0), "Yield" => cramino_res.yield_gb = parts[2].parse().unwrap_or(0.0), "N50" => cramino_res.n50 = parts[1].parse().unwrap_or(0), "Median" => match parts[1] { "length" => cramino_res.median_length = parts[2].parse().unwrap_or(0.0), "identity" => cramino_res.median_identity = parts[2].parse().unwrap_or(0.0), _ => {} }, "Mean" => match parts[1] { "coverage" => cramino_res.mean_coverage = parts[2].parse().unwrap_or(0.0), "length" => cramino_res.mean_length = parts[2].parse().unwrap_or(0.0), "identity" => cramino_res.mean_identity = parts[2].parse().unwrap_or(0.0), _ => {} }, "Path" => cramino_res.path = parts[1].to_string(), "Creation" => cramino_res.creation_time = parts[2..].join(" "), "Checksum" => cramino_res.checksum = parts[1].to_string(), _ => {} } } else { if parts.len() == 2 { let chromosome = parts[0].to_string(); let count = parts[1].parse().unwrap_or(0.0); cramino_res .normalized_read_count_per_chromosome .insert(chromosome, count); } } } self.results = Some(cramino_res); Ok(()) } fn clean(&self) -> anyhow::Result<()> { if self.remove_result_file { fs::remove_file(&self.result_path)?; } Ok(()) } } #[derive(Debug)] pub struct CraminoRes { pub file_name: String, pub number_of_reads: u64, pub yield_gb: f64, pub mean_coverage: f64, pub n50: u32, pub median_length: f64, pub mean_length: f64, pub median_identity: f64, pub mean_identity: f64, pub path: String, pub creation_time: String, pub checksum: String, pub normalized_read_count_per_chromosome: HashMap, } impl CraminoRes { pub fn is_woman(&self) -> anyhow::Result { if let Some(v) = self.normalized_read_count_per_chromosome.get("chrY") { Ok(*v < 0.1) } else { Err(anyhow::anyhow!("Can't get chrY normalized read count.")) } } }