| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- use std::{
- collections::HashMap,
- env,
- fs::{self, File},
- io::{self, BufRead},
- path::PathBuf,
- };
- use anyhow::anyhow;
- use log::info;
- use uuid::Uuid;
- use crate::utils::{exec_bin_to_file, RunBin};
- #[derive(Debug)]
- pub struct Cramino {
- pub prog_path: String,
- pub args: Vec<String>,
- pub bam_file: PathBuf,
- pub result_path: PathBuf,
- pub results: Option<CraminoRes>,
- pub remove_result_file: bool,
- }
- impl Default for Cramino {
- fn default() -> Self {
- let mut result_path = env::temp_dir();
- result_path.push(format!("{}_cramino.txt", Uuid::new_v4()));
- Self {
- prog_path: "cramino".to_string(),
- args: vec![
- "--hist".to_string(),
- "--checksum".to_string(),
- "--karyotype".to_string(),
- ],
- result_path,
- results: None,
- remove_result_file: false,
- bam_file: PathBuf::new(),
- }
- }
- }
- impl Cramino {
- pub fn with_result_path(mut self, result_path: &str) -> Self {
- self.result_path = PathBuf::from(result_path);
- self
- }
- pub fn with_threads(mut self, n_threads: usize) -> Self {
- self.args.push("-t".to_string());
- self.args.push(n_threads.to_string());
- self
- }
- pub fn with_bam(mut self, bam_file: &str) -> anyhow::Result<Self> {
- let bam_file = PathBuf::from(bam_file);
- if !bam_file.exists() {
- Err(anyhow!("BAM file doesn't exists."))
- } else {
- self.bam_file = bam_file;
- Ok(self)
- }
- }
- }
- impl RunBin for Cramino {
- fn exec(&self) -> anyhow::Result<()> {
- if self.result_path.exists() {
- info!("Result file already exists.");
- Ok(())
- } else {
- let mut args = self.args.clone();
- args.push(self.bam_file.to_string_lossy().to_string());
- exec_bin_to_file(&self.prog_path, &args, &self.result_path)
- }
- }
- fn parse_results(&mut self) -> anyhow::Result<()> {
- let file = File::open(&self.result_path)?;
- let reader = io::BufReader::new(file);
- let mut lines = reader.lines();
- let mut cramino_res = CraminoRes {
- file_name: String::new(),
- number_of_reads: 0,
- yield_gb: 0.0,
- mean_coverage: 0.0,
- n50: 0,
- median_length: 0.0,
- mean_length: 0.0,
- median_identity: 0.0,
- mean_identity: 0.0,
- path: String::new(),
- creation_time: String::new(),
- checksum: String::new(),
- normalized_read_count_per_chromosome: HashMap::new(),
- };
- while let Some(Ok(line)) = lines.next() {
- if line.starts_with('#') {
- continue; // Skip the normalized read count header line
- }
- let parts: Vec<&str> = line.split_whitespace().collect();
- if parts.len() < 2 {
- continue; // Skip any malformed lines
- }
- if !parts[0].starts_with("chr") {
- match parts[0] {
- "File" => cramino_res.file_name = parts[2].to_string(),
- "Number" => cramino_res.number_of_reads = parts[3].parse().unwrap_or(0),
- "Yield" => cramino_res.yield_gb = parts[2].parse().unwrap_or(0.0),
- "N50" => cramino_res.n50 = parts[1].parse().unwrap_or(0),
- "Median" => match parts[1] {
- "length" => cramino_res.median_length = parts[2].parse().unwrap_or(0.0),
- "identity" => cramino_res.median_identity = parts[2].parse().unwrap_or(0.0),
- _ => {}
- },
- "Mean" => match parts[1] {
- "coverage" => cramino_res.mean_coverage = parts[2].parse().unwrap_or(0.0),
- "length" => cramino_res.mean_length = parts[2].parse().unwrap_or(0.0),
- "identity" => cramino_res.mean_identity = parts[2].parse().unwrap_or(0.0),
- _ => {}
- },
- "Path" => cramino_res.path = parts[1].to_string(),
- "Creation" => cramino_res.creation_time = parts[2..].join(" "),
- "Checksum" => cramino_res.checksum = parts[1].to_string(),
- _ => {}
- }
- } else {
- if parts.len() == 2 {
- let chromosome = parts[0].to_string();
- let count = parts[1].parse().unwrap_or(0.0);
- cramino_res
- .normalized_read_count_per_chromosome
- .insert(chromosome, count);
- }
- }
- }
- self.results = Some(cramino_res);
- Ok(())
- }
- fn clean(&self) -> anyhow::Result<()> {
- if self.remove_result_file {
- fs::remove_file(&self.result_path)?;
- }
- Ok(())
- }
- }
- #[derive(Debug)]
- pub struct CraminoRes {
- pub file_name: String,
- pub number_of_reads: u64,
- pub yield_gb: f64,
- pub mean_coverage: f64,
- pub n50: u32,
- pub median_length: f64,
- pub mean_length: f64,
- pub median_identity: f64,
- pub mean_identity: f64,
- pub path: String,
- pub creation_time: String,
- pub checksum: String,
- pub normalized_read_count_per_chromosome: HashMap<String, f64>,
- }
- impl CraminoRes {
- pub fn is_woman(&self) -> anyhow::Result<bool> {
- if let Some(v) = self.normalized_read_count_per_chromosome.get("chrY") {
- Ok(*v < 0.1)
- } else {
- Err(anyhow::anyhow!("Can't get chrY normalized read count."))
- }
- }
- }
|