|
|
@@ -2,17 +2,13 @@ use std::{
|
|
|
collections::HashMap,
|
|
|
fs::{self, File},
|
|
|
path::PathBuf,
|
|
|
- str::FromStr,
|
|
|
};
|
|
|
|
|
|
use anyhow::{anyhow, Context};
|
|
|
use chrono::{DateTime, Utc};
|
|
|
+use dashmap::DashMap;
|
|
|
use glob::glob;
|
|
|
use log::{debug, info, warn};
|
|
|
-use pandora_lib_bindings::{
|
|
|
- progs::cramino::{Cramino, CraminoRes},
|
|
|
- utils::RunBin,
|
|
|
-};
|
|
|
use rand::{thread_rng, Rng};
|
|
|
use rayon::prelude::*;
|
|
|
use rust_htslib::bam::{ext::BamRecordExtensions, record::Cigar, Read};
|
|
|
@@ -86,7 +82,12 @@ impl WGSBam {
|
|
|
let json_file = PathBuf::from(&json_path);
|
|
|
|
|
|
if json_file.exists() && json_file.metadata()?.modified()? > modified {
|
|
|
- return Self::load_json(&json_path);
|
|
|
+ match Self::load_json(&json_path) {
|
|
|
+ Ok(s) => return Ok(s),
|
|
|
+ Err(e) => {
|
|
|
+ warn!("Error while loading {}.\n{e}", json_path);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// let cramino_path = format!(
|
|
|
@@ -546,6 +547,8 @@ pub struct WGSBamStats {
|
|
|
pub mapped_yield: u128,
|
|
|
pub global_coverage: f64,
|
|
|
pub karyotype: Vec<(i32, u64, String, u128)>,
|
|
|
+ pub n50: u128,
|
|
|
+ pub by_lengths: Vec<(u128, u32)>,
|
|
|
}
|
|
|
|
|
|
impl WGSBamStats {
|
|
|
@@ -561,7 +564,7 @@ impl WGSBamStats {
|
|
|
let header = rust_htslib::bam::Header::from_template(&h);
|
|
|
bam.set_threads(bam_n_threads as usize)?;
|
|
|
|
|
|
- let mut mapped_lens = Vec::new();
|
|
|
+ let mut mapped_lengths = Vec::new();
|
|
|
let mut all_records = 0;
|
|
|
let mut n_reads = 0;
|
|
|
let mut lengths_by_tid: HashMap<i32, Vec<u128>> = HashMap::new();
|
|
|
@@ -582,7 +585,7 @@ impl WGSBamStats {
|
|
|
{
|
|
|
n_reads += 1;
|
|
|
let mapped_len = (read.reference_end() - read.pos()) as u128;
|
|
|
- mapped_lens.push(mapped_len);
|
|
|
+ mapped_lengths.push(mapped_len);
|
|
|
lengths_by_tid
|
|
|
.entry(read.tid())
|
|
|
.or_default()
|
|
|
@@ -592,11 +595,21 @@ impl WGSBamStats {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- let mapped_yield = mapped_lens.par_iter().sum::<u128>();
|
|
|
+ let mapped_yield = mapped_lengths.par_iter().sum::<u128>();
|
|
|
let genome = get_genome_sizes(&header)?;
|
|
|
let genome_size = genome.values().map(|v| *v as u128).sum::<u128>();
|
|
|
let global_coverage = mapped_yield as f64 / genome_size as f64;
|
|
|
|
|
|
+ let by_lengths: DashMap<u128, u32> = DashMap::new();
|
|
|
+ mapped_lengths
|
|
|
+ .par_iter()
|
|
|
+ .for_each(|l| *by_lengths.entry(*l).or_default() += 1);
|
|
|
+
|
|
|
+ let mut by_lengths: Vec<(u128, u32)> =
|
|
|
+ by_lengths.iter().map(|e| (*e.key(), *e.value())).collect();
|
|
|
+ by_lengths.par_sort_by(|a, b| a.0.cmp(&b.0));
|
|
|
+ let n50 = by_lengths[by_lengths.len() / 2].0;
|
|
|
+
|
|
|
let mut karyotype: Vec<(i32, u64, String, u128)> = lengths_by_tid
|
|
|
.iter()
|
|
|
.map(|(tid, lengths)| {
|
|
|
@@ -618,6 +631,8 @@ impl WGSBamStats {
|
|
|
mapped_yield,
|
|
|
global_coverage,
|
|
|
karyotype,
|
|
|
+ n50,
|
|
|
+ by_lengths,
|
|
|
})
|
|
|
}
|
|
|
|