|
|
@@ -306,7 +306,9 @@ pub fn par_whole_scan(dict_file: &str, bam_path: &str, out_dir: &str) -> anyhow:
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
|
|
|
- use counts::Counts;
|
|
|
+ use std::collections::HashMap;
|
|
|
+
|
|
|
+ use counts::{merge_ranges, CountAnnotation, CountRange, Counts};
|
|
|
use rust_htslib::bam::Reader;
|
|
|
|
|
|
use super::*;
|
|
|
@@ -406,8 +408,8 @@ mod tests {
|
|
|
#[test]
|
|
|
fn save_stats() -> anyhow::Result<()> {
|
|
|
init();
|
|
|
- let id = "DAHAN";
|
|
|
- let breaks = vec![1, 6, 15];
|
|
|
+ let id = "LEVASSEUR";
|
|
|
+ let breaks = vec![1, 4, 15];
|
|
|
|
|
|
let result_dir = "/data/longreads_basic_pipe";
|
|
|
let save_dir = format!("{result_dir}/{id}/diag/report/data/scan");
|
|
|
@@ -435,4 +437,143 @@ mod tests {
|
|
|
counts.save_contigs(&contigs, &format!("{save_dir}/{id}"), breaks)?;
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn save_json() -> anyhow::Result<()> {
|
|
|
+ init();
|
|
|
+ let id = "MORIN";
|
|
|
+ let contig = "chr1";
|
|
|
+
|
|
|
+ let result_dir = "/data/longreads_basic_pipe";
|
|
|
+ let save_dir = format!("{result_dir}/{id}/diag/report/data/scan");
|
|
|
+ info!("Files will be saved into {save_dir}");
|
|
|
+ fs::create_dir_all(&save_dir)?;
|
|
|
+
|
|
|
+ let mut contigs: Vec<String> = (1..=22).map(|c| format!("chr{c}")).collect();
|
|
|
+ contigs.push("chrX".to_string());
|
|
|
+ contigs.push("chrY".to_string());
|
|
|
+ let mut counts = Counts::from_files(
|
|
|
+ contigs
|
|
|
+ .clone()
|
|
|
+ .iter()
|
|
|
+ .map(|c| format!("{result_dir}/{id}/diag/scan/{c}_counts.tsv"))
|
|
|
+ .collect(),
|
|
|
+ );
|
|
|
+ counts.mrd_from_files(
|
|
|
+ contigs
|
|
|
+ .clone()
|
|
|
+ .iter()
|
|
|
+ .map(|c| format!("{result_dir}/{id}/mrd/scan/{c}_counts.tsv"))
|
|
|
+ .collect(),
|
|
|
+ );
|
|
|
+
|
|
|
+ // counts.mask_low_mrd(contig, 6)?;
|
|
|
+ counts.mask_low_quality(contig, 0.1)?;
|
|
|
+
|
|
|
+ let mask_annotations: HashSet<CountAnnotation> = vec![
|
|
|
+ CountAnnotation::MaskedLowMRD,
|
|
|
+ CountAnnotation::MaskedQuality,
|
|
|
+ ]
|
|
|
+ .into_iter()
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let contig_counts = counts.data.get(contig).unwrap();
|
|
|
+
|
|
|
+ let mut n_masked = 0;
|
|
|
+ let mut n = 0;
|
|
|
+ let mut n_by_depth = HashMap::new();
|
|
|
+ for count in contig_counts {
|
|
|
+ if count
|
|
|
+ .annotation
|
|
|
+ .iter()
|
|
|
+ .any(|a| mask_annotations.contains(a))
|
|
|
+ {
|
|
|
+ n_masked += 1;
|
|
|
+ } else {
|
|
|
+ *n_by_depth.entry(count.n_reads).or_insert(0) += 1;
|
|
|
+ }
|
|
|
+ n += 1;
|
|
|
+ }
|
|
|
+ let mut d_n: Vec<_> = n_by_depth.iter().collect();
|
|
|
+ d_n.sort_unstable_by_key(|(d, _)| **d);
|
|
|
+ println!("{:#?}", d_n);
|
|
|
+ let n_unmasked = d_n.iter().map(|(_, v)| **v).sum::<i32>() as f64;
|
|
|
+
|
|
|
+ // let proportions: Vec<(u32, f64)> = d_n.iter().map(|(d,n)| (**d, **n as f64 / n_unmasked)).collect();
|
|
|
+ // let u = serde_json::from_value(proportions)?;
|
|
|
+ // println!("{:?}", proportions);
|
|
|
+
|
|
|
+ // let n = contig_counts.len();
|
|
|
+ // let n_masked = contig_counts
|
|
|
+ // .iter()
|
|
|
+ // .filter(|c| c.annotation.iter().any(|a| mask_annotations.contains(a)))
|
|
|
+ // .count();
|
|
|
+
|
|
|
+ info!(
|
|
|
+ "{contig} masked: {n_masked}/{n} ({:.0}%)",
|
|
|
+ n_masked as f64 * 100.0 / n as f64
|
|
|
+ );
|
|
|
+
|
|
|
+ // let n_unmasked = (n - n_masked) as f64;
|
|
|
+ // let mut frequencies = HashMap::new();
|
|
|
+ // let contig_values = counts.get(&contig);
|
|
|
+ // contig_value.iter()
|
|
|
+ // .map(|n| {
|
|
|
+ //
|
|
|
+ // })
|
|
|
+ // .for_each(|n| {
|
|
|
+ //
|
|
|
+ // });
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn save_mask() -> anyhow::Result<()> {
|
|
|
+ init();
|
|
|
+
|
|
|
+ let id = "ADJAGBA";
|
|
|
+ let result_dir = "/data/longreads_basic_pipe";
|
|
|
+ let save_path = format!("{result_dir}/{id}/diag/mask.bed");
|
|
|
+
|
|
|
+ let mut contigs: Vec<String> = (1..=22).map(|c| format!("chr{c}")).collect();
|
|
|
+ contigs.push("chrX".to_string());
|
|
|
+ contigs.push("chrY".to_string());
|
|
|
+ let mut counts = Counts::from_files(
|
|
|
+ contigs
|
|
|
+ .clone()
|
|
|
+ .iter()
|
|
|
+ .map(|c| format!("{result_dir}/{id}/diag/scan/{c}_counts.tsv"))
|
|
|
+ .collect(),
|
|
|
+ );
|
|
|
+ counts.mrd_from_files(
|
|
|
+ contigs
|
|
|
+ .clone()
|
|
|
+ .iter()
|
|
|
+ .map(|c| format!("{result_dir}/{id}/mrd/scan/{c}_counts.tsv"))
|
|
|
+ .collect(),
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut writer = BufWriter::new(File::create(save_path)?) ;
|
|
|
+ for c in contigs.iter() {
|
|
|
+ println!("Contig {c}");
|
|
|
+ counts.mask_low_mrd(c, 6)?;
|
|
|
+ counts.mask_low_quality(c, 0.3)?;
|
|
|
+ let count = counts.data.get(c).unwrap();
|
|
|
+ let ranges: Vec<CountRange> = count
|
|
|
+ .iter()
|
|
|
+ .filter(|a| {
|
|
|
+ a.annotation
|
|
|
+ .iter()
|
|
|
+ .any(|e| {
|
|
|
+ matches!(e, CountAnnotation::MaskedLowMRD)
|
|
|
+ | matches!(e, CountAnnotation::MaskedQuality)
|
|
|
+ })
|
|
|
+ })
|
|
|
+ .map(|e| e.position.clone()).collect();
|
|
|
+ let bed: Vec<String> = merge_ranges(ranges).iter().map(|e| format!("{}\t{}\t{}", e.contig, e.start, e.end)).collect();
|
|
|
+ writer.write_fmt(format_args!("{}\n", bed.join("\n")))?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|