|
|
@@ -3,6 +3,7 @@ use crate::{
|
|
|
bin::{scan_outliers, Bin},
|
|
|
};
|
|
|
use anyhow::Context;
|
|
|
+use dashmap::DashMap;
|
|
|
use dict_reader::read_dict;
|
|
|
use log::info;
|
|
|
use rayon::prelude::*;
|
|
|
@@ -10,7 +11,7 @@ use rust_htslib::bam::{Format, Header, Read, Record, Writer};
|
|
|
use std::{
|
|
|
collections::{HashMap, HashSet},
|
|
|
fs::{self, File},
|
|
|
- io::{self, BufReader, BufWriter, Write},
|
|
|
+ io::{self, BufReader, BufWriter, Write}, sync::Arc,
|
|
|
};
|
|
|
|
|
|
pub mod bam;
|
|
|
@@ -92,44 +93,88 @@ pub fn scan_save(
|
|
|
})
|
|
|
.collect();
|
|
|
|
|
|
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
+ info!("🆗");
|
|
|
+ let bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
let header = bam.header().to_owned();
|
|
|
let mut grouped_records = Vec::new();
|
|
|
+ // for outlier_record in outliers_records {
|
|
|
+ // let mut hm_positions: DashMap<String, Vec<String>> = DashMap::new();
|
|
|
+ // outlier_record.par_iter().for_each(|r| {
|
|
|
+ // let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
+ // for pos in get_all_positions(r, &header, &mut bam).unwrap() {
|
|
|
+ // let qname = String::from_utf8(r.qname().to_vec()).unwrap();
|
|
|
+ // hm_positions
|
|
|
+ // .entry(format!("{}-{}", pos.0, pos.1))
|
|
|
+ // .or_default()
|
|
|
+ // .push(qname.clone());
|
|
|
+ // hm_positions
|
|
|
+ // .entry(format!("{}-{}", pos.0, pos.2))
|
|
|
+ // .or_default()
|
|
|
+ // .push(qname);
|
|
|
+ // }
|
|
|
+ // });
|
|
|
+ // for v in hm_positions.iter_mut() {
|
|
|
+ // let mut v = v.value_mut();
|
|
|
+ // v.sort(); // Ensure dedup works correctly
|
|
|
+ // v.dedup();
|
|
|
+ //
|
|
|
+ // if v.len() >= min_reads {
|
|
|
+ // let rec: Vec<_> = outlier_record
|
|
|
+ // .clone()
|
|
|
+ // .into_iter()
|
|
|
+ // .filter(|r| {
|
|
|
+ // let qname = String::from_utf8(r.qname().to_vec()).unwrap();
|
|
|
+ // v.contains(&qname)
|
|
|
+ // })
|
|
|
+ // .collect();
|
|
|
+ // grouped_records.push(rec);
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
for outlier_record in outliers_records {
|
|
|
- let mut hm_positions: HashMap<String, Vec<String>> = HashMap::new();
|
|
|
- outlier_record.iter().for_each(|r| {
|
|
|
- for pos in get_all_positions(r, &header, &mut bam).unwrap() {
|
|
|
- let qname = String::from_utf8(r.qname().to_vec()).unwrap();
|
|
|
- hm_positions
|
|
|
- .entry(format!("{}-{}", pos.0, pos.1))
|
|
|
- .or_default()
|
|
|
- .push(qname.clone());
|
|
|
- hm_positions
|
|
|
- .entry(format!("{}-{}", pos.0, pos.2))
|
|
|
- .or_default()
|
|
|
- .push(qname);
|
|
|
+ let hm_positions: DashMap<String, Vec<String>> = DashMap::new();
|
|
|
+ // let bam_clone = Arc::clone(&mut bam);
|
|
|
+
|
|
|
+ outlier_record.par_iter().for_each(|r| {
|
|
|
+ let r = r.clone();
|
|
|
+ if let Ok(positions) = get_all_positions(&r, bam_path) {
|
|
|
+ // if let Ok(positions) = get_all_positions(&r, &header, &mut bam) {
|
|
|
+ let qname = String::from_utf8_lossy(r.qname()).to_string();
|
|
|
+ for pos in positions {
|
|
|
+ hm_positions
|
|
|
+ .entry(format!("{}-{}", pos.0, pos.1))
|
|
|
+ .or_default()
|
|
|
+ .push(qname.clone());
|
|
|
+ hm_positions
|
|
|
+ .entry(format!("{}-{}", pos.0, pos.2))
|
|
|
+ .or_default()
|
|
|
+ .push(qname.clone());
|
|
|
+ }
|
|
|
}
|
|
|
});
|
|
|
- for v in hm_positions.values_mut() {
|
|
|
- v.sort(); // Ensure dedup works correctly
|
|
|
+
|
|
|
+ for mut entry in hm_positions.iter_mut() {
|
|
|
+ let v = entry.value_mut();
|
|
|
+ v.sort();
|
|
|
v.dedup();
|
|
|
|
|
|
if v.len() >= min_reads {
|
|
|
let rec: Vec<_> = outlier_record
|
|
|
- .clone()
|
|
|
- .into_iter()
|
|
|
+ .iter()
|
|
|
.filter(|r| {
|
|
|
- let qname = String::from_utf8(r.qname().to_vec()).unwrap();
|
|
|
+ let qname = String::from_utf8_lossy(r.qname()).to_string();
|
|
|
v.contains(&qname)
|
|
|
})
|
|
|
+ .cloned()
|
|
|
.collect();
|
|
|
grouped_records.push(rec);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ info!("n groups {}", grouped_records.len());
|
|
|
let mut dedup = HashSet::new();
|
|
|
-
|
|
|
let mut n_records = 0;
|
|
|
let grouped_records: Vec<Vec<Record>> = grouped_records
|
|
|
.iter()
|
|
|
@@ -340,7 +385,7 @@ mod tests {
|
|
|
#[test]
|
|
|
fn locus() {
|
|
|
init();
|
|
|
- let len = 1000;
|
|
|
+ // let len = 1000;
|
|
|
let id = "LEVASSEUR";
|
|
|
let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
|
|
|
let out_scan = "/tmp/test.txt";
|
|
|
@@ -353,7 +398,7 @@ mod tests {
|
|
|
&bam_path,
|
|
|
"chr10",
|
|
|
/* 102020492 - (len * 1000) */ 0,
|
|
|
- 120000000,
|
|
|
+ 120_000,
|
|
|
&mut writer,
|
|
|
&out_dir_bam,
|
|
|
&config,
|