|
|
@@ -591,12 +591,20 @@ impl Variants {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- ///
|
|
|
+ /// Merge identical variants (same position, reference and alternative).
|
|
|
pub fn merge(&mut self) {
|
|
|
- let pg = self.mp.add(new_pg_speed(self.len() as u64));
|
|
|
- pg.set_message("Merging Variants by contig, positions, ref, alt");
|
|
|
+ let reporter_msg =
|
|
|
+ "Merging identical variants (same position, reference and alternative)".to_string();
|
|
|
+ info!("{reporter_msg}");
|
|
|
+ if self.reporter.with_multi_progress {
|
|
|
+ self.reporter
|
|
|
+ .create_counter(&reporter_msg, Some(self.len() as u64));
|
|
|
+ } else {
|
|
|
+ self.reporter.create_counter(&reporter_msg, None);
|
|
|
+ }
|
|
|
+
|
|
|
let hm: DashMap<String, Variant> = DashMap::new();
|
|
|
- self.data.par_iter().progress_with(pg).for_each(|e| {
|
|
|
+ self.data.par_iter().for_each(|e| {
|
|
|
let k = format!(
|
|
|
"{}:{}|{}>{}",
|
|
|
e.contig, e.position, e.reference, e.alternative
|
|
|
@@ -613,6 +621,7 @@ impl Variants {
|
|
|
} else {
|
|
|
hm.insert(k, e.clone());
|
|
|
}
|
|
|
+ self.reporter.inc(&reporter_msg);
|
|
|
});
|
|
|
self.data = hm.iter().map(|e| e.value().clone()).collect();
|
|
|
}
|
|
|
@@ -620,31 +629,39 @@ impl Variants {
|
|
|
pub fn annotate_gff_feature(&mut self, gff_path: &str) -> Result<()> {
|
|
|
let gff_path = gff_path.to_string();
|
|
|
let len = self.data.len();
|
|
|
- let pg = self.mp.add(new_pg_speed(self.len() as u64));
|
|
|
- pg.set_message("GFF Annotate");
|
|
|
+ let gff_chunk_size = len / 33;
|
|
|
|
|
|
- self.data
|
|
|
- .par_chunks_mut(len / 33)
|
|
|
- .progress_with(pg)
|
|
|
- .for_each(|chunk| {
|
|
|
- let mut reader = File::open(gff_path.to_string())
|
|
|
- .map(noodles_bgzf::Reader::new)
|
|
|
- .map(gff::Reader::new)
|
|
|
- .unwrap();
|
|
|
-
|
|
|
- let index = noodles_csi::read(format!("{}.csi", gff_path)).unwrap();
|
|
|
-
|
|
|
- for v in chunk.iter_mut() {
|
|
|
- let start = Position::try_from(v.position as usize).unwrap();
|
|
|
- let r = Region::new(v.contig.to_string(), start..=start);
|
|
|
- if let std::result::Result::Ok(rows) = reader.query(&index, &r.clone()) {
|
|
|
- for row in rows {
|
|
|
- let ncbi = NCBIGFF::try_from(row.unwrap()).unwrap();
|
|
|
- v.annotations.push(AnnotationType::NCBIGFF(ncbi));
|
|
|
- }
|
|
|
+ let reporter_msg = "GFF annotation".to_string();
|
|
|
+ info!("{reporter_msg}");
|
|
|
+ if self.reporter.with_multi_progress {
|
|
|
+ self.reporter.create_counter(
|
|
|
+ &reporter_msg,
|
|
|
+ Some((len as f64 / gff_chunk_size as f64) as u64),
|
|
|
+ );
|
|
|
+ } else {
|
|
|
+ self.reporter.create_counter(&reporter_msg, None);
|
|
|
+ }
|
|
|
+
|
|
|
+ self.data.par_chunks_mut(gff_chunk_size).for_each(|chunk| {
|
|
|
+ let mut reader = File::open(gff_path.to_string())
|
|
|
+ .map(noodles_bgzf::Reader::new)
|
|
|
+ .map(gff::Reader::new)
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ let index = noodles_csi::read(format!("{}.csi", gff_path)).unwrap();
|
|
|
+
|
|
|
+ for v in chunk.iter_mut() {
|
|
|
+ let start = Position::try_from(v.position as usize).unwrap();
|
|
|
+ let r = Region::new(v.contig.to_string(), start..=start);
|
|
|
+ if let std::result::Result::Ok(rows) = reader.query(&index, &r.clone()) {
|
|
|
+ for row in rows {
|
|
|
+ let ncbi = NCBIGFF::try_from(row.unwrap()).unwrap();
|
|
|
+ v.annotations.push(AnnotationType::NCBIGFF(ncbi));
|
|
|
}
|
|
|
}
|
|
|
- });
|
|
|
+ }
|
|
|
+ self.reporter.inc(&reporter_msg);
|
|
|
+ });
|
|
|
Ok(())
|
|
|
}
|
|
|
|