|
|
@@ -1285,21 +1285,6 @@ mod tests {
|
|
|
let chr = "chr10";
|
|
|
|
|
|
// Diag
|
|
|
- let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
|
|
|
- let mut genome = Genome::new();
|
|
|
- genome.add_dir(&dir).unwrap();
|
|
|
- let mut bp_representations: Vec<String> = genome
|
|
|
- .iter()
|
|
|
- .flat_map(|(_, c)| {
|
|
|
- c.contigs
|
|
|
- .iter()
|
|
|
- .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
|
|
|
- })
|
|
|
- .collect();
|
|
|
-
|
|
|
- bp_representations.sort();
|
|
|
- bp_representations.dedup();
|
|
|
- info!("Diag bp {}", bp_representations.len());
|
|
|
|
|
|
// MRD
|
|
|
let dir = format!("/data/longreads_basic_pipe/{id}/mrd/assemblies/{chr}",);
|
|
|
@@ -1323,21 +1308,47 @@ mod tests {
|
|
|
|
|
|
info!("Mrd bp {}", bp_representations_mrd.len());
|
|
|
|
|
|
- let bp: Vec<String> = bp_representations
|
|
|
- .into_iter()
|
|
|
- .filter(|bp| !bp_representations_mrd.contains(bp))
|
|
|
+ let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
|
|
|
+ let mut genome = Genome::new();
|
|
|
+ genome.add_dir(&dir).unwrap();
|
|
|
+ let mut bp_representations: Vec<(String, String)> = genome
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(_, c)| {
|
|
|
+ c.contigs.iter().filter_map(|contig| {
|
|
|
+ contig
|
|
|
+ .breakpoints_repr()
|
|
|
+ .and_then(|r| r.first().cloned())
|
|
|
+ .map(|b| (contig.id.clone(), b))
|
|
|
+ })
|
|
|
+ })
|
|
|
+ .filter(|(_, bp)| {
|
|
|
+ !bp.split("||")
|
|
|
+ .map(|s| s.replace("►", "").replace("◄", ""))
|
|
|
+ .any(|bp| bp_representations_mrd.contains(&bp))
|
|
|
+ })
|
|
|
.collect();
|
|
|
|
|
|
- info!("Final BP {}", bp.len());
|
|
|
+ let mut grouped: HashMap<String, Vec<String>> = HashMap::new();
|
|
|
|
|
|
- bp.iter()
|
|
|
- .filter(|bp| {
|
|
|
- bp.split("||")
|
|
|
- .map(|s| s.replace("►", "").replace("◄", ""))
|
|
|
- .filter(|bp| bp_representations_mrd.contains(bp))
|
|
|
- .count()
|
|
|
- == 0
|
|
|
+ // Groupement des IDs par breakpoint
|
|
|
+ for (id, bp) in bp_representations {
|
|
|
+ grouped.entry(bp).or_default().push(id);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Conversion en Vec, tri des IDs dans chaque groupe, et tri final
|
|
|
+ let mut result: Vec<(Vec<String>, String)> = grouped
|
|
|
+ .into_iter()
|
|
|
+ .map(|(bp, mut ids)| {
|
|
|
+ ids.sort(); // Tri des IDs dans chaque groupe
|
|
|
+ (ids, bp)
|
|
|
})
|
|
|
- .for_each(|bp| println!("{bp}"));
|
|
|
+ .collect();
|
|
|
+ result.sort_by_key(|(_, bp)| bp.clone());
|
|
|
+
|
|
|
+ info!("Diag bp {}", result.len());
|
|
|
+
|
|
|
+ result
|
|
|
+ .iter()
|
|
|
+ .for_each(|(ids, bp)| { println!("{bp}\t{}", ids.join("|")) });
|
|
|
}
|
|
|
}
|