Thomas 1 жил өмнө
parent
commit
5a172d627c
1 өөрчлөгдсөн 38 нэмэгдсэн , 27 устгасан
  1. 38 27
      src/lib.rs

+ 38 - 27
src/lib.rs

@@ -1285,21 +1285,6 @@ mod tests {
         let chr = "chr10";
 
         // Diag
-        let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
-        let mut genome = Genome::new();
-        genome.add_dir(&dir).unwrap();
-        let mut bp_representations: Vec<String> = genome
-            .iter()
-            .flat_map(|(_, c)| {
-                c.contigs
-                    .iter()
-                    .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
-            })
-            .collect();
-
-        bp_representations.sort();
-        bp_representations.dedup();
-        info!("Diag bp {}", bp_representations.len());
 
         // MRD
         let dir = format!("/data/longreads_basic_pipe/{id}/mrd/assemblies/{chr}",);
@@ -1323,21 +1308,47 @@ mod tests {
 
         info!("Mrd bp {}", bp_representations_mrd.len());
 
-        let bp: Vec<String> = bp_representations
-            .into_iter()
-            .filter(|bp| !bp_representations_mrd.contains(bp))
+        let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
+        let mut genome = Genome::new();
+        genome.add_dir(&dir).unwrap();
+        let mut bp_representations: Vec<(String, String)> = genome
+            .iter()
+            .flat_map(|(_, c)| {
+                c.contigs.iter().filter_map(|contig| {
+                    contig
+                        .breakpoints_repr()
+                        .and_then(|r| r.first().cloned())
+                        .map(|b| (contig.id.clone(), b))
+                })
+            })
+            .filter(|(_, bp)| {
+                !bp.split("||")
+                    .map(|s| s.replace("►", "").replace("◄", ""))
+                    .any(|bp| bp_representations_mrd.contains(&bp))
+            })
             .collect();
 
-        info!("Final BP {}", bp.len());
+        let mut grouped: HashMap<String, Vec<String>> = HashMap::new();
 
-        bp.iter()
-            .filter(|bp| {
-                bp.split("||")
-                    .map(|s| s.replace("►", "").replace("◄", ""))
-                    .filter(|bp| bp_representations_mrd.contains(bp))
-                    .count()
-                    == 0
+        // Groupement des IDs par breakpoint
+        for (id, bp) in bp_representations {
+            grouped.entry(bp).or_default().push(id);
+        }
+
+        // Conversion en Vec, tri des IDs dans chaque groupe, et tri final
+        let mut result: Vec<(Vec<String>, String)> = grouped
+            .into_iter()
+            .map(|(bp, mut ids)| {
+                ids.sort(); // Tri des IDs dans chaque groupe
+                (ids, bp)
             })
-            .for_each(|bp| println!("{bp}"));
+            .collect();
+        result.sort_by_key(|(_, bp)| bp.clone());
+
+        info!("Diag bp {}", result.len());
+
+        result
+            .iter()
+            .for_each(|(ids, bp)| { println!("{bp}\t{}", ids.join("|")) });
     }
 }