Browse Source

filter mrd better repr of bp

Thomas 1 year ago
parent
commit
2f4fde4403
2 changed files with 96 additions and 16 deletions
  1. 4 4
      src/genomic_graph.rs
  2. 92 12
      src/lib.rs

+ 4 - 4
src/genomic_graph.rs

@@ -8,10 +8,10 @@ use petgraph::{
 use crate::{breakpoint::BreakPoint, Genome};
 
 #[derive(Debug, Clone)]
-struct GenomicGraphParams {
-    sens_symb: String,
-    rc_symb: String,
-    thousand_sep: String,
+pub struct GenomicGraphParams {
+    pub sens_symb: String,
+    pub rc_symb: String,
+    pub thousand_sep: String,
 }
 
 impl Default for GenomicGraphParams {

+ 92 - 12
src/lib.rs

@@ -4,6 +4,7 @@ pub mod genomic_graph;
 
 use anyhow::{anyhow, Ok, Result};
 use fasta::record::Sequence;
+use genomic_graph::GenomicGraphParams;
 use log::info;
 use minimap2::{Aligner, Mapping};
 use noodles_fasta as fasta;
@@ -102,10 +103,13 @@ impl ContigRef {
         let right = "◄";
         let bp_right = "▐";
         let bp_left = "▌";
+
+        let bp_right = "|";
+        let bp_left = "|";
         let format = CustomFormat::builder()
             .grouping(Grouping::Standard)
             .minus_sign("-")
-            .separator("_")
+            .separator(",")
             .build()
             .unwrap();
 
@@ -120,19 +124,26 @@ impl ContigRef {
         let mut res = Vec::new();
         if let Some(breakpoints) = self.breakpoints() {
             for v in breakpoints.windows(2) {
+                assert!(v[0].target_start < v[0].target_end);
+                assert!(v[1].target_start < v[1].target_end);
                 let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
-                let _ = bp_string
-                    .write_formatted(&v[0].target_end, &format)
-                    .unwrap();
+                let bp_pos = match v[1].strand {
+                    minimap2::Strand::Forward => v[0].target_end,
+                    minimap2::Strand::Reverse => v[1].target_start,
+                };
+
+                let _ = bp_string.write_formatted(&bp_pos, &format).unwrap();
                 bp_string = format!(
                     "{bp_string}{}{bp_left}{bp_right}{}{}:",
                     get_sign(&v[0]),
                     get_sign(&v[1]),
                     v[1].target_name.clone().unwrap_or(uk.clone())
                 );
-                let _ = bp_string
-                    .write_formatted(&v[1].target_start, &format)
-                    .unwrap();
+                let bp_pos = match v[1].strand {
+                    minimap2::Strand::Forward => v[1].target_start,
+                    minimap2::Strand::Reverse => v[1].target_end,
+                };
+                let _ = bp_string.write_formatted(&bp_pos, &format).unwrap();
                 res.push(bp_string);
             }
         }
@@ -271,15 +282,23 @@ impl ContigRef {
 }
 
 pub fn mapping_to_string(mapping: &Mapping) -> String {
+    let config = GenomicGraphParams::default();
+
+    let sens = match mapping.strand {
+        minimap2::Strand::Forward => config.sens_symb.clone(),
+        minimap2::Strand::Reverse => config.rc_symb.clone(),
+    };
+
     let uk = "UNKNOWN".to_string();
     format!(
-        "{}:{}-{}({}:{}-{})",
+        "{}:{}-{}({}-{}){}",
         mapping.target_name.clone().unwrap_or(uk.clone()),
         mapping.target_start,
         mapping.target_end,
-        mapping.query_name.clone().unwrap_or(uk),
+        // mapping.query_name.clone().unwrap_or(uk),
         mapping.query_start,
-        mapping.query_end
+        mapping.query_end,
+        sens
     )
 }
 
@@ -1210,8 +1229,7 @@ mod tests {
     #[test]
     fn dir() {
         init();
-
-        let id = "RIVOALEN";
+        let id = "CHAMPION";
 
         let mut chr: Vec<String> = (1..=22).map(|p| format!("chr{p}")).collect();
         chr.extend(["chrX", "chrY", "chrM"].iter().map(|&s| s.to_string()));
@@ -1260,4 +1278,66 @@ mod tests {
 
         // println!("{genome:#?}");
     }
+    #[test]
+    fn contig() {
+        init();
+        let id = "SALICETTO";
+        let chr = "chr10";
+
+        // Diag
+        let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
+        let mut genome = Genome::new();
+        genome.add_dir(&dir).unwrap();
+        let mut bp_representations: Vec<String> = genome
+            .iter()
+            .flat_map(|(_, c)| {
+                c.contigs
+                    .iter()
+                    .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
+            })
+            .collect();
+
+        bp_representations.sort();
+        bp_representations.dedup();
+        info!("Diag bp {}", bp_representations.len());
+
+        // MRD
+        let dir = format!("/data/longreads_basic_pipe/{id}/mrd/assemblies/{chr}",);
+        let mut genome = Genome::new();
+        genome.add_dir(&dir).unwrap();
+        let mut bp_representations_mrd: Vec<String> = genome
+            .iter()
+            .flat_map(|(_, c)| {
+                c.contigs
+                    .iter()
+                    .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
+            })
+            .flat_map(|bp| {
+                bp.split("||")
+                    .map(|s| s.replace("►", "").replace("◄", ""))
+                    .collect::<Vec<String>>()
+            })
+            .collect();
+        bp_representations_mrd.sort();
+        bp_representations_mrd.dedup();
+
+        info!("Mrd bp {}", bp_representations_mrd.len());
+
+        let bp: Vec<String> = bp_representations
+            .into_iter()
+            .filter(|bp| !bp_representations_mrd.contains(bp))
+            .collect();
+
+        info!("Final BP {}", bp.len());
+
+        bp.iter()
+            .filter(|bp| {
+                bp.split("||")
+                    .map(|s| s.replace("►", "").replace("◄", ""))
+                    .filter(|bp| bp_representations_mrd.contains(bp))
+                    .count()
+                    == 0
+            })
+            .for_each(|bp| println!("{bp}"));
+    }
 }