Thomas 1 ano atrás
pai
commit
137db21975
3 arquivos alterados com 444 adições e 164 exclusões
  1. 164 0
      src/breakpoint.rs
  2. 246 0
      src/genomic_graph.rs
  3. 34 164
      src/lib.rs

+ 164 - 0
src/breakpoint.rs

@@ -0,0 +1,164 @@
+use minimap2::Mapping;
+use num_format::{CustomFormat, Grouping, ToFormattedString};
+use uuid::Uuid;
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct BreakPoint {
+    pub id: String,
+    pub mappings: Vec<Mapping>,
+    // sens_symb: &str,
+    // rc_symb: &str
+}
+
+impl BreakPoint {
+    pub fn new(mappings: Vec<Mapping>) -> Self {
+        Self {
+            id: Uuid::new_v4().to_string(),
+            mappings,
+        }
+    }
+
+    pub fn from(sens: bool, chr: &str, pos: i32, id: &str) -> Self {
+        let strand = match sens {
+            true => minimap2::Strand::Forward,
+            false => minimap2::Strand::Reverse,
+        };
+        let target_name = Some(chr.to_string());
+        let mut left_mapping = Mapping::default();
+        left_mapping.strand = strand;
+        left_mapping.target_name = target_name.clone();
+        left_mapping.target_start = pos;
+        left_mapping.target_end = pos;
+
+        let mut right_mapping = Mapping::default();
+        right_mapping.strand = strand;
+        right_mapping.target_name = target_name;
+        right_mapping.target_start = pos;
+        right_mapping.target_end = pos;
+
+        Self {
+            id: id.to_string(),
+            mappings: vec![left_mapping, right_mapping],
+        }
+    }
+
+    pub fn sens(&self) -> Self {
+        self.clone()
+    }
+
+    pub fn rc(&self) -> Self {
+        let mut mappings = self.mappings.clone();
+        mappings.reverse();
+        mappings.iter_mut().for_each(|m| m.strand = match m.strand {
+            minimap2::Strand::Forward => minimap2::Strand::Reverse,
+            minimap2::Strand::Reverse => minimap2::Strand::Forward,
+        });
+        
+        Self {
+            id: self.id.clone(),
+            mappings,
+        }
+    }
+
+    pub fn repr_sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
+        let (first, last) = (
+            self.mappings.first().unwrap(),
+            self.mappings.last().unwrap(),
+        );
+
+        match (first.strand, last.strand) {
+            (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
+                (true, first.target_name.clone().unwrap(), first.target_start),
+                (true, last.target_name.clone().unwrap(), last.target_end),
+            ),
+            (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
+                (true, first.target_name.clone().unwrap(), first.target_start),
+                (false, last.target_name.clone().unwrap(), last.target_start),
+            ),
+            (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
+                (false, first.target_name.clone().unwrap(), first.target_end),
+                (true, last.target_name.clone().unwrap(), last.target_end),
+            ),
+            (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
+                (false, first.target_name.clone().unwrap(), first.target_end),
+                (false, last.target_name.clone().unwrap(), last.target_start),
+            ),
+        }
+    }
+
+    pub fn repr_rc(&self) -> ((bool, String, i32), (bool, String, i32)) {
+        let (mut a, mut b) = self.repr_sens();
+        a.0 = !a.0;
+        b.0 = !b.0;
+        // let (aa, bb) = self.rc().repr_sens();
+        // assert_eq!(aa, b);
+        (b, a)
+    }
+
+    pub fn str_sens(&self, sens_symb: &str, rc_symb: &str) -> String {
+        let format = CustomFormat::builder()
+            .grouping(Grouping::Standard)
+            .minus_sign("-")
+            .separator("_")
+            .build()
+            .unwrap();
+        let (a, b) = self.repr_sens();
+        let symb = |s| -> &str {
+            if s {
+                sens_symb
+            } else {
+                rc_symb
+            }
+        };
+        format!(
+            "{}{}:{}|{}:{}{}",
+            symb(a.0),
+            a.1,
+            a.2.to_formatted_string(&format),
+            b.1,
+            b.2.to_formatted_string(&format),
+            symb(b.0)
+        )
+    }
+
+    pub fn str_rc(&self, sens_symb: &str, rc_symb: &str) -> String {
+        let format = CustomFormat::builder()
+            .grouping(Grouping::Standard)
+            .minus_sign("-")
+            .separator("_")
+            .build()
+            .unwrap();
+        let (a, b) = self.repr_rc();
+        let symb = |s| -> &str {
+            if s {
+                sens_symb
+            } else {
+                rc_symb
+            }
+        };
+        format!(
+            "{}{}:{}|{}:{}{}",
+            symb(a.0),
+            a.1,
+            a.2.to_formatted_string(&format),
+            b.1,
+            b.2.to_formatted_string(&format),
+            symb(b.0)
+        )
+    }
+
+    pub fn is_next(&self, next: &BreakPoint) -> bool {
+        let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
+        let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
+        let is_same_sens = last_sens == next_sens;
+        let is_on_same_chr = last_chr == next_chr;
+        let is_reachable = if last_sens {
+            last_pos < next_pos
+        } else {
+            last_pos > next_pos
+        };
+        is_same_sens && is_on_same_chr && is_reachable
+    }
+}
+
+

+ 246 - 0
src/genomic_graph.rs

@@ -0,0 +1,246 @@
+use std::collections::{HashMap, HashSet};
+
+use petgraph::{
+    dot::Dot,
+    stable_graph::{NodeIndex, StableGraph},
+};
+
+use crate::{breakpoint::BreakPoint, Genome};
+
+#[derive(Debug, Clone)]
+struct GenomicGraphParams {
+    sens_symb: String,
+    rc_symb: String,
+    thousand_sep: String,
+}
+
+impl Default for GenomicGraphParams {
+    fn default() -> Self {
+        Self {
+            sens_symb: "►".to_string(),
+            rc_symb: "◄".to_string(),
+            thousand_sep: ",".to_string(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct GenomicGraph {
+    g: StableGraph<BreakPoint, String>,
+    params: GenomicGraphParams,
+}
+
+impl Default for GenomicGraph {
+    fn default() -> Self {
+        Self {
+            g: StableGraph::new(),
+            params: GenomicGraphParams::default(),
+        }
+    }
+}
+
+impl GenomicGraph {
+    pub fn from_genome(genome: &Genome) -> Self {
+        let mut graph = GenomicGraph::default();
+
+        // Load unique breakpoints as graph nodes.
+        let mut breakpoints_id = HashMap::new();
+        let mut dedup_hash = HashSet::new();
+        for contig in genome.contigs() {
+            if let Some(mappings) = contig.contig_ref.breakpoints() {
+                let bp = BreakPoint::new(mappings);
+                if !dedup_hash
+                    .contains(&bp.str_sens(&graph.params.sens_symb, &graph.params.rc_symb))
+                {
+                    let id = graph.g.add_node(bp.clone());
+                    breakpoints_id.insert(bp.id.clone(), id);
+                    dedup_hash.insert(bp.str_sens(&graph.params.sens_symb, &graph.params.rc_symb));
+                    dedup_hash.insert(bp.str_rc(&graph.params.sens_symb, &graph.params.rc_symb));
+                }
+            }
+        }
+        graph.draw_edges();
+        graph
+    }
+
+    pub fn draw_edges(&mut self) {
+        // Draw edges between nodes.
+        let g_iter = self.g.clone();
+        for (prev_bp, a) in g_iter.node_weights().zip(g_iter.node_indices()) {
+            for (next, b) in g_iter.node_weights().zip(g_iter.node_indices()) {
+                if prev_bp == next {
+                    continue;
+                }
+
+                // Don't add edge between start and end nodes.
+                // if a == start_id && b == end_id || a == end_id && b == start_id {
+                //     continue;
+                // }
+
+                // A->B.
+                if prev_bp.is_next(next) || prev_bp.is_next(&next.rc()) {
+                    if self.g.find_edge(a, b).is_none() {
+                        self.g.add_edge(a, b, "".to_string());
+                    }
+                }
+
+                // If the rc of A can follow B, B->A.
+                if prev_bp.rc().is_next(&next) || prev_bp.rc().is_next(&next.rc()) {
+                    if self.g.find_edge(b, a).is_none() {
+                        self.g.add_edge(b, a, "".to_string());
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn dot_graph(&self) -> String {
+        let g = self.g.map(
+            |ni, nv| {
+                format!(
+                    "{} ({})",
+                    nv.str_sens(&self.params.sens_symb, &self.params.rc_symb)
+                        .to_string(),
+                    ni.index() + 1
+                )
+            },
+            |_, v| v.to_string(),
+        );
+        Dot::new(&g).to_string().replace("\\\"", "")
+    }
+    pub fn ways(
+        &mut self,
+        start: (bool, &str, i32),
+        end: (bool, &str, i32),
+    ) -> Vec<Vec<(NodeIndex, bool, BreakPoint, String)>> {
+        // Create a new working graph.
+        let mut gg = self.clone();
+        // Add start and end nodes.
+        // start node
+        let start_bp = BreakPoint::from(start.0, start.1, start.2, "start");
+        let start_id = gg.g.add_node(start_bp.clone());
+
+        // end node
+        let end_bp = BreakPoint::from(end.0, end.1, end.2, "end");
+        let end_id = gg.g.add_node(end_bp.clone());
+
+        // Redraw edges and remove direct edge from start to end.
+        gg.draw_edges();
+        if let Some(e) = gg.g.find_edge(start_id, end_id) {
+            gg.g.remove_edge(e);
+        }
+        if let Some(e) = gg.g.find_edge(end_id, start_id) {
+            gg.g.remove_edge(e);
+        }
+
+        // Find ways between start and end nodes.
+        let ways = find_ways(&gg.g, start_id, start.0, end_id);
+
+        // Add corresponding Breakpoints and their representating String to ways steps.
+        let mut ways: Vec<Vec<(NodeIndex, bool, BreakPoint, String)>> = ways
+            .into_iter()
+            .map(|way| {
+                way.iter()
+                    .map(|(node_id, sens)| {
+                        let bp = gg.g.node_weight(*node_id).unwrap().to_owned();
+                        (*node_id, *sens, bp)
+                    })
+                    .map(|(id, sens, bp)| {
+                        let s = if sens {
+                            bp.str_sens(&gg.params.sens_symb, &gg.params.rc_symb)
+                        } else {
+                            bp.str_rc(&gg.params.sens_symb, &gg.params.rc_symb)
+                        };
+                        (id, sens, bp, s)
+                    })
+                    .collect()
+            })
+            .collect();
+
+        // Sort ways.
+        ways.sort_by_key(|way| {
+            let s: Vec<String> = way.iter().map(|(_, _, _, s)| s.to_string()).collect();
+            s.join("")
+        });
+
+        // Dedup ways.
+        ways.dedup_by_key(|way| {
+            let s: Vec<String> = way.iter().map(|(_, _, _, s)| s.to_string()).collect();
+            s.join("")
+        });
+
+        ways
+    }
+}
+
+fn get_next(
+    graph: &StableGraph<BreakPoint, String>,
+    ways: Vec<Vec<(NodeIndex, bool)>>,
+) -> Vec<Vec<(NodeIndex, bool)>> {
+    let mut visited = Vec::new();
+    for way in ways.into_iter() {
+        let mut possible_next: Vec<(NodeIndex, bool)> = Vec::new();
+        let (last_id, last_sens) = way.get(way.len() - 1).unwrap().to_owned();
+        let a = graph.node_weight(last_id).unwrap().to_owned();
+        let last = if last_sens { a.clone() } else { a.rc() };
+
+        // Check for every neighbors if they can be visited
+        for neighbor_id in graph.neighbors(last_id) {
+            let b = graph.node_weight(neighbor_id).unwrap().to_owned();
+            if last.is_next(&b) {
+                possible_next.push((neighbor_id, true));
+            }
+            if last.is_next(&b.rc()) {
+                possible_next.push((neighbor_id, false));
+            }
+        }
+
+        // If no visitable neighbors continue to the next way
+        if possible_next.len() == 0 {
+            continue;
+        }
+        for pn in possible_next {
+            // If the possible next node in the way isn't already visited add it as visited
+            let mut new_way = way.clone();
+            if !new_way
+                .iter()
+                .map(|(id, _)| id.clone())
+                .collect::<Vec<NodeIndex>>()
+                .contains(&pn.0)
+            {
+                new_way.push(pn);
+                visited.push(new_way);
+            } else {
+                // println!("dropping way: {way:?} next : {:?}", pn);
+            }
+        }
+    }
+    visited
+}
+
+pub fn find_ways(
+    graph: &StableGraph<BreakPoint, String>,
+    start_id: NodeIndex,
+    start_sens: bool,
+    end_id: NodeIndex,
+) -> Vec<Vec<(NodeIndex, bool)>> {
+    let mut reached_end = Vec::new();
+    let mut ways = vec![vec![(start_id, start_sens)]];
+    loop {
+        let new_ways = get_next(graph, ways.clone());
+        if new_ways.len() == 0 {
+            break;
+        }
+
+        ways.clear();
+        for new_way in new_ways.iter() {
+            let last_id = new_way.last().unwrap().to_owned().0;
+            if last_id == end_id {
+                reached_end.push(new_way.clone());
+            } else {
+                ways.push(new_way.clone());
+            }
+        }
+    }
+    reached_end
+}

+ 34 - 164
src/lib.rs

@@ -1,20 +1,22 @@
+pub mod breakpoint;
+pub mod genomic_graph;
+
 use anyhow::{anyhow, Ok, Result};
+use breakpoint::BreakPoint;
 use fasta::record::Sequence;
 use log::info;
 use minimap2::{Aligner, Mapping};
 use noodles_fasta as fasta;
-use num_format::{CustomFormat, Grouping, Locale, ToFormattedString, WriteFormatted};
-use petgraph::{algo, dot::Dot, prelude::*, Graph};
+use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted};
+use petgraph::{dot::Dot, prelude::*};
 use rust_htslib::bam::{self, Record};
-use std::hash::{Hash, Hasher};
 use std::{
-    collections::{HashMap, VecDeque},
+    collections::HashMap,
     fmt,
     fs::{self, File},
     io::{BufReader, BufWriter, Write},
     path::PathBuf,
     process::{Command, Stdio},
-    thread,
 };
 use uuid::Uuid;
 
@@ -985,165 +987,6 @@ pub fn dot_graph_biall(
     dot
 }
 
-#[derive(Clone, PartialEq, Eq, Debug)]
-pub struct BreakPoint {
-    pub id: String,
-    pub mappings: Vec<Mapping>,
-    // sens_symb: &str,
-    // rc_symb: &str
-}
-
-impl BreakPoint {
-    pub fn new(mappings: Vec<Mapping>) -> Self {
-        Self {
-            id: Uuid::new_v4().to_string(),
-            mappings,
-        }
-    }
-
-    pub fn from(sens: bool, chr: &str, pos: i32, id: &str) -> Self {
-        let strand = match sens {
-            true => minimap2::Strand::Forward,
-            false => minimap2::Strand::Reverse,
-        };
-        let target_name = Some(chr.to_string());
-        let mut left_mapping = Mapping::default();
-        left_mapping.strand = strand;
-        left_mapping.target_name = target_name.clone();
-        left_mapping.target_start = pos;
-        left_mapping.target_end = pos;
-
-        let mut right_mapping = Mapping::default();
-        right_mapping.strand = strand;
-        right_mapping.target_name = target_name;
-        right_mapping.target_start = pos;
-        right_mapping.target_end = pos;
-
-        Self {
-            id: id.to_string(),
-            mappings: vec![left_mapping, right_mapping],
-        }
-    }
-
-    pub fn sens(&self) -> Self {
-        self.clone()
-    }
-
-    pub fn rc(&self) -> Self {
-        let mut mappings = self.mappings.clone();
-        mappings.reverse();
-        mappings.iter_mut().for_each(|m| m.strand = match m.strand {
-            minimap2::Strand::Forward => minimap2::Strand::Reverse,
-            minimap2::Strand::Reverse => minimap2::Strand::Forward,
-        });
-        
-        Self {
-            id: self.id.clone(),
-            mappings,
-        }
-    }
-
-    pub fn repr_sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
-        let (first, last) = (
-            self.mappings.first().unwrap(),
-            self.mappings.last().unwrap(),
-        );
-
-        match (first.strand, last.strand) {
-            (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
-                (true, first.target_name.clone().unwrap(), first.target_start),
-                (true, last.target_name.clone().unwrap(), last.target_end),
-            ),
-            (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
-                (true, first.target_name.clone().unwrap(), first.target_start),
-                (false, last.target_name.clone().unwrap(), last.target_start),
-            ),
-            (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
-                (false, first.target_name.clone().unwrap(), first.target_end),
-                (true, last.target_name.clone().unwrap(), last.target_end),
-            ),
-            (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
-                (false, first.target_name.clone().unwrap(), first.target_end),
-                (false, last.target_name.clone().unwrap(), last.target_start),
-            ),
-        }
-    }
-
-    pub fn repr_rc(&self) -> ((bool, String, i32), (bool, String, i32)) {
-        let (mut a, mut b) = self.repr_sens();
-        a.0 = !a.0;
-        b.0 = !b.0;
-        // let (aa, bb) = self.rc().repr_sens();
-        // assert_eq!(aa, b);
-        (b, a)
-    }
-
-    pub fn str_sens(&self, sens_symb: &str, rc_symb: &str) -> String {
-        let format = CustomFormat::builder()
-            .grouping(Grouping::Standard)
-            .minus_sign("-")
-            .separator("_")
-            .build()
-            .unwrap();
-        let (a, b) = self.repr_sens();
-        let symb = |s| -> &str {
-            if s {
-                sens_symb
-            } else {
-                rc_symb
-            }
-        };
-        format!(
-            "{}{}:{}|{}:{}{}",
-            symb(a.0),
-            a.1,
-            a.2.to_formatted_string(&format),
-            b.1,
-            b.2.to_formatted_string(&format),
-            symb(b.0)
-        )
-    }
-
-    pub fn str_rc(&self, sens_symb: &str, rc_symb: &str) -> String {
-        let format = CustomFormat::builder()
-            .grouping(Grouping::Standard)
-            .minus_sign("-")
-            .separator("_")
-            .build()
-            .unwrap();
-        let (a, b) = self.repr_rc();
-        let symb = |s| -> &str {
-            if s {
-                sens_symb
-            } else {
-                rc_symb
-            }
-        };
-        format!(
-            "{}{}:{}|{}:{}{}",
-            symb(a.0),
-            a.1,
-            a.2.to_formatted_string(&format),
-            b.1,
-            b.2.to_formatted_string(&format),
-            symb(b.0)
-        )
-    }
-
-    pub fn is_next(&self, next: &BreakPoint) -> bool {
-        let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
-        let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
-        let is_same_sens = last_sens == next_sens;
-        let is_on_same_chr = last_chr == next_chr;
-        let is_reachable = if last_sens {
-            last_pos < next_pos
-        } else {
-            last_pos > next_pos
-        };
-        is_same_sens && is_on_same_chr && is_reachable
-    }
-}
-
 fn get_next(
     graph: &StableGraph<BreakPoint, String>,
     ways: Vec<Vec<(NodeIndex, bool)>>,
@@ -1222,6 +1065,8 @@ mod tests {
 
     use petgraph::{stable_graph::StableGraph, visit::EdgeRef, Directed};
 
+    use crate::genomic_graph::GenomicGraph;
+
     use super::*;
 
     #[test]
@@ -1410,8 +1255,33 @@ mod tests {
         let all_bp: Vec<BreakPoint> = graph.node_weights().map(|n| n.clone()).collect();
         println!("{} unique breakpoints considered.", all_bp.len());
 
+        let nodeids_bp: Vec<(NodeIndex, BreakPoint)> =
+            graph.node_indices().zip(all_bp.into_iter()).collect();
+
         // monallellic way ?
 
         Ok(())
     }
+
+    #[test]
+    fn test_graph() -> Result<()> {
+        let dir = "./data_test";
+
+        // Load from fasta in dir.
+        let genome = Genome::from_contigs_sequences(dir)?;
+        let mut genomic_graph = GenomicGraph::from_genome(&genome);
+        let dot = genomic_graph.dot_graph();
+        println!("{dot}");
+
+        let ways = genomic_graph.ways((true, "chr7", 0), (true, "chr7", i32::MAX));
+        for (i, way) in ways.iter().enumerate() {
+            let s = way
+                .iter()
+                .map(|(_, _, _, s)| s.to_string())
+                .collect::<Vec<String>>()
+                .join("");
+            println!("{}\t{s}", i + 1);
+        }
+        Ok(())
+    }
 }