Thomas před 1 rokem
rodič
revize
63d6863220
1 změnil soubory, kde provedl 101 přidání a 244 odebrání
  1. 101 244
      src/lib.rs

+ 101 - 244
src/lib.rs

@@ -891,70 +891,6 @@ fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
     Ok(paths)
 }
 
-#[derive(Clone, PartialEq)]
-struct BreakPoint<'a> {
-    d: &'a Vec<Mapping>,
-}
-
-impl<'a> BreakPoint<'a> {
-    pub fn new(d: &'a Vec<Mapping>) -> Self {
-        Self { d }
-    }
-    pub fn sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
-        let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
-
-        match (first.strand, last.strand) {
-            (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
-                (true, first.target_name.clone().unwrap(), first.target_start),
-                (true, last.target_name.clone().unwrap(), last.target_end),
-            ),
-            (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
-                (true, first.target_name.clone().unwrap(), first.target_start),
-                (false, last.target_name.clone().unwrap(), last.target_start),
-            ),
-            (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
-                (false, first.target_name.clone().unwrap(), first.target_end),
-                (true, last.target_name.clone().unwrap(), last.target_end),
-            ),
-            (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
-                (false, first.target_name.clone().unwrap(), first.target_end),
-                (false, last.target_name.clone().unwrap(), last.target_start),
-            ),
-        }
-    }
-
-    pub fn rev_comp(&self) -> ((bool, String, i32), (bool, String, i32)) {
-        let (mut a, mut b) = self.sens();
-        a.0 = !a.0;
-        b.0 = !b.0;
-        (b, a)
-
-        // let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
-        // match (first.strand, last.strand) {
-        //     (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
-        //         (false, last.target_name.clone().unwrap(), last.target_end),
-        //         (false, first.target_name.clone().unwrap(), first.target_start),
-        //     ),
-        //     (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
-        //         (true, last.target_name.clone().unwrap(), last.target_start),
-        //         (false, first.target_name.clone().unwrap(), first.target_start),
-        //     ),
-        //     (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
-        //         (false, last.target_name.clone().unwrap(), last.target_end),
-        //         (true, first.target_name.clone().unwrap(), first.target_end),
-        //     ),
-        //     (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
-        //         (true, last.target_name.clone().unwrap(), last.target_end),
-        //         (true, first.target_name.clone().unwrap(), first.target_end),
-        //     ),
-        // }
-    }
-}
-
-pub fn erase_edges_labels_inplace(graph: &mut StableGraph<String, String>) {
-    graph.edge_weights_mut().for_each(|e| *e = "".to_string());
-}
-
 pub fn dot_graph(
     graph: &StableGraph<String, String>,
     way: &Vec<NodeIndex>,
@@ -1049,74 +985,15 @@ pub fn dot_graph_biall(
     dot
 }
 
-pub fn str_to_bp(s: &str) -> ((bool, String, i32), (String, i32, bool)) {
-    let symb = |s| -> &str {
-        if s {
-            "►"
-        } else {
-            "◄"
-        }
-    };
-
-    let (a, b) = s.split_once("|").unwrap();
-    let (sens_a, rest_a) = a.split_at(3);
-    let sens_a = if sens_a == "►" { true } else { false };
-    let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
-
-    let (rest_b, sens_b) = b.split_at(b.len() - 3);
-    let sens_b = if sens_b == "►" { true } else { false };
-    let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
-
-    (
-        (
-            sens_a,
-            chr_a.to_string(),
-            pos_a.replace("_", "").parse().unwrap(),
-        ),
-        (
-            chr_b.to_string(),
-            pos_b.replace("_", "").parse().unwrap(),
-            sens_b,
-        ),
-    )
-}
-
-pub fn rev_comp_bp_str(s: &str) -> String {
-    let symb = |s| -> &str {
-        if s {
-            "►"
-        } else {
-            "◄"
-        }
-    };
-
-    let (a, b) = s.split_once("|").unwrap();
-    let (sens_a, rest_a) = a.split_at(3);
-    let sens_a = if sens_a == "►" { true } else { false };
-    let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
-
-    let (rest_b, sens_b) = b.split_at(b.len() - 3);
-    let sens_b = if sens_b == "►" { true } else { false };
-    let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
-
-    format!(
-        "{}{}:{}|{}:{}{}",
-        symb(!sens_b),
-        chr_b,
-        pos_b,
-        chr_a,
-        pos_a,
-        symb(!sens_a)
-    )
-}
-
 #[derive(Clone, PartialEq, Eq, Debug)]
-pub struct MyBreakPoint {
+pub struct BreakPoint {
     pub id: String,
     pub mappings: Vec<Mapping>,
+    // sens_symb: &str,
+    // rc_symb: &str
 }
 
-impl MyBreakPoint {
+impl BreakPoint {
     pub fn new(mappings: Vec<Mapping>) -> Self {
         Self {
             id: Uuid::new_v4().to_string(),
@@ -1155,18 +1032,11 @@ impl MyBreakPoint {
     pub fn rc(&self) -> Self {
         let mut mappings = self.mappings.clone();
         mappings.reverse();
-        let left = mappings.first_mut().unwrap();
-        left.strand = match left.strand {
-            minimap2::Strand::Forward => minimap2::Strand::Reverse,
-            minimap2::Strand::Reverse => minimap2::Strand::Forward,
-        };
-
-        let right = mappings.last_mut().unwrap();
-        right.strand = match right.strand {
+        mappings.iter_mut().for_each(|m| m.strand = match m.strand {
             minimap2::Strand::Forward => minimap2::Strand::Reverse,
             minimap2::Strand::Reverse => minimap2::Strand::Forward,
-        };
-
+        });
+        
         Self {
             id: self.id.clone(),
             mappings,
@@ -1260,7 +1130,7 @@ impl MyBreakPoint {
         )
     }
 
-    pub fn is_next(&self, next: &MyBreakPoint) -> bool {
+    pub fn is_next(&self, next: &BreakPoint) -> bool {
         let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
         let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
         let is_same_sens = last_sens == next_sens;
@@ -1275,16 +1145,17 @@ impl MyBreakPoint {
 }
 
 fn get_next(
-    graph: &StableGraph<MyBreakPoint, String>,
+    graph: &StableGraph<BreakPoint, String>,
     ways: Vec<Vec<(NodeIndex, bool)>>,
 ) -> Vec<Vec<(NodeIndex, bool)>> {
-    let mut new_ways = Vec::new();
+    let mut visited = Vec::new();
     for way in ways.into_iter() {
         let mut possible_next: Vec<(NodeIndex, bool)> = Vec::new();
         let (last_id, last_sens) = way.get(way.len() - 1).unwrap().to_owned();
         let a = graph.node_weight(last_id).unwrap().to_owned();
         let last = if last_sens { a.clone() } else { a.rc() };
 
+        // Check for every neighbors if they can be visited
         for neighbor_id in graph.neighbors(last_id) {
             let b = graph.node_weight(neighbor_id).unwrap().to_owned();
             if last.is_next(&b) {
@@ -1294,10 +1165,13 @@ fn get_next(
                 possible_next.push((neighbor_id, false));
             }
         }
+
+        // If no visitable neighbors continue to the next way
         if possible_next.len() == 0 {
-            println!("droppping way no compatible neighbors {:?}", way);
+            continue;
         }
         for pn in possible_next {
+            // If the possible next node in the way isn't already visited add it as visited
             let mut new_way = way.clone();
             if !new_way
                 .iter()
@@ -1306,31 +1180,26 @@ fn get_next(
                 .contains(&pn.0)
             {
                 new_way.push(pn);
-                new_ways.push(new_way);
+                visited.push(new_way);
             } else {
-                println!("dropping way: {way:?} next : {:?}", pn);
+                // println!("dropping way: {way:?} next : {:?}", pn);
             }
         }
     }
-    new_ways
+    visited
 }
 
-fn find_ways(
-    graph: &StableGraph<MyBreakPoint, String>,
+pub fn find_ways(
+    graph: &StableGraph<BreakPoint, String>,
     start_id: NodeIndex,
     start_sens: bool,
     end_id: NodeIndex,
 ) -> Vec<Vec<(NodeIndex, bool)>> {
-    let sens_symb = "►";
-    let rc_symb = "◄";
-
     let mut reached_end = Vec::new();
     let mut ways = vec![vec![(start_id, start_sens)]];
     loop {
         let new_ways = get_next(graph, ways.clone());
-        println!("-------\n{new_ways:?}\n-----");
         if new_ways.len() == 0 {
-            println!("no more");
             break;
         }
 
@@ -1349,10 +1218,7 @@ fn find_ways(
 
 #[cfg(test)]
 mod tests {
-    use std::{
-        collections::HashSet,
-        ops::{Range, RangeInclusive},
-    };
+    use std::collections::HashSet;
 
     use petgraph::{stable_graph::StableGraph, visit::EdgeRef, Directed};
 
@@ -1390,30 +1256,22 @@ mod tests {
         let sens_symb = "►";
         let rc_symb = "◄";
 
-        // load from dir
+        // Load from fasta in dir.
         let genome = Genome::from_contigs_sequences(dir)?;
 
         // start node
-        let start_bp = MyBreakPoint::from(true, "chr7", 0, "start");
+        let start_bp = BreakPoint::from(true, "chr7", 0, "start");
         // end node
-        let end_bp = MyBreakPoint::from(true, "chr7", i32::MAX, "end");
+        let end_bp = BreakPoint::from(true, "chr7", i32::MAX, "end");
 
-        // load breakpoints
+        // Load breakpoints as graph nodes.
         let mut breakpoints_id = HashMap::new();
-        let mut graph = StableGraph::<MyBreakPoint, String, Directed>::new();
-
-        // add bp to graph
+        let mut graph = StableGraph::<BreakPoint, String, Directed>::new();
         let mut dedup_hash = HashSet::new();
         for contig in genome.contigs() {
             if let Some(mappings) = contig.contig_ref.breakpoints() {
-                let bp = MyBreakPoint::new(mappings);
+                let bp = BreakPoint::new(mappings);
                 if !dedup_hash.contains(&bp.str_sens(sens_symb, rc_symb)) {
-                    println!(
-                        "adding: {} rc: {}\n\n{:?}\n",
-                        bp.str_sens(sens_symb, rc_symb),
-                        bp.str_rc(sens_symb, rc_symb),
-                        bp
-                    );
                     let id = graph.add_node(bp.clone());
                     breakpoints_id.insert(bp.id.clone(), id);
                     dedup_hash.insert(bp.str_sens(sens_symb, rc_symb));
@@ -1422,81 +1280,45 @@ mod tests {
             }
         }
 
+        // Add start and end nodes.
         let start_id = graph.add_node(start_bp.clone());
         breakpoints_id.insert(start_bp.id, start_id);
 
         let end_id = graph.add_node(end_bp.clone());
         breakpoints_id.insert(end_bp.id, end_id);
 
+        // Draw edges between nodes.
         let g_iter = graph.clone();
         for last in g_iter.node_weights() {
+            let a = *breakpoints_id.get(&last.id).unwrap();
             for next in g_iter.node_weights() {
                 if last == next {
                     continue;
                 }
-                let a = *breakpoints_id.get(&last.id).unwrap();
                 let b = *breakpoints_id.get(&next.id).unwrap();
 
+                // Don't add edge between start and end nodes.
                 if a == start_id && b == end_id || a == end_id && b == start_id {
                     continue;
                 }
 
-                let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_sens();
-                let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
-
-                if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
-                    let in_order = if last_tail_sens {
-                        last_tail_pos <= next_head_pos
-                    } else {
-                        last_tail_pos > next_head_pos
-                    };
-                    if in_order && graph.find_edge(a, b).is_none() {
-                        graph.add_edge(a, b, "".to_string());
-                    }
-                }
-
-                let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_rc();
-
-                if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
-                    let in_order = if last_tail_sens {
-                        last_tail_pos <= next_head_pos
-                    } else {
-                        last_tail_pos > next_head_pos
-                    };
-                    if in_order && graph.find_edge(a, b).is_none() {
+                // A->B.
+                if last.is_next(next) || last.is_next(&next.rc()) {
+                    if graph.find_edge(a, b).is_none() {
                         graph.add_edge(a, b, "".to_string());
                     }
                 }
 
-                let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_rc();
-                let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
-                if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
-                    let in_order = if last_tail_sens {
-                        last_tail_pos <= next_head_pos
-                    } else {
-                        last_tail_pos > next_head_pos
-                    };
-                    if in_order && graph.find_edge(b, a).is_none() {
-                        graph.add_edge(b, a, "".to_string());
-                    }
-                }
-
-                let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_rc();
-                let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_rc();
-                if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
-                    let in_order = if last_tail_sens {
-                        last_tail_pos <= next_head_pos
-                    } else {
-                        last_tail_pos > next_head_pos
-                    };
-                    if in_order && graph.find_edge(b, a).is_none() {
+                // If the rc of A can follow B, B->A.
+                if last.rc().is_next(&next) || last.rc().is_next(&next.rc()) {
+                    if graph.find_edge(b, a).is_none() {
                         graph.add_edge(b, a, "".to_string());
                     }
                 }
             }
         }
 
-        // remove empty nodes
+        // Remove nodes without edge.
         let del: Vec<_> = graph
             .node_indices()
             .filter(|id| graph.neighbors_undirected(*id).count() == 0)
@@ -1506,6 +1328,19 @@ mod tests {
         });
 
         let mut ways = find_ways(&graph, start_id, true, end_id);
+        ways.sort_by_key(|a| {
+            let astr: Vec<String> = a
+                .iter()
+                .map(|(id, sens)| {
+                    if *sens {
+                        graph.node_weight(*id).unwrap().str_sens(sens_symb, rc_symb)
+                    } else {
+                        graph.node_weight(*id).unwrap().str_rc(sens_symb, rc_symb)
+                    }
+                })
+                .collect();
+            astr.join(";")
+        });
         ways.dedup_by_key(|a| {
             let astr: Vec<String> = a
                 .iter()
@@ -1520,35 +1355,57 @@ mod tests {
             astr.join(";")
         });
 
-        for new_way in ways.iter() {
-            let g = graph.map(
-                |ni, nv| {
-                    let mut in_way: Vec<_> = new_way.iter().filter(|(id, _)| *id == ni).collect();
-                    let val = if in_way.len() == 1 {
-                        let (_, n_sens) = in_way.pop().unwrap();
-                        if *n_sens {
-                            nv.str_sens(sens_symb, rc_symb).to_string()
-                        } else {
-                            nv.str_rc(sens_symb, rc_symb).to_string()
+        // for new_way in ways.iter() {
+        //     let g = graph.map(
+        //         |ni, nv| {
+        //             let mut in_way: Vec<_> = new_way.iter().filter(|(id, _)| *id == ni).collect();
+        //             let val = if in_way.len() == 1 {
+        //                 let (_, n_sens) = in_way.pop().unwrap();
+        //                 if *n_sens {
+        //                     nv.str_sens(sens_symb, rc_symb).to_string()
+        //                 } else {
+        //                     nv.str_rc(sens_symb, rc_symb).to_string()
+        //                 }
+        //             } else {
+        //                 nv.str_sens(sens_symb, rc_symb).to_string()
+        //             };
+        //             format!("{val} ({})", ni.index())
+        //         },
+        //         |_, v| v.to_string(),
+        //     );
+        //     println!(
+        //         "{}",
+        //         dot_graph(
+        //             &g,
+        //             &new_way.into_iter().map(|(id, _)| id.clone()).collect(),
+        //             "A",
+        //             "red",
+        //             true,
+        //         )
+        //     );
+        // }
+
+        let res_str: Vec<String> = ways
+            .iter()
+            .map(|way| {
+                let s: Vec<String> = way
+                    .iter()
+                    .map(|(node_id, sens)| {
+                        let bp = graph.node_weight(*node_id).unwrap();
+                        match sens {
+                            true => bp.str_sens(sens_symb, rc_symb),
+                            false => bp.str_rc(sens_symb, rc_symb),
                         }
-                    } else {
-                        nv.str_sens(sens_symb, rc_symb).to_string()
-                    };
-                    format!("{val} ({})", ni.index())
-                },
-                |_, v| v.to_string(),
-            );
-            println!(
-                "{}",
-                dot_graph(
-                    &g,
-                    &new_way.into_iter().map(|(id, _)| id.clone()).collect(),
-                    "A",
-                    "red",
-                    true,
-                )
-            );
-        }
+                    })
+                    .collect();
+                s.join("")
+            })
+            .collect();
+
+        res_str
+            .iter()
+            .enumerate()
+            .for_each(|(i, s)| println!("{}\t{s}", i + 1));
 
         // monallellic way ?