|
|
@@ -891,70 +891,6 @@ fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
|
|
|
Ok(paths)
|
|
|
}
|
|
|
|
|
|
-#[derive(Clone, PartialEq)]
|
|
|
-struct BreakPoint<'a> {
|
|
|
- d: &'a Vec<Mapping>,
|
|
|
-}
|
|
|
-
|
|
|
-impl<'a> BreakPoint<'a> {
|
|
|
- pub fn new(d: &'a Vec<Mapping>) -> Self {
|
|
|
- Self { d }
|
|
|
- }
|
|
|
- pub fn sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
- let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
|
|
|
-
|
|
|
- match (first.strand, last.strand) {
|
|
|
- (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
- (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
- (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
- (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
- (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
- ),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn rev_comp(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
- let (mut a, mut b) = self.sens();
|
|
|
- a.0 = !a.0;
|
|
|
- b.0 = !b.0;
|
|
|
- (b, a)
|
|
|
-
|
|
|
- // let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
|
|
|
- // match (first.strand, last.strand) {
|
|
|
- // (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
- // (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- // (false, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- // ),
|
|
|
- // (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
- // (true, last.target_name.clone().unwrap(), last.target_start),
|
|
|
- // (false, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- // ),
|
|
|
- // (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
- // (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- // (true, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- // ),
|
|
|
- // (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
- // (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- // (true, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- // ),
|
|
|
- // }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-pub fn erase_edges_labels_inplace(graph: &mut StableGraph<String, String>) {
|
|
|
- graph.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
-}
|
|
|
-
|
|
|
pub fn dot_graph(
|
|
|
graph: &StableGraph<String, String>,
|
|
|
way: &Vec<NodeIndex>,
|
|
|
@@ -1049,74 +985,15 @@ pub fn dot_graph_biall(
|
|
|
dot
|
|
|
}
|
|
|
|
|
|
-pub fn str_to_bp(s: &str) -> ((bool, String, i32), (String, i32, bool)) {
|
|
|
- let symb = |s| -> &str {
|
|
|
- if s {
|
|
|
- "►"
|
|
|
- } else {
|
|
|
- "◄"
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- let (a, b) = s.split_once("|").unwrap();
|
|
|
- let (sens_a, rest_a) = a.split_at(3);
|
|
|
- let sens_a = if sens_a == "►" { true } else { false };
|
|
|
- let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
|
|
|
-
|
|
|
- let (rest_b, sens_b) = b.split_at(b.len() - 3);
|
|
|
- let sens_b = if sens_b == "►" { true } else { false };
|
|
|
- let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
|
|
|
-
|
|
|
- (
|
|
|
- (
|
|
|
- sens_a,
|
|
|
- chr_a.to_string(),
|
|
|
- pos_a.replace("_", "").parse().unwrap(),
|
|
|
- ),
|
|
|
- (
|
|
|
- chr_b.to_string(),
|
|
|
- pos_b.replace("_", "").parse().unwrap(),
|
|
|
- sens_b,
|
|
|
- ),
|
|
|
- )
|
|
|
-}
|
|
|
-
|
|
|
-pub fn rev_comp_bp_str(s: &str) -> String {
|
|
|
- let symb = |s| -> &str {
|
|
|
- if s {
|
|
|
- "►"
|
|
|
- } else {
|
|
|
- "◄"
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- let (a, b) = s.split_once("|").unwrap();
|
|
|
- let (sens_a, rest_a) = a.split_at(3);
|
|
|
- let sens_a = if sens_a == "►" { true } else { false };
|
|
|
- let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
|
|
|
-
|
|
|
- let (rest_b, sens_b) = b.split_at(b.len() - 3);
|
|
|
- let sens_b = if sens_b == "►" { true } else { false };
|
|
|
- let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
|
|
|
-
|
|
|
- format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(!sens_b),
|
|
|
- chr_b,
|
|
|
- pos_b,
|
|
|
- chr_a,
|
|
|
- pos_a,
|
|
|
- symb(!sens_a)
|
|
|
- )
|
|
|
-}
|
|
|
-
|
|
|
#[derive(Clone, PartialEq, Eq, Debug)]
|
|
|
-pub struct MyBreakPoint {
|
|
|
+pub struct BreakPoint {
|
|
|
pub id: String,
|
|
|
pub mappings: Vec<Mapping>,
|
|
|
+ // sens_symb: &str,
|
|
|
+ // rc_symb: &str
|
|
|
}
|
|
|
|
|
|
-impl MyBreakPoint {
|
|
|
+impl BreakPoint {
|
|
|
pub fn new(mappings: Vec<Mapping>) -> Self {
|
|
|
Self {
|
|
|
id: Uuid::new_v4().to_string(),
|
|
|
@@ -1155,18 +1032,11 @@ impl MyBreakPoint {
|
|
|
pub fn rc(&self) -> Self {
|
|
|
let mut mappings = self.mappings.clone();
|
|
|
mappings.reverse();
|
|
|
- let left = mappings.first_mut().unwrap();
|
|
|
- left.strand = match left.strand {
|
|
|
- minimap2::Strand::Forward => minimap2::Strand::Reverse,
|
|
|
- minimap2::Strand::Reverse => minimap2::Strand::Forward,
|
|
|
- };
|
|
|
-
|
|
|
- let right = mappings.last_mut().unwrap();
|
|
|
- right.strand = match right.strand {
|
|
|
+ mappings.iter_mut().for_each(|m| m.strand = match m.strand {
|
|
|
minimap2::Strand::Forward => minimap2::Strand::Reverse,
|
|
|
minimap2::Strand::Reverse => minimap2::Strand::Forward,
|
|
|
- };
|
|
|
-
|
|
|
+ });
|
|
|
+
|
|
|
Self {
|
|
|
id: self.id.clone(),
|
|
|
mappings,
|
|
|
@@ -1260,7 +1130,7 @@ impl MyBreakPoint {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- pub fn is_next(&self, next: &MyBreakPoint) -> bool {
|
|
|
+ pub fn is_next(&self, next: &BreakPoint) -> bool {
|
|
|
let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
|
|
|
let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
|
|
|
let is_same_sens = last_sens == next_sens;
|
|
|
@@ -1275,16 +1145,17 @@ impl MyBreakPoint {
|
|
|
}
|
|
|
|
|
|
fn get_next(
|
|
|
- graph: &StableGraph<MyBreakPoint, String>,
|
|
|
+ graph: &StableGraph<BreakPoint, String>,
|
|
|
ways: Vec<Vec<(NodeIndex, bool)>>,
|
|
|
) -> Vec<Vec<(NodeIndex, bool)>> {
|
|
|
- let mut new_ways = Vec::new();
|
|
|
+ let mut visited = Vec::new();
|
|
|
for way in ways.into_iter() {
|
|
|
let mut possible_next: Vec<(NodeIndex, bool)> = Vec::new();
|
|
|
let (last_id, last_sens) = way.get(way.len() - 1).unwrap().to_owned();
|
|
|
let a = graph.node_weight(last_id).unwrap().to_owned();
|
|
|
let last = if last_sens { a.clone() } else { a.rc() };
|
|
|
|
|
|
+ // Check for every neighbors if they can be visited
|
|
|
for neighbor_id in graph.neighbors(last_id) {
|
|
|
let b = graph.node_weight(neighbor_id).unwrap().to_owned();
|
|
|
if last.is_next(&b) {
|
|
|
@@ -1294,10 +1165,13 @@ fn get_next(
|
|
|
possible_next.push((neighbor_id, false));
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // If no visitable neighbors continue to the next way
|
|
|
if possible_next.len() == 0 {
|
|
|
- println!("droppping way no compatible neighbors {:?}", way);
|
|
|
+ continue;
|
|
|
}
|
|
|
for pn in possible_next {
|
|
|
+ // If the possible next node in the way isn't already visited add it as visited
|
|
|
let mut new_way = way.clone();
|
|
|
if !new_way
|
|
|
.iter()
|
|
|
@@ -1306,31 +1180,26 @@ fn get_next(
|
|
|
.contains(&pn.0)
|
|
|
{
|
|
|
new_way.push(pn);
|
|
|
- new_ways.push(new_way);
|
|
|
+ visited.push(new_way);
|
|
|
} else {
|
|
|
- println!("dropping way: {way:?} next : {:?}", pn);
|
|
|
+ // println!("dropping way: {way:?} next : {:?}", pn);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- new_ways
|
|
|
+ visited
|
|
|
}
|
|
|
|
|
|
-fn find_ways(
|
|
|
- graph: &StableGraph<MyBreakPoint, String>,
|
|
|
+pub fn find_ways(
|
|
|
+ graph: &StableGraph<BreakPoint, String>,
|
|
|
start_id: NodeIndex,
|
|
|
start_sens: bool,
|
|
|
end_id: NodeIndex,
|
|
|
) -> Vec<Vec<(NodeIndex, bool)>> {
|
|
|
- let sens_symb = "►";
|
|
|
- let rc_symb = "◄";
|
|
|
-
|
|
|
let mut reached_end = Vec::new();
|
|
|
let mut ways = vec![vec![(start_id, start_sens)]];
|
|
|
loop {
|
|
|
let new_ways = get_next(graph, ways.clone());
|
|
|
- println!("-------\n{new_ways:?}\n-----");
|
|
|
if new_ways.len() == 0 {
|
|
|
- println!("no more");
|
|
|
break;
|
|
|
}
|
|
|
|
|
|
@@ -1349,10 +1218,7 @@ fn find_ways(
|
|
|
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
- use std::{
|
|
|
- collections::HashSet,
|
|
|
- ops::{Range, RangeInclusive},
|
|
|
- };
|
|
|
+ use std::collections::HashSet;
|
|
|
|
|
|
use petgraph::{stable_graph::StableGraph, visit::EdgeRef, Directed};
|
|
|
|
|
|
@@ -1390,30 +1256,22 @@ mod tests {
|
|
|
let sens_symb = "►";
|
|
|
let rc_symb = "◄";
|
|
|
|
|
|
- // load from dir
|
|
|
+ // Load from fasta in dir.
|
|
|
let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
|
|
|
// start node
|
|
|
- let start_bp = MyBreakPoint::from(true, "chr7", 0, "start");
|
|
|
+ let start_bp = BreakPoint::from(true, "chr7", 0, "start");
|
|
|
// end node
|
|
|
- let end_bp = MyBreakPoint::from(true, "chr7", i32::MAX, "end");
|
|
|
+ let end_bp = BreakPoint::from(true, "chr7", i32::MAX, "end");
|
|
|
|
|
|
- // load breakpoints
|
|
|
+ // Load breakpoints as graph nodes.
|
|
|
let mut breakpoints_id = HashMap::new();
|
|
|
- let mut graph = StableGraph::<MyBreakPoint, String, Directed>::new();
|
|
|
-
|
|
|
- // add bp to graph
|
|
|
+ let mut graph = StableGraph::<BreakPoint, String, Directed>::new();
|
|
|
let mut dedup_hash = HashSet::new();
|
|
|
for contig in genome.contigs() {
|
|
|
if let Some(mappings) = contig.contig_ref.breakpoints() {
|
|
|
- let bp = MyBreakPoint::new(mappings);
|
|
|
+ let bp = BreakPoint::new(mappings);
|
|
|
if !dedup_hash.contains(&bp.str_sens(sens_symb, rc_symb)) {
|
|
|
- println!(
|
|
|
- "adding: {} rc: {}\n\n{:?}\n",
|
|
|
- bp.str_sens(sens_symb, rc_symb),
|
|
|
- bp.str_rc(sens_symb, rc_symb),
|
|
|
- bp
|
|
|
- );
|
|
|
let id = graph.add_node(bp.clone());
|
|
|
breakpoints_id.insert(bp.id.clone(), id);
|
|
|
dedup_hash.insert(bp.str_sens(sens_symb, rc_symb));
|
|
|
@@ -1422,81 +1280,45 @@ mod tests {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // Add start and end nodes.
|
|
|
let start_id = graph.add_node(start_bp.clone());
|
|
|
breakpoints_id.insert(start_bp.id, start_id);
|
|
|
|
|
|
let end_id = graph.add_node(end_bp.clone());
|
|
|
breakpoints_id.insert(end_bp.id, end_id);
|
|
|
|
|
|
+ // Draw edges between nodes.
|
|
|
let g_iter = graph.clone();
|
|
|
for last in g_iter.node_weights() {
|
|
|
+ let a = *breakpoints_id.get(&last.id).unwrap();
|
|
|
for next in g_iter.node_weights() {
|
|
|
if last == next {
|
|
|
continue;
|
|
|
}
|
|
|
- let a = *breakpoints_id.get(&last.id).unwrap();
|
|
|
let b = *breakpoints_id.get(&next.id).unwrap();
|
|
|
|
|
|
+ // Don't add edge between start and end nodes.
|
|
|
if a == start_id && b == end_id || a == end_id && b == start_id {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_sens();
|
|
|
- let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
|
|
|
-
|
|
|
- if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
- let in_order = if last_tail_sens {
|
|
|
- last_tail_pos <= next_head_pos
|
|
|
- } else {
|
|
|
- last_tail_pos > next_head_pos
|
|
|
- };
|
|
|
- if in_order && graph.find_edge(a, b).is_none() {
|
|
|
- graph.add_edge(a, b, "".to_string());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_rc();
|
|
|
-
|
|
|
- if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
- let in_order = if last_tail_sens {
|
|
|
- last_tail_pos <= next_head_pos
|
|
|
- } else {
|
|
|
- last_tail_pos > next_head_pos
|
|
|
- };
|
|
|
- if in_order && graph.find_edge(a, b).is_none() {
|
|
|
+ // A->B.
|
|
|
+ if last.is_next(next) || last.is_next(&next.rc()) {
|
|
|
+ if graph.find_edge(a, b).is_none() {
|
|
|
graph.add_edge(a, b, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_rc();
|
|
|
- let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
|
|
|
- if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
- let in_order = if last_tail_sens {
|
|
|
- last_tail_pos <= next_head_pos
|
|
|
- } else {
|
|
|
- last_tail_pos > next_head_pos
|
|
|
- };
|
|
|
- if in_order && graph.find_edge(b, a).is_none() {
|
|
|
- graph.add_edge(b, a, "".to_string());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_rc();
|
|
|
- let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_rc();
|
|
|
- if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
- let in_order = if last_tail_sens {
|
|
|
- last_tail_pos <= next_head_pos
|
|
|
- } else {
|
|
|
- last_tail_pos > next_head_pos
|
|
|
- };
|
|
|
- if in_order && graph.find_edge(b, a).is_none() {
|
|
|
+ // If the rc of A can follow B, B->A.
|
|
|
+ if last.rc().is_next(&next) || last.rc().is_next(&next.rc()) {
|
|
|
+ if graph.find_edge(b, a).is_none() {
|
|
|
graph.add_edge(b, a, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // remove empty nodes
|
|
|
+ // Remove nodes without edge.
|
|
|
let del: Vec<_> = graph
|
|
|
.node_indices()
|
|
|
.filter(|id| graph.neighbors_undirected(*id).count() == 0)
|
|
|
@@ -1506,6 +1328,19 @@ mod tests {
|
|
|
});
|
|
|
|
|
|
let mut ways = find_ways(&graph, start_id, true, end_id);
|
|
|
+ ways.sort_by_key(|a| {
|
|
|
+ let astr: Vec<String> = a
|
|
|
+ .iter()
|
|
|
+ .map(|(id, sens)| {
|
|
|
+ if *sens {
|
|
|
+ graph.node_weight(*id).unwrap().str_sens(sens_symb, rc_symb)
|
|
|
+ } else {
|
|
|
+ graph.node_weight(*id).unwrap().str_rc(sens_symb, rc_symb)
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ astr.join(";")
|
|
|
+ });
|
|
|
ways.dedup_by_key(|a| {
|
|
|
let astr: Vec<String> = a
|
|
|
.iter()
|
|
|
@@ -1520,35 +1355,57 @@ mod tests {
|
|
|
astr.join(";")
|
|
|
});
|
|
|
|
|
|
- for new_way in ways.iter() {
|
|
|
- let g = graph.map(
|
|
|
- |ni, nv| {
|
|
|
- let mut in_way: Vec<_> = new_way.iter().filter(|(id, _)| *id == ni).collect();
|
|
|
- let val = if in_way.len() == 1 {
|
|
|
- let (_, n_sens) = in_way.pop().unwrap();
|
|
|
- if *n_sens {
|
|
|
- nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
- } else {
|
|
|
- nv.str_rc(sens_symb, rc_symb).to_string()
|
|
|
+ // for new_way in ways.iter() {
|
|
|
+ // let g = graph.map(
|
|
|
+ // |ni, nv| {
|
|
|
+ // let mut in_way: Vec<_> = new_way.iter().filter(|(id, _)| *id == ni).collect();
|
|
|
+ // let val = if in_way.len() == 1 {
|
|
|
+ // let (_, n_sens) = in_way.pop().unwrap();
|
|
|
+ // if *n_sens {
|
|
|
+ // nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
+ // } else {
|
|
|
+ // nv.str_rc(sens_symb, rc_symb).to_string()
|
|
|
+ // }
|
|
|
+ // } else {
|
|
|
+ // nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
+ // };
|
|
|
+ // format!("{val} ({})", ni.index())
|
|
|
+ // },
|
|
|
+ // |_, v| v.to_string(),
|
|
|
+ // );
|
|
|
+ // println!(
|
|
|
+ // "{}",
|
|
|
+ // dot_graph(
|
|
|
+ // &g,
|
|
|
+ // &new_way.into_iter().map(|(id, _)| id.clone()).collect(),
|
|
|
+ // "A",
|
|
|
+ // "red",
|
|
|
+ // true,
|
|
|
+ // )
|
|
|
+ // );
|
|
|
+ // }
|
|
|
+
|
|
|
+ let res_str: Vec<String> = ways
|
|
|
+ .iter()
|
|
|
+ .map(|way| {
|
|
|
+ let s: Vec<String> = way
|
|
|
+ .iter()
|
|
|
+ .map(|(node_id, sens)| {
|
|
|
+ let bp = graph.node_weight(*node_id).unwrap();
|
|
|
+ match sens {
|
|
|
+ true => bp.str_sens(sens_symb, rc_symb),
|
|
|
+ false => bp.str_rc(sens_symb, rc_symb),
|
|
|
}
|
|
|
- } else {
|
|
|
- nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
- };
|
|
|
- format!("{val} ({})", ni.index())
|
|
|
- },
|
|
|
- |_, v| v.to_string(),
|
|
|
- );
|
|
|
- println!(
|
|
|
- "{}",
|
|
|
- dot_graph(
|
|
|
- &g,
|
|
|
- &new_way.into_iter().map(|(id, _)| id.clone()).collect(),
|
|
|
- "A",
|
|
|
- "red",
|
|
|
- true,
|
|
|
- )
|
|
|
- );
|
|
|
- }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ s.join("")
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ res_str
|
|
|
+ .iter()
|
|
|
+ .enumerate()
|
|
|
+ .for_each(|(i, s)| println!("{}\t{s}", i + 1));
|
|
|
|
|
|
// monallellic way ?
|
|
|
|