|
|
@@ -6,11 +6,13 @@ use noodles_fasta as fasta;
|
|
|
use num_format::{CustomFormat, Grouping, Locale, ToFormattedString, WriteFormatted};
|
|
|
use petgraph::{algo, dot::Dot, prelude::*, Graph};
|
|
|
use rust_htslib::bam::{self, Record};
|
|
|
+use std::hash::{Hash, Hasher};
|
|
|
use std::{
|
|
|
collections::{HashMap, VecDeque},
|
|
|
fmt,
|
|
|
fs::{self, File},
|
|
|
io::{BufReader, BufWriter, Write},
|
|
|
+ ops::RangeInclusive,
|
|
|
path::PathBuf,
|
|
|
process::{Command, Stdio},
|
|
|
thread,
|
|
|
@@ -908,7 +910,7 @@ impl<'a> BreakPoint<'a> {
|
|
|
),
|
|
|
(minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
(false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
),
|
|
|
}
|
|
|
}
|
|
|
@@ -941,106 +943,6 @@ impl<'a> BreakPoint<'a> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-struct BreakPoints<'a>(Vec<&'a BreakPoint<'a>>);
|
|
|
-
|
|
|
-// impl<'a> BreakPoints<'a> {
|
|
|
-// pub fn new() -> Self {
|
|
|
-// Self(Vec::new())
|
|
|
-// }
|
|
|
-// pub fn push(&'a mut self, bp: &'a BreakPoint<'a>) {
|
|
|
-// self.0.push(bp);
|
|
|
-// }
|
|
|
-// pub fn paths(&'a self, start: &'a BreakPoint<'a>) -> std::prelude::v1::Result<Vec<Vec<Option<(&'a BreakPoint<'a>, bool)>>>, anyhow::Error> {
|
|
|
-// let start_sens = true;
|
|
|
-// let mut paths = vec![vec![Some((start, start_sens))]];
|
|
|
-// loop {
|
|
|
-// let p = paths.clone();
|
|
|
-// let curr_paths: Vec<_> = p
|
|
|
-// .iter()
|
|
|
-// .filter(|path| path.last().unwrap().is_some())
|
|
|
-// .collect();
|
|
|
-//
|
|
|
-// if curr_paths.len() == 0 {
|
|
|
-// break;
|
|
|
-// }
|
|
|
-// let mut new_paths = Vec::new();
|
|
|
-//
|
|
|
-// for curr_path in curr_paths.iter() {
|
|
|
-//
|
|
|
-// let visited: Vec<_> = curr_path.iter().map(|e| e.as_ref().unwrap()).collect();
|
|
|
-// let (curr_bp, curr_sens) = visited.last().unwrap();
|
|
|
-// let visited_bps: Vec<_> = visited.iter().map(|(bp, _)| bp).collect();
|
|
|
-//
|
|
|
-// let (_, curr_pos) = if *curr_sens {
|
|
|
-// curr_bp.sens()
|
|
|
-// } else {
|
|
|
-// curr_bp.rev_comp()
|
|
|
-// };
|
|
|
-//
|
|
|
-// let next: Vec<(&BreakPoint<'a>, bool)> = self
|
|
|
-// .0
|
|
|
-// .clone()
|
|
|
-// .into_iter()
|
|
|
-// .filter(|bp| !visited_bps.contains(&bp))
|
|
|
-// .flat_map(|bp| {
|
|
|
-// let (bp_s, _) = bp.sens();
|
|
|
-// let same_sens = bp_s.0 == curr_pos.0;
|
|
|
-// let same_chr = bp_s.1 == curr_pos.1;
|
|
|
-// let next = if curr_pos.0 {
|
|
|
-// bp_s.2 >= curr_pos.2
|
|
|
-// } else {
|
|
|
-// bp_s.2 <= curr_pos.2
|
|
|
-// };
|
|
|
-// if same_chr && same_sens && next {
|
|
|
-// return vec![(bp, *curr_sens == bp_s.0)];
|
|
|
-// }
|
|
|
-// let (bp_s, _) = bp.rev_comp();
|
|
|
-// let same_sens = bp_s.0 == curr_pos.0;
|
|
|
-// let same_chr = bp_s.1 == curr_pos.1;
|
|
|
-// let next = if curr_pos.0 {
|
|
|
-// bp_s.2 >= curr_pos.2
|
|
|
-// } else {
|
|
|
-// bp_s.2 <= curr_pos.2
|
|
|
-// };
|
|
|
-// if same_chr && same_sens && next {
|
|
|
-// return vec![(bp, *curr_sens == bp_s.0)];
|
|
|
-// }
|
|
|
-// vec![]
|
|
|
-// })
|
|
|
-// .collect();
|
|
|
-//
|
|
|
-// if next.len() == 0 {
|
|
|
-// let mut curr_p = curr_path.clone().to_owned();
|
|
|
-// curr_p.push(None);
|
|
|
-// new_paths.push(curr_p);
|
|
|
-// } else {
|
|
|
-// next.into_iter().for_each(|bp| {
|
|
|
-// let mut curr_p = *curr_path.clone().to_owned();
|
|
|
-// curr_p.push(Some(bp));
|
|
|
-// new_paths.push(curr_p);
|
|
|
-// })
|
|
|
-// }
|
|
|
-// paths = new_paths.clone();
|
|
|
-// }
|
|
|
-// }
|
|
|
-// Ok(paths)
|
|
|
-// }
|
|
|
-// }
|
|
|
-
|
|
|
-pub fn rename_edges(
|
|
|
- graph: &mut StableGraph<String, String>,
|
|
|
- way: &Vec<NodeIndex>,
|
|
|
- value: &str,
|
|
|
-) -> &StableGraph<String, String> {
|
|
|
- graph.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
- for window in way.windows(2) {
|
|
|
- let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
- let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
- *edge = value.to_string();
|
|
|
- }
|
|
|
- graph
|
|
|
-}
|
|
|
-
|
|
|
pub fn erase_edges_labels_inplace(graph: &mut StableGraph<String, String>) {
|
|
|
graph.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
}
|
|
|
@@ -1060,8 +962,8 @@ pub fn dot_graph(
|
|
|
}
|
|
|
|
|
|
let mut labels = Vec::new();
|
|
|
- for [a, b] in way.windows(2) {
|
|
|
- let edge_id = g.find_edge(*a, *b).unwrap();
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
let v = if edge != "" {
|
|
|
let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
@@ -1074,24 +976,380 @@ pub fn dot_graph(
|
|
|
*edge = v;
|
|
|
}
|
|
|
|
|
|
- let dot = Dot::new(&g)
|
|
|
- .to_string()
|
|
|
- .replace("\\\"", "");
|
|
|
+ g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+
|
|
|
+ let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
|
|
|
labels.sort_by(|a, b| b.len().cmp(&a.len()));
|
|
|
for label in labels {
|
|
|
let label_str = format!("label = \"{label}\"");
|
|
|
- dot.replace(&label_str, &format!("{label_str}, color = \"{color}\""));
|
|
|
+ dot = dot.replace(
|
|
|
+ &format!("{label_str} ]\n"),
|
|
|
+ &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
+ );
|
|
|
}
|
|
|
dot
|
|
|
}
|
|
|
|
|
|
+/// for non overlapping ways
|
|
|
+pub fn dot_graph_biall(
|
|
|
+ graph: &StableGraph<String, String>,
|
|
|
+ ways: &Vec<Vec<NodeIndex>>,
|
|
|
+ values: Vec<&str>,
|
|
|
+ colors: Vec<&str>,
|
|
|
+ erase: bool,
|
|
|
+) -> String {
|
|
|
+ let mut g = graph.clone();
|
|
|
+
|
|
|
+ // erase labels
|
|
|
+ if erase {
|
|
|
+ g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut labels = Vec::new();
|
|
|
+ for ((way, value), color) in ways.iter().zip(values.into_iter()).zip(colors.into_iter()) {
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
+ let v = if edge != "" {
|
|
|
+ let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
+ v.push(value);
|
|
|
+ v.join(", ")
|
|
|
+ } else {
|
|
|
+ value.to_string()
|
|
|
+ };
|
|
|
+ labels.push((v.clone(), color));
|
|
|
+ *edge = v;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+
|
|
|
+ let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
+
|
|
|
+ labels.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
|
|
|
+ for (label, color) in labels {
|
|
|
+ let label_str = format!("label = \"{label}\"");
|
|
|
+ dot = dot.replace(
|
|
|
+ &format!("{label_str} ]\n"),
|
|
|
+ &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
+ );
|
|
|
+ }
|
|
|
+ dot
|
|
|
+}
|
|
|
+
|
|
|
+pub fn str_to_bp(s: &str) -> ((bool, String, i32), (String, i32, bool)) {
|
|
|
+ let symb = |s| -> &str {
|
|
|
+ if s {
|
|
|
+ "►"
|
|
|
+ } else {
|
|
|
+ "◄"
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ let (a, b) = s.split_once("|").unwrap();
|
|
|
+ let (sens_a, rest_a) = a.split_at(3);
|
|
|
+ let sens_a = if sens_a == "►" { true } else { false };
|
|
|
+ let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
|
|
|
+
|
|
|
+ let (rest_b, sens_b) = b.split_at(b.len() - 3);
|
|
|
+ let sens_b = if sens_b == "►" { true } else { false };
|
|
|
+ let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
|
|
|
+
|
|
|
+ (
|
|
|
+ (
|
|
|
+ sens_a,
|
|
|
+ chr_a.to_string(),
|
|
|
+ pos_a.replace("_", "").parse().unwrap(),
|
|
|
+ ),
|
|
|
+ (
|
|
|
+ chr_b.to_string(),
|
|
|
+ pos_b.replace("_", "").parse().unwrap(),
|
|
|
+ sens_b,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+}
|
|
|
+
|
|
|
+pub fn rev_comp_bp_str(s: &str) -> String {
|
|
|
+ let symb = |s| -> &str {
|
|
|
+ if s {
|
|
|
+ "►"
|
|
|
+ } else {
|
|
|
+ "◄"
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ let (a, b) = s.split_once("|").unwrap();
|
|
|
+ let (sens_a, rest_a) = a.split_at(3);
|
|
|
+ let sens_a = if sens_a == "►" { true } else { false };
|
|
|
+ let (chr_a, pos_a) = rest_a.split_once(":").unwrap();
|
|
|
+
|
|
|
+ let (rest_b, sens_b) = b.split_at(b.len() - 3);
|
|
|
+ let sens_b = if sens_b == "►" { true } else { false };
|
|
|
+ let (chr_b, pos_b) = rest_b.split_once(":").unwrap();
|
|
|
+
|
|
|
+ format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(!sens_b),
|
|
|
+ chr_b,
|
|
|
+ pos_b,
|
|
|
+ chr_a,
|
|
|
+ pos_a,
|
|
|
+ symb(!sens_a)
|
|
|
+ )
|
|
|
+}
|
|
|
+
|
|
|
+// assuming way
|
|
|
+pub fn has_way_overlapping(
|
|
|
+ way: Vec<Vec<Mapping>>,
|
|
|
+ // g: &StableGraph<String, String>,
|
|
|
+ // start: (String, i32),
|
|
|
+ // end: (String, i32),
|
|
|
+) -> bool {
|
|
|
+ let mut chr_ranges: HashMap<String, Vec<RangeInclusive<i32>>> = HashMap::new();
|
|
|
+
|
|
|
+ // add inner ranges
|
|
|
+ way.iter().for_each(|m| {
|
|
|
+ vec![m.first().unwrap(), m.last().unwrap()]
|
|
|
+ .iter()
|
|
|
+ .for_each(|part| {
|
|
|
+ let target_name = part.target_name.clone().unwrap();
|
|
|
+ if let Some(v) = chr_ranges.get_mut(&target_name) {
|
|
|
+ v.push(part.target_start..=part.target_end);
|
|
|
+ } else {
|
|
|
+ chr_ranges.insert(target_name, vec![part.target_start..=part.target_end]);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ // Add outer ranges.
|
|
|
+ // let first = way.first().unwrap().first().unwrap();
|
|
|
+ // assert_eq!(start.0, first.target_name.clone().unwrap());
|
|
|
+ // let r = if start.1 <= first.target_end {
|
|
|
+ // start.1..=first.target_end
|
|
|
+ // } else {
|
|
|
+ // first.target_start..=start.1
|
|
|
+ // };
|
|
|
+ // chr_ranges.insert(start.0, vec![r]);
|
|
|
+
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let a = window[0].last().unwrap();
|
|
|
+ let b = window[1].first().unwrap();
|
|
|
+
|
|
|
+ let a_target_name = a.target_name.clone().unwrap();
|
|
|
+ let b_target_name = b.target_name.clone().unwrap();
|
|
|
+ assert_eq!(a.strand, b.strand);
|
|
|
+ assert_eq!(a_target_name, b_target_name);
|
|
|
+
|
|
|
+ let r = if a.target_end <= b.target_start {
|
|
|
+ a.target_end..=b.target_start
|
|
|
+ } else {
|
|
|
+ b.target_end..=a.target_end
|
|
|
+ };
|
|
|
+ if let Some(v) = chr_ranges.get_mut(&a_target_name) {
|
|
|
+ v.push(r);
|
|
|
+ } else {
|
|
|
+ chr_ranges.insert(a_target_name, vec![r]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // let last = way.last().unwrap().last().unwrap();
|
|
|
+ // assert_eq!(end.0, last.target_name.clone().unwrap());
|
|
|
+ // if let Some(v) = chr_ranges.get_mut(&end.0) {
|
|
|
+ // let r = if last.target_end <= end.1 {
|
|
|
+ // last.target_end..=end.1
|
|
|
+ // } else {
|
|
|
+ // end.1..=last.target_start
|
|
|
+ // };
|
|
|
+ // v.push(r);
|
|
|
+ // }
|
|
|
+
|
|
|
+ println!("{chr_ranges:?}");
|
|
|
+
|
|
|
+ // check overlaps
|
|
|
+ let mut res = false;
|
|
|
+ 'outer: for (_chr, ranges) in chr_ranges {
|
|
|
+ for ra in ranges.iter() {
|
|
|
+ for rb in ranges.iter() {
|
|
|
+ if ra != rb {
|
|
|
+ let a = vec![*ra.start(), *rb.start()]
|
|
|
+ .iter()
|
|
|
+ .max()
|
|
|
+ .unwrap()
|
|
|
+ .to_owned();
|
|
|
+ let b = vec![*ra.end(), *rb.end()].iter().min().unwrap().to_owned();
|
|
|
+ if a <= b {
|
|
|
+ res = true;
|
|
|
+ break 'outer;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ res
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Clone, PartialEq, Eq)]
|
|
|
+pub struct MyBreakPoint {
|
|
|
+ pub id: String,
|
|
|
+ pub mappings: Vec<Mapping>,
|
|
|
+}
|
|
|
+
|
|
|
+impl Hash for MyBreakPoint {
|
|
|
+ fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
+ self.repr_sens().hash(state);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl MyBreakPoint {
|
|
|
+ pub fn new(mappings: Vec<Mapping>) -> Self {
|
|
|
+ Self {
|
|
|
+ id: Uuid::new_v4().to_string(),
|
|
|
+ mappings,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn from(sens: bool, chr: &str, pos: i32, id: &str) -> Self {
|
|
|
+ let strand = match sens {
|
|
|
+ true => minimap2::Strand::Forward,
|
|
|
+ false => minimap2::Strand::Reverse,
|
|
|
+ };
|
|
|
+ let target_name = Some(chr.to_string());
|
|
|
+ let mut left_mapping = Mapping::default();
|
|
|
+ left_mapping.strand = strand;
|
|
|
+ left_mapping.target_name = target_name.clone();
|
|
|
+ left_mapping.target_start = pos;
|
|
|
+ left_mapping.target_end = pos;
|
|
|
+
|
|
|
+ let mut right_mapping = Mapping::default();
|
|
|
+ right_mapping.strand = strand;
|
|
|
+ right_mapping.target_name = target_name;
|
|
|
+ right_mapping.target_start = pos;
|
|
|
+ right_mapping.target_end = pos;
|
|
|
+
|
|
|
+ Self { id: id.to_string(), mappings: vec![left_mapping, right_mapping] }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn sens(&self) -> Self {
|
|
|
+ self.clone()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn rc(&self) -> Self {
|
|
|
+ let mut mappings = self.mappings.clone();
|
|
|
+ mappings.reverse();
|
|
|
+ Self {
|
|
|
+ id: self.id.clone(),
|
|
|
+ mappings,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn repr_sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
+ let (first, last) = (
|
|
|
+ self.mappings.first().unwrap(),
|
|
|
+ self.mappings.last().unwrap(),
|
|
|
+ );
|
|
|
+
|
|
|
+ match (first.strand, last.strand) {
|
|
|
+ (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
+ (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
+ (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
+ (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
+ (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
+ ),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn repr_rc(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
+ let (mut a, mut b) = self.repr_sens();
|
|
|
+ a.0 = !a.0;
|
|
|
+ b.0 = !b.0;
|
|
|
+ (b, a)
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn str_sens(&self, sens_symb: &str, rc_symb: &str) -> String {
|
|
|
+ let format = CustomFormat::builder()
|
|
|
+ .grouping(Grouping::Standard)
|
|
|
+ .minus_sign("-")
|
|
|
+ .separator("_")
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
+ let (a, b) = self.repr_sens();
|
|
|
+ let symb = |s| -> &str {
|
|
|
+ if s {
|
|
|
+ sens_symb
|
|
|
+ } else {
|
|
|
+ rc_symb
|
|
|
+ }
|
|
|
+ };
|
|
|
+ format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(a.0),
|
|
|
+ a.1,
|
|
|
+ a.2.to_formatted_string(&format),
|
|
|
+ b.1,
|
|
|
+ b.2.to_formatted_string(&format),
|
|
|
+ symb(b.0)
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn str_rc(&self, sens_symb: &str, rc_symb: &str) -> String {
|
|
|
+ let format = CustomFormat::builder()
|
|
|
+ .grouping(Grouping::Standard)
|
|
|
+ .minus_sign("-")
|
|
|
+ .separator("_")
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
+ let (a, b) = self.repr_rc();
|
|
|
+ let symb = |s| -> &str {
|
|
|
+ if s {
|
|
|
+ sens_symb
|
|
|
+ } else {
|
|
|
+ rc_symb
|
|
|
+ }
|
|
|
+ };
|
|
|
+ format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(a.0),
|
|
|
+ a.1,
|
|
|
+ a.2.to_formatted_string(&format),
|
|
|
+ b.1,
|
|
|
+ b.2.to_formatted_string(&format),
|
|
|
+ symb(b.0)
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn is_next(&self, next: &MyBreakPoint) -> bool {
|
|
|
+ let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
|
|
|
+ let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
|
|
|
+ let is_same_sens = last_sens == next_sens;
|
|
|
+ let is_on_same_chr = last_chr == next_chr;
|
|
|
+ let is_reachable = if last_sens {
|
|
|
+ last_pos < next_pos
|
|
|
+ } else {
|
|
|
+ last_pos > next_pos
|
|
|
+ };
|
|
|
+ is_same_sens && is_on_same_chr && is_reachable
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
- use std::ops::Deref;
|
|
|
+ use std::ops::{Range, RangeInclusive};
|
|
|
|
|
|
- use minimap2::Strand;
|
|
|
- use petgraph::{dot::Dot, stable_graph::StableGraph, visit::EdgeRef, Directed};
|
|
|
+ use petgraph::{stable_graph::StableGraph, visit::EdgeRef, Directed};
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
@@ -1121,6 +1379,118 @@ mod tests {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ #[test]
|
|
|
+ fn linear() -> Result<()> {
|
|
|
+ let dir = "./data_test";
|
|
|
+ let sens_symb = "►";
|
|
|
+ let rc_symb = "◄";
|
|
|
+
|
|
|
+ // load from dir
|
|
|
+ let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
+
|
|
|
+ // start node
|
|
|
+ let start_bp = MyBreakPoint::from(true, "chr7", 0, "start");
|
|
|
+ // end node
|
|
|
+ let end_bp = MyBreakPoint::from(true, "chr7", i32::MAX, "end");
|
|
|
+
|
|
|
+ // load breakpoints
|
|
|
+ let mut breakpoints_id = HashMap::new();
|
|
|
+ let mut graph = StableGraph::<MyBreakPoint, String, Directed>::new();
|
|
|
+
|
|
|
+ // add bp to graph
|
|
|
+ for contig in genome.contigs() {
|
|
|
+ if let Some(mappings) = contig.contig_ref.breakpoints() {
|
|
|
+ let bp = MyBreakPoint::new(mappings);
|
|
|
+ let id = graph.add_node(bp.clone());
|
|
|
+ breakpoints_id.insert(bp.id, id);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let start_id = graph.add_node(start_bp.clone());
|
|
|
+ breakpoints_id.insert(start_bp.id, start_id);
|
|
|
+
|
|
|
+ let end_id = graph.add_node(end_bp.clone());
|
|
|
+ breakpoints_id.insert(end_bp.id, end_id);
|
|
|
+
|
|
|
+ let g_iter = graph.clone();
|
|
|
+ for last in g_iter.node_weights() {
|
|
|
+ for next in g_iter.node_weights() {
|
|
|
+ let a = breakpoints_id.get(&last.id).unwrap();
|
|
|
+ let b = breakpoints_id.get(&next.id).unwrap();
|
|
|
+
|
|
|
+ let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_sens();
|
|
|
+ let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
|
|
|
+
|
|
|
+ if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
+ let in_order = if last_tail_sens {
|
|
|
+ last_tail_pos <= next_head_pos
|
|
|
+ } else {
|
|
|
+ last_tail_pos > next_head_pos
|
|
|
+ };
|
|
|
+ if in_order {
|
|
|
+ graph.add_edge(*a, *b, "".to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = next.repr_rc();
|
|
|
+ let ((next_head_sens, next_head_chr, next_head_pos), _) = last.repr_rc();
|
|
|
+
|
|
|
+ if last_tail_chr == next_head_chr && last_tail_sens == next_head_sens {
|
|
|
+ let in_order = if last_tail_sens {
|
|
|
+ last_tail_pos <= next_head_pos
|
|
|
+ } else {
|
|
|
+ last_tail_pos > next_head_pos
|
|
|
+ };
|
|
|
+ if in_order {
|
|
|
+ graph.add_edge(*b, *a, "".to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Remove edges in feedback arc set.
|
|
|
+ let fas: Vec<_> = petgraph::algo::feedback_arc_set::greedy_feedback_arc_set(&graph)
|
|
|
+ .map(|e| e.id())
|
|
|
+ .collect();
|
|
|
+ for edge_id in fas {
|
|
|
+ graph.remove_edge(edge_id);
|
|
|
+ }
|
|
|
+
|
|
|
+ // remove empty nodes
|
|
|
+ let del: Vec<_> = graph.node_indices().filter(|id| graph.neighbors_undirected(*id).count() == 0).collect();
|
|
|
+ del.iter().for_each(|id| { graph.remove_node(*id); });
|
|
|
+
|
|
|
+
|
|
|
+ // Find all the ways from start node to end node.
|
|
|
+ let ways =
|
|
|
+ algo::all_simple_paths::<Vec<_>, _>(&graph, start_id, end_id, 0, None)
|
|
|
+ .collect::<Vec<_>>();
|
|
|
+
|
|
|
+
|
|
|
+ // Add way number to edges.
|
|
|
+ for (n_way, way) in ways.iter().enumerate() {
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
+ *edge = vec![edge.to_string(), (n_way + 1).to_string()]
|
|
|
+ .into_iter()
|
|
|
+ .filter(|s| s != "")
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(", ")
|
|
|
+ .to_string();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // show graph
|
|
|
+ let g = graph.map(|_, v| v.str_sens(sens_symb, rc_symb), |_, v| v.to_string());
|
|
|
+ let dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
+ println!("{dot}");
|
|
|
+
|
|
|
+ // for last in
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
#[test]
|
|
|
fn load_dir() -> Result<()> {
|
|
|
let dir = "./data_test";
|
|
|
@@ -1130,22 +1500,35 @@ mod tests {
|
|
|
.separator("_")
|
|
|
.build()
|
|
|
.unwrap();
|
|
|
+
|
|
|
+ // load from dir
|
|
|
let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
+
|
|
|
genome.stats();
|
|
|
- // println!("{genome:?}");
|
|
|
|
|
|
- for contig in genome.contigs() {
|
|
|
- if let Some(breakpoints) = contig.breakpoints_repr() {
|
|
|
- println!("[{}]", breakpoints.join("---"));
|
|
|
- }
|
|
|
- }
|
|
|
+ // for contig in genome.contigs() {
|
|
|
+ // if let Some(breakpoints) = contig.breakpoints_repr() {
|
|
|
+ // println!("[{}]", breakpoints.join("---"));
|
|
|
+ // }
|
|
|
+ // }
|
|
|
|
|
|
+ // Get all unique sorted breakpoints in sens and revcomp
|
|
|
let mut bp = Vec::new();
|
|
|
+ let mut bp_num_store: HashMap<((bool, String, i32), (bool, String, i32)), i32> =
|
|
|
+ HashMap::new();
|
|
|
+ let mut mapping_num_store = HashMap::new();
|
|
|
+ let mut num_contig = 0;
|
|
|
for contig in genome.contigs() {
|
|
|
if let Some(breakpoints) = contig.contig_ref.breakpoints() {
|
|
|
- bp.push(BreakPoint::new(&breakpoints).sens());
|
|
|
- bp.push(BreakPoint::new(&breakpoints).rev_comp());
|
|
|
+ let s = BreakPoint::new(&breakpoints).sens();
|
|
|
+ let revcomp = BreakPoint::new(&breakpoints).rev_comp();
|
|
|
+ bp_num_store.insert(s.clone(), num_contig);
|
|
|
+ bp_num_store.insert(revcomp.clone(), num_contig);
|
|
|
+ mapping_num_store.insert(num_contig, breakpoints);
|
|
|
+ bp.push(s);
|
|
|
+ bp.push(revcomp);
|
|
|
}
|
|
|
+ num_contig += 1;
|
|
|
}
|
|
|
bp.sort_by(|(a, _), (b, _)| a.2.cmp(&b.2));
|
|
|
bp.sort_by(|(a, _), (b, _)| a.1.cmp(&b.1));
|
|
|
@@ -1158,21 +1541,22 @@ mod tests {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+ // start node
|
|
|
let start_node = (true, "chr7".to_string(), 0);
|
|
|
let start_node = (start_node.clone(), start_node);
|
|
|
bp.push(start_node.clone());
|
|
|
let mut start_key = "".to_string();
|
|
|
|
|
|
+ // end node
|
|
|
let end_node = (true, "chr7".to_string(), i32::MAX);
|
|
|
let end_node = (end_node.clone(), end_node);
|
|
|
bp.push(end_node.clone());
|
|
|
let mut end_key = "".to_string();
|
|
|
|
|
|
- let start_chr = "chr7".to_string();
|
|
|
- let start_sens = true;
|
|
|
-
|
|
|
+ // add all breakpoints into a graph nodes
|
|
|
let mut graph = StableGraph::<_, _, Directed>::new();
|
|
|
let mut nodes_idx = HashMap::new();
|
|
|
+ let mut key_num_store: HashMap<String, i32> = HashMap::new();
|
|
|
bp.iter().for_each(|(a, b)| {
|
|
|
let key = format!(
|
|
|
"{}{}:{}|{}:{}{}",
|
|
|
@@ -1183,12 +1567,12 @@ mod tests {
|
|
|
b.2.to_formatted_string(&format),
|
|
|
symb(b.0)
|
|
|
);
|
|
|
+ if let Some(bp_num) = bp_num_store.get(&(a.clone(), b.clone())) {
|
|
|
+ key_num_store.insert(key.to_string(), *bp_num);
|
|
|
+ }
|
|
|
let node_id = graph.add_node(key.clone());
|
|
|
nodes_idx.insert(key.clone(), node_id);
|
|
|
- println!("{}", key);
|
|
|
- // if start_key == "".to_string() && start_chr == a.1.to_string() && start_sens == a.0 {
|
|
|
- // start_key = key;
|
|
|
- // }
|
|
|
+ // println!("{}", key);
|
|
|
if (a.to_owned(), b.to_owned()) == start_node {
|
|
|
start_key = key;
|
|
|
} else if (a.to_owned(), b.to_owned()) == end_node {
|
|
|
@@ -1196,6 +1580,7 @@ mod tests {
|
|
|
}
|
|
|
});
|
|
|
|
|
|
+ // Comparing two by two breakpoints, if same sens, chromosome and order: draw edge.
|
|
|
bp.iter().for_each(|(a, (last_sens, last_chr, last_pos))| {
|
|
|
let last_key = format!(
|
|
|
"{}{}:{}|{}:{}{}",
|
|
|
@@ -1233,7 +1618,7 @@ mod tests {
|
|
|
});
|
|
|
});
|
|
|
|
|
|
- // Remove edges in feedback arc set
|
|
|
+ // Remove edges in feedback arc set.
|
|
|
let fas: Vec<_> = petgraph::algo::feedback_arc_set::greedy_feedback_arc_set(&graph)
|
|
|
.map(|e| e.id())
|
|
|
.collect();
|
|
|
@@ -1248,12 +1633,33 @@ mod tests {
|
|
|
let se = graph.find_edge(*start_node_id, *end_node_id).unwrap();
|
|
|
graph.remove_edge(se);
|
|
|
|
|
|
- // Find all the ways from start node to end node
|
|
|
+ // Find all the ways from start node to end node.
|
|
|
let ways =
|
|
|
algo::all_simple_paths::<Vec<_>, _>(&graph, *start_node_id, *end_node_id, 0, None)
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
- // add way number to edges
|
|
|
+ // Remove ways that pass by the same bp either sens or revcomp.
|
|
|
+ let mut erase_way = Vec::new();
|
|
|
+ for (i, way) in ways.iter().enumerate() {
|
|
|
+ let mut w = Vec::new();
|
|
|
+ for node in way.iter() {
|
|
|
+ let s = graph.node_weight(*node).unwrap();
|
|
|
+ if w.contains(s) {
|
|
|
+ erase_way.push(i);
|
|
|
+ }
|
|
|
+ let rcs = rev_comp_bp_str(s);
|
|
|
+ w.push(s.to_string());
|
|
|
+ w.push(rcs);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ let ways: Vec<Vec<NodeIndex>> = ways
|
|
|
+ .iter()
|
|
|
+ .enumerate()
|
|
|
+ .filter(|(i, _)| !erase_way.contains(i))
|
|
|
+ .map(|(_, v)| v.clone())
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ // Add way number to edges.
|
|
|
for (n_way, way) in ways.iter().enumerate() {
|
|
|
for window in way.windows(2) {
|
|
|
let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
@@ -1267,52 +1673,111 @@ mod tests {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- println!("{ways:?}");
|
|
|
-
|
|
|
// Retain edges and nodes in a way
|
|
|
let mut g = graph.clone();
|
|
|
g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
|
|
|
- // Try to find one way containing all nodes
|
|
|
- let n_nodes = g.node_weights().count();
|
|
|
- let max_way = ways.iter().map(|v| v.len()).max().unwrap();
|
|
|
- let mut monoallelic_graphs = Vec::new();
|
|
|
- let mut empty_edges = g.clone();
|
|
|
- erase_edges_labels_inplace(&mut empty_edges);
|
|
|
- if n_nodes == max_way {
|
|
|
- let biggest = ways
|
|
|
+ // Get keeped bp_num
|
|
|
+ let mut bp_nums = Vec::new();
|
|
|
+ g.node_weights().for_each(|s| {
|
|
|
+ if let Some(n) = key_num_store.get(s) {
|
|
|
+ bp_nums.push(*n)
|
|
|
+ }
|
|
|
+ });
|
|
|
+ bp_nums.sort();
|
|
|
+ bp_nums.dedup();
|
|
|
+ println!("{} bp considered", bp_nums.len());
|
|
|
+
|
|
|
+ // Try to find one way containing all bp
|
|
|
+ let mut monoallelic_ways = Vec::new();
|
|
|
+ for way in ways.iter() {
|
|
|
+ // check if passing by all bp
|
|
|
+ let mut n = way
|
|
|
.iter()
|
|
|
- .filter(|v| v.len() == max_way)
|
|
|
- .collect::<Vec<_>>();
|
|
|
- for b in biggest {
|
|
|
- println!(
|
|
|
- "Likely monoallelic phase [{}]",
|
|
|
- b.iter()
|
|
|
- .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(";")
|
|
|
- );
|
|
|
- let new_g = rename_edges(&mut empty_edges, b, "A");
|
|
|
- // let dot = Dot::new(&new_g);
|
|
|
- // let u = dot.to_string().replace("\\\"", "");
|
|
|
- // let u = u.replace("label = \"A\"", "label = \"A\", color = \"red\"");
|
|
|
-
|
|
|
- println!("{}", dot_graph(&g, b, "A", "red", true));
|
|
|
- monoallelic_graphs.push(new_g);
|
|
|
+ .flat_map(|node| {
|
|
|
+ if let Some(n) = key_num_store.get(g.node_weight(*node).unwrap()) {
|
|
|
+ vec![*n]
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect::<Vec<i32>>();
|
|
|
+ n.sort();
|
|
|
+ n.dedup();
|
|
|
+ if n == bp_nums {
|
|
|
+ // check if repassing by same coords
|
|
|
+ let bp_maps = n
|
|
|
+ .iter()
|
|
|
+ .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
+ .collect();
|
|
|
+ if !has_way_overlapping(bp_maps) {
|
|
|
+ monoallelic_ways.push(way.clone());
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Try to find two ways containig all nodes
|
|
|
+ // print monoallelic graphs
|
|
|
+ for b in monoallelic_ways.iter() {
|
|
|
+ println!(
|
|
|
+ "Likely monoallelic phase [{}]",
|
|
|
+ b.iter()
|
|
|
+ .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(";")
|
|
|
+ );
|
|
|
+ println!("{}", dot_graph(&g, b, "A", "red", true));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Try to find two different ways containig all bp and not sharing same bp
|
|
|
let mut bial_ways = Vec::new();
|
|
|
for a_way in ways.iter() {
|
|
|
let mut a_nodes = a_way.clone();
|
|
|
a_nodes.remove(0);
|
|
|
a_nodes.remove(a_nodes.len() - 1);
|
|
|
|
|
|
+ let num_a: Vec<i32> = a_nodes
|
|
|
+ .iter()
|
|
|
+ .map(|node| {
|
|
|
+ key_num_store
|
|
|
+ .get(g.node_weight(*node).unwrap())
|
|
|
+ .unwrap()
|
|
|
+ .to_owned()
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ let maps_a: Vec<Vec<Mapping>> = num_a
|
|
|
+ .iter()
|
|
|
+ .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ if has_way_overlapping(maps_a) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Try to find another way that hasnt shared node
|
|
|
let mut diff_way = None;
|
|
|
for b_way in ways.iter() {
|
|
|
let mut share_node = false;
|
|
|
+
|
|
|
+ let mut b_nodes = b_way.clone();
|
|
|
+ b_nodes.remove(0);
|
|
|
+ b_nodes.remove(a_nodes.len() - 1);
|
|
|
+ let num_b: Vec<i32> = b_nodes
|
|
|
+ .iter()
|
|
|
+ .map(|node| {
|
|
|
+ key_num_store
|
|
|
+ .get(g.node_weight(*node).unwrap())
|
|
|
+ .unwrap()
|
|
|
+ .to_owned()
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ let maps_b: Vec<Vec<Mapping>> = num_b
|
|
|
+ .iter()
|
|
|
+ .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
+ .collect();
|
|
|
+ if has_way_overlapping(maps_b) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
for b_node in b_way.iter() {
|
|
|
if a_nodes.contains(b_node) {
|
|
|
share_node = true;
|
|
|
@@ -1330,7 +1795,19 @@ mod tests {
|
|
|
all.extend(b_way.clone().iter());
|
|
|
all.sort();
|
|
|
all.dedup();
|
|
|
- if all.len() == max_way {
|
|
|
+ let mut n = all
|
|
|
+ .iter()
|
|
|
+ .flat_map(|node| {
|
|
|
+ if let Some(n) = key_num_store.get(g.node_weight(*node).unwrap()) {
|
|
|
+ vec![*n]
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect::<Vec<i32>>();
|
|
|
+ n.sort();
|
|
|
+ n.dedup();
|
|
|
+ if n == bp_nums {
|
|
|
bial_ways.push((a_way.clone(), b_way))
|
|
|
}
|
|
|
}
|
|
|
@@ -1347,10 +1824,18 @@ mod tests {
|
|
|
.map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
.collect::<Vec<_>>()
|
|
|
.join(";")
|
|
|
- )
|
|
|
+ );
|
|
|
+ let s = dot_graph_biall(
|
|
|
+ &g,
|
|
|
+ &vec![a.to_vec(), b.to_vec()],
|
|
|
+ vec!["A", "A'"],
|
|
|
+ vec!["red", "blue"],
|
|
|
+ true,
|
|
|
+ );
|
|
|
+ println!("{s}");
|
|
|
}
|
|
|
-
|
|
|
- println!("{:?}", Dot::new(&g));
|
|
|
+ //
|
|
|
+ // println!("{:?}", Dot::new(&g));
|
|
|
|
|
|
Ok(())
|
|
|
}
|