|
|
@@ -12,7 +12,6 @@ use std::{
|
|
|
fmt,
|
|
|
fs::{self, File},
|
|
|
io::{BufReader, BufWriter, Write},
|
|
|
- ops::RangeInclusive,
|
|
|
path::PathBuf,
|
|
|
process::{Command, Stdio},
|
|
|
thread,
|
|
|
@@ -209,7 +208,6 @@ impl ContigRef {
|
|
|
ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
let mut v = vec![a, b, c];
|
|
|
v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
-
|
|
|
let (a, b, c) = (
|
|
|
*v.get(0).clone().unwrap(),
|
|
|
*v.get(1).clone().unwrap(),
|
|
|
@@ -292,17 +290,25 @@ fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
|
|
|
pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
let mut mappings = mappings;
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+
|
|
|
if mappings.len() == 1 {
|
|
|
return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
|
|
|
} else {
|
|
|
- let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
+ let mut grouped: Vec<Vec<Mapping>> = group_mappings(&mut mappings)?;
|
|
|
+ // let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
|
|
|
if grouped.len() == 1 {
|
|
|
let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
return Ok(ContigRef::Ambigous(r));
|
|
|
} else if grouped.len() >= 2 {
|
|
|
- let first = grouped.pop_back().unwrap();
|
|
|
- let last = grouped.pop_front().unwrap();
|
|
|
+ // let first = grouped.pop_back().unwrap();
|
|
|
+ // let last = grouped.pop_front().unwrap();
|
|
|
+ let first = grouped.first().unwrap().to_vec();
|
|
|
+ let last = grouped.last().unwrap().to_vec();
|
|
|
+ grouped.remove(0);
|
|
|
+ grouped.remove(grouped.len() - 1);
|
|
|
+ assert!(first[0].query_start < last[0].query_start);
|
|
|
|
|
|
if grouped.len() == 0 {
|
|
|
if first.len() == 1 && last.len() == 1 {
|
|
|
@@ -392,6 +398,8 @@ impl Genome {
|
|
|
supporting_records: Option<Vec<Record>>,
|
|
|
sequence: String,
|
|
|
) -> Result<()> {
|
|
|
+ let mut mappings = mappings;
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
let new_contig = Contig {
|
|
|
id,
|
|
|
mappings: mappings.clone(),
|
|
|
@@ -976,8 +984,10 @@ pub fn dot_graph(
|
|
|
*edge = v;
|
|
|
}
|
|
|
|
|
|
- g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
- g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+ if erase {
|
|
|
+ g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+ }
|
|
|
|
|
|
let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
|
|
|
@@ -1001,7 +1011,6 @@ pub fn dot_graph_biall(
|
|
|
erase: bool,
|
|
|
) -> String {
|
|
|
let mut g = graph.clone();
|
|
|
-
|
|
|
// erase labels
|
|
|
if erase {
|
|
|
g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
@@ -1101,108 +1110,12 @@ pub fn rev_comp_bp_str(s: &str) -> String {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
-// assuming way
|
|
|
-pub fn has_way_overlapping(
|
|
|
- way: Vec<Vec<Mapping>>,
|
|
|
- // g: &StableGraph<String, String>,
|
|
|
- // start: (String, i32),
|
|
|
- // end: (String, i32),
|
|
|
-) -> bool {
|
|
|
- let mut chr_ranges: HashMap<String, Vec<RangeInclusive<i32>>> = HashMap::new();
|
|
|
-
|
|
|
- // add inner ranges
|
|
|
- way.iter().for_each(|m| {
|
|
|
- vec![m.first().unwrap(), m.last().unwrap()]
|
|
|
- .iter()
|
|
|
- .for_each(|part| {
|
|
|
- let target_name = part.target_name.clone().unwrap();
|
|
|
- if let Some(v) = chr_ranges.get_mut(&target_name) {
|
|
|
- v.push(part.target_start..=part.target_end);
|
|
|
- } else {
|
|
|
- chr_ranges.insert(target_name, vec![part.target_start..=part.target_end]);
|
|
|
- }
|
|
|
- });
|
|
|
- });
|
|
|
-
|
|
|
- // Add outer ranges.
|
|
|
- // let first = way.first().unwrap().first().unwrap();
|
|
|
- // assert_eq!(start.0, first.target_name.clone().unwrap());
|
|
|
- // let r = if start.1 <= first.target_end {
|
|
|
- // start.1..=first.target_end
|
|
|
- // } else {
|
|
|
- // first.target_start..=start.1
|
|
|
- // };
|
|
|
- // chr_ranges.insert(start.0, vec![r]);
|
|
|
-
|
|
|
- for window in way.windows(2) {
|
|
|
- let a = window[0].last().unwrap();
|
|
|
- let b = window[1].first().unwrap();
|
|
|
-
|
|
|
- let a_target_name = a.target_name.clone().unwrap();
|
|
|
- let b_target_name = b.target_name.clone().unwrap();
|
|
|
- assert_eq!(a.strand, b.strand);
|
|
|
- assert_eq!(a_target_name, b_target_name);
|
|
|
-
|
|
|
- let r = if a.target_end <= b.target_start {
|
|
|
- a.target_end..=b.target_start
|
|
|
- } else {
|
|
|
- b.target_end..=a.target_end
|
|
|
- };
|
|
|
- if let Some(v) = chr_ranges.get_mut(&a_target_name) {
|
|
|
- v.push(r);
|
|
|
- } else {
|
|
|
- chr_ranges.insert(a_target_name, vec![r]);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // let last = way.last().unwrap().last().unwrap();
|
|
|
- // assert_eq!(end.0, last.target_name.clone().unwrap());
|
|
|
- // if let Some(v) = chr_ranges.get_mut(&end.0) {
|
|
|
- // let r = if last.target_end <= end.1 {
|
|
|
- // last.target_end..=end.1
|
|
|
- // } else {
|
|
|
- // end.1..=last.target_start
|
|
|
- // };
|
|
|
- // v.push(r);
|
|
|
- // }
|
|
|
-
|
|
|
- println!("{chr_ranges:?}");
|
|
|
-
|
|
|
- // check overlaps
|
|
|
- let mut res = false;
|
|
|
- 'outer: for (_chr, ranges) in chr_ranges {
|
|
|
- for ra in ranges.iter() {
|
|
|
- for rb in ranges.iter() {
|
|
|
- if ra != rb {
|
|
|
- let a = vec![*ra.start(), *rb.start()]
|
|
|
- .iter()
|
|
|
- .max()
|
|
|
- .unwrap()
|
|
|
- .to_owned();
|
|
|
- let b = vec![*ra.end(), *rb.end()].iter().min().unwrap().to_owned();
|
|
|
- if a <= b {
|
|
|
- res = true;
|
|
|
- break 'outer;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- res
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Clone, PartialEq, Eq)]
|
|
|
+#[derive(Clone, PartialEq, Eq, Debug)]
|
|
|
pub struct MyBreakPoint {
|
|
|
pub id: String,
|
|
|
pub mappings: Vec<Mapping>,
|
|
|
}
|
|
|
|
|
|
-impl Hash for MyBreakPoint {
|
|
|
- fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
- self.repr_sens().hash(state);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
impl MyBreakPoint {
|
|
|
pub fn new(mappings: Vec<Mapping>) -> Self {
|
|
|
Self {
|
|
|
@@ -1242,6 +1155,18 @@ impl MyBreakPoint {
|
|
|
pub fn rc(&self) -> Self {
|
|
|
let mut mappings = self.mappings.clone();
|
|
|
mappings.reverse();
|
|
|
+ let left = mappings.first_mut().unwrap();
|
|
|
+ left.strand = match left.strand {
|
|
|
+ minimap2::Strand::Forward => minimap2::Strand::Reverse,
|
|
|
+ minimap2::Strand::Reverse => minimap2::Strand::Forward,
|
|
|
+ };
|
|
|
+
|
|
|
+ let right = mappings.last_mut().unwrap();
|
|
|
+ right.strand = match right.strand {
|
|
|
+ minimap2::Strand::Forward => minimap2::Strand::Reverse,
|
|
|
+ minimap2::Strand::Reverse => minimap2::Strand::Forward,
|
|
|
+ };
|
|
|
+
|
|
|
Self {
|
|
|
id: self.id.clone(),
|
|
|
mappings,
|
|
|
@@ -1278,6 +1203,8 @@ impl MyBreakPoint {
|
|
|
let (mut a, mut b) = self.repr_sens();
|
|
|
a.0 = !a.0;
|
|
|
b.0 = !b.0;
|
|
|
+ // let (aa, bb) = self.rc().repr_sens();
|
|
|
+ // assert_eq!(aa, b);
|
|
|
(b, a)
|
|
|
}
|
|
|
|
|
|
@@ -1347,6 +1274,79 @@ impl MyBreakPoint {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+fn get_next(
|
|
|
+ graph: &StableGraph<MyBreakPoint, String>,
|
|
|
+ ways: Vec<Vec<(NodeIndex, bool)>>,
|
|
|
+) -> Vec<Vec<(NodeIndex, bool)>> {
|
|
|
+ let mut new_ways = Vec::new();
|
|
|
+ for way in ways.into_iter() {
|
|
|
+ let mut possible_next: Vec<(NodeIndex, bool)> = Vec::new();
|
|
|
+ let (last_id, last_sens) = way.get(way.len() - 1).unwrap().to_owned();
|
|
|
+ let a = graph.node_weight(last_id).unwrap().to_owned();
|
|
|
+ let last = if last_sens { a.clone() } else { a.rc() };
|
|
|
+
|
|
|
+ for neighbor_id in graph.neighbors(last_id) {
|
|
|
+ let b = graph.node_weight(neighbor_id).unwrap().to_owned();
|
|
|
+ if last.is_next(&b) {
|
|
|
+ possible_next.push((neighbor_id, true));
|
|
|
+ }
|
|
|
+ if last.is_next(&b.rc()) {
|
|
|
+ possible_next.push((neighbor_id, false));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if possible_next.len() == 0 {
|
|
|
+ println!("droppping way no compatible neighbors {:?}", way);
|
|
|
+ }
|
|
|
+ for pn in possible_next {
|
|
|
+ let mut new_way = way.clone();
|
|
|
+ if !new_way
|
|
|
+ .iter()
|
|
|
+ .map(|(id, _)| id.clone())
|
|
|
+ .collect::<Vec<NodeIndex>>()
|
|
|
+ .contains(&pn.0)
|
|
|
+ {
|
|
|
+ new_way.push(pn);
|
|
|
+ new_ways.push(new_way);
|
|
|
+ } else {
|
|
|
+ println!("dropping way: {way:?} next : {:?}", pn);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ new_ways
|
|
|
+}
|
|
|
+
|
|
|
+fn find_ways(
|
|
|
+ graph: &StableGraph<MyBreakPoint, String>,
|
|
|
+ start_id: NodeIndex,
|
|
|
+ start_sens: bool,
|
|
|
+ end_id: NodeIndex,
|
|
|
+) -> Vec<Vec<(NodeIndex, bool)>> {
|
|
|
+ let sens_symb = "►";
|
|
|
+ let rc_symb = "◄";
|
|
|
+
|
|
|
+ let mut reached_end = Vec::new();
|
|
|
+ let mut ways = vec![vec![(start_id, start_sens)]];
|
|
|
+ loop {
|
|
|
+ let new_ways = get_next(graph, ways.clone());
|
|
|
+ println!("-------\n{new_ways:?}\n-----");
|
|
|
+ if new_ways.len() == 0 {
|
|
|
+ println!("no more");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ ways.clear();
|
|
|
+ for new_way in new_ways.iter() {
|
|
|
+ let last_id = new_way.last().unwrap().to_owned().0;
|
|
|
+ if last_id == end_id {
|
|
|
+ reached_end.push(new_way.clone());
|
|
|
+ } else {
|
|
|
+ ways.push(new_way.clone());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ reached_end
|
|
|
+}
|
|
|
+
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
use std::{
|
|
|
@@ -1408,9 +1408,16 @@ mod tests {
|
|
|
if let Some(mappings) = contig.contig_ref.breakpoints() {
|
|
|
let bp = MyBreakPoint::new(mappings);
|
|
|
if !dedup_hash.contains(&bp.str_sens(sens_symb, rc_symb)) {
|
|
|
+ println!(
|
|
|
+ "adding: {} rc: {}\n\n{:?}\n",
|
|
|
+ bp.str_sens(sens_symb, rc_symb),
|
|
|
+ bp.str_rc(sens_symb, rc_symb),
|
|
|
+ bp
|
|
|
+ );
|
|
|
let id = graph.add_node(bp.clone());
|
|
|
breakpoints_id.insert(bp.id.clone(), id);
|
|
|
dedup_hash.insert(bp.str_sens(sens_symb, rc_symb));
|
|
|
+ dedup_hash.insert(bp.str_rc(sens_symb, rc_symb));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -1434,7 +1441,6 @@ mod tests {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
let (_, (last_tail_sens, last_tail_chr, last_tail_pos)) = last.repr_sens();
|
|
|
let ((next_head_sens, next_head_chr, next_head_pos), _) = next.repr_sens();
|
|
|
|
|
|
@@ -1445,11 +1451,6 @@ mod tests {
|
|
|
last_tail_pos > next_head_pos
|
|
|
};
|
|
|
if in_order && graph.find_edge(a, b).is_none() {
|
|
|
- println!(
|
|
|
- "{}<-->{}",
|
|
|
- last.str_sens(sens_symb, rc_symb),
|
|
|
- next.str_sens(sens_symb, rc_symb)
|
|
|
- );
|
|
|
graph.add_edge(a, b, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
@@ -1463,11 +1464,6 @@ mod tests {
|
|
|
last_tail_pos > next_head_pos
|
|
|
};
|
|
|
if in_order && graph.find_edge(a, b).is_none() {
|
|
|
- println!(
|
|
|
- "{}<-->{}",
|
|
|
- last.str_sens(sens_symb, rc_symb),
|
|
|
- next.str_rc(sens_symb, rc_symb)
|
|
|
- );
|
|
|
graph.add_edge(a, b, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
@@ -1481,11 +1477,6 @@ mod tests {
|
|
|
last_tail_pos > next_head_pos
|
|
|
};
|
|
|
if in_order && graph.find_edge(b, a).is_none() {
|
|
|
- println!(
|
|
|
- "{}<-->{}",
|
|
|
- last.str_rc(sens_symb, rc_symb),
|
|
|
- next.str_sens(sens_symb, rc_symb)
|
|
|
- );
|
|
|
graph.add_edge(b, a, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
@@ -1498,28 +1489,13 @@ mod tests {
|
|
|
} else {
|
|
|
last_tail_pos > next_head_pos
|
|
|
};
|
|
|
- if in_order && graph.find_edge(b, a).is_none() {
|
|
|
- println!(
|
|
|
- "{}<-->{}",
|
|
|
- last.str_rc(sens_symb, rc_symb),
|
|
|
- next.str_rc(sens_symb, rc_symb)
|
|
|
- );
|
|
|
+ if in_order && graph.find_edge(b, a).is_none() {
|
|
|
graph.add_edge(b, a, "".to_string());
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
-
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Remove edges in feedback arc set.
|
|
|
- // let fas: Vec<_> = petgraph::algo::feedback_arc_set::greedy_feedback_arc_set(&graph)
|
|
|
- // .map(|e| e.id())
|
|
|
- // .collect();
|
|
|
- // for edge_id in fas {
|
|
|
- // graph.remove_edge(edge_id);
|
|
|
- // }
|
|
|
- //
|
|
|
// remove empty nodes
|
|
|
let del: Vec<_> = graph
|
|
|
.node_indices()
|
|
|
@@ -1529,405 +1505,52 @@ mod tests {
|
|
|
graph.remove_node(*id);
|
|
|
});
|
|
|
|
|
|
- // Find all the ways from start node to end node.
|
|
|
- let ways = algo::all_simple_paths::<Vec<_>, _>(&graph, start_id, end_id, 0, None)
|
|
|
- .collect::<Vec<_>>();
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- let mut confirmed_way = Vec::new();
|
|
|
- for way in ways.iter().enumerate() {
|
|
|
- let mut last_sens = true;
|
|
|
- let mut confirmed = true;
|
|
|
- // let mut last_sens = s.1.0.clone();
|
|
|
- for window in way.windows(2) {
|
|
|
- let a = graph.node_weight(window[0]).unwrap();
|
|
|
- let b = graph.node_weight(window[1]).unwrap();
|
|
|
-
|
|
|
- let last = if last_sens { a.clone() } else { a.rc() };
|
|
|
- if last.is_next(&b) {
|
|
|
- last_sens = true;
|
|
|
- } else if last.is_next(&b.rc()) {
|
|
|
- last_sens = false;
|
|
|
- } else {
|
|
|
- confirmed = false;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- if confirmed {
|
|
|
- confirmed_way.push(way.clone());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Add way number to edges.
|
|
|
- for (n_way, way) in confirmed_way.iter().enumerate() {
|
|
|
- for window in way.windows(2) {
|
|
|
- let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
- let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
- *edge = vec![edge.to_string(), (n_way + 1).to_string()]
|
|
|
- .into_iter()
|
|
|
- .filter(|s| s != "")
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(", ")
|
|
|
- .to_string();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // show graph
|
|
|
- let g = graph.map(|_, v| v.str_sens(sens_symb, rc_symb), |_, v| v.to_string());
|
|
|
- let dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
- println!("{dot}");
|
|
|
-
|
|
|
- // for last in
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- #[test]
|
|
|
- fn load_dir() -> Result<()> {
|
|
|
- let dir = "./data_test";
|
|
|
- let format = CustomFormat::builder()
|
|
|
- .grouping(Grouping::Standard)
|
|
|
- .minus_sign("-")
|
|
|
- .separator("_")
|
|
|
- .build()
|
|
|
- .unwrap();
|
|
|
-
|
|
|
- // load from dir
|
|
|
- let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
-
|
|
|
- genome.stats();
|
|
|
-
|
|
|
- // for contig in genome.contigs() {
|
|
|
- // if let Some(breakpoints) = contig.breakpoints_repr() {
|
|
|
- // println!("[{}]", breakpoints.join("---"));
|
|
|
- // }
|
|
|
- // }
|
|
|
-
|
|
|
- // Get all unique sorted breakpoints in sens and revcomp
|
|
|
- let mut bp = Vec::new();
|
|
|
- let mut bp_num_store: HashMap<((bool, String, i32), (bool, String, i32)), i32> =
|
|
|
- HashMap::new();
|
|
|
- let mut mapping_num_store = HashMap::new();
|
|
|
- let mut num_contig = 0;
|
|
|
- for contig in genome.contigs() {
|
|
|
- if let Some(breakpoints) = contig.contig_ref.breakpoints() {
|
|
|
- let s = BreakPoint::new(&breakpoints).sens();
|
|
|
- let revcomp = BreakPoint::new(&breakpoints).rev_comp();
|
|
|
- bp_num_store.insert(s.clone(), num_contig);
|
|
|
- bp_num_store.insert(revcomp.clone(), num_contig);
|
|
|
- mapping_num_store.insert(num_contig, breakpoints);
|
|
|
- bp.push(s);
|
|
|
- bp.push(revcomp);
|
|
|
- }
|
|
|
- num_contig += 1;
|
|
|
- }
|
|
|
- bp.sort_by(|(a, _), (b, _)| a.2.cmp(&b.2));
|
|
|
- bp.sort_by(|(a, _), (b, _)| a.1.cmp(&b.1));
|
|
|
- bp.dedup_by(|(a, _), (b, _)| a.2 == b.2 && a.1 == b.1);
|
|
|
- let symb = |s| -> &str {
|
|
|
- if s {
|
|
|
- "►"
|
|
|
- } else {
|
|
|
- "◄"
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- // start node
|
|
|
- let start_node = (true, "chr7".to_string(), 0);
|
|
|
- let start_node = (start_node.clone(), start_node);
|
|
|
- bp.push(start_node.clone());
|
|
|
- let mut start_key = "".to_string();
|
|
|
-
|
|
|
- // end node
|
|
|
- let end_node = (true, "chr7".to_string(), i32::MAX);
|
|
|
- let end_node = (end_node.clone(), end_node);
|
|
|
- bp.push(end_node.clone());
|
|
|
- let mut end_key = "".to_string();
|
|
|
-
|
|
|
- // add all breakpoints into a graph nodes
|
|
|
- let mut graph = StableGraph::<_, _, Directed>::new();
|
|
|
- let mut nodes_idx = HashMap::new();
|
|
|
- let mut key_num_store: HashMap<String, i32> = HashMap::new();
|
|
|
- bp.iter().for_each(|(a, b)| {
|
|
|
- let key = format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(a.0),
|
|
|
- a.1,
|
|
|
- a.2.to_formatted_string(&format),
|
|
|
- b.1,
|
|
|
- b.2.to_formatted_string(&format),
|
|
|
- symb(b.0)
|
|
|
- );
|
|
|
- if let Some(bp_num) = bp_num_store.get(&(a.clone(), b.clone())) {
|
|
|
- key_num_store.insert(key.to_string(), *bp_num);
|
|
|
- }
|
|
|
- let node_id = graph.add_node(key.clone());
|
|
|
- nodes_idx.insert(key.clone(), node_id);
|
|
|
- // println!("{}", key);
|
|
|
- if (a.to_owned(), b.to_owned()) == start_node {
|
|
|
- start_key = key;
|
|
|
- } else if (a.to_owned(), b.to_owned()) == end_node {
|
|
|
- end_key = key;
|
|
|
- }
|
|
|
- });
|
|
|
-
|
|
|
- // Comparing two by two breakpoints, if same sens, chromosome and order: draw edge.
|
|
|
- bp.iter().for_each(|(a, (last_sens, last_chr, last_pos))| {
|
|
|
- let last_key = format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(a.0),
|
|
|
- a.1,
|
|
|
- a.2.to_formatted_string(&format),
|
|
|
- last_chr,
|
|
|
- last_pos.to_formatted_string(&format),
|
|
|
- symb(*last_sens)
|
|
|
- );
|
|
|
- bp.iter().for_each(|((next_sens, next_chr, next_pos), b)| {
|
|
|
- let is_same_sens = last_sens == next_sens;
|
|
|
- let is_on_same_chr = last_chr == next_chr;
|
|
|
- let is_reachable = if *last_sens {
|
|
|
- last_pos < next_pos
|
|
|
- } else {
|
|
|
- last_pos > next_pos
|
|
|
- };
|
|
|
- if is_same_sens && is_on_same_chr && is_reachable {
|
|
|
- let next_key = format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(*next_sens),
|
|
|
- next_chr,
|
|
|
- next_pos.to_formatted_string(&format),
|
|
|
- b.1,
|
|
|
- b.2.to_formatted_string(&format),
|
|
|
- symb(b.0)
|
|
|
- );
|
|
|
- graph.add_edge(
|
|
|
- *nodes_idx.get(&last_key).unwrap(),
|
|
|
- *nodes_idx.get(&next_key).unwrap(),
|
|
|
- "".to_string(),
|
|
|
- );
|
|
|
- }
|
|
|
- });
|
|
|
- });
|
|
|
-
|
|
|
- // Remove edges in feedback arc set.
|
|
|
- let fas: Vec<_> = petgraph::algo::feedback_arc_set::greedy_feedback_arc_set(&graph)
|
|
|
- .map(|e| e.id())
|
|
|
- .collect();
|
|
|
- for edge_id in fas {
|
|
|
- graph.remove_edge(edge_id);
|
|
|
- }
|
|
|
-
|
|
|
- let start_node_id = nodes_idx.get(&start_key).unwrap();
|
|
|
- let end_node_id = nodes_idx.get(&end_key).unwrap();
|
|
|
-
|
|
|
- // Remove edge direct start -> end
|
|
|
- let se = graph.find_edge(*start_node_id, *end_node_id).unwrap();
|
|
|
- graph.remove_edge(se);
|
|
|
-
|
|
|
- // Find all the ways from start node to end node.
|
|
|
- let ways =
|
|
|
- algo::all_simple_paths::<Vec<_>, _>(&graph, *start_node_id, *end_node_id, 0, None)
|
|
|
- .collect::<Vec<_>>();
|
|
|
-
|
|
|
- // Remove ways that pass by the same bp either sens or revcomp.
|
|
|
- let mut erase_way = Vec::new();
|
|
|
- for (i, way) in ways.iter().enumerate() {
|
|
|
- let mut w = Vec::new();
|
|
|
- for node in way.iter() {
|
|
|
- let s = graph.node_weight(*node).unwrap();
|
|
|
- if w.contains(s) {
|
|
|
- erase_way.push(i);
|
|
|
- }
|
|
|
- let rcs = rev_comp_bp_str(s);
|
|
|
- w.push(s.to_string());
|
|
|
- w.push(rcs);
|
|
|
- }
|
|
|
- }
|
|
|
- let ways: Vec<Vec<NodeIndex>> = ways
|
|
|
- .iter()
|
|
|
- .enumerate()
|
|
|
- .filter(|(i, _)| !erase_way.contains(i))
|
|
|
- .map(|(_, v)| v.clone())
|
|
|
- .collect();
|
|
|
-
|
|
|
- // Add way number to edges.
|
|
|
- for (n_way, way) in ways.iter().enumerate() {
|
|
|
- for window in way.windows(2) {
|
|
|
- let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
- let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
- *edge = vec![edge.to_string(), (n_way + 1).to_string()]
|
|
|
- .into_iter()
|
|
|
- .filter(|s| s != "")
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(", ")
|
|
|
- .to_string();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Retain edges and nodes in a way
|
|
|
- let mut g = graph.clone();
|
|
|
- g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
- g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
-
|
|
|
- // Get keeped bp_num
|
|
|
- let mut bp_nums = Vec::new();
|
|
|
- g.node_weights().for_each(|s| {
|
|
|
- if let Some(n) = key_num_store.get(s) {
|
|
|
- bp_nums.push(*n)
|
|
|
- }
|
|
|
- });
|
|
|
- bp_nums.sort();
|
|
|
- bp_nums.dedup();
|
|
|
- println!("{} bp considered", bp_nums.len());
|
|
|
-
|
|
|
- // Try to find one way containing all bp
|
|
|
- let mut monoallelic_ways = Vec::new();
|
|
|
- for way in ways.iter() {
|
|
|
- // check if passing by all bp
|
|
|
- let mut n = way
|
|
|
+ let mut ways = find_ways(&graph, start_id, true, end_id);
|
|
|
+ ways.dedup_by_key(|a| {
|
|
|
+ let astr: Vec<String> = a
|
|
|
.iter()
|
|
|
- .flat_map(|node| {
|
|
|
- if let Some(n) = key_num_store.get(g.node_weight(*node).unwrap()) {
|
|
|
- vec![*n]
|
|
|
+ .map(|(id, sens)| {
|
|
|
+ if *sens {
|
|
|
+ graph.node_weight(*id).unwrap().str_sens(sens_symb, rc_symb)
|
|
|
} else {
|
|
|
- vec![]
|
|
|
+ graph.node_weight(*id).unwrap().str_rc(sens_symb, rc_symb)
|
|
|
}
|
|
|
})
|
|
|
- .collect::<Vec<i32>>();
|
|
|
- n.sort();
|
|
|
- n.dedup();
|
|
|
- if n == bp_nums {
|
|
|
- // check if repassing by same coords
|
|
|
- let bp_maps = n
|
|
|
- .iter()
|
|
|
- .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
- .collect();
|
|
|
- if !has_way_overlapping(bp_maps) {
|
|
|
- monoallelic_ways.push(way.clone());
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // print monoallelic graphs
|
|
|
- for b in monoallelic_ways.iter() {
|
|
|
- println!(
|
|
|
- "Likely monoallelic phase [{}]",
|
|
|
- b.iter()
|
|
|
- .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(";")
|
|
|
- );
|
|
|
- println!("{}", dot_graph(&g, b, "A", "red", true));
|
|
|
- }
|
|
|
-
|
|
|
- // Try to find two different ways containig all bp and not sharing same bp
|
|
|
- let mut bial_ways = Vec::new();
|
|
|
- for a_way in ways.iter() {
|
|
|
- let mut a_nodes = a_way.clone();
|
|
|
- a_nodes.remove(0);
|
|
|
- a_nodes.remove(a_nodes.len() - 1);
|
|
|
-
|
|
|
- let num_a: Vec<i32> = a_nodes
|
|
|
- .iter()
|
|
|
- .map(|node| {
|
|
|
- key_num_store
|
|
|
- .get(g.node_weight(*node).unwrap())
|
|
|
- .unwrap()
|
|
|
- .to_owned()
|
|
|
- })
|
|
|
- .collect();
|
|
|
- let maps_a: Vec<Vec<Mapping>> = num_a
|
|
|
- .iter()
|
|
|
- .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
.collect();
|
|
|
+ astr.join(";")
|
|
|
+ });
|
|
|
|
|
|
- if has_way_overlapping(maps_a) {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // Try to find another way that hasnt shared node
|
|
|
- let mut diff_way = None;
|
|
|
- for b_way in ways.iter() {
|
|
|
- let mut share_node = false;
|
|
|
-
|
|
|
- let mut b_nodes = b_way.clone();
|
|
|
- b_nodes.remove(0);
|
|
|
- b_nodes.remove(a_nodes.len() - 1);
|
|
|
- let num_b: Vec<i32> = b_nodes
|
|
|
- .iter()
|
|
|
- .map(|node| {
|
|
|
- key_num_store
|
|
|
- .get(g.node_weight(*node).unwrap())
|
|
|
- .unwrap()
|
|
|
- .to_owned()
|
|
|
- })
|
|
|
- .collect();
|
|
|
- let maps_b: Vec<Vec<Mapping>> = num_b
|
|
|
- .iter()
|
|
|
- .map(|num| mapping_num_store.get(num).unwrap().to_owned())
|
|
|
- .collect();
|
|
|
- if has_way_overlapping(maps_b) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- for b_node in b_way.iter() {
|
|
|
- if a_nodes.contains(b_node) {
|
|
|
- share_node = true;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- if !share_node {
|
|
|
- diff_way = Some(b_way.clone())
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if let Some(b_way) = diff_way {
|
|
|
- // dedup for double check
|
|
|
- let mut all = a_way.clone();
|
|
|
- all.extend(b_way.clone().iter());
|
|
|
- all.sort();
|
|
|
- all.dedup();
|
|
|
- let mut n = all
|
|
|
- .iter()
|
|
|
- .flat_map(|node| {
|
|
|
- if let Some(n) = key_num_store.get(g.node_weight(*node).unwrap()) {
|
|
|
- vec![*n]
|
|
|
+ for new_way in ways.iter() {
|
|
|
+ let g = graph.map(
|
|
|
+ |ni, nv| {
|
|
|
+ let mut in_way: Vec<_> = new_way.iter().filter(|(id, _)| *id == ni).collect();
|
|
|
+ let val = if in_way.len() == 1 {
|
|
|
+ let (_, n_sens) = in_way.pop().unwrap();
|
|
|
+ if *n_sens {
|
|
|
+ nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
} else {
|
|
|
- vec![]
|
|
|
+ nv.str_rc(sens_symb, rc_symb).to_string()
|
|
|
}
|
|
|
- })
|
|
|
- .collect::<Vec<i32>>();
|
|
|
- n.sort();
|
|
|
- n.dedup();
|
|
|
- if n == bp_nums {
|
|
|
- bial_ways.push((a_way.clone(), b_way))
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- for (a, b) in bial_ways.iter() {
|
|
|
- println!(
|
|
|
- "Likely bi-allelic phase:\n\t[{}]\n\t[{}]",
|
|
|
- a.iter()
|
|
|
- .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(";"),
|
|
|
- b.iter()
|
|
|
- .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
- .collect::<Vec<_>>()
|
|
|
- .join(";")
|
|
|
+ } else {
|
|
|
+ nv.str_sens(sens_symb, rc_symb).to_string()
|
|
|
+ };
|
|
|
+ format!("{val} ({})", ni.index())
|
|
|
+ },
|
|
|
+ |_, v| v.to_string(),
|
|
|
);
|
|
|
- let s = dot_graph_biall(
|
|
|
- &g,
|
|
|
- &vec![a.to_vec(), b.to_vec()],
|
|
|
- vec!["A", "A'"],
|
|
|
- vec!["red", "blue"],
|
|
|
- true,
|
|
|
+ println!(
|
|
|
+ "{}",
|
|
|
+ dot_graph(
|
|
|
+ &g,
|
|
|
+ &new_way.into_iter().map(|(id, _)| id.clone()).collect(),
|
|
|
+ "A",
|
|
|
+ "red",
|
|
|
+ true,
|
|
|
+ )
|
|
|
);
|
|
|
- println!("{s}");
|
|
|
}
|
|
|
- //
|
|
|
- // println!("{:?}", Dot::new(&g));
|
|
|
+
|
|
|
+ // monallellic way ?
|
|
|
|
|
|
Ok(())
|
|
|
}
|