|
|
@@ -3,12 +3,15 @@ use fasta::record::Sequence;
|
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use noodles_fasta as fasta;
|
|
|
+use num_format::{CustomFormat, Grouping, Locale, ToFormattedString, WriteFormatted};
|
|
|
+use petgraph::{algo, prelude::*, Graph};
|
|
|
use rust_htslib::bam::{self, Record};
|
|
|
use std::{
|
|
|
collections::{HashMap, VecDeque},
|
|
|
fmt,
|
|
|
fs::{self, File},
|
|
|
io::{BufReader, BufWriter, Write},
|
|
|
+ path::PathBuf,
|
|
|
process::{Command, Stdio},
|
|
|
thread,
|
|
|
};
|
|
|
@@ -80,6 +83,63 @@ impl fmt::Display for ContigRef {
|
|
|
}
|
|
|
|
|
|
impl ContigRef {
|
|
|
+ pub fn breakpoints(&self) -> Option<Vec<Mapping>> {
|
|
|
+ match self {
|
|
|
+ ContigRef::Unique(_) => None,
|
|
|
+ ContigRef::Chimeric((a, b)) => Some(vec![a.clone(), b.clone()]),
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => Some(vec![a.clone(), b.clone(), c.clone()]),
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(_) => None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
+ let left = "►";
|
|
|
+ let right = "◄";
|
|
|
+ let bp_right = "▐";
|
|
|
+ let bp_left = "▌";
|
|
|
+ let format = CustomFormat::builder()
|
|
|
+ .grouping(Grouping::Standard)
|
|
|
+ .minus_sign("-")
|
|
|
+ .separator("_")
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ let get_sign = |m: &Mapping| -> &str {
|
|
|
+ match m.strand {
|
|
|
+ minimap2::Strand::Forward => left,
|
|
|
+ minimap2::Strand::Reverse => right,
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ let mut res = Vec::new();
|
|
|
+ if let Some(breakpoints) = self.breakpoints() {
|
|
|
+ for v in breakpoints.windows(2) {
|
|
|
+ let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
|
|
|
+ let _ = bp_string
|
|
|
+ .write_formatted(&v[0].target_end, &format)
|
|
|
+ .unwrap();
|
|
|
+ bp_string = format!(
|
|
|
+ "{bp_string}{}{bp_left}{bp_right}{}{}:",
|
|
|
+ get_sign(&v[0]),
|
|
|
+ get_sign(&v[1]),
|
|
|
+ v[1].target_name.clone().unwrap_or(uk.clone())
|
|
|
+ );
|
|
|
+ let _ = bp_string
|
|
|
+ .write_formatted(&v[1].target_start, &format)
|
|
|
+ .unwrap();
|
|
|
+ res.push(bp_string);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if res.len() > 0 {
|
|
|
+ Some(res)
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }
|
|
|
pub fn desc(&self) -> Option<String> {
|
|
|
let uk = "UNKNOWN".to_string();
|
|
|
let to_desc = |v: &mut Vec<Mapping>| -> String {
|
|
|
@@ -114,12 +174,10 @@ impl ContigRef {
|
|
|
ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()]))
|
|
|
}
|
|
|
- ContigRef::ChimericMultiple(_) => todo!(),
|
|
|
- ContigRef::LeftAmbiguity(_) => todo!(),
|
|
|
- ContigRef::RightAmbiguity(_) => todo!(),
|
|
|
- ContigRef::Ambigous(a) => {
|
|
|
- Some(to_desc(&mut a.to_owned()))
|
|
|
- },
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(a) => Some(to_desc(&mut a.to_owned())),
|
|
|
}
|
|
|
}
|
|
|
pub fn hgvs(&self) -> Option<String> {
|
|
|
@@ -322,6 +380,9 @@ impl Genome {
|
|
|
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
|
|
|
self.chromosomes.iter()
|
|
|
}
|
|
|
+ pub fn contigs(&self) -> impl Iterator<Item = &Contig> {
|
|
|
+ self.chromosomes.iter().flat_map(|(_, e)| e.iter())
|
|
|
+ }
|
|
|
pub fn add_contig(
|
|
|
&mut self,
|
|
|
id: String,
|
|
|
@@ -434,11 +495,43 @@ impl Genome {
|
|
|
}
|
|
|
|
|
|
pub fn write_contigs_sequences(&self, dir: &str) {
|
|
|
- self.iter().for_each(|(_, chr)| {
|
|
|
- chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id)))
|
|
|
- });
|
|
|
+ for contig in self.contigs() {
|
|
|
+ contig.write_fasta(&format!("{dir}/{}.fasta", contig.id))
|
|
|
+ }
|
|
|
+ // self.iter().for_each(|(_, chr)| {
|
|
|
+ // chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id)))
|
|
|
+ // });
|
|
|
}
|
|
|
|
|
|
+ pub fn from_contigs_sequences(dir: &str) -> Result<Self> {
|
|
|
+ let aligner_url = "http://localhost:4444/align";
|
|
|
+ let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
+ let mut genome = Self::new();
|
|
|
+ let paths = get_ext_paths(dir, "fasta")?;
|
|
|
+ for path in paths {
|
|
|
+ let fa = read_fasta(path.to_str().unwrap())?;
|
|
|
+ for (name, sequence) in fa {
|
|
|
+ genome.add_contig_from_seq(name, &sequence.as_ref().to_vec(), &aligner)?;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Ok(genome)
|
|
|
+ }
|
|
|
+
|
|
|
+ // pub fn write_records(&self, file: &str) {
|
|
|
+ // let mut records = Vec::new();
|
|
|
+ // for (name, chromosome) in self.chromosomes.iter() {
|
|
|
+ // for contig in chromosome.iter() {
|
|
|
+ // if let Some(rec) = &contig.supporting_records {
|
|
|
+ // records.extend(rec.iter());
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // // header
|
|
|
+ //
|
|
|
+ // // writer
|
|
|
+ // rust_htslib::bam::Writer::from_path(path, header, format)
|
|
|
+ // }
|
|
|
+
|
|
|
pub fn stats(&self) {
|
|
|
for (k, v) in self.chromosomes.iter() {
|
|
|
info!("{}:{}", k, v.contigs.len());
|
|
|
@@ -606,6 +699,10 @@ impl Contig {
|
|
|
self.contig_ref.desc()
|
|
|
}
|
|
|
|
|
|
+ pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
+ self.contig_ref.breakpoints_repr()
|
|
|
+ }
|
|
|
+
|
|
|
pub fn write_fasta(&self, fasta_path: &str) {
|
|
|
write_fasta(fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
|
|
|
}
|
|
|
@@ -766,10 +863,196 @@ pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
|
|
|
|
|
|
Ok(res)
|
|
|
}
|
|
|
+fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
|
|
|
+ let paths = std::fs::read_dir(dir)?
|
|
|
+ // Filter out all those directory entries which couldn't be read
|
|
|
+ .filter_map(|res| res.ok())
|
|
|
+ // Map the directory entries to paths
|
|
|
+ .map(|dir_entry| dir_entry.path())
|
|
|
+ // Filter out all paths with extensions other than `csv`
|
|
|
+ .filter_map(|path| {
|
|
|
+ if path.extension().map_or(false, |xt| xt == ext) {
|
|
|
+ Some(path)
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect::<Vec<_>>();
|
|
|
+ Ok(paths)
|
|
|
+}
|
|
|
|
|
|
+#[derive(Clone, PartialEq)]
|
|
|
+struct BreakPoint<'a> {
|
|
|
+ d: &'a Vec<Mapping>,
|
|
|
+}
|
|
|
+
|
|
|
+impl<'a> BreakPoint<'a> {
|
|
|
+ pub fn new(d: &'a Vec<Mapping>) -> Self {
|
|
|
+ Self { d }
|
|
|
+ }
|
|
|
+ pub fn sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
+ let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
|
|
|
+
|
|
|
+ match (first.strand, last.strand) {
|
|
|
+ (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
+ (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
+ (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
+ (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ ),
|
|
|
+ (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
+ (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ ),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn rev_comp(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
+ let (mut a, mut b) = self.sens();
|
|
|
+ a.0 = !a.0;
|
|
|
+ b.0 = !b.0;
|
|
|
+ (b, a)
|
|
|
+
|
|
|
+ // let (first, last) = (self.d.first().unwrap(), self.d.last().unwrap());
|
|
|
+ // match (first.strand, last.strand) {
|
|
|
+ // (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
+ // (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ // (false, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ // ),
|
|
|
+ // (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
+ // (true, last.target_name.clone().unwrap(), last.target_start),
|
|
|
+ // (false, first.target_name.clone().unwrap(), first.target_start),
|
|
|
+ // ),
|
|
|
+ // (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
+ // (false, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ // (true, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ // ),
|
|
|
+ // (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
+ // (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
+ // (true, first.target_name.clone().unwrap(), first.target_end),
|
|
|
+ // ),
|
|
|
+ // }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+struct BreakPoints<'a>(Vec<&'a BreakPoint<'a>>);
|
|
|
+
|
|
|
+// impl<'a> BreakPoints<'a> {
|
|
|
+// pub fn new() -> Self {
|
|
|
+// Self(Vec::new())
|
|
|
+// }
|
|
|
+// pub fn push(&'a mut self, bp: &'a BreakPoint<'a>) {
|
|
|
+// self.0.push(bp);
|
|
|
+// }
|
|
|
+// pub fn paths(&'a self, start: &'a BreakPoint<'a>) -> std::prelude::v1::Result<Vec<Vec<Option<(&'a BreakPoint<'a>, bool)>>>, anyhow::Error> {
|
|
|
+// let start_sens = true;
|
|
|
+// let mut paths = vec![vec![Some((start, start_sens))]];
|
|
|
+// loop {
|
|
|
+// let p = paths.clone();
|
|
|
+// let curr_paths: Vec<_> = p
|
|
|
+// .iter()
|
|
|
+// .filter(|path| path.last().unwrap().is_some())
|
|
|
+// .collect();
|
|
|
+//
|
|
|
+// if curr_paths.len() == 0 {
|
|
|
+// break;
|
|
|
+// }
|
|
|
+// let mut new_paths = Vec::new();
|
|
|
+//
|
|
|
+// for curr_path in curr_paths.iter() {
|
|
|
+//
|
|
|
+// let visited: Vec<_> = curr_path.iter().map(|e| e.as_ref().unwrap()).collect();
|
|
|
+// let (curr_bp, curr_sens) = visited.last().unwrap();
|
|
|
+// let visited_bps: Vec<_> = visited.iter().map(|(bp, _)| bp).collect();
|
|
|
+//
|
|
|
+// let (_, curr_pos) = if *curr_sens {
|
|
|
+// curr_bp.sens()
|
|
|
+// } else {
|
|
|
+// curr_bp.rev_comp()
|
|
|
+// };
|
|
|
+//
|
|
|
+// let next: Vec<(&BreakPoint<'a>, bool)> = self
|
|
|
+// .0
|
|
|
+// .clone()
|
|
|
+// .into_iter()
|
|
|
+// .filter(|bp| !visited_bps.contains(&bp))
|
|
|
+// .flat_map(|bp| {
|
|
|
+// let (bp_s, _) = bp.sens();
|
|
|
+// let same_sens = bp_s.0 == curr_pos.0;
|
|
|
+// let same_chr = bp_s.1 == curr_pos.1;
|
|
|
+// let next = if curr_pos.0 {
|
|
|
+// bp_s.2 >= curr_pos.2
|
|
|
+// } else {
|
|
|
+// bp_s.2 <= curr_pos.2
|
|
|
+// };
|
|
|
+// if same_chr && same_sens && next {
|
|
|
+// return vec![(bp, *curr_sens == bp_s.0)];
|
|
|
+// }
|
|
|
+// let (bp_s, _) = bp.rev_comp();
|
|
|
+// let same_sens = bp_s.0 == curr_pos.0;
|
|
|
+// let same_chr = bp_s.1 == curr_pos.1;
|
|
|
+// let next = if curr_pos.0 {
|
|
|
+// bp_s.2 >= curr_pos.2
|
|
|
+// } else {
|
|
|
+// bp_s.2 <= curr_pos.2
|
|
|
+// };
|
|
|
+// if same_chr && same_sens && next {
|
|
|
+// return vec![(bp, *curr_sens == bp_s.0)];
|
|
|
+// }
|
|
|
+// vec![]
|
|
|
+// })
|
|
|
+// .collect();
|
|
|
+//
|
|
|
+// if next.len() == 0 {
|
|
|
+// let mut curr_p = curr_path.clone().to_owned();
|
|
|
+// curr_p.push(None);
|
|
|
+// new_paths.push(curr_p);
|
|
|
+// } else {
|
|
|
+// next.into_iter().for_each(|bp| {
|
|
|
+// let mut curr_p = *curr_path.clone().to_owned();
|
|
|
+// curr_p.push(Some(bp));
|
|
|
+// new_paths.push(curr_p);
|
|
|
+// })
|
|
|
+// }
|
|
|
+// paths = new_paths.clone();
|
|
|
+// }
|
|
|
+// }
|
|
|
+// Ok(paths)
|
|
|
+// }
|
|
|
+// }
|
|
|
+
|
|
|
+pub fn rename_edges(
|
|
|
+ graph: &mut StableGraph<String, String>,
|
|
|
+ way: &Vec<NodeIndex>,
|
|
|
+ value: &str,
|
|
|
+) -> StableGraph<String, String> {
|
|
|
+ let mut graph = graph.clone();
|
|
|
+ graph.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
+ *edge = value.to_string();
|
|
|
+ }
|
|
|
+ graph
|
|
|
+}
|
|
|
+
|
|
|
+pub fn erase_edges_labels_inplace(graph: &mut StableGraph<String, String>) {
|
|
|
+ graph.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
+}
|
|
|
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
+ use std::ops::Deref;
|
|
|
+
|
|
|
+ use minimap2::Strand;
|
|
|
+ use petgraph::{dot::Dot, stable_graph::StableGraph, visit::EdgeRef, Directed};
|
|
|
+
|
|
|
use super::*;
|
|
|
|
|
|
#[test]
|
|
|
@@ -797,4 +1080,240 @@ mod tests {
|
|
|
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn load_dir() -> Result<()> {
|
|
|
+ let dir = "./data_test";
|
|
|
+ let format = CustomFormat::builder()
|
|
|
+ .grouping(Grouping::Standard)
|
|
|
+ .minus_sign("-")
|
|
|
+ .separator("_")
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
+ let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
+ genome.stats();
|
|
|
+ // println!("{genome:?}");
|
|
|
+
|
|
|
+ for contig in genome.contigs() {
|
|
|
+ if let Some(breakpoints) = contig.breakpoints_repr() {
|
|
|
+ println!("[{}]", breakpoints.join("---"));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut bp = Vec::new();
|
|
|
+ for contig in genome.contigs() {
|
|
|
+ if let Some(breakpoints) = contig.contig_ref.breakpoints() {
|
|
|
+ bp.push(BreakPoint::new(&breakpoints).sens());
|
|
|
+ bp.push(BreakPoint::new(&breakpoints).rev_comp());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ bp.sort_by(|(a, _), (b, _)| a.2.cmp(&b.2));
|
|
|
+ bp.sort_by(|(a, _), (b, _)| a.1.cmp(&b.1));
|
|
|
+ bp.dedup_by(|(a, _), (b, _)| a.2 == b.2 && a.1 == b.1);
|
|
|
+ let symb = |s| -> &str {
|
|
|
+ if s {
|
|
|
+ "►"
|
|
|
+ } else {
|
|
|
+ "◄"
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ let start_node = (true, "chr7".to_string(), 0);
|
|
|
+ let start_node = (start_node.clone(), start_node);
|
|
|
+ bp.push(start_node.clone());
|
|
|
+ let mut start_key = "".to_string();
|
|
|
+
|
|
|
+ let end_node = (true, "chr7".to_string(), i32::MAX);
|
|
|
+ let end_node = (end_node.clone(), end_node);
|
|
|
+ bp.push(end_node.clone());
|
|
|
+ let mut end_key = "".to_string();
|
|
|
+
|
|
|
+ let start_chr = "chr7".to_string();
|
|
|
+ let start_sens = true;
|
|
|
+
|
|
|
+ let mut graph = StableGraph::<_, _, Directed>::new();
|
|
|
+ let mut nodes_idx = HashMap::new();
|
|
|
+ bp.iter().for_each(|(a, b)| {
|
|
|
+ let key = format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(a.0),
|
|
|
+ a.1,
|
|
|
+ a.2.to_formatted_string(&format),
|
|
|
+ b.1,
|
|
|
+ b.2.to_formatted_string(&format),
|
|
|
+ symb(b.0)
|
|
|
+ );
|
|
|
+ let node_id = graph.add_node(key.clone());
|
|
|
+ nodes_idx.insert(key.clone(), node_id);
|
|
|
+ println!("{}", key);
|
|
|
+ // if start_key == "".to_string() && start_chr == a.1.to_string() && start_sens == a.0 {
|
|
|
+ // start_key = key;
|
|
|
+ // }
|
|
|
+ if (a.to_owned(), b.to_owned()) == start_node {
|
|
|
+ start_key = key;
|
|
|
+ } else if (a.to_owned(), b.to_owned()) == end_node {
|
|
|
+ end_key = key;
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ bp.iter().for_each(|(a, (last_sens, last_chr, last_pos))| {
|
|
|
+ let last_key = format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(a.0),
|
|
|
+ a.1,
|
|
|
+ a.2.to_formatted_string(&format),
|
|
|
+ last_chr,
|
|
|
+ last_pos.to_formatted_string(&format),
|
|
|
+ symb(*last_sens)
|
|
|
+ );
|
|
|
+ bp.iter().for_each(|((next_sens, next_chr, next_pos), b)| {
|
|
|
+ let is_same_sens = last_sens == next_sens;
|
|
|
+ let is_on_same_chr = last_chr == next_chr;
|
|
|
+ let is_reachable = if *last_sens {
|
|
|
+ last_pos < next_pos
|
|
|
+ } else {
|
|
|
+ last_pos > next_pos
|
|
|
+ };
|
|
|
+ if is_same_sens && is_on_same_chr && is_reachable {
|
|
|
+ let next_key = format!(
|
|
|
+ "{}{}:{}|{}:{}{}",
|
|
|
+ symb(*next_sens),
|
|
|
+ next_chr,
|
|
|
+ next_pos.to_formatted_string(&format),
|
|
|
+ b.1,
|
|
|
+ b.2.to_formatted_string(&format),
|
|
|
+ symb(b.0)
|
|
|
+ );
|
|
|
+ graph.add_edge(
|
|
|
+ *nodes_idx.get(&last_key).unwrap(),
|
|
|
+ *nodes_idx.get(&next_key).unwrap(),
|
|
|
+ "".to_string(),
|
|
|
+ );
|
|
|
+ }
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ // Remove edges in feedback arc set
|
|
|
+ let fas: Vec<_> = petgraph::algo::feedback_arc_set::greedy_feedback_arc_set(&graph)
|
|
|
+ .map(|e| e.id())
|
|
|
+ .collect();
|
|
|
+ for edge_id in fas {
|
|
|
+ graph.remove_edge(edge_id);
|
|
|
+ }
|
|
|
+
|
|
|
+ let start_node_id = nodes_idx.get(&start_key).unwrap();
|
|
|
+ let end_node_id = nodes_idx.get(&end_key).unwrap();
|
|
|
+
|
|
|
+ // Remove edge direct start -> end
|
|
|
+ let se = graph.find_edge(*start_node_id, *end_node_id).unwrap();
|
|
|
+ graph.remove_edge(se);
|
|
|
+
|
|
|
+ // Find all the ways from start node to end node
|
|
|
+ let ways =
|
|
|
+ algo::all_simple_paths::<Vec<_>, _>(&graph, *start_node_id, *end_node_id, 0, None)
|
|
|
+ .collect::<Vec<_>>();
|
|
|
+
|
|
|
+ // add way number to edges
|
|
|
+ for (n_way, way) in ways.iter().enumerate() {
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = graph.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = graph.edge_weight_mut(edge_id).unwrap();
|
|
|
+ *edge = vec![edge.to_string(), (n_way + 1).to_string()]
|
|
|
+ .into_iter()
|
|
|
+ .filter(|s| s != "")
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(", ")
|
|
|
+ .to_string();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ println!("{ways:?}");
|
|
|
+
|
|
|
+ // Retain edges and nodes in a way
|
|
|
+ let mut g = graph.clone();
|
|
|
+ g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+
|
|
|
+ // Try to find one way containing all nodes
|
|
|
+ let n_nodes = g.node_weights().count();
|
|
|
+ let max_way = ways.iter().map(|v| v.len()).max().unwrap();
|
|
|
+ let mut monoallelic_graphs = Vec::new();
|
|
|
+ let mut empty_edges = g.clone();
|
|
|
+ erase_edges_labels_inplace(&mut empty_edges);
|
|
|
+ if n_nodes == max_way {
|
|
|
+ let biggest = ways
|
|
|
+ .iter()
|
|
|
+ .filter(|v| v.len() == max_way)
|
|
|
+ .collect::<Vec<_>>();
|
|
|
+ for b in biggest {
|
|
|
+ println!(
|
|
|
+ "Likely monoallelic phase [{}]",
|
|
|
+ b.iter()
|
|
|
+ .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(";")
|
|
|
+ );
|
|
|
+ let new_g = rename_edges(&mut empty_edges, b, "A");
|
|
|
+ let dot = Dot::new(&new_g);
|
|
|
+ let u = dot.to_string().replace("\\\"", "");
|
|
|
+ let u = u.replace("label = \"A\"", "label = \"A\", color = \"red\"");
|
|
|
+
|
|
|
+ println!("{}", u);
|
|
|
+ monoallelic_graphs.push(new_g);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ // Try to find two ways containig all nodes
|
|
|
+ let mut bial_ways = Vec::new();
|
|
|
+ for a_way in ways.iter() {
|
|
|
+ let mut a_nodes = a_way.clone();
|
|
|
+ a_nodes.remove(0);
|
|
|
+ a_nodes.remove(a_nodes.len() - 1);
|
|
|
+
|
|
|
+ let mut diff_way = None;
|
|
|
+ for b_way in ways.iter() {
|
|
|
+ let mut share_node = false;
|
|
|
+ for b_node in b_way.iter() {
|
|
|
+ if a_nodes.contains(b_node) {
|
|
|
+ share_node = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !share_node {
|
|
|
+ diff_way = Some(b_way.clone())
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if let Some(b_way) = diff_way {
|
|
|
+ // dedup for double check
|
|
|
+ let mut all = a_way.clone();
|
|
|
+ all.extend(b_way.clone().iter());
|
|
|
+ all.sort();
|
|
|
+ all.dedup();
|
|
|
+ if all.len() == max_way {
|
|
|
+ bial_ways.push((a_way.clone(), b_way))
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (a, b) in bial_ways.iter() {
|
|
|
+ println!(
|
|
|
+ "Likely bi-allelic phase:\n\t[{}]\n\t[{}]",
|
|
|
+ a.iter()
|
|
|
+ .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(";"),
|
|
|
+ b.iter()
|
|
|
+ .map(|nid| g.node_weight(*nid).unwrap().to_string())
|
|
|
+ .collect::<Vec<_>>()
|
|
|
+ .join(";")
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ println!("{:?}", Dot::new(&g));
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|