|
|
@@ -1,20 +1,22 @@
|
|
|
+pub mod breakpoint;
|
|
|
+pub mod genomic_graph;
|
|
|
+
|
|
|
use anyhow::{anyhow, Ok, Result};
|
|
|
+use breakpoint::BreakPoint;
|
|
|
use fasta::record::Sequence;
|
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use noodles_fasta as fasta;
|
|
|
-use num_format::{CustomFormat, Grouping, Locale, ToFormattedString, WriteFormatted};
|
|
|
-use petgraph::{algo, dot::Dot, prelude::*, Graph};
|
|
|
+use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted};
|
|
|
+use petgraph::{dot::Dot, prelude::*};
|
|
|
use rust_htslib::bam::{self, Record};
|
|
|
-use std::hash::{Hash, Hasher};
|
|
|
use std::{
|
|
|
- collections::{HashMap, VecDeque},
|
|
|
+ collections::HashMap,
|
|
|
fmt,
|
|
|
fs::{self, File},
|
|
|
io::{BufReader, BufWriter, Write},
|
|
|
path::PathBuf,
|
|
|
process::{Command, Stdio},
|
|
|
- thread,
|
|
|
};
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
@@ -985,165 +987,6 @@ pub fn dot_graph_biall(
|
|
|
dot
|
|
|
}
|
|
|
|
|
|
-#[derive(Clone, PartialEq, Eq, Debug)]
|
|
|
-pub struct BreakPoint {
|
|
|
- pub id: String,
|
|
|
- pub mappings: Vec<Mapping>,
|
|
|
- // sens_symb: &str,
|
|
|
- // rc_symb: &str
|
|
|
-}
|
|
|
-
|
|
|
-impl BreakPoint {
|
|
|
- pub fn new(mappings: Vec<Mapping>) -> Self {
|
|
|
- Self {
|
|
|
- id: Uuid::new_v4().to_string(),
|
|
|
- mappings,
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn from(sens: bool, chr: &str, pos: i32, id: &str) -> Self {
|
|
|
- let strand = match sens {
|
|
|
- true => minimap2::Strand::Forward,
|
|
|
- false => minimap2::Strand::Reverse,
|
|
|
- };
|
|
|
- let target_name = Some(chr.to_string());
|
|
|
- let mut left_mapping = Mapping::default();
|
|
|
- left_mapping.strand = strand;
|
|
|
- left_mapping.target_name = target_name.clone();
|
|
|
- left_mapping.target_start = pos;
|
|
|
- left_mapping.target_end = pos;
|
|
|
-
|
|
|
- let mut right_mapping = Mapping::default();
|
|
|
- right_mapping.strand = strand;
|
|
|
- right_mapping.target_name = target_name;
|
|
|
- right_mapping.target_start = pos;
|
|
|
- right_mapping.target_end = pos;
|
|
|
-
|
|
|
- Self {
|
|
|
- id: id.to_string(),
|
|
|
- mappings: vec![left_mapping, right_mapping],
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn sens(&self) -> Self {
|
|
|
- self.clone()
|
|
|
- }
|
|
|
-
|
|
|
- pub fn rc(&self) -> Self {
|
|
|
- let mut mappings = self.mappings.clone();
|
|
|
- mappings.reverse();
|
|
|
- mappings.iter_mut().for_each(|m| m.strand = match m.strand {
|
|
|
- minimap2::Strand::Forward => minimap2::Strand::Reverse,
|
|
|
- minimap2::Strand::Reverse => minimap2::Strand::Forward,
|
|
|
- });
|
|
|
-
|
|
|
- Self {
|
|
|
- id: self.id.clone(),
|
|
|
- mappings,
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn repr_sens(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
- let (first, last) = (
|
|
|
- self.mappings.first().unwrap(),
|
|
|
- self.mappings.last().unwrap(),
|
|
|
- );
|
|
|
-
|
|
|
- match (first.strand, last.strand) {
|
|
|
- (minimap2::Strand::Forward, minimap2::Strand::Forward) => (
|
|
|
- (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Forward, minimap2::Strand::Reverse) => (
|
|
|
- (true, first.target_name.clone().unwrap(), first.target_start),
|
|
|
- (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Reverse, minimap2::Strand::Forward) => (
|
|
|
- (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- (true, last.target_name.clone().unwrap(), last.target_end),
|
|
|
- ),
|
|
|
- (minimap2::Strand::Reverse, minimap2::Strand::Reverse) => (
|
|
|
- (false, first.target_name.clone().unwrap(), first.target_end),
|
|
|
- (false, last.target_name.clone().unwrap(), last.target_start),
|
|
|
- ),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn repr_rc(&self) -> ((bool, String, i32), (bool, String, i32)) {
|
|
|
- let (mut a, mut b) = self.repr_sens();
|
|
|
- a.0 = !a.0;
|
|
|
- b.0 = !b.0;
|
|
|
- // let (aa, bb) = self.rc().repr_sens();
|
|
|
- // assert_eq!(aa, b);
|
|
|
- (b, a)
|
|
|
- }
|
|
|
-
|
|
|
- pub fn str_sens(&self, sens_symb: &str, rc_symb: &str) -> String {
|
|
|
- let format = CustomFormat::builder()
|
|
|
- .grouping(Grouping::Standard)
|
|
|
- .minus_sign("-")
|
|
|
- .separator("_")
|
|
|
- .build()
|
|
|
- .unwrap();
|
|
|
- let (a, b) = self.repr_sens();
|
|
|
- let symb = |s| -> &str {
|
|
|
- if s {
|
|
|
- sens_symb
|
|
|
- } else {
|
|
|
- rc_symb
|
|
|
- }
|
|
|
- };
|
|
|
- format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(a.0),
|
|
|
- a.1,
|
|
|
- a.2.to_formatted_string(&format),
|
|
|
- b.1,
|
|
|
- b.2.to_formatted_string(&format),
|
|
|
- symb(b.0)
|
|
|
- )
|
|
|
- }
|
|
|
-
|
|
|
- pub fn str_rc(&self, sens_symb: &str, rc_symb: &str) -> String {
|
|
|
- let format = CustomFormat::builder()
|
|
|
- .grouping(Grouping::Standard)
|
|
|
- .minus_sign("-")
|
|
|
- .separator("_")
|
|
|
- .build()
|
|
|
- .unwrap();
|
|
|
- let (a, b) = self.repr_rc();
|
|
|
- let symb = |s| -> &str {
|
|
|
- if s {
|
|
|
- sens_symb
|
|
|
- } else {
|
|
|
- rc_symb
|
|
|
- }
|
|
|
- };
|
|
|
- format!(
|
|
|
- "{}{}:{}|{}:{}{}",
|
|
|
- symb(a.0),
|
|
|
- a.1,
|
|
|
- a.2.to_formatted_string(&format),
|
|
|
- b.1,
|
|
|
- b.2.to_formatted_string(&format),
|
|
|
- symb(b.0)
|
|
|
- )
|
|
|
- }
|
|
|
-
|
|
|
- pub fn is_next(&self, next: &BreakPoint) -> bool {
|
|
|
- let (_, (last_sens, last_chr, last_pos)) = self.repr_sens();
|
|
|
- let ((next_sens, next_chr, next_pos), _) = next.repr_sens();
|
|
|
- let is_same_sens = last_sens == next_sens;
|
|
|
- let is_on_same_chr = last_chr == next_chr;
|
|
|
- let is_reachable = if last_sens {
|
|
|
- last_pos < next_pos
|
|
|
- } else {
|
|
|
- last_pos > next_pos
|
|
|
- };
|
|
|
- is_same_sens && is_on_same_chr && is_reachable
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
fn get_next(
|
|
|
graph: &StableGraph<BreakPoint, String>,
|
|
|
ways: Vec<Vec<(NodeIndex, bool)>>,
|
|
|
@@ -1222,6 +1065,8 @@ mod tests {
|
|
|
|
|
|
use petgraph::{stable_graph::StableGraph, visit::EdgeRef, Directed};
|
|
|
|
|
|
+ use crate::genomic_graph::GenomicGraph;
|
|
|
+
|
|
|
use super::*;
|
|
|
|
|
|
#[test]
|
|
|
@@ -1410,8 +1255,33 @@ mod tests {
|
|
|
let all_bp: Vec<BreakPoint> = graph.node_weights().map(|n| n.clone()).collect();
|
|
|
println!("{} unique breakpoints considered.", all_bp.len());
|
|
|
|
|
|
+ let nodeids_bp: Vec<(NodeIndex, BreakPoint)> =
|
|
|
+ graph.node_indices().zip(all_bp.into_iter()).collect();
|
|
|
+
|
|
|
// monallellic way ?
|
|
|
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_graph() -> Result<()> {
|
|
|
+ let dir = "./data_test";
|
|
|
+
|
|
|
+ // Load from fasta in dir.
|
|
|
+ let genome = Genome::from_contigs_sequences(dir)?;
|
|
|
+ let mut genomic_graph = GenomicGraph::from_genome(&genome);
|
|
|
+ let dot = genomic_graph.dot_graph();
|
|
|
+ println!("{dot}");
|
|
|
+
|
|
|
+ let ways = genomic_graph.ways((true, "chr7", 0), (true, "chr7", i32::MAX));
|
|
|
+ for (i, way) in ways.iter().enumerate() {
|
|
|
+ let s = way
|
|
|
+ .iter()
|
|
|
+ .map(|(_, _, _, s)| s.to_string())
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ .join("");
|
|
|
+ println!("{}\t{s}", i + 1);
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|