|
@@ -2,7 +2,7 @@ use std::{ops::Range, collections::{HashMap, HashSet}};
|
|
|
use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected};
|
|
use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected};
|
|
|
use kseq::parse_path;
|
|
use kseq::parse_path;
|
|
|
use faimm::IndexedFasta;
|
|
use faimm::IndexedFasta;
|
|
|
-
|
|
|
|
|
|
|
+use duct::cmd;
|
|
|
|
|
|
|
|
pub struct SequencesGraph {
|
|
pub struct SequencesGraph {
|
|
|
sequences: HashMap<NodeIndex, String>,
|
|
sequences: HashMap<NodeIndex, String>,
|
|
@@ -58,6 +58,29 @@ impl SequencesGraph {
|
|
|
self.find_leftmost_node();
|
|
self.find_leftmost_node();
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ pub fn add_node (&mut self, sequence: String, name: String) {
|
|
|
|
|
+ let id = self.graph.add_node(sequence.clone());
|
|
|
|
|
+ self.sequences.insert(id, sequence);
|
|
|
|
|
+ self.names.insert(id, name);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn add_edge (&mut self, id_a: &NodeIndex, id_b: &NodeIndex) {
|
|
|
|
|
+ let a = self.sequences.get(id_a).unwrap().as_bytes().to_vec();
|
|
|
|
|
+ let b = self.sequences.get(id_b).unwrap().as_bytes().to_vec();
|
|
|
|
|
+
|
|
|
|
|
+ if let Some(r) = get_overlaps(&a, &b, self.min_overlapping, self.max_consecutive, self.max_diffs) { // normaly always like that
|
|
|
|
|
+ self.overlaps.insert(
|
|
|
|
|
+ self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()),
|
|
|
|
|
+ Overlap { id_a: *id_a, range_a: r.0, id_b: *id_b, range_b: r.1}
|
|
|
|
|
+ );
|
|
|
|
|
+ } else if let Some(r) = get_overlaps(&b, &a, self.min_overlapping, self.max_consecutive, self.max_diffs) {
|
|
|
|
|
+ self.overlaps.insert(
|
|
|
|
|
+ self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()),
|
|
|
|
|
+ Overlap { id_a: *id_a, range_a: r.1, id_b: *id_b, range_b: r.0}
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
pub fn add_from_pe (&mut self, a: Vec<u8>, b: Vec<u8>, a_name: String, b_name: String, min_overlapping_len_pe: usize) {
|
|
pub fn add_from_pe (&mut self, a: Vec<u8>, b: Vec<u8>, a_name: String, b_name: String, min_overlapping_len_pe: usize) {
|
|
|
let a_rc = revcomp(std::str::from_utf8(&a).unwrap()).as_bytes().to_vec();
|
|
let a_rc = revcomp(std::str::from_utf8(&a).unwrap()).as_bytes().to_vec();
|
|
|
let b_rc = revcomp(std::str::from_utf8(&b).unwrap()).as_bytes().to_vec();
|
|
let b_rc = revcomp(std::str::from_utf8(&b).unwrap()).as_bytes().to_vec();
|
|
@@ -103,29 +126,6 @@ impl SequencesGraph {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- pub fn add_node (&mut self, sequence: String, name: String) {
|
|
|
|
|
- let id = self.graph.add_node(sequence.clone());
|
|
|
|
|
- self.sequences.insert(id, sequence);
|
|
|
|
|
- self.names.insert(id, name);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- pub fn add_edge (&mut self, id_a: &NodeIndex, id_b: &NodeIndex) {
|
|
|
|
|
- let a = self.sequences.get(id_a).unwrap().as_bytes().to_vec();
|
|
|
|
|
- let b = self.sequences.get(id_b).unwrap().as_bytes().to_vec();
|
|
|
|
|
-
|
|
|
|
|
- if let Some(r) = get_overlaps(&a, &b, self.min_overlapping, self.max_consecutive, self.max_diffs) { // normaly always like that
|
|
|
|
|
- self.overlaps.insert(
|
|
|
|
|
- self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()),
|
|
|
|
|
- Overlap { id_a: *id_a, range_a: r.0, id_b: *id_b, range_b: r.1}
|
|
|
|
|
- );
|
|
|
|
|
- } else if let Some(r) = get_overlaps(&b, &a, self.min_overlapping, self.max_consecutive, self.max_diffs) {
|
|
|
|
|
- self.overlaps.insert(
|
|
|
|
|
- self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()),
|
|
|
|
|
- Overlap { id_a: *id_a, range_a: r.1, id_b: *id_b, range_b: r.0}
|
|
|
|
|
- );
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
pub fn draw_edges (&mut self) {
|
|
pub fn draw_edges (&mut self) {
|
|
|
let idx: Vec<NodeIndex> = self.graph.node_indices().collect();
|
|
let idx: Vec<NodeIndex> = self.graph.node_indices().collect();
|
|
|
let n_nodes = idx.len();
|
|
let n_nodes = idx.len();
|
|
@@ -218,7 +218,7 @@ impl SequencesGraph {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- pub fn process(&mut self) {
|
|
|
|
|
|
|
+ pub fn process (&mut self) {
|
|
|
self.draw_edges();
|
|
self.draw_edges();
|
|
|
self.remove_single_nodes();
|
|
self.remove_single_nodes();
|
|
|
self.find_leftmost_node();
|
|
self.find_leftmost_node();
|
|
@@ -395,7 +395,53 @@ pub struct NeoContig {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl NeoContig {
|
|
impl NeoContig {
|
|
|
|
|
+ pub fn bwa_aln (&mut self, fasta_ref_path: &str, fa: &Fasta) {
|
|
|
|
|
+ let sam_keys = vec!["QNAME", "FLAG", "RNAME",
|
|
|
|
|
+ "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ",
|
|
|
|
|
+ "QUAL", "ALN"];
|
|
|
|
|
+
|
|
|
|
|
+ let stdout = cmd!("echo", "-e", format!("\"{}\"", self.contig))
|
|
|
|
|
+ .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
|
|
|
|
|
+ .stdout_capture()
|
|
|
|
|
+ .stderr_capture()
|
|
|
|
|
+ .read().unwrap();
|
|
|
|
|
+
|
|
|
|
|
+ let mut alignments: Vec<Sam> = Vec::new();
|
|
|
|
|
+ stdout.split("\n").for_each(|line|{
|
|
|
|
|
+ if line.len() > 9 {
|
|
|
|
|
+ if &line[0..9] == "sequence_" {
|
|
|
|
|
+ // println!("{:?}", line);
|
|
|
|
|
+ let mut qname: Option<String> = None;
|
|
|
|
|
+ let mut flag: Option<i32> = None;
|
|
|
|
|
+ let mut rname: Option<String> = None;
|
|
|
|
|
+ let mut pos: Option<i64> = None;
|
|
|
|
|
+ let mut cigar: Option<String> = None;
|
|
|
|
|
+ let mut sequence: Option<String> = None;
|
|
|
|
|
+
|
|
|
|
|
+ for (i, value) in line.split("\t").enumerate() {
|
|
|
|
|
+ if sam_keys.len() <= i { break; }
|
|
|
|
|
+ match sam_keys[i] {
|
|
|
|
|
+ "QNAME" => qname = Some(value.to_string()),
|
|
|
|
|
+ "FLAG" => flag = value.parse::<i32>().ok(),
|
|
|
|
|
+ "RNAME" => rname = Some(value.to_string()),
|
|
|
|
|
+ "POS" => pos = value.parse::<i64>().ok(),
|
|
|
|
|
+ "CIGAR" => cigar = Some(value.to_string()),
|
|
|
|
|
+ "SEQ" => sequence = Some(value.to_string()),
|
|
|
|
|
+ _ => ()
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if let (Some(qname), Some(flag), Some(rname), Some(pos), Some(cigar), Some(sequence)) = (qname, flag, rname, pos, cigar, sequence) {
|
|
|
|
|
+ alignments.push(Sam::new(qname, flag, rname, pos, cigar, sequence, &fa));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ self.alignments = alignments;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
pub fn make_aln (&self) -> Vec<Vec<u8>> {
|
|
pub fn make_aln (&self) -> Vec<Vec<u8>> {
|
|
|
|
|
+ println!("== {} ============", self.name);
|
|
|
let contig_len = self.contig.len();
|
|
let contig_len = self.contig.len();
|
|
|
let mut lines: Vec<Vec<u8>> = Vec::new();
|
|
let mut lines: Vec<Vec<u8>> = Vec::new();
|
|
|
|
|
|