|
|
@@ -3,6 +3,7 @@ use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected};
|
|
|
use kseq::parse_path;
|
|
|
use faimm::IndexedFasta;
|
|
|
|
|
|
+
|
|
|
pub struct SequencesGraph {
|
|
|
sequences: HashMap<NodeIndex, String>,
|
|
|
seq_hash : HashSet<String>,
|
|
|
@@ -217,6 +218,12 @@ impl SequencesGraph {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ pub fn process(&mut self) {
|
|
|
+ self.draw_edges();
|
|
|
+ self.remove_single_nodes();
|
|
|
+ self.find_leftmost_node();
|
|
|
+ }
|
|
|
+
|
|
|
pub fn get_new_stretch_id (&self) -> usize {
|
|
|
let mut v = self.offsets.iter()
|
|
|
.map(|(_, (_, id, _))| *id).collect::<Vec<usize>>();
|
|
|
@@ -387,6 +394,114 @@ pub struct NeoContig {
|
|
|
stretch : Vec<(NodeIndex, i32)>,
|
|
|
}
|
|
|
|
|
|
+impl NeoContig {
|
|
|
+ pub fn make_aln (&self) -> Vec<Vec<u8>> {
|
|
|
+ let contig_len = self.contig.len();
|
|
|
+ let mut lines: Vec<Vec<u8>> = Vec::new();
|
|
|
+
|
|
|
+ // ref 1
|
|
|
+ if self.alignments.len() > 0 {
|
|
|
+ let mut ref_spaces = " ".repeat(self.alignments[0].query_range.start).as_bytes().to_vec();
|
|
|
+
|
|
|
+ // ref sequence
|
|
|
+ let mut l = ref_spaces.clone();
|
|
|
+ let ref_cigar = self.alignments[0].ref_cigar.as_bytes();
|
|
|
+
|
|
|
+ let mut filtered_ref: Vec<u8> = self.alignments[0].ref_sequence
|
|
|
+ .as_bytes()
|
|
|
+ .iter().enumerate()
|
|
|
+ .filter(|(i, c)| ref_cigar[*i] == b"M"[0])
|
|
|
+ .map(|(i, c)| *c)
|
|
|
+ .collect();
|
|
|
+ l.append(&mut filtered_ref);
|
|
|
+ lines.push(l);
|
|
|
+
|
|
|
+ // match pipes
|
|
|
+ let mut match_pipes = self.alignments[0].ref_cigar.as_bytes().iter().filter(|c| **c == b"M"[0]).map(|c| b"|"[0])
|
|
|
+ .collect::<Vec<u8>>();
|
|
|
+
|
|
|
+ let mut l = ref_spaces.clone();
|
|
|
+ l.append(&mut match_pipes);
|
|
|
+
|
|
|
+ let mut ll = format!("{} - {}:{}-{}{}",
|
|
|
+ " ".repeat(contig_len - l.len()).to_string(),
|
|
|
+ self.alignments[0].ref_name,
|
|
|
+ self.alignments[0].ref_range.start,
|
|
|
+ self.alignments[0].ref_range.end,
|
|
|
+ if self.alignments[0].ref_range.start > self.alignments[0].ref_range.end { " (rc)" } else { "" }
|
|
|
+ ).as_bytes().to_vec();
|
|
|
+ l.append(&mut ll);
|
|
|
+ lines.push(l);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Contig
|
|
|
+ let mut contig = format!("{} - denovo assembled contig (cov = {})",
|
|
|
+ self.contig,
|
|
|
+ self.sequences.len()
|
|
|
+ ).as_bytes().to_vec();
|
|
|
+ lines.push(contig.clone());
|
|
|
+
|
|
|
+ // Sequences
|
|
|
+ if self.stretch.len() > 0 {
|
|
|
+ let decal = -self.stretch[0].1; // first have to be leftmost
|
|
|
+ for (node_index, offset) in self.stretch.iter() {
|
|
|
+ let seq = self.sequences.get(&node_index).unwrap();
|
|
|
+ let name = self.sequences_names.get(&node_index).unwrap();
|
|
|
+ let mut l = format!("{}{}", " ".repeat((offset + decal) as usize).to_string(), seq).as_bytes().to_vec();
|
|
|
+ let mut ll = format!("{} - {}", " ".repeat(contig_len - l.len()).to_string(), name).as_bytes().to_vec();
|
|
|
+ l.append(&mut ll);
|
|
|
+ lines.push(l);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Contig
|
|
|
+ lines.push(contig);
|
|
|
+
|
|
|
+ // other ref
|
|
|
+ if self.alignments.len() > 1 {
|
|
|
+
|
|
|
+ let mut ref_spaces = " ".repeat(self.alignments[1].query_range.start).as_bytes().to_vec();
|
|
|
+
|
|
|
+ // match pipes
|
|
|
+ let mut match_pipes = self.alignments[1].ref_cigar.as_bytes().iter().filter(|c| **c == b"M"[0]).map(|c| b"|"[0])
|
|
|
+ .collect::<Vec<u8>>();
|
|
|
+ let mut l = ref_spaces.clone();
|
|
|
+ l.append(&mut match_pipes);
|
|
|
+
|
|
|
+ let mut ll = format!("{} - {}:{}-{}{}",
|
|
|
+ " ".repeat(contig_len - l.len()).to_string(),
|
|
|
+ self.alignments[1].ref_name,
|
|
|
+ self.alignments[1].ref_range.start,
|
|
|
+ self.alignments[1].ref_range.end,
|
|
|
+ if self.alignments[1].ref_range.start > self.alignments[1].ref_range.end { " (rc)" } else { "" }
|
|
|
+ ).as_bytes().to_vec();
|
|
|
+ l.append(&mut ll);
|
|
|
+ lines.push(l);
|
|
|
+
|
|
|
+ // ref sequence
|
|
|
+ let mut l = ref_spaces.clone();
|
|
|
+ let ref_cigar = self.alignments[1].ref_cigar.as_bytes();
|
|
|
+
|
|
|
+ let mut filtered_ref: Vec<u8> = self.alignments[1].ref_sequence
|
|
|
+ .as_bytes()
|
|
|
+ .iter().enumerate()
|
|
|
+ .filter(|(i, c)| ref_cigar[*i] == b"M"[0])
|
|
|
+ .map(|(i, c)| *c)
|
|
|
+ .collect();
|
|
|
+ l.append(&mut filtered_ref);
|
|
|
+ lines.push(l);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ lines
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn print_stretch(&self) {
|
|
|
+ for line in self.make_aln() {
|
|
|
+ println!("{}", String::from_utf8_lossy(&line));
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
pub struct Fasta {
|
|
|
fa: IndexedFasta
|
|
|
}
|
|
|
@@ -494,12 +609,6 @@ fn matched_range (cigar: &str, flag: &i32, matched_seq: &mut str) -> (Range<usiz
|
|
|
range_query.end = range_query.start + query_len;
|
|
|
}
|
|
|
|
|
|
- // println!("{}", matched_seq);
|
|
|
- // println!("{:?}", range_ref);
|
|
|
- // println!("{:?}", range_query);
|
|
|
- // println!("{:?}", ref_cigar_string.len());
|
|
|
- // println!("{:?}", range_ref.len());
|
|
|
- // println!("{}", ref_cigar_string);
|
|
|
assert_eq!(range_ref.len(), ref_cigar_string.len());
|
|
|
|
|
|
(range_ref, range_query, ref_cigar_string)
|