Thomas 3 years ago
parent
commit
b13427cce5
1 changed files with 115 additions and 6 deletions
  1. 115 6
      src/lib.rs

+ 115 - 6
src/lib.rs

@@ -3,6 +3,7 @@ use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected};
 use kseq::parse_path;
 use faimm::IndexedFasta;
 
+
 pub struct SequencesGraph {
     sequences: HashMap<NodeIndex, String>,
     seq_hash : HashSet<String>,
@@ -217,6 +218,12 @@ impl SequencesGraph {
         }
     }
 
+    pub fn process(&mut self) {
+        self.draw_edges();
+        self.remove_single_nodes();
+        self.find_leftmost_node();
+    }
+
     pub fn get_new_stretch_id (&self) -> usize {
         let mut v = self.offsets.iter()
         .map(|(_, (_, id, _))| *id).collect::<Vec<usize>>();
@@ -387,6 +394,114 @@ pub struct NeoContig {
     stretch    : Vec<(NodeIndex, i32)>,
 }
 
+impl NeoContig {
+    pub fn make_aln (&self) -> Vec<Vec<u8>> {
+        let contig_len = self.contig.len();
+        let mut lines: Vec<Vec<u8>> = Vec::new();
+
+        // ref 1
+        if self.alignments.len() > 0 {
+            let mut ref_spaces = " ".repeat(self.alignments[0].query_range.start).as_bytes().to_vec();
+
+            // ref sequence
+            let mut l = ref_spaces.clone();
+            let ref_cigar = self.alignments[0].ref_cigar.as_bytes();
+            
+            let mut filtered_ref: Vec<u8> = self.alignments[0].ref_sequence
+            .as_bytes()
+            .iter().enumerate()
+            .filter(|(i, c)| ref_cigar[*i] == b"M"[0])
+            .map(|(i, c)| *c)
+            .collect();
+            l.append(&mut filtered_ref);
+            lines.push(l);
+    
+            // match pipes
+            let mut match_pipes = self.alignments[0].ref_cigar.as_bytes().iter().filter(|c| **c == b"M"[0]).map(|c| b"|"[0])
+            .collect::<Vec<u8>>();
+            
+            let mut l = ref_spaces.clone();
+            l.append(&mut match_pipes);
+            
+            let mut ll = format!("{} - {}:{}-{}{}", 
+                " ".repeat(contig_len - l.len()).to_string(),
+                self.alignments[0].ref_name, 
+                self.alignments[0].ref_range.start, 
+                self.alignments[0].ref_range.end,
+                if self.alignments[0].ref_range.start > self.alignments[0].ref_range.end { " (rc)" } else { "" }
+            ).as_bytes().to_vec();
+            l.append(&mut ll);
+            lines.push(l);
+        }
+    
+        // Contig
+        let mut contig = format!("{} - denovo assembled contig (cov = {})", 
+            self.contig, 
+            self.sequences.len()
+        ).as_bytes().to_vec();
+        lines.push(contig.clone());
+
+        // Sequences
+        if self.stretch.len() > 0 {
+            let decal = -self.stretch[0].1; // first have to be leftmost
+            for (node_index, offset) in self.stretch.iter() {
+                let seq = self.sequences.get(&node_index).unwrap();
+                let name = self.sequences_names.get(&node_index).unwrap();
+                let mut l = format!("{}{}", " ".repeat((offset + decal) as usize).to_string(), seq).as_bytes().to_vec();
+                let mut ll = format!("{} - {}", " ".repeat(contig_len - l.len()).to_string(), name).as_bytes().to_vec();
+                l.append(&mut ll);
+                lines.push(l);
+            }
+        }
+
+        // Contig
+        lines.push(contig);
+
+        // other ref
+        if self.alignments.len() > 1 {
+            
+            let mut ref_spaces = " ".repeat(self.alignments[1].query_range.start).as_bytes().to_vec();
+
+            // match pipes
+            let mut match_pipes = self.alignments[1].ref_cigar.as_bytes().iter().filter(|c| **c == b"M"[0]).map(|c| b"|"[0])
+            .collect::<Vec<u8>>();
+            let mut l = ref_spaces.clone();
+            l.append(&mut match_pipes);
+            
+            let mut ll = format!("{} - {}:{}-{}{}", 
+                " ".repeat(contig_len - l.len()).to_string(), 
+                self.alignments[1].ref_name, 
+                self.alignments[1].ref_range.start, 
+                self.alignments[1].ref_range.end,
+                if self.alignments[1].ref_range.start > self.alignments[1].ref_range.end { " (rc)" } else { "" }
+            ).as_bytes().to_vec();
+            l.append(&mut ll);
+            lines.push(l);
+
+            // ref sequence
+            let mut l = ref_spaces.clone();
+            let ref_cigar = self.alignments[1].ref_cigar.as_bytes();
+            
+            let mut filtered_ref: Vec<u8> = self.alignments[1].ref_sequence
+            .as_bytes()
+            .iter().enumerate()
+            .filter(|(i, c)| ref_cigar[*i] == b"M"[0])
+            .map(|(i, c)| *c)
+            .collect();
+            l.append(&mut filtered_ref);
+            lines.push(l);
+
+        }
+
+        lines
+    }
+
+    pub fn print_stretch(&self) {
+        for line in self.make_aln() {
+            println!("{}", String::from_utf8_lossy(&line));
+        }
+    }
+}
 pub struct Fasta {
     fa: IndexedFasta
 }
@@ -494,12 +609,6 @@ fn matched_range (cigar: &str, flag: &i32, matched_seq: &mut str) -> (Range<usiz
         range_query.end   = range_query.start + query_len;
     }
     
-    // println!("{}", matched_seq);
-    // println!("{:?}", range_ref);
-    // println!("{:?}", range_query);
-    // println!("{:?}", ref_cigar_string.len());
-    // println!("{:?}", range_ref.len());
-    // println!("{}", ref_cigar_string);
     assert_eq!(range_ref.len(), ref_cigar_string.len());
     
     (range_ref, range_query, ref_cigar_string)