Thomas 3 yıl önce
ebeveyn
işleme
9584e0ff90
2 değiştirilmiş dosya ile 73 ekleme ve 26 silme
  1. 2 1
      Cargo.toml
  2. 71 25
      src/lib.rs

+ 2 - 1
Cargo.toml

@@ -8,4 +8,5 @@ edition = "2021"
 [dependencies]
 kseq = "0.3.2"
 petgraph = "0.6.2"
-faimm = "0.3.0"
+faimm = "0.3.0"
+duct = "0.13.5"

+ 71 - 25
src/lib.rs

@@ -2,7 +2,7 @@ use std::{ops::Range, collections::{HashMap, HashSet}};
 use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected};
 use kseq::parse_path;
 use faimm::IndexedFasta;
-
+use duct::cmd;
 
 pub struct SequencesGraph {
     sequences: HashMap<NodeIndex, String>,
@@ -58,6 +58,29 @@ impl SequencesGraph {
         self.find_leftmost_node();
     }
 
+    pub fn add_node (&mut self, sequence: String, name: String) {
+        let id = self.graph.add_node(sequence.clone());
+        self.sequences.insert(id, sequence);
+        self.names.insert(id, name);
+    }
+
+    pub fn add_edge (&mut self, id_a: &NodeIndex, id_b: &NodeIndex) {
+        let a = self.sequences.get(id_a).unwrap().as_bytes().to_vec();
+        let b = self.sequences.get(id_b).unwrap().as_bytes().to_vec();
+
+        if let Some(r) = get_overlaps(&a, &b, self.min_overlapping, self.max_consecutive, self.max_diffs) { // normaly always like that
+            self.overlaps.insert(
+                self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), 
+                Overlap { id_a: *id_a, range_a: r.0, id_b: *id_b, range_b: r.1}
+            );
+        } else if let Some(r) = get_overlaps(&b, &a, self.min_overlapping, self.max_consecutive, self.max_diffs) {
+            self.overlaps.insert(
+                self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), 
+                Overlap { id_a: *id_a, range_a: r.1, id_b: *id_b, range_b: r.0}
+            );
+        }
+    }
+
     pub fn add_from_pe (&mut self, a: Vec<u8>, b: Vec<u8>, a_name: String, b_name: String, min_overlapping_len_pe: usize) {
         let a_rc = revcomp(std::str::from_utf8(&a).unwrap()).as_bytes().to_vec();
         let b_rc = revcomp(std::str::from_utf8(&b).unwrap()).as_bytes().to_vec();
@@ -103,29 +126,6 @@ impl SequencesGraph {
         }
     }
 
-    pub fn add_node (&mut self, sequence: String, name: String) {
-        let id = self.graph.add_node(sequence.clone());
-        self.sequences.insert(id, sequence);
-        self.names.insert(id, name);
-    }
-
-    pub fn add_edge (&mut self, id_a: &NodeIndex, id_b: &NodeIndex) {
-        let a = self.sequences.get(id_a).unwrap().as_bytes().to_vec();
-        let b = self.sequences.get(id_b).unwrap().as_bytes().to_vec();
-
-        if let Some(r) = get_overlaps(&a, &b, self.min_overlapping, self.max_consecutive, self.max_diffs) { // normaly always like that
-            self.overlaps.insert(
-                self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), 
-                Overlap { id_a: *id_a, range_a: r.0, id_b: *id_b, range_b: r.1}
-            );
-        } else if let Some(r) = get_overlaps(&b, &a, self.min_overlapping, self.max_consecutive, self.max_diffs) {
-            self.overlaps.insert(
-                self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), 
-                Overlap { id_a: *id_a, range_a: r.1, id_b: *id_b, range_b: r.0}
-            );
-        }
-    }
-
     pub fn draw_edges (&mut self) {
         let idx: Vec<NodeIndex> = self.graph.node_indices().collect();
         let n_nodes = idx.len();
@@ -218,7 +218,7 @@ impl SequencesGraph {
         }
     }
 
-    pub fn process(&mut self) {
+    pub fn process (&mut self) {
         self.draw_edges();
         self.remove_single_nodes();
         self.find_leftmost_node();
@@ -395,7 +395,53 @@ pub struct NeoContig {
 }
 
 impl NeoContig {
+    pub fn bwa_aln (&mut self, fasta_ref_path: &str, fa: &Fasta) {
+        let sam_keys = vec!["QNAME", "FLAG", "RNAME", 
+        "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ", 
+        "QUAL", "ALN"];
+
+        let stdout = cmd!("echo", "-e", format!("\"{}\"", self.contig))
+        .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
+        .stdout_capture()
+        .stderr_capture()
+        .read().unwrap();
+
+        let mut alignments: Vec<Sam> = Vec::new();
+        stdout.split("\n").for_each(|line|{
+            if line.len() > 9 {
+                if &line[0..9] == "sequence_" {
+                    // println!("{:?}", line);
+                    let mut qname: Option<String> = None;
+                    let mut flag: Option<i32> = None;
+                    let mut rname: Option<String> = None;
+                    let mut pos: Option<i64> = None;
+                    let mut cigar: Option<String> = None;
+                    let mut sequence: Option<String> = None;
+
+                    for (i, value) in line.split("\t").enumerate() {
+                        if sam_keys.len() <= i { break; }
+                        match sam_keys[i] {
+                            "QNAME" => qname    = Some(value.to_string()),
+                            "FLAG"  => flag     = value.parse::<i32>().ok(),
+                            "RNAME" => rname    = Some(value.to_string()),
+                            "POS"   => pos      = value.parse::<i64>().ok(),
+                            "CIGAR" => cigar    = Some(value.to_string()),
+                            "SEQ"   => sequence = Some(value.to_string()),
+                            _       => ()
+                        }
+                    }
+                    
+                    if let (Some(qname), Some(flag), Some(rname), Some(pos), Some(cigar), Some(sequence)) = (qname, flag, rname, pos, cigar, sequence) {
+                        alignments.push(Sam::new(qname, flag, rname, pos, cigar, sequence, &fa));
+                    }
+                }
+            }
+        });
+        self.alignments = alignments;
+    }
+
     pub fn make_aln (&self) -> Vec<Vec<u8>> {
+        println!("== {} ============", self.name);
         let contig_len = self.contig.len();
         let mut lines: Vec<Vec<u8>> = Vec::new();