|
|
@@ -26,17 +26,18 @@ fn main() {
|
|
|
println!("cigar len: {:?}", e.cigar.len());
|
|
|
println!("pos: {}:{}", e.ref_name, e.pos);
|
|
|
if is_reverse(e.flag) {
|
|
|
- println!("{:?}", revcomp(&e.seq));
|
|
|
- println!("{:?}", e.cigar.expand());
|
|
|
+ // println!("{:?}", &e.seq);
|
|
|
+ println!("rc {:?}", revcomp(&e.seq));
|
|
|
+ println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
|
|
|
+
|
|
|
+ // println!("{:?}", e.cigar.expand());
|
|
|
|
|
|
- // println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
|
|
|
} else {
|
|
|
println!("{:?}", e.seq);
|
|
|
println!("{:?}", e.cigar.expand());
|
|
|
}
|
|
|
|
|
|
println!("-------------------------------")
|
|
|
- // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
|
|
|
});
|
|
|
}
|
|
|
|
|
|
@@ -45,14 +46,27 @@ pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
|
|
|
.pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
|
|
|
.stdout_capture()
|
|
|
.stderr_capture()
|
|
|
- .read().unwrap();
|
|
|
-
|
|
|
- let alignements: Vec<Sam> = stdout.split("\n")
|
|
|
- .filter(|e| e.contains("sequence_to_query"))
|
|
|
- .map(|e| parse_sam_output(e))
|
|
|
- .collect();
|
|
|
+ .read()
|
|
|
+ .unwrap();
|
|
|
|
|
|
- alignements
|
|
|
+ stdout.split("\n")
|
|
|
+ .filter(|e| e.contains("sequence_to_query"))
|
|
|
+ .map(|e| Sam::new_from_bwa_output(e))
|
|
|
+ .into_iter()
|
|
|
+ .map(|mut e| {
|
|
|
+ // if the sequence is returned by bwa with an N at first position remove it
|
|
|
+ if e.seq.len() != sequence.len() {
|
|
|
+ if e.seq.chars().nth(0).unwrap() == 'N' {
|
|
|
+ let (_, seq) = e.seq.split_at(1);
|
|
|
+ e.seq = seq.to_string();
|
|
|
+ let mut cig = e.cigar.0.get_mut(0).unwrap();
|
|
|
+ cig.1 -= 1;
|
|
|
+ // e.pos += 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ e
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
@@ -69,19 +83,25 @@ pub struct Sam {
|
|
|
pub seq : String
|
|
|
}
|
|
|
|
|
|
-fn parse_sam_output(sam_string: &str) -> Sam {
|
|
|
- let mut split = sam_string.split("\t");
|
|
|
- Sam {
|
|
|
- qname : split.next().unwrap().to_string(),
|
|
|
- flag : split.next().unwrap().parse().unwrap(),
|
|
|
- ref_name: split.next().unwrap().to_string(),
|
|
|
- pos : split.next().unwrap().parse().unwrap(),
|
|
|
- mapq : split.next().unwrap().parse().unwrap(),
|
|
|
- cigar : Cigar::new_from_str(&split.next().unwrap().to_string()),
|
|
|
- rnext : split.next().unwrap().to_string(),
|
|
|
- pnext : split.next().unwrap().to_string(),
|
|
|
- tlen : split.next().unwrap().to_string(),
|
|
|
- seq : split.next().unwrap().to_string()
|
|
|
+impl Sam {
|
|
|
+ pub fn new_from_bwa_output(sam_string: &str) -> Sam {
|
|
|
+ let mut split = sam_string.split("\t");
|
|
|
+ Sam {
|
|
|
+ qname : split.next().unwrap().to_string(),
|
|
|
+ flag : split.next().unwrap().parse().unwrap(),
|
|
|
+ ref_name: split.next().unwrap().to_string(),
|
|
|
+ pos : split.next().unwrap().parse().unwrap(),
|
|
|
+ mapq : split.next().unwrap().parse().unwrap(),
|
|
|
+ cigar : Cigar::new_from_str(&split.next().unwrap().to_string()),
|
|
|
+ rnext : split.next().unwrap().to_string(),
|
|
|
+ pnext : split.next().unwrap().to_string(),
|
|
|
+ tlen : split.next().unwrap().to_string(),
|
|
|
+ seq : split.next().unwrap().to_string()
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn is_reverse(&self) -> bool {
|
|
|
+ self.flag & 0x10 == 0x10
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -99,7 +119,6 @@ impl Cigar {
|
|
|
let current_add = num_acc.parse::<u32>().unwrap();
|
|
|
num_acc = "".to_string();
|
|
|
|
|
|
- // if begin by N...
|
|
|
res.push((c.to_string(), current_add));
|
|
|
}
|
|
|
}}
|
|
|
@@ -156,5 +175,15 @@ fn switch_base(c:char) -> char {
|
|
|
|
|
|
fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
|
|
|
let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
|
|
|
- let consumes_ref = vec!["M", "D", "N", "=", "X"];
|
|
|
+ let consumes_ref = vec!["M", "D", "N", "=", "X"];
|
|
|
+
|
|
|
+ let mut stack: Vec<Vec<u8>> = Vec::new();
|
|
|
+ stack.push(seq_query.as_bytes().to_vec());
|
|
|
+
|
|
|
+ for alignement in alignements {
|
|
|
+ // for pos in 0..seq_query.len() {
|
|
|
+
|
|
|
+ // }
|
|
|
+
|
|
|
+ }
|
|
|
}
|