| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- use duct::cmd;
- fn main() {
- let fasta_ref_path = "/home/thomas/NGS/ref/hg19.fa";
- let sequence = "
- CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG
- GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA
- GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA
- TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC
- TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT
- GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC
- GGACAGAAGTGTGGTCGACATGTTTCTGGAA"
- .replace("\n", "")
- .replace(" ", "");
- println!("sequence len: {}", sequence.len());
- println!(">sequence\n{}", sequence);
- let aln = bwa_aln(fasta_ref_path, &sequence);
- aln.iter()
- .for_each(|e| {
- println!("seq len: {:?}", e.seq.len());
- println!("cigar: {:?}", e.cigar.0);
- println!("reverse: {:?}", is_reverse(e.flag));
- println!("cigar len: {:?}", e.cigar.len());
- println!("pos: {}:{}", e.ref_name, e.pos);
- if is_reverse(e.flag) {
- println!("{:?}", revcomp(&e.seq));
- println!("{:?}", e.cigar.expand());
- // println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
- } else {
- println!("{:?}", e.seq);
- println!("{:?}", e.cigar.expand());
- }
-
- println!("-------------------------------")
- // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
- });
- }
- pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
- let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", sequence))
- .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
- .stdout_capture()
- .stderr_capture()
- .read().unwrap();
- let alignements: Vec<Sam> = stdout.split("\n")
- .filter(|e| e.contains("sequence_to_query"))
- .map(|e| parse_sam_output(e))
- .collect();
-
- alignements
- }
- #[derive(Debug, Clone)]
- pub struct Sam {
- pub qname : String,
- pub flag : u16,
- pub ref_name: String,
- pub pos : i32,
- pub mapq : i32,
- pub cigar : Cigar,
- pub rnext : String,
- pub pnext : String,
- pub tlen : String,
- pub seq : String
- }
- fn parse_sam_output(sam_string: &str) -> Sam {
- let mut split = sam_string.split("\t");
- Sam {
- qname : split.next().unwrap().to_string(),
- flag : split.next().unwrap().parse().unwrap(),
- ref_name: split.next().unwrap().to_string(),
- pos : split.next().unwrap().parse().unwrap(),
- mapq : split.next().unwrap().parse().unwrap(),
- cigar : Cigar::new_from_str(&split.next().unwrap().to_string()),
- rnext : split.next().unwrap().to_string(),
- pnext : split.next().unwrap().to_string(),
- tlen : split.next().unwrap().to_string(),
- seq : split.next().unwrap().to_string()
- }
- }
- #[derive(Debug, Clone)]
- pub struct Cigar(Vec<(String, u32)>);
- impl Cigar {
- pub fn new_from_str(cigar_str: &str) -> Cigar {
- let mut res: Vec<(String, u32)> = Vec::new();
- let mut num_acc = String::new();
- for c in cigar_str.split("") {
- if c != "" { match c.parse::<usize>() {
- Ok(_) => num_acc.push_str(c), // if a number added to the accumulator
- Err(_) => {
- let current_add = num_acc.parse::<u32>().unwrap();
- num_acc = "".to_string();
- // if begin by N...
- res.push((c.to_string(), current_add));
- }
- }}
- }
- Cigar(res)
- }
- pub fn len(&self) -> u32 {
- self.0.iter()
- .map(|e| e.1)
- .reduce(|acc, e| acc + e)
- .unwrap()
- }
- pub fn expand(&self) -> String {
- let mut res = String::new();
- for (op, n) in self.0.iter() {
- let c = op.chars().next().unwrap();
- for _ in 0..(*n) {
- res.push(c);
- }
- }
- res
- }
- }
- fn is_reverse (flag: u16) -> bool {
- flag & 0x10 == 0x10
- }
- pub fn revcomp(dna: &str) -> String{
- let mut rdna: String = String::with_capacity(dna.len());
- for c in dna.chars().rev() {
- rdna.push(switch_base(c));
- }
- rdna
- }
- fn switch_base(c:char) -> char {
- match c {
- 'a' => 't',
- 'c' => 'g',
- 't' => 'a',
- 'g' => 'c',
- 'u' => 'a',
- 'A' => 'T',
- 'C' => 'G',
- 'T' => 'A',
- 'G' => 'C',
- 'U' => 'A',
- _ => 'N'
- }
- }
- fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
- let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
- let consumes_ref = vec!["M", "D", "N", "=", "X"];
- }
|