main.rs 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. use duct::cmd;
  2. fn main() {
  3. let fasta_ref_path = "/home/thomas/NGS/ref/hg19.fa";
  4. let sequence = "
  5. CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG
  6. GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA
  7. GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA
  8. TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC
  9. TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT
  10. GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC
  11. GGACAGAAGTGTGGTCGACATGTTTCTGGAA"
  12. .replace("\n", "")
  13. .replace(" ", "");
  14. println!("sequence len: {}", sequence.len());
  15. println!(">sequence\n{}", sequence);
  16. let aln = bwa_aln(fasta_ref_path, &sequence);
  17. aln.iter()
  18. .for_each(|e| {
  19. println!("seq len: {:?}", e.seq.len());
  20. println!("cigar: {:?}", e.cigar.0);
  21. println!("reverse: {:?}", is_reverse(e.flag));
  22. println!("cigar len: {:?}", e.cigar.len());
  23. println!("pos: {}:{}", e.ref_name, e.pos);
  24. if is_reverse(e.flag) {
  25. println!("{:?}", revcomp(&e.seq));
  26. println!("{:?}", e.cigar.expand());
  27. // println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
  28. } else {
  29. println!("{:?}", e.seq);
  30. println!("{:?}", e.cigar.expand());
  31. }
  32. println!("-------------------------------")
  33. // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
  34. });
  35. }
  36. pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
  37. let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", sequence))
  38. .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
  39. .stdout_capture()
  40. .stderr_capture()
  41. .read().unwrap();
  42. let alignements: Vec<Sam> = stdout.split("\n")
  43. .filter(|e| e.contains("sequence_to_query"))
  44. .map(|e| parse_sam_output(e))
  45. .collect();
  46. alignements
  47. }
  48. #[derive(Debug, Clone)]
  49. pub struct Sam {
  50. pub qname : String,
  51. pub flag : u16,
  52. pub ref_name: String,
  53. pub pos : i32,
  54. pub mapq : i32,
  55. pub cigar : Cigar,
  56. pub rnext : String,
  57. pub pnext : String,
  58. pub tlen : String,
  59. pub seq : String
  60. }
  61. fn parse_sam_output(sam_string: &str) -> Sam {
  62. let mut split = sam_string.split("\t");
  63. Sam {
  64. qname : split.next().unwrap().to_string(),
  65. flag : split.next().unwrap().parse().unwrap(),
  66. ref_name: split.next().unwrap().to_string(),
  67. pos : split.next().unwrap().parse().unwrap(),
  68. mapq : split.next().unwrap().parse().unwrap(),
  69. cigar : Cigar::new_from_str(&split.next().unwrap().to_string()),
  70. rnext : split.next().unwrap().to_string(),
  71. pnext : split.next().unwrap().to_string(),
  72. tlen : split.next().unwrap().to_string(),
  73. seq : split.next().unwrap().to_string()
  74. }
  75. }
  76. #[derive(Debug, Clone)]
  77. pub struct Cigar(Vec<(String, u32)>);
  78. impl Cigar {
  79. pub fn new_from_str(cigar_str: &str) -> Cigar {
  80. let mut res: Vec<(String, u32)> = Vec::new();
  81. let mut num_acc = String::new();
  82. for c in cigar_str.split("") {
  83. if c != "" { match c.parse::<usize>() {
  84. Ok(_) => num_acc.push_str(c), // if a number added to the accumulator
  85. Err(_) => {
  86. let current_add = num_acc.parse::<u32>().unwrap();
  87. num_acc = "".to_string();
  88. // if begin by N...
  89. res.push((c.to_string(), current_add));
  90. }
  91. }}
  92. }
  93. Cigar(res)
  94. }
  95. pub fn len(&self) -> u32 {
  96. self.0.iter()
  97. .map(|e| e.1)
  98. .reduce(|acc, e| acc + e)
  99. .unwrap()
  100. }
  101. pub fn expand(&self) -> String {
  102. let mut res = String::new();
  103. for (op, n) in self.0.iter() {
  104. let c = op.chars().next().unwrap();
  105. for _ in 0..(*n) {
  106. res.push(c);
  107. }
  108. }
  109. res
  110. }
  111. }
  112. fn is_reverse (flag: u16) -> bool {
  113. flag & 0x10 == 0x10
  114. }
  115. pub fn revcomp(dna: &str) -> String{
  116. let mut rdna: String = String::with_capacity(dna.len());
  117. for c in dna.chars().rev() {
  118. rdna.push(switch_base(c));
  119. }
  120. rdna
  121. }
  122. fn switch_base(c:char) -> char {
  123. match c {
  124. 'a' => 't',
  125. 'c' => 'g',
  126. 't' => 'a',
  127. 'g' => 'c',
  128. 'u' => 'a',
  129. 'A' => 'T',
  130. 'C' => 'G',
  131. 'T' => 'A',
  132. 'G' => 'C',
  133. 'U' => 'A',
  134. _ => 'N'
  135. }
  136. }
  137. fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
  138. let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
  139. let consumes_ref = vec!["M", "D", "N", "=", "X"];
  140. }