lib.rs 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. use anyhow::{Context, Ok, Result};
  2. use rust_htslib::{
  3. bam,
  4. bam::{ext::BamRecordExtensions, record, Read},
  5. };
  6. use average::Mean;
  7. pub fn get_hts_nt_pileup(
  8. bam: &mut rust_htslib::bam::IndexedReader,
  9. chr: &str,
  10. start: i32,
  11. with_next_ins: bool,
  12. ) -> Result<Vec<u8>> {
  13. let stop = start + 1;
  14. let mut bases = Vec::new();
  15. bam.fetch((chr, start, stop))?;
  16. let mut bam_pileup = Vec::new();
  17. for p in bam.pileup() {
  18. let pileup = p.context(format!(
  19. "Can't pilup bam at position {}:{}-{}",
  20. chr, start, stop
  21. ))?;
  22. let position = pileup.pos() as i32;
  23. if position == start {
  24. for alignment in pileup.alignments() {
  25. match alignment.indel() {
  26. bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
  27. bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
  28. _ => {
  29. let record = alignment.record();
  30. if record.seq_len() > 0 {
  31. if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
  32. bases.push(b);
  33. }
  34. } else {
  35. if alignment.is_del() {
  36. bases.push(b'D');
  37. }
  38. }
  39. }
  40. }
  41. }
  42. }
  43. }
  44. Ok(bases)
  45. }
  46. pub fn hts_base_at(
  47. record: &rust_htslib::bam::record::Record,
  48. at_pos: u32,
  49. with_next_ins: bool,
  50. ) -> Result<Option<u8>> {
  51. use rust_htslib::bam::record::Cigar;
  52. let cigar = record.cigar();
  53. let seq = record.seq();
  54. let pos = cigar.pos() as u32;
  55. let mut read_i = 0u32;
  56. let at_pos = at_pos - 1;
  57. let mut ref_pos = pos;
  58. if ref_pos > at_pos {
  59. return Ok(None);
  60. }
  61. for (id, op) in cigar.iter().enumerate() {
  62. let (add_read, add_ref) = match *op {
  63. Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
  64. Cigar::Ins(len) => (len, 0),
  65. Cigar::Del(len) => (0, len),
  66. Cigar::RefSkip(len) => (0, len),
  67. Cigar::SoftClip(len) => (len, 0),
  68. Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
  69. };
  70. // If at the end of the op len and next op is Ins return I
  71. if with_next_ins && ref_pos + add_read == at_pos + 1 {
  72. if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
  73. return Ok(Some(b'I'));
  74. }
  75. }
  76. if ref_pos + add_ref > at_pos {
  77. // Handle deletions directly
  78. if let Cigar::Del(_) = *op {
  79. return Ok(Some(b'D'));
  80. } else if let Cigar::RefSkip(_) = op {
  81. return Ok(None);
  82. } else {
  83. let diff = at_pos - ref_pos;
  84. let p = read_i + diff;
  85. return Ok(Some(seq[p as usize]));
  86. }
  87. }
  88. read_i += add_read;
  89. ref_pos += add_ref;
  90. }
  91. Ok(None)
  92. }
  93. pub fn get_n_start(
  94. bam: &mut rust_htslib::bam::IndexedReader,
  95. chr: &str,
  96. start: i32,
  97. stop: i32,
  98. ) -> Result<usize> {
  99. bam.fetch((chr, start, stop))?;
  100. // let mut start_positions = Vec::new();
  101. let mut n_start = 0;
  102. for read in bam.records() {
  103. let record = read.context(format!("eRR"))?;
  104. let rstart = record.pos() as i32;
  105. if rstart >= start && rstart < stop {
  106. if record.mapq() > 40 {
  107. n_start += 1;
  108. }
  109. // start_positions.push(rstart);
  110. }
  111. }
  112. Ok(n_start)
  113. // Ok(start_positions.len())
  114. }
  115. pub fn get_n_start_end_qual(
  116. bam: &mut rust_htslib::bam::IndexedReader,
  117. chr: &str,
  118. start: i32,
  119. stop: i32,
  120. mapq: u8,
  121. ) -> Result<usize> {
  122. bam.fetch((chr, start, stop))?;
  123. // let mut start_positions = Vec::new();
  124. let mut n_start = 0;
  125. for read in bam.records() {
  126. let record = read.context(format!("eRR"))?;
  127. let rstart = record.pos() as i32;
  128. let rend = record.reference_end() as i32;
  129. if rstart >= start && rstart < stop || rend >= start && rend < stop {
  130. if record.mapq() >= mapq {
  131. n_start += 1;
  132. }
  133. }
  134. }
  135. Ok(n_start)
  136. }
  137. pub fn get_start_end_qual(
  138. bam: &mut rust_htslib::bam::IndexedReader,
  139. chr: &str,
  140. start: i32,
  141. stop: i32,
  142. mapq: u8,
  143. ) -> Result<Vec<i32>> {
  144. bam.fetch((chr, start, stop))?;
  145. let length = stop - start;
  146. let mut results = Vec::new();
  147. results.resize(length as usize, 0);
  148. for read in bam.records() {
  149. let record = read.context(format!("Error while parsing record"))?;
  150. let rstart = record.pos() as i32;
  151. let rend = record.reference_end() as i32;
  152. if rstart >= start && rstart < stop {
  153. if record.mapq() >= mapq {
  154. let index = rstart - start;
  155. *results.get_mut(index as usize).unwrap() += 1;
  156. }
  157. }
  158. if rend >= start && rend < stop {
  159. if record.mapq() >= mapq {
  160. let index = rend - start;
  161. *results.get_mut(index as usize).unwrap() += 1;
  162. }
  163. }
  164. }
  165. Ok(results)
  166. }
  167. pub fn range_depths(
  168. bam: &mut rust_htslib::bam::IndexedReader,
  169. chr: &str,
  170. start: i32,
  171. stop: i32,
  172. ) -> Result<Vec<u32>> {
  173. bam.fetch((chr, start, stop))?;
  174. let mut depths = vec![0];
  175. depths.resize((stop - start) as usize, 0);
  176. for p in bam.pileup() {
  177. let pileup = p.context(format!("eRR"))?;
  178. let rstart = pileup.pos() as i32;
  179. if rstart >= start && rstart < stop {
  180. // depths.push(pileup.depth());
  181. let v = depths
  182. .get_mut((rstart - start) as usize)
  183. .context(format!("Errrr {}", rstart - start))?;
  184. *v = pileup.depth();
  185. } else if rstart >= stop {
  186. break;
  187. }
  188. }
  189. Ok(depths)
  190. }
  191. pub fn range_depths_qual(
  192. bam: &mut rust_htslib::bam::IndexedReader,
  193. chr: &str,
  194. start: i32,
  195. stop: i32,
  196. mapq: u8,
  197. ) -> Result<Vec<usize>> {
  198. bam.fetch((chr, start, stop))?;
  199. let mut depths = vec![0];
  200. depths.resize((stop - start) as usize, 0);
  201. for p in bam.pileup() {
  202. let pileup = p.context(format!("eRR"))?;
  203. let rstart = pileup.pos() as i32;
  204. if rstart >= start && rstart < stop {
  205. let v = depths
  206. .get_mut((rstart - start) as usize)
  207. .context(format!("Errrr {}", rstart - start))?;
  208. *v = pileup
  209. .alignments()
  210. .filter(|e| e.record().mapq() >= mapq)
  211. .count();
  212. } else if rstart >= stop {
  213. break;
  214. }
  215. }
  216. Ok(depths)
  217. }
  218. pub fn range_len_qual(
  219. bam: &mut rust_htslib::bam::IndexedReader,
  220. chr: &str,
  221. start: i32,
  222. stop: i32,
  223. mapq: u8,
  224. ) -> Result<Vec<usize>> {
  225. bam.fetch((chr, start, stop))?;
  226. let mut depths = vec![0];
  227. depths.resize((stop - start) as usize, 0);
  228. for p in bam.pileup() {
  229. let pileup = p.context(format!("eRR"))?;
  230. let rstart = pileup.pos() as i32;
  231. if rstart >= start && rstart < stop {
  232. let v = depths
  233. .get_mut((rstart - start) as usize)
  234. .context(format!("Errrr {}", rstart - start))?;
  235. *v = pileup
  236. .alignments()
  237. .filter(|e| e.record().mapq() >= mapq)
  238. .map(|e| e.record().seq_len())
  239. .sum();
  240. } else if rstart >= stop {
  241. break;
  242. }
  243. }
  244. Ok(depths)
  245. }