lib.rs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. use anyhow::{Context, Ok, Result};
  2. use rust_htslib::{
  3. bam,
  4. bam::{ext::BamRecordExtensions, record, Read, Record},
  5. };
  6. pub fn get_hts_nt_pileup(
  7. bam: &mut rust_htslib::bam::IndexedReader,
  8. chr: &str,
  9. start: i32,
  10. with_next_ins: bool,
  11. ) -> Result<Vec<u8>> {
  12. let stop = start + 1;
  13. let mut bases = Vec::new();
  14. bam.fetch((chr, start, stop))?;
  15. let mut bam_pileup = Vec::new();
  16. for p in bam.pileup() {
  17. let pileup = p.context(format!(
  18. "Can't pilup bam at position {}:{}-{}",
  19. chr, start, stop
  20. ))?;
  21. let position = pileup.pos() as i32;
  22. if position == start {
  23. for alignment in pileup.alignments() {
  24. match alignment.indel() {
  25. bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
  26. bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
  27. _ => {
  28. let record = alignment.record();
  29. if record.seq_len() > 0 {
  30. if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
  31. bases.push(b);
  32. }
  33. } else {
  34. if alignment.is_del() {
  35. bases.push(b'D');
  36. }
  37. }
  38. }
  39. }
  40. }
  41. }
  42. }
  43. Ok(bases)
  44. }
  45. pub fn hts_base_at(
  46. record: &rust_htslib::bam::record::Record,
  47. at_pos: u32,
  48. with_next_ins: bool,
  49. ) -> Result<Option<u8>> {
  50. use rust_htslib::bam::record::Cigar;
  51. let cigar = record.cigar();
  52. let seq = record.seq();
  53. let pos = cigar.pos() as u32;
  54. let mut read_i = 0u32;
  55. let at_pos = at_pos - 1;
  56. let mut ref_pos = pos;
  57. if ref_pos > at_pos {
  58. return Ok(None);
  59. }
  60. for (id, op) in cigar.iter().enumerate() {
  61. let (add_read, add_ref) = match *op {
  62. Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
  63. Cigar::Ins(len) => (len, 0),
  64. Cigar::Del(len) => (0, len),
  65. Cigar::RefSkip(len) => (0, len),
  66. Cigar::SoftClip(len) => (len, 0),
  67. Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
  68. };
  69. // If at the end of the op len and next op is Ins return I
  70. if with_next_ins && ref_pos + add_read == at_pos + 1 {
  71. if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
  72. return Ok(Some(b'I'));
  73. }
  74. }
  75. if ref_pos + add_ref > at_pos {
  76. // Handle deletions directly
  77. if let Cigar::Del(_) = *op {
  78. return Ok(Some(b'D'));
  79. } else if let Cigar::RefSkip(_) = op {
  80. return Ok(None);
  81. } else {
  82. let diff = at_pos - ref_pos;
  83. let p = read_i + diff;
  84. return Ok(Some(seq[p as usize]));
  85. }
  86. }
  87. read_i += add_read;
  88. ref_pos += add_ref;
  89. }
  90. Ok(None)
  91. }
  92. pub fn get_n_start(
  93. bam: &mut rust_htslib::bam::IndexedReader,
  94. chr: &str,
  95. start: i32,
  96. stop: i32,
  97. ) -> Result<usize> {
  98. bam.fetch((chr, start, stop))?;
  99. // let mut start_positions = Vec::new();
  100. let mut n_start = 0;
  101. for read in bam.records() {
  102. let record = read.context(format!("eRR"))?;
  103. let rstart = record.pos() as i32;
  104. if rstart >= start && rstart < stop {
  105. if record.mapq() > 40 {
  106. n_start += 1;
  107. }
  108. // start_positions.push(rstart);
  109. }
  110. }
  111. Ok(n_start)
  112. // Ok(start_positions.len())
  113. }
  114. pub fn get_n_start_end_qual(
  115. bam: &mut rust_htslib::bam::IndexedReader,
  116. chr: &str,
  117. start: i32,
  118. stop: i32,
  119. mapq: u8,
  120. ) -> Result<usize> {
  121. bam.fetch((chr, start, stop))?;
  122. // let mut start_positions = Vec::new();
  123. let mut n_start = 0;
  124. for read in bam.records() {
  125. let record = read.context(format!("eRR"))?;
  126. let rstart = record.pos() as i32;
  127. let rend = record.reference_end() as i32;
  128. if rstart >= start && rstart < stop || rend >= start && rend < stop {
  129. if record.mapq() >= mapq {
  130. n_start += 1;
  131. }
  132. }
  133. }
  134. Ok(n_start)
  135. }
  136. pub fn get_start_end_qual(
  137. bam: &mut rust_htslib::bam::IndexedReader,
  138. chr: &str,
  139. start: i32,
  140. stop: i32,
  141. mapq: u8,
  142. ) -> Result<Vec<i32>> {
  143. bam.fetch((chr, start - 1, stop))?;
  144. let length = stop - start;
  145. let mut results = Vec::new();
  146. results.resize(length as usize, 0);
  147. for read in bam.records() {
  148. let record = read.context(format!("Error while parsing record"))?;
  149. let rstart = record.pos() as i32;
  150. // let rstart = record.reference_start() as i32;
  151. let rend = record.reference_end() as i32;
  152. if rstart >= start && rstart < stop {
  153. // println!("start {rstart}");
  154. if record.mapq() >= mapq {
  155. let index = rstart - start;
  156. *results.get_mut(index as usize).unwrap() += 1;
  157. }
  158. }
  159. if rend >= start && rend < stop {
  160. // println!("{rend}");
  161. if record.mapq() >= mapq {
  162. let index = rend - start;
  163. *results.get_mut(index as usize).unwrap() += 1;
  164. }
  165. }
  166. }
  167. Ok(results)
  168. }
  169. pub fn get_start_end_qual_rec(
  170. bam: &mut rust_htslib::bam::IndexedReader,
  171. chr: &str,
  172. start: i32,
  173. stop: i32,
  174. mapq: u8,
  175. ) -> Result<(Vec<Vec<Record>>, Vec<Vec<Record>>)> {
  176. bam.fetch((chr, start - 1, stop))?;
  177. let length = stop - start;
  178. let mut results_start: Vec<Vec<Record>> = Vec::new();
  179. results_start.resize(length as usize, vec![]);
  180. let mut results_end: Vec<Vec<Record>> = Vec::new();
  181. results_end.resize(length as usize, vec![]);
  182. for read in bam.records() {
  183. let record = read.context(format!("Error while parsing record"))?;
  184. let rstart = record.pos() as i32;
  185. let rend = record.reference_end() as i32;
  186. if rstart >= start && rstart < stop {
  187. if record.mapq() >= mapq {
  188. let index = rstart - start;
  189. let u = results_start.get_mut(index as usize).unwrap();
  190. u.push(record.clone());
  191. }
  192. }
  193. if rend >= start && rend < stop {
  194. if record.mapq() >= mapq {
  195. let index = rend - start;
  196. let u = results_end.get_mut(index as usize).unwrap();
  197. u.push(record.clone());
  198. }
  199. }
  200. }
  201. Ok((results_start, results_end))
  202. }
  203. pub fn primary_record(bam_path: &str, record: &Record) -> Option<Record> {
  204. let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
  205. if record.is_supplementary() {
  206. let qname = record.qname();
  207. let mut positions: Vec<(&str, i32)> = Vec::new();
  208. match record.aux(b"SA") {
  209. std::result::Result::Ok(v) => match v {
  210. record::Aux::String(v) => {
  211. positions = v
  212. .split(";")
  213. .filter(|s| s.len() != 0)
  214. .map(|s| {
  215. let s = s.split(",").take(2).collect::<Vec<&str>>();
  216. let chr = *s.get(0).unwrap();
  217. let position: i32 = s.get(1).unwrap().parse().unwrap();
  218. (chr, position)
  219. })
  220. .collect();
  221. }
  222. _ => (),
  223. },
  224. Err(_) => (),
  225. };
  226. for (chr, start) in positions {
  227. bam.fetch((chr, start, start + 1)).unwrap();
  228. let records = bam.records();
  229. for record in records {
  230. let record = record.unwrap();
  231. // String::from_utf8(record.qname().to_vec()).unwrap().as_str()
  232. if qname == record.qname() {
  233. if !record.is_supplementary() {
  234. return Some(record.clone());
  235. }
  236. }
  237. }
  238. }
  239. }
  240. None
  241. }
  242. pub fn range_depths(
  243. bam: &mut rust_htslib::bam::IndexedReader,
  244. chr: &str,
  245. start: i32,
  246. stop: i32,
  247. ) -> Result<Vec<u32>> {
  248. bam.fetch((chr, start, stop))?;
  249. let mut depths = vec![0];
  250. depths.resize((stop - start) as usize, 0);
  251. for p in bam.pileup() {
  252. let pileup = p.context(format!("eRR"))?;
  253. let rstart = pileup.pos() as i32;
  254. if rstart >= start && rstart < stop {
  255. // depths.push(pileup.depth());
  256. let v = depths
  257. .get_mut((rstart - start) as usize)
  258. .context(format!("Errrr {}", rstart - start))?;
  259. *v = pileup.depth();
  260. } else if rstart >= stop {
  261. break;
  262. }
  263. }
  264. Ok(depths)
  265. }
  266. pub fn range_depths_qual(
  267. bam: &mut rust_htslib::bam::IndexedReader,
  268. chr: &str,
  269. start: i32,
  270. stop: i32,
  271. mapq: u8,
  272. ) -> Result<Vec<usize>> {
  273. bam.fetch((chr, start, stop))?;
  274. let mut depths = vec![0];
  275. depths.resize((stop - start) as usize, 0);
  276. for p in bam.pileup() {
  277. let pileup = p.context(format!("eRR"))?;
  278. let rstart = pileup.pos() as i32;
  279. if rstart >= start && rstart < stop {
  280. let v = depths
  281. .get_mut((rstart - start) as usize)
  282. .context(format!("Errrr {}", rstart - start))?;
  283. *v = pileup
  284. .alignments()
  285. .filter(|e| e.record().mapq() >= mapq)
  286. .count();
  287. } else if rstart >= stop {
  288. break;
  289. }
  290. }
  291. Ok(depths)
  292. }
  293. pub fn range_len_qual(
  294. bam: &mut rust_htslib::bam::IndexedReader,
  295. chr: &str,
  296. start: i32,
  297. stop: i32,
  298. mapq: u8,
  299. ) -> Result<Vec<usize>> {
  300. bam.fetch((chr, start, stop))?;
  301. let mut depths = vec![0];
  302. depths.resize((stop - start) as usize, 0);
  303. for p in bam.pileup() {
  304. let pileup = p.context(format!("eRR"))?;
  305. let rstart = pileup.pos() as i32;
  306. if rstart >= start && rstart < stop {
  307. let v = depths
  308. .get_mut((rstart - start) as usize)
  309. .context(format!("Errrr {}", rstart - start))?;
  310. *v = pileup
  311. .alignments()
  312. .filter(|e| e.record().mapq() >= mapq)
  313. .map(|e| e.record().seq_len())
  314. .sum();
  315. } else if rstart >= stop {
  316. break;
  317. }
  318. }
  319. Ok(depths)
  320. }
  321. pub fn swap_by_primary(bam_path: &str, records: Vec<Record>) -> Vec<Record> {
  322. let mut res_records = Vec::new();
  323. for record in records {
  324. if let Some(record) = primary_record(bam_path, &record) {
  325. res_records.push(record);
  326. } else {
  327. res_records.push(record);
  328. }
  329. }
  330. res_records
  331. }
  332. pub fn scan_sa(
  333. bam: &mut rust_htslib::bam::IndexedReader,
  334. chr: &str,
  335. start: i32,
  336. stop: i32,
  337. mapq: u8,
  338. ) -> Result<(Vec<Vec<Record>>, Vec<Vec<Record>>)> {
  339. bam.fetch((chr, start - 1, stop))?;
  340. let length = stop - start;
  341. let mut results_start: Vec<Vec<Record>> = Vec::new();
  342. results_start.resize(length as usize, vec![]);
  343. let mut results_end: Vec<Vec<Record>> = Vec::new();
  344. results_end.resize(length as usize, vec![]);
  345. for read in bam.records() {
  346. let record = read.context(format!("Error while parsing record"))?;
  347. let rstart = record.pos() as i32;
  348. let rend = record.reference_end() as i32;
  349. if rstart >= start && rstart < stop {
  350. if record.mapq() >= mapq && record.aux(b"SA").is_ok() {
  351. let index = rstart - start;
  352. let u = results_start.get_mut(index as usize).unwrap();
  353. u.push(record.clone());
  354. }
  355. }
  356. if rend >= start && rend < stop && record.aux(b"SA").is_ok() {
  357. if record.mapq() >= mapq {
  358. let index = rend - start;
  359. let u = results_end.get_mut(index as usize).unwrap();
  360. u.push(record.clone());
  361. }
  362. }
  363. }
  364. Ok((results_start, results_end))
  365. }
  366. pub fn records_at_base(
  367. bam: &mut rust_htslib::bam::IndexedReader,
  368. chr: &str,
  369. start: i32,
  370. with_next_ins: bool,
  371. ) -> Result<Vec<(Record, u8)>> {
  372. let stop = start + 1;
  373. let mut bases = Vec::new();
  374. bam.fetch((chr, start, stop))?;
  375. let mut bam_pileup = Vec::new();
  376. for p in bam.pileup() {
  377. let pileup = p.context(format!(
  378. "Can't pilup bam at position {}:{}-{}",
  379. chr, start, stop
  380. ))?;
  381. let position = pileup.pos() as i32;
  382. if position == start {
  383. for alignment in pileup.alignments() {
  384. match alignment.indel() {
  385. bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
  386. bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
  387. _ => {
  388. let record = alignment.record();
  389. if record.seq_len() > 0 {
  390. if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
  391. bases.push((record, b));
  392. }
  393. } else {
  394. if alignment.is_del() {
  395. bases.push((record, b'D'));
  396. }
  397. }
  398. }
  399. }
  400. }
  401. }
  402. }
  403. Ok(bases)
  404. }
  405. pub fn qnames_at_base(
  406. bam: &mut rust_htslib::bam::IndexedReader,
  407. chr: &str,
  408. start: i32,
  409. with_next_ins: bool,
  410. ) -> Result<Vec<(String, u8)>> {
  411. let stop = start + 1;
  412. let mut bases = Vec::new();
  413. bam.fetch((chr, start, stop))?;
  414. let mut bam_pileup = Vec::new();
  415. for p in bam.pileup() {
  416. let pileup = p.context(format!(
  417. "Can't pilup bam at position {}:{}-{}",
  418. chr, start, stop
  419. ))?;
  420. let position = pileup.pos() as i32;
  421. if position == start {
  422. for alignment in pileup.alignments() {
  423. match alignment.indel() {
  424. bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
  425. bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
  426. _ => {
  427. let record = alignment.record();
  428. let qname = String::from_utf8(record.qname().to_vec())?;
  429. if record.seq_len() > 0 {
  430. if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
  431. bases.push((qname, b));
  432. }
  433. } else {
  434. if alignment.is_del() {
  435. bases.push((qname, b'D'));
  436. }
  437. }
  438. }
  439. }
  440. }
  441. }
  442. }
  443. Ok(bases)
  444. }
  445. #[cfg(test)]
  446. mod tests {
  447. // Note this useful idiom: importing names from outer (for mod tests) scope.
  448. use super::*;
  449. #[test]
  450. fn test_se() {
  451. let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam";
  452. let chr = "chr9";
  453. let start = 21839796;
  454. let mapq = 50;
  455. let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
  456. // let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap();
  457. // println!("{a:?}");
  458. let res = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap();
  459. let (_, e) = res;
  460. let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone());
  461. println!("{res_records:?}");
  462. println!("{}", res_records.len());
  463. assert!(res_records.len() == 12);
  464. }
  465. #[test]
  466. fn test_sa() {
  467. let bam_path = "/data/longreads_basic_pipe/CAMARA/diag/CAMARA_diag_hs1.bam";
  468. let chr = "chr9";
  469. let start = 21839796;
  470. let mapq = 50;
  471. let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
  472. let a = get_start_end_qual(&mut bam, chr, start, start + 1, mapq).unwrap().pop().unwrap();
  473. let (_, e) = get_start_end_qual_rec(&mut bam, chr, start, start + 1, mapq).unwrap();
  474. let res_records = swap_by_primary(bam_path, e.get(0).unwrap().clone());
  475. let (_, e) = scan_sa(&mut bam, chr, start, start + 1, mapq).unwrap();
  476. let res_records_sa = swap_by_primary(bam_path, e.get(0).unwrap().clone());
  477. assert_eq!(a as usize, res_records.len());
  478. assert_eq!(a as usize, res_records_sa.len());
  479. }
  480. }