bam.rs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. use std::{
  2. collections::HashMap,
  3. fs::{self, File},
  4. path::PathBuf,
  5. };
  6. use anyhow::{anyhow, Context};
  7. use chrono::{DateTime, Utc};
  8. use dashmap::DashMap;
  9. use glob::glob;
  10. use log::{debug, info, warn};
  11. use rand::{rng, Rng};
  12. use rayon::prelude::*;
  13. use rust_htslib::bam::{ext::BamRecordExtensions, record::Cigar, Read};
  14. use serde::{Deserialize, Serialize};
  15. use crate::config::Config;
  16. #[derive(Debug, Clone, Deserialize, Serialize)]
  17. pub struct WGSBam {
  18. pub id: String,
  19. pub time_point: String,
  20. pub reference_genome: String,
  21. // pub bam_type: BamType,
  22. pub path: PathBuf,
  23. pub modified: DateTime<Utc>,
  24. pub bam_stats: WGSBamStats,
  25. // pub cramino: Option<CraminoRes>,
  26. pub composition: Vec<(String, f64)>, // acquisition id
  27. }
  28. // #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)]
  29. // pub enum BamType {
  30. // WGS,
  31. // Panel(String),
  32. // ChIP(String),
  33. // }
  34. //
  35. impl WGSBam {
  36. pub fn new(path: PathBuf) -> anyhow::Result<Self> {
  37. let stem = path
  38. .clone()
  39. .file_stem()
  40. .context(format!("Can't parse stem from {}", path.display()))?
  41. .to_string_lossy()
  42. .to_string();
  43. let stem: Vec<&str> = stem.split('_').collect();
  44. if stem.len() != 3 {
  45. return Err(anyhow!("Error in bam name: {}", path.display()));
  46. }
  47. let id = stem[0].to_string();
  48. let time_point = stem[1].to_string();
  49. let reference_genome = stem
  50. .last()
  51. .context("Can't get last from stem {stem}")?
  52. .to_string();
  53. // let bam_type = if stem.len() == 4 {
  54. // match stem[2] {
  55. // "oncoT" => BamType::Panel("oncoT".to_string()),
  56. // "H3K27ac" => BamType::ChIP("H3K27ac".to_string()),
  57. // "H3K4me3" => BamType::ChIP("H3K4me3".to_string()),
  58. // _ => return Err(anyhow!("Error in bam name: {}", path.display())),
  59. // }
  60. // } else {
  61. // BamType::WGS
  62. // };
  63. let file_metadata = fs::metadata(&path)?;
  64. let modified = file_metadata.modified()?;
  65. let tp_dir = path
  66. .parent()
  67. .context("Can't parse parent from: {bam_path}")?;
  68. let json_path = format!(
  69. "{}/{id}_{time_point}_{reference_genome}_info.json",
  70. tp_dir.to_string_lossy()
  71. );
  72. let json_file = PathBuf::from(&json_path);
  73. if json_file.exists() && json_file.metadata()?.modified()? > modified {
  74. match Self::load_json(&json_path) {
  75. Ok(s) => return Ok(s),
  76. Err(e) => {
  77. warn!("Error while loading {}.\n{e}", json_path);
  78. }
  79. }
  80. }
  81. // let cramino_path = format!(
  82. // "{}/{id}_{time_point}_{reference_genome}_cramino.txt",
  83. // tp_dir.to_string_lossy()
  84. // );
  85. // let cramino = if bam_type == BamType::WGS {
  86. // if !PathBuf::from_str(&cramino_path)?.exists() {
  87. // // return Err(anyhow!("Cramino file missing {cramino_path}"));
  88. // Cramino::default().with_bam(path.to_str().unwrap())?;
  89. // }
  90. // let mut cramino = Cramino::default().with_result_path(&cramino_path);
  91. // cramino
  92. // .parse_results()
  93. // .context(format!("Error while parsing cramino for {cramino_path}"))?;
  94. //
  95. // if let Some(cramino) = cramino.results {
  96. // Some(cramino)
  97. // } else {
  98. // return Err(anyhow!("Cramino results parsing failed"));
  99. // }
  100. // } else {
  101. // None
  102. // };
  103. let composition = bam_compo(path.to_string_lossy().as_ref(), 20_000).context(format!(
  104. "Error while reading BAM composition for {}",
  105. path.display()
  106. ))?;
  107. let s = Self {
  108. path,
  109. bam_stats: WGSBamStats::new(&id, &time_point, Config::default())?,
  110. // cramino,
  111. id: id.to_string(),
  112. time_point: time_point.to_string(),
  113. // bam_type,
  114. reference_genome,
  115. composition,
  116. modified: modified.into(),
  117. };
  118. s.save_json(&json_path)?;
  119. Ok(s)
  120. }
  121. pub fn load_json(path: &str) -> anyhow::Result<Self> {
  122. let f = File::open(path)?;
  123. let s: Self = serde_json::from_reader(f)?;
  124. Ok(s)
  125. }
  126. pub fn save_json(&self, path: &str) -> anyhow::Result<()> {
  127. let f = File::create(path)?;
  128. serde_json::to_writer(f, self)?;
  129. Ok(())
  130. }
  131. }
  132. #[derive(Debug, Default)]
  133. pub struct BamCollection {
  134. pub bams: Vec<WGSBam>,
  135. }
  136. impl BamCollection {
  137. pub fn new(result_dir: &str) -> Self {
  138. load_bam_collection(result_dir)
  139. }
  140. pub fn by_acquisition_id(&self) -> HashMap<String, Vec<&WGSBam>> {
  141. let mut acq: HashMap<String, Vec<&WGSBam>> = HashMap::new();
  142. for bam in self.bams.iter() {
  143. for (acq_id, _) in bam.composition.iter() {
  144. if let Some(entry) = acq.get_mut(acq_id) {
  145. entry.push(bam);
  146. } else {
  147. acq.insert(acq_id.to_string(), vec![bam]);
  148. }
  149. }
  150. }
  151. acq
  152. }
  153. pub fn get(&self, id: &str, time_point: &str) -> Vec<&WGSBam> {
  154. self.bams
  155. .iter()
  156. .filter(|b| b.id == id && b.time_point == time_point)
  157. .collect()
  158. }
  159. pub fn by_id_completed(&self, min_diag_cov: f32, min_mrd_cov: f32) -> Vec<WGSBam> {
  160. self.bams
  161. .iter()
  162. // .filter(|b| matches!(b.bam_type, BamType::WGS))
  163. .filter(|b| match b.time_point.as_str() {
  164. "diag" => b.bam_stats.global_coverage >= min_diag_cov as f64,
  165. "mrd" => b.bam_stats.global_coverage >= min_mrd_cov as f64,
  166. _ => false,
  167. })
  168. // .filter(|b| match &b.cramino {
  169. // Some(cramino) => match b.time_point.as_str() {
  170. // "diag" => cramino.mean_length >= min_diag_cov as f64,
  171. // "mrd" => cramino.mean_length >= min_mrd_cov as f64,
  172. // _ => false,
  173. // },
  174. // _ => false,
  175. // })
  176. .cloned()
  177. .collect()
  178. }
  179. pub fn ids(&self) -> Vec<String> {
  180. let mut ids: Vec<String> = self.bams.iter().map(|b| b.id.clone()).collect();
  181. ids.sort();
  182. ids.dedup();
  183. ids
  184. }
  185. }
  186. pub fn load_bam_collection(result_dir: &str) -> BamCollection {
  187. let pattern = format!("{}/*/*/*.bam", result_dir);
  188. let bams = glob(&pattern)
  189. .expect("Failed to read glob pattern")
  190. // .par_bridge()
  191. .filter_map(|entry| {
  192. match entry {
  193. Ok(path) => match WGSBam::new(path) {
  194. Ok(bam) => return Some(bam),
  195. Err(e) => warn!("{e}"),
  196. },
  197. Err(e) => warn!("Error: {:?}", e),
  198. }
  199. None
  200. })
  201. .collect();
  202. BamCollection { bams }
  203. }
  204. pub fn bam_compo(file_path: &str, sample_size: usize) -> anyhow::Result<Vec<(String, f64)>> {
  205. let mut bam = rust_htslib::bam::Reader::from_path(file_path)?;
  206. let mut rgs: HashMap<String, u64> = HashMap::new();
  207. for result in bam.records().filter_map(Result::ok).take(sample_size) {
  208. if let rust_htslib::bam::record::Aux::String(s) = result.aux(b"RG")? {
  209. *rgs.entry(s.to_string()).or_default() += 1;
  210. }
  211. }
  212. Ok(rgs
  213. .into_iter()
  214. .map(|(rg, n)| (rg.to_string(), n as f64 * 100.0 / sample_size as f64))
  215. .map(|(rg, p)| {
  216. let (a, _) = rg.split_once('_').unwrap();
  217. (a.to_string(), p)
  218. })
  219. .collect())
  220. }
  221. pub struct BamReadsSampler {
  222. pub positions: Vec<(String, u64)>,
  223. pub reader: rust_htslib::bam::IndexedReader,
  224. current_index: usize,
  225. }
  226. impl BamReadsSampler {
  227. pub fn new(path: &str, n: usize) -> anyhow::Result<Self> {
  228. debug!("Reading BAM file: {path}");
  229. let reader = rust_htslib::bam::IndexedReader::from_path(path)?;
  230. let header = reader.header();
  231. let contig_len: Vec<(String, u64)> = header
  232. .target_names()
  233. .into_iter()
  234. .map(|target_name| {
  235. let tid = header.tid(target_name).unwrap();
  236. (
  237. String::from_utf8(target_name.to_vec()).unwrap(),
  238. header.target_len(tid).unwrap(),
  239. )
  240. })
  241. .collect();
  242. let positions = sample_random_positions(&contig_len, n);
  243. Ok(Self {
  244. positions,
  245. reader,
  246. current_index: 0,
  247. })
  248. }
  249. }
  250. impl Iterator for BamReadsSampler {
  251. type Item = Result<rust_htslib::bam::Record, rust_htslib::errors::Error>;
  252. fn next(&mut self) -> Option<Self::Item> {
  253. loop {
  254. if self.current_index < self.positions.len() {
  255. let (contig, pos) = &self.positions[self.current_index];
  256. match self.reader.fetch((contig, *pos, pos + 1)) {
  257. Ok(_) => (),
  258. Err(e) => warn!("Error while reading BAM {e}"),
  259. }
  260. let result = self.reader.records().next();
  261. self.current_index += 1;
  262. if result.is_some() {
  263. return result;
  264. }
  265. } else {
  266. return None;
  267. }
  268. }
  269. }
  270. }
  271. pub fn n_mapped_reads(bam_path: &str) -> anyhow::Result<u64> {
  272. let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path)?;
  273. let mapped = bam
  274. .index_stats()?
  275. .into_iter()
  276. .filter(|(tid, _, _, _)| *tid < 24)
  277. .map(|(_, _, m, _)| m)
  278. .sum();
  279. Ok(mapped)
  280. }
  281. // 0-based inclusif
  282. pub fn nt_pileup(
  283. bam: &mut rust_htslib::bam::IndexedReader,
  284. chr: &str,
  285. position: u32,
  286. with_next_ins: bool,
  287. ) -> anyhow::Result<Vec<u8>> {
  288. use rust_htslib::{bam, bam::Read};
  289. let mut bases = Vec::new();
  290. bam.fetch((chr, position, position + 1))?;
  291. let mut bam_pileup = Vec::new();
  292. for p in bam.pileup() {
  293. let pileup = p.context(format!(
  294. "Can't pileup bam at position {}:{} (0-based)",
  295. chr, position
  296. ))?;
  297. let cur_position = pileup.pos();
  298. if cur_position == position {
  299. for alignment in pileup.alignments() {
  300. match alignment.indel() {
  301. bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
  302. bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
  303. _ => {
  304. let record = alignment.record();
  305. if record.seq_len() > 0 {
  306. if let Some(b) = base_at(&record, position, with_next_ins)? {
  307. bases.push(b);
  308. }
  309. } else if alignment.is_del() {
  310. bases.push(b'D');
  311. }
  312. }
  313. }
  314. }
  315. }
  316. }
  317. Ok(bases)
  318. }
  319. pub fn base_at(
  320. record: &rust_htslib::bam::record::Record,
  321. at_pos: u32,
  322. with_next_ins: bool,
  323. ) -> anyhow::Result<Option<u8>> {
  324. let cigar = record.cigar();
  325. let seq = record.seq();
  326. let pos = cigar.pos() as u32;
  327. let mut read_i = 0u32;
  328. // let at_pos = at_pos - 1;
  329. let mut ref_pos = pos;
  330. if ref_pos > at_pos {
  331. return Ok(None);
  332. }
  333. for (id, op) in cigar.iter().enumerate() {
  334. let (add_read, add_ref) = match *op {
  335. Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
  336. Cigar::Ins(len) => (len, 0),
  337. Cigar::Del(len) => (0, len),
  338. Cigar::RefSkip(len) => (0, len),
  339. Cigar::SoftClip(len) => (len, 0),
  340. Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
  341. };
  342. // If at the end of the op len and next op is Ins return I
  343. if with_next_ins && ref_pos + add_read == at_pos + 1 {
  344. if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
  345. return Ok(Some(b'I'));
  346. }
  347. }
  348. if ref_pos + add_ref > at_pos {
  349. // Handle deletions directly
  350. if let Cigar::Del(_) = *op {
  351. return Ok(Some(b'D'));
  352. } else if let Cigar::RefSkip(_) = op {
  353. return Ok(None);
  354. } else {
  355. let diff = at_pos - ref_pos;
  356. let p = read_i + diff;
  357. return Ok(Some(seq[p as usize]));
  358. }
  359. }
  360. read_i += add_read;
  361. ref_pos += add_ref;
  362. }
  363. Ok(None)
  364. }
  365. pub fn counts_at(
  366. bam: &mut rust_htslib::bam::IndexedReader,
  367. chr: &str,
  368. position: u32,
  369. ) -> anyhow::Result<HashMap<String, i32>> {
  370. let p = nt_pileup(bam, chr, position, false)?
  371. .iter()
  372. .map(|e| String::from_utf8(vec![*e]).unwrap())
  373. .collect::<Vec<_>>();
  374. let mut counts = HashMap::new();
  375. for item in p.iter() {
  376. *counts.entry(item.to_string()).or_insert(0) += 1;
  377. }
  378. Ok(counts)
  379. }
  380. pub fn ins_pileup(
  381. bam: &mut rust_htslib::bam::IndexedReader,
  382. chr: &str,
  383. position: u32,
  384. ) -> anyhow::Result<Vec<String>> {
  385. let mut bases = Vec::new();
  386. bam.fetch((chr, position, position + 10))?;
  387. for p in bam.pileup() {
  388. let pileup = p.context(format!(
  389. "Can't pileup bam at position {}:{} (0-based)",
  390. chr, position
  391. ))?;
  392. let cur_position = pileup.pos();
  393. // Ins in the next position
  394. if cur_position == position + 1 {
  395. // debug!("{cur_position}");
  396. for alignment in pileup.alignments() {
  397. let record = alignment.record();
  398. if record.seq_len() > 0 {
  399. if let Some(b) = ins_at(&record, position)? {
  400. bases.push(b);
  401. }
  402. }
  403. }
  404. }
  405. }
  406. Ok(bases)
  407. }
  408. pub fn ins_at(
  409. record: &rust_htslib::bam::record::Record,
  410. at_pos: u32,
  411. ) -> anyhow::Result<Option<String>> {
  412. use rust_htslib::bam::record::Cigar;
  413. let cigar = record.cigar();
  414. let seq = record.seq();
  415. let pos = cigar.pos() as u32;
  416. let mut read_i = 0u32;
  417. let mut ref_pos = pos;
  418. if ref_pos > at_pos {
  419. return Ok(None);
  420. }
  421. // debug!(
  422. // "read: {}",
  423. // String::from_utf8(record.qname().to_vec()).unwrap()
  424. // );
  425. for op in cigar.iter() {
  426. let (add_read, add_ref) = match *op {
  427. Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
  428. Cigar::Ins(len) => (len, 0),
  429. Cigar::Del(len) => (0, len),
  430. Cigar::RefSkip(len) => (0, len),
  431. Cigar::SoftClip(len) => (len, 0),
  432. Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
  433. };
  434. if ref_pos + add_read > at_pos && ref_pos + add_read < at_pos + 10 {
  435. if let Cigar::Ins(v) = *op {
  436. // debug!(
  437. // "ins size {v} @ {} (corrected {})",
  438. // ref_pos + add_read,
  439. // (ref_pos + add_read) - v - 1
  440. // );
  441. if (ref_pos + add_read) - v - 1 == at_pos {
  442. let inserted_seq =
  443. seq.as_bytes()[read_i as usize..(read_i + v) as usize].to_vec();
  444. return Ok(Some(String::from_utf8(inserted_seq)?));
  445. }
  446. }
  447. }
  448. read_i += add_read;
  449. ref_pos += add_ref;
  450. }
  451. Ok(None)
  452. }
  453. pub fn counts_ins_at(
  454. bam: &mut rust_htslib::bam::IndexedReader,
  455. chr: &str,
  456. position: u32,
  457. ) -> anyhow::Result<HashMap<String, i32>> {
  458. let p = ins_pileup(bam, chr, position)?;
  459. let mut counts = HashMap::new();
  460. for item in p.iter() {
  461. *counts.entry(item.to_string()).or_insert(0) += 1;
  462. }
  463. Ok(counts)
  464. }
  465. pub fn sample_random_positions(chromosomes: &Vec<(String, u64)>, n: usize) -> Vec<(String, u64)> {
  466. let mut rng = rng();
  467. let total_length: u64 = chromosomes.iter().map(|(_, length)| length).sum();
  468. (0..n)
  469. .map(|_| {
  470. let random_position = rng.random_range(0..total_length);
  471. let mut cumulative_length = 0;
  472. for (chr, length) in chromosomes {
  473. cumulative_length += length;
  474. if random_position < cumulative_length {
  475. let position_within_chr = random_position - (cumulative_length - length);
  476. return (chr.clone(), position_within_chr);
  477. }
  478. }
  479. unreachable!("Should always find a chromosome")
  480. })
  481. .collect()
  482. }
  483. #[derive(Debug, Clone, Deserialize, Serialize)]
  484. pub struct WGSBamStats {
  485. pub all_records: u128,
  486. pub n_reads: u128,
  487. pub mapped_yield: u128,
  488. pub global_coverage: f64,
  489. pub karyotype: Vec<(i32, u64, String, u128)>,
  490. pub n50: u128,
  491. pub by_lengths: Vec<(u128, u32)>,
  492. }
  493. impl WGSBamStats {
  494. pub fn new(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
  495. let bam_path = config.solo_bam(id, time);
  496. let Config {
  497. bam_min_mapq,
  498. bam_n_threads,
  499. ..
  500. } = config;
  501. let mut bam = rust_htslib::bam::Reader::from_path(bam_path)?;
  502. let h = bam.header().clone();
  503. let header = rust_htslib::bam::Header::from_template(&h);
  504. bam.set_threads(bam_n_threads as usize)?;
  505. let mut mapped_lengths = Vec::new();
  506. let mut all_records = 0;
  507. let mut n_reads = 0;
  508. let mut lengths_by_tid: HashMap<i32, Vec<u128>> = HashMap::new();
  509. for read in bam
  510. .rc_records()
  511. .map(|x| x.expect("Failure parsing Bam file"))
  512. .inspect(|_| all_records += 1)
  513. .filter(|read| {
  514. read.flags()
  515. & (rust_htslib::htslib::BAM_FUNMAP
  516. | rust_htslib::htslib::BAM_FSECONDARY
  517. | rust_htslib::htslib::BAM_FQCFAIL
  518. | rust_htslib::htslib::BAM_FDUP) as u16
  519. == 0
  520. })
  521. .filter(|r| r.mapq() >= bam_min_mapq)
  522. {
  523. n_reads += 1;
  524. let mapped_len = (read.reference_end() - read.pos()) as u128;
  525. mapped_lengths.push(mapped_len);
  526. lengths_by_tid
  527. .entry(read.tid())
  528. .or_default()
  529. .push(mapped_len);
  530. if n_reads % 500_000 == 0 {
  531. info!("{n_reads} reads parsed");
  532. }
  533. }
  534. let mapped_yield = mapped_lengths.par_iter().sum::<u128>();
  535. let genome = get_genome_sizes(&header)?;
  536. let genome_size = genome.values().map(|v| *v as u128).sum::<u128>();
  537. let global_coverage = mapped_yield as f64 / genome_size as f64;
  538. let by_lengths: DashMap<u128, u32> = DashMap::new();
  539. mapped_lengths
  540. .par_iter()
  541. .for_each(|l| *by_lengths.entry(*l).or_default() += 1);
  542. let mut by_lengths: Vec<(u128, u32)> =
  543. by_lengths.iter().map(|e| (*e.key(), *e.value())).collect();
  544. by_lengths.par_sort_by(|a, b| a.0.cmp(&b.0));
  545. // This is not n50
  546. let n50 = by_lengths[by_lengths.len() / 2].0;
  547. let mut karyotype: Vec<(i32, u64, String, u128)> = lengths_by_tid
  548. .iter()
  549. .map(|(tid, lengths)| {
  550. let contig = String::from_utf8(h.tid2name(*tid as u32).to_vec()).unwrap();
  551. (
  552. *tid,
  553. *genome.get(&contig).unwrap(),
  554. contig,
  555. lengths.par_iter().sum::<u128>(),
  556. )
  557. })
  558. .collect();
  559. karyotype.sort_by(|(ac, _, _, _), (bc, _, _, _)| ac.cmp(bc));
  560. Ok(Self {
  561. all_records,
  562. n_reads,
  563. mapped_yield,
  564. global_coverage,
  565. karyotype,
  566. n50,
  567. by_lengths,
  568. })
  569. }
  570. pub fn print(&self) {
  571. println!("N records\t{}", self.all_records);
  572. println!("N counted reads\t{}", self.n_reads);
  573. println!("Mapped yield [Gb]\t{:.2}", self.mapped_yield as f64 / 1e9);
  574. println!("Mean coverage\t{:.2}", self.global_coverage);
  575. self.karyotype
  576. .iter()
  577. .for_each(|(_, contig_len, contig, contig_yield)| {
  578. println!(
  579. "{contig}\t{:.2}",
  580. (*contig_yield as f64 / *contig_len as f64) / self.global_coverage
  581. );
  582. });
  583. }
  584. }
  585. pub fn get_genome_sizes(header: &rust_htslib::bam::Header) -> anyhow::Result<HashMap<String, u64>> {
  586. let mut genome = HashMap::new();
  587. for (key, records) in header.to_hashmap() {
  588. for record in records {
  589. if key == "SQ" {
  590. genome.insert(
  591. record["SN"].to_string(),
  592. record["LN"]
  593. .parse::<u64>()
  594. .expect("Failed parsing length of chromosomes"),
  595. );
  596. }
  597. }
  598. }
  599. Ok(genome)
  600. }
  601. // fn softclipped_bases(read: &rust_htslib::bam::Record) -> u128 {
  602. // (read.cigar().leading_softclips() + read.cigar().trailing_softclips()) as u128
  603. // }