use std::{fs::File, io::BufReader}; use anyhow::Context; use log::debug; use crate::io::tsv::TsvLine; /// Read a sequence dictionary (`.dict`) file and return `(name, length)` pairs. /// /// Only `@SQ` lines are processed; other lines are skipped. Each `@SQ` line /// must contain `SN:` and `LN:` tab-separated fields. /// /// # Errors /// /// Returns an error if the file cannot be opened, an `@SQ` line is missing /// `SN:` or `LN:`, or `LN:` cannot be parsed as `u32`. pub fn read_dict(path: &str) -> anyhow::Result> { debug!("Parsing {path}."); let mut reader = BufReader::new(File::open(path).with_context(|| format!("cannot open dict: {path}"))?); let mut line = TsvLine::new(); let mut line_no = 0usize; let mut res = Vec::new(); while line .read(&mut reader) .with_context(|| format!("failed reading dict line after {path}:{line_no}"))? { line_no += 1; let fields = line.split_fields(); if fields.first().copied() != Some("@SQ") { continue; } let sn = fields .iter() .find_map(|f| f.strip_prefix("SN:")) .with_context(|| format!("Missing SN: in @SQ line at {path}:{line_no}"))? .to_string(); let ln: u32 = fields .iter() .find_map(|f| f.strip_prefix("LN:")) .with_context(|| format!("Missing LN: in @SQ line at {path}:{line_no}"))? .parse() .with_context(|| format!("Invalid LN: value at {path}:{line_no}"))?; res.push((sn, ln)); } Ok(res) }