| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- use std::{fs::File, io::BufReader};
- use anyhow::Context;
- use log::debug;
- use crate::io::tsv::TsvLine;
- /// Read a sequence dictionary (`.dict`) file and return `(name, length)` pairs.
- ///
- /// Only `@SQ` lines are processed; other lines are skipped. Each `@SQ` line
- /// must contain `SN:<name>` and `LN:<length>` tab-separated fields.
- ///
- /// # Errors
- ///
- /// Returns an error if the file cannot be opened, an `@SQ` line is missing
- /// `SN:` or `LN:`, or `LN:` cannot be parsed as `u32`.
- pub fn read_dict(path: &str) -> anyhow::Result<Vec<(String, u32)>> {
- debug!("Parsing {path}.");
- let mut reader =
- BufReader::new(File::open(path).with_context(|| format!("cannot open dict: {path}"))?);
- let mut line = TsvLine::new();
- let mut line_no = 0usize;
- let mut res = Vec::new();
- while line
- .read(&mut reader)
- .with_context(|| format!("failed reading dict line after {path}:{line_no}"))?
- {
- line_no += 1;
- let fields = line.split_fields();
- if fields.first().copied() != Some("@SQ") {
- continue;
- }
- let sn = fields
- .iter()
- .find_map(|f| f.strip_prefix("SN:"))
- .with_context(|| format!("Missing SN: in @SQ line at {path}:{line_no}"))?
- .to_string();
- let ln: u32 = fields
- .iter()
- .find_map(|f| f.strip_prefix("LN:"))
- .with_context(|| format!("Missing LN: in @SQ line at {path}:{line_no}"))?
- .parse()
- .with_context(|| format!("Invalid LN: value at {path}:{line_no}"))?;
- res.push((sn, ln));
- }
- Ok(res)
- }
|