1 месяц назад · 4dde55f778
--- a/src/io/bed.rs
+++ b/src/io/bed.rs
@@ -1,10 +1,20 @@
 
															-//! BED file parsing, indexing, and overlap-based variant annotation utilities.
														
 
															+//! BED file I/O, overlap queries, and genomic region annotation.
														
 
															 //!
														
 
															-//! This module provides:
														
 
															-//! - Parsing of BED rows into typed structures
														
 
															-//! - Efficient overlap queries between BED regions and genome ranges
														
 
															-//! - Parallel annotation of variants using BED-defined regions
														
 
															-//! - A pre-indexed BED structure for fast gene queries
														
 
															+//! All coordinates follow **BED convention: 0-based, half-open `[start, end)`**.
														
 
															+//! This matches [`GenomeRange`]'s internal `Range<u32>` representation, so BED
														
 
															+//! values are stored as-is without conversion.
														
 
															+//!
														
 
															+//! # Main types and functions
														
 
															+//!
														
 
															+//! | Item | Purpose |
														
 
															+//! |------|---------|
														
 
															+//! | [`BedRow`] | One parsed BED line (up to BED6) |
														
 
															+//! | [`read_bed`] | Load a BED file into memory, skipping headers and blank lines |
														
 
															+//! | [`annotate_with_bed`] | Annotate a [`Variants`] collection from a BED region set |
														
 
															+//! | [`bedrow_overlaps_par`] | Parallel overlap query: which BED rows hit a set of query ranges |
														
 
															+//! | [`GenesBedIndex`] | Per-contig indexed structure for fast gene-name lookup |
														
 
															+//! | [`convert_bgz_with_tabix`] | Compress a BED file to BGZF and build a Tabix index in one pass |
														
 
															+//! | [`parse_centromere_intervals`] | Extract centromeric intervals from a UCSC cytoband file |
														
 
															 use std::{
														
 
															     io::{BufRead, BufReader}, path::Path, str::FromStr, sync::Arc
														
@@ -24,22 +34,33 @@ use noodles_csi::binning_index::index::Header;
 
															 use noodles_tabix as tabix;
														
 
															 use std::io::Write;
														
 
															-/// One row of a BED file.
														
 
															+/// One parsed row from a BED file (up to BED6).
														
 
															 ///
														
 
															-/// Represents a genomic interval with optional name, score, and strand
														
 
															-/// information.
														
 
															+/// Coordinates in `range` are **0-based, half-open** as stored in the BED file.
														
 
															+/// Optional fields (`name`, `score`, `strand`) are `None` when the column is absent
														
 
															+/// or unparseable.
														
 
															 #[derive(Debug, Clone)]
														
 
															 pub struct BedRow {
														
 
															+    /// Genomic interval — 0-based half-open `[start, end)`
														
 
															     pub range: GenomeRange,
														
 
															+    /// BED column 4: feature name
														
 
															     pub name: Option<String>,
														
 
															+    /// BED column 5: score (0–1000 per spec, stored as `u16`)
														
 
															     pub score: Option<u16>,
														
 
															+    /// BED column 6: strand — `true` = `+`, `false` = `-`
														
 
															     pub strand: Option<bool>,
														
 
															 }
														
 
															-/// Parses a BED row from a tab-separated string.
														
 
															+/// Parse a tab-separated BED line into a [`BedRow`].
														
 
															+///
														
 
															+/// Expects at least 3 tab-separated columns: `chrom`, `start`, `end`.
														
 
															+/// Columns 4–6 (`name`, `score`, `strand`) are optional; missing or unparseable
														
 
															+/// values are stored as `None`. Coordinates are stored as-is (0-based half-open).
														
 
															+///
														
 
															+/// # Errors
														
 
															 ///
														
 
															-/// Expected format (BED6-compatible):
														
 
															-/// `contig  start  end  name?  score?  strand?`
														
 
															+/// Returns an error if fewer than 3 columns are present or if `start`/`end`
														
 
															+/// cannot be parsed as `u32`.
														
 
															 impl FromStr for BedRow {
														
 
															     type Err = anyhow::Error;
														
@@ -58,7 +79,7 @@ impl FromStr for BedRow {
 
															         Ok(Self {
														
 
															             range,
														
 
															-            name: v.get(3).map(|v| Some(v.to_string())).unwrap_or(None),
														
 
															+            name: v.get(3).map(|v| v.to_string()),
														
 
															             score: v.get(4).and_then(|v| v.parse().ok()),
														
 
															             strand: v.get(5).and_then(|&v| match v {
														
 
															                 "+" => Some(true),
														
@@ -69,19 +90,30 @@ impl FromStr for BedRow {
 
															     }
														
 
															 }
														
 
															-/// Exposes the genomic range of a BED row.
														
 
															 impl GetGenomeRange for BedRow {
														
 
															     fn range(&self) -> &GenomeRange {
														
 
															         &self.range
														
 
															     }
														
 
															 }
														
 
															-/// Reads a BED file into memory.
														
 
															+/// Load a BED file into memory as a vector of [`BedRow`]s.
														
 
															 ///
														
 
															-/// Lines starting with `#` are ignored.
														
 
															+/// Skips blank lines and lines starting with `#`, `track`, or `browser`
														
 
															+/// (standard UCSC header conventions). All other lines are parsed via
														
 
															+/// [`BedRow::from_str`].
														
 
															+///
														
 
															+/// # Arguments
														
 
															+///
														
 
															+/// * `path` - Path to a BED file (plain text or gzip-compressed)
														
 
															+///
														
 
															+/// # Returns
														
 
															+///
														
 
															+/// A vector of parsed rows in file order.
														
 
															 ///
														
 
															 /// # Errors
														
 
															-/// Returns an error if the file cannot be opened or if any row fails to parse.
														
 
															+///
														
 
															+/// Returns an error if the file cannot be opened or if any data line fails to parse.
														
 
															+/// I/O errors on individual lines are logged as warnings and skipped.
														
 
															 pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
														
 
															     let reader = BufReader::new(get_reader(path)?);
														
@@ -89,7 +121,11 @@ pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
 
															     for (i, line) in reader.lines().enumerate() {
														
 
															         match line {
														
 
															             Ok(line) => {
														
 
															-                if line.starts_with("#") {
														
 
															+                if line.is_empty()
														
 
															+                    || line.starts_with('#')
														
 
															+                    || line.starts_with("track")
														
 
															+                    || line.starts_with("browser")
														
 
															+                {
														
 
															                     continue;
														
 
															                 }
														
 
															                 res.push(line.parse().context(format!("Can't parse {line}"))?);
														
@@ -101,18 +137,26 @@ pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
 
															     Ok(res)
														
 
															 }
														
 
															-/// Annotates variants with a given annotation based on overlap with a BED file,
														
 
															-/// and returns total base count and number of overlapping mutations.
														
 
															+/// Annotate variants that overlap a BED region set and return coverage statistics.
														
 
															+///
														
 
															+/// Loads the BED file, finds all variants whose position overlaps any BED region
														
 
															+/// (via [`overlaps_par`]), pushes `annotation` onto each matching variant, and
														
 
															+/// returns the total covered base-pair count alongside the overlap count.
														
 
															 ///
														
 
															 /// # Arguments
														
 
															-/// * `variants` - Mutable reference to a `Variants` collection to annotate.
														
 
															-/// * `bed_path` - Path to the BED file defining the region class.
														
 
															-/// * `annotation` - The `Annotation` to assign to overlapping variants.
														
 
															+///
														
 
															+/// * `variants` - Variant collection to annotate in place
														
 
															+/// * `bed_path` - Path to the BED file defining the region class
														
 
															+/// * `annotation` - Annotation tag to attach to each overlapping variant
														
 
															 ///
														
 
															 /// # Returns
														
 
															-/// `Ok((total_bp, overlap_count))` — where:
														
 
															-///   - `total_bp`: number of base pairs in the BED regions
														
 
															-///   - `overlap_count`: number of variants overlapping those regions
														
 
															+///
														
 
															+/// `(total_bp, overlap_count)` where `total_bp` is the sum of all BED interval
														
 
															+/// lengths and `overlap_count` is the number of variants that received the annotation.
														
 
															+///
														
 
															+/// # Errors
														
 
															+///
														
 
															+/// Returns an error if the BED file cannot be read.
														
 
															 pub fn annotate_with_bed(
														
 
															     variants: &mut Variants,
														
 
															     bed_path: &str,
														
@@ -121,16 +165,10 @@ pub fn annotate_with_bed(
 
															     let bed_rows = read_bed(bed_path)?;
														
 
															     let ranges: Vec<&GenomeRange> = bed_rows.iter().map(|b| &b.range).collect();
														
 
															-    // Total number of base pairs in the BED regions
														
 
															     let total_bp: usize = ranges.iter().map(|r| r.length() as usize).sum();
														
 
															-
														
 
															-    // Extract positions of all variants
														
 
															     let positions: Vec<&GenomePosition> = variants.data.iter().map(|v| &v.position).collect();
														
 
															-
														
 
															-    // Find indices of variants overlapping the BED ranges
														
 
															     let overlaps = overlaps_par(&positions, &ranges);
														
 
															-    // Annotate overlapping variants
														
 
															     for &idx in &overlaps {
														
 
															         variants.data[idx].annotations.push(annotation.clone());
														
 
															     }
														
@@ -138,9 +176,25 @@ pub fn annotate_with_bed(
 
															     Ok((total_bp, overlaps.len()))
														
 
															 }
														
 
															-/// Returns all BED rows overlapping a set of query ranges (parallel).
														
 
															+/// Return every BED row that overlaps at least one query range, parallelised per contig.
														
 
															+///
														
 
															+/// Both inputs must be sorted by `(contig, start)`. Each matching row appears exactly
														
 
															+/// once in the output regardless of how many queries it overlaps.
														
 
															+///
														
 
															+/// Uses an anchored-scan algorithm (O(n + m)) that is correct even when rows or
														
 
															+/// queries overlap each other (e.g. nested genes). The anchor `j0` advances only
														
 
															+/// when a query definitively ends before the current row starts, so no hit can be
														
 
															+/// missed.
														
 
															+///
														
 
															+/// # Arguments
														
 
															 ///
														
 
															-/// Input rows and queries must be sorted by contig and position.
														
 
															+/// * `rows` - BED rows sorted by `(contig, start)`
														
 
															+/// * `queries` - Query ranges sorted by `(contig, start)`
														
 
															+///
														
 
															+/// # Returns
														
 
															+///
														
 
															+/// Matching BED rows in per-contig order. Cross-contig order is unspecified (parallel
														
 
															+/// execution).
														
 
															 pub fn bedrow_overlaps_par(
														
 
															     rows: &[BedRow],
														
 
															     queries: &[&GenomeRange],
														
@@ -148,86 +202,57 @@ pub fn bedrow_overlaps_par(
 
															 where
														
 
															     BedRow: Clone + Send + Sync,
														
 
															 {
														
 
															-    // Pre-compute [start, end) indices per contig for both inputs.
														
 
															+    let row_ranges: Vec<&GenomeRange> = rows.iter().map(|r| &r.range).collect();
														
 
															     let (row_contigs, query_contigs) = rayon::join(
														
 
															-        || extract_contig_indices_bed(rows),
														
 
															+        || extract_contig_indices(&row_ranges),
														
 
															         || extract_contig_indices(queries),
														
 
															     );
														
 
															     row_contigs
														
 
															-        .into_par_iter()                             // one task per contig
														
 
															+        .into_par_iter()
														
 
															         .filter_map(|(contig, r_start, r_end)| {
														
 
															-            // No queries on this contig → skip the task.
														
 
															             let (q_start, q_end) = find_contig_indices(&query_contigs, contig)?;
														
 
															             let r_slice = &rows[r_start..r_end];
														
 
															             let q_slice = &queries[q_start..q_end];
														
 
															             let mut hits = Vec::new();
														
 
															-            let (mut i, mut j) = (0usize, 0usize);
														
 
															-
														
 
															-            // Classic two-finger sweep.
														
 
															-            while i < r_slice.len() && j < q_slice.len() {
														
 
															-                let r_range = r_slice[i].range();           // BedRow → GenomeRange
														
 
															-                let q_range = &q_slice[j].range;
														
 
															-
														
 
															-                match (r_range.range.end <= q_range.start, q_range.end <= r_range.range.start) {
														
 
															-                    (true, _) => i += 1,                    // row finishes before query starts
														
 
															-                    (_, true) => j += 1,                    // query finishes before row starts
														
 
															-                    _ => {
														
 
															-                        hits.push(r_slice[i].clone());      // overlap detected
														
 
															-                        if r_range.range.end < q_range.end {
														
 
															-                            i += 1;
														
 
															-                        } else {
														
 
															-                            j += 1;
														
 
															-                        }
														
 
															-                    }
														
 
															+            let mut j0 = 0usize;
														
 
															+
														
 
															+            for row in r_slice {
														
 
															+                let r = row.range();
														
 
															+
														
 
															+                while j0 < q_slice.len() && q_slice[j0].range.end <= r.range.start {
														
 
															+                    j0 += 1;
														
 
															+                }
														
 
															+
														
 
															+                if j0 < q_slice.len() && q_slice[j0].range.start < r.range.end {
														
 
															+                    hits.push(row.clone());
														
 
															                 }
														
 
															             }
														
 
															+
														
 
															             Some(hits)
														
 
															         })
														
 
															         .flatten()
														
 
															         .collect()
														
 
															 }
														
 
															-/// Computes contiguous slice indices for BED rows grouped by contig.
														
 
															-///
														
 
															-/// Assumes input is sorted by contig.
														
 
															-fn extract_contig_indices_bed(rows: &[BedRow]) -> Vec<(u8, usize, usize)> {
														
 
															-    let mut out = Vec::new();
														
 
															-    if rows.is_empty() {
														
 
															-        return out;
														
 
															-    }
														
 
															-
														
 
															-    let mut current = rows[0].range.contig;
														
 
															-    let mut start = 0;
														
 
															-    for (idx, row) in rows.iter().enumerate() {
														
 
															-        if row.range.contig != current {
														
 
															-            out.push((current, start, idx));
														
 
															-            current = row.range.contig;
														
 
															-            start = idx;
														
 
															-        }
														
 
															-    }
														
 
															-    out.push((current, start, rows.len()));
														
 
															-    out
														
 
															-}
														
 
															-
														
 
															-/// Pre-indexed BED structure for fast gene lookup by genomic interval.
														
 
															+/// Per-contig indexed BED structure for fast gene-name lookup.
														
 
															 ///
														
 
															-/// BED rows are grouped by contig and stored in sorted order.
														
 
															+/// Rows are sorted by `(contig, start, end)` at construction time and stored in
														
 
															+/// 256 contig slots (one per encoded `u8` contig). Lookups are O(rows before `end`)
														
 
															+/// with early termination — correct even for very long host genes.
														
 
															 #[derive(Clone)]
														
 
															 pub struct GenesBedIndex {
														
 
															-    // contig (u8) -> BedRows on that contig, sorted by range.start
														
 
															     by_contig: Vec<Arc<[BedRow]>>,
														
 
															 }
														
 
															 impl GenesBedIndex {
														
 
															-    /// Builds a contig-indexed BED structure.
														
 
															+    /// Build a [`GenesBedIndex`] from an unsorted vector of [`BedRow`]s.
														
 
															     ///
														
 
															-    /// Input rows are sorted and grouped internally.
														
 
															+    /// Rows are sorted by `(contig, start, end)` internally.
														
 
															     pub fn new(mut rows: Vec<BedRow>) -> Self {
														
 
															-        // Ensure deterministic grouping & fast queries
														
 
															         rows.sort_unstable_by(|a, b| {
														
 
															             a.range.contig
														
 
															                 .cmp(&b.range.contig)
														
@@ -235,63 +260,63 @@ impl GenesBedIndex {
 
															                 .then_with(|| a.range.range.end.cmp(&b.range.range.end))
														
 
															         });
														
 
															-        // 256 is fine if you encode contigs into u8
														
 
															         let mut tmp: Vec<Vec<BedRow>> = vec![Vec::new(); 256];
														
 
															         for r in rows {
														
 
															             tmp[r.range.contig as usize].push(r);
														
 
															         }
														
 
															-        let by_contig = tmp
														
 
															-            .into_iter()
														
 
															-            .map(Arc::<[BedRow]>::from)
														
 
															-            .collect();
														
 
															-
														
 
															-        Self { by_contig }
														
 
															+        Self {
														
 
															+            by_contig: tmp.into_iter().map(Arc::<[BedRow]>::from).collect(),
														
 
															+        }
														
 
															     }
														
 
															-    /// Returns gene names overlapping the given interval.
														
 
															-    #[inline]
														
 
															+    /// Return the names of all genes overlapping `[start, end)` on `contig`.
														
 
															+    ///
														
 
															+    /// Coordinates are 0-based half-open. If `start > end` the arguments are
														
 
															+    /// swapped defensively. Genes without a name field are silently skipped.
														
 
															+    ///
														
 
															+    /// # Arguments
														
 
															+    ///
														
 
															+    /// * `contig` - Encoded contig index (see `contig_to_num`)
														
 
															+    /// * `start` - Query start, 0-based inclusive
														
 
															+    /// * `end` - Query end, 0-based exclusive
														
 
															+    ///
														
 
															+    /// # Returns
														
 
															+    ///
														
 
															+    /// Names of overlapping genes in start-sorted order.
														
 
															     pub fn query_genes(&self, contig: u8, start: u32, end: u32) -> Vec<String> {
														
 
															         let (s, e) = if start <= end { (start, end) } else { (end, start) };
														
 
															         let rows = &self.by_contig[contig as usize];
														
 
															-        if rows.is_empty() {
														
 
															-            return Vec::new();
														
 
															-        }
														
 
															-
														
 
															-        // lower_bound on start
														
 
															-        let mut i = match rows.binary_search_by_key(&s, |r| r.range.range.start) {
														
 
															-            Ok(i) | Err(i) => i,
														
 
															-        };
														
 
															-        i = i.saturating_sub(1);
														
 
															-
														
 
															-        let mut out: Vec<String> = Vec::new();
														
 
															-        while i < rows.len() {
														
 
															-            let r = &rows[i];
														
 
															-            let rs = r.range.range.start;
														
 
															-            let re = r.range.range.end;
														
 
															-            if rs > e {
														
 
															-                break;
														
 
															-            }
														
 
															-            if re >= s && rs <= e {
														
 
															-                if let Some(name) = &r.name {
														
 
															-                    out.push(name.clone());
														
 
															-                }
														
 
															-            }
														
 
															-            i += 1;
														
 
															-        }
														
 
															-
														
 
															-        out
														
 
															+        rows.iter()
														
 
															+            .take_while(|r| r.range.range.start < e)
														
 
															+            .filter(|r| r.range.range.end > s)
														
 
															+            .filter_map(|r| r.name.clone())
														
 
															+            .collect()
														
 
															     }
														
 
															 }
														
 
															-/// Convert BED -> BGZF (.gz) and create a Tabix index (.tbi) in the same pass.
														
 
															+/// Compress a BED file to BGZF and build a Tabix index (`.tbi`) in a single pass.
														
 
															+///
														
 
															+/// The output files are `<input>.gz` and `<input>.gz.tbi`. The input must be
														
 
															+/// tab-delimited, contig-grouped, and coordinate-sorted (Tabix requirement).
														
 
															 ///
														
 
															-/// Notes:
														
 
															-/// - Tabix expects the file to be tab-delimited, grouped by contig, and coordinate-sorted.
														
 
															-/// - BED is 0-based, half-open; Tabix uses 1-based positions. We convert as:
														
 
															-///   start_pos = start0 + 1
														
 
															-///   end_pos   = end0
														
 
															+/// # Coordinate conversion
														
 
															+///
														
 
															+/// BED uses 0-based half-open `[start, end)`. Tabix uses 1-based positions.
														
 
															+/// The conversion is:
														
 
															+/// - `tabix_start = bed_start + 1`  (0-based → 1-based)
														
 
															+/// - `tabix_end   = bed_end`        (0-based exclusive = 1-based inclusive numerically)
														
 
															+///
														
 
															+/// # Arguments
														
 
															+///
														
 
															+/// * `input` - Path to the input BED file
														
 
															+/// * `force` - Overwrite the output `.gz` file if it already exists
														
 
															+///
														
 
															+/// # Errors
														
 
															+///
														
 
															+/// Returns an error if the input cannot be read, the output cannot be written,
														
 
															+/// or any data line is malformed (missing columns, non-integer coordinates).
														
 
															 pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::Result<()> {
														
 
															     let input = input.as_ref();
														
 
															     let out_bgz = format!("{}.gz", input.display());
														
@@ -304,7 +329,7 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
															     // Build a tabix index while we write.
														
 
															     let mut indexer = tabix::index::Indexer::default();
														
 
															-    indexer.set_header(Header::default()); // minimal header; usable for most tools :contentReference[oaicite:1]{index=1}
														
 
															+    indexer.set_header(Header::default());
														
 
															     let mut line = String::new();
														
 
															     loop {
														
@@ -314,17 +339,10 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
															             break;
														
 
															         }
														
 
															-        // Record start virtual offset for this line in the *output* bgzf stream.
														
 
															-        // let chunk_start = writer.bgzf_pos(); // BGZF virtual offset :contentReference[oaicite:2]{index=2}
														
 
															-
														
 
															         let chunk_start = writer.virtual_position();
														
 
															-        // Write line as-is.
														
 
															         writer.write_all(line.as_bytes())?;
														
 
															+        let chunk_end = writer.virtual_position();
														
 
															-        // Record end virtual offset after writing the line.
														
 
															-        let chunk_end = writer.virtual_position(); // :contentReference[oaicite:3]{index=3}
														
 
															-
														
 
															-        // Add to tabix index if it's a data line (skip meta/header lines).
														
 
															         if !line.starts_with('#') && !line.trim().is_empty() {
														
 
															             let mut it = line.split('\t');
														
 
															             let rname = it
														
@@ -340,10 +358,10 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
															             let end0: u32 = it
														
 
															                 .next()
														
 
															                 .context("BED: missing end")?
														
 
															+                .trim()
														
 
															                 .parse()
														
 
															                 .context("BED: invalid end")?;
														
 
															-            // BED 0-based half-open -> 1-based inclusive-ish interval for tabix
														
 
															             let start1 = start0
														
 
															                 .checked_add(1)
														
 
															                 .context("BED: start overflow")?;
														
@@ -353,10 +371,7 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
															             let end = Position::try_from(end0 as usize)
														
 
															                 .context("BED: end must be >= 1 for tabix indexing")?;
														
 
															-            let chunk = Chunk::new(
														
 
															-                chunk_start,
														
 
															-                chunk_end,
														
 
															-            ); // chunk is [start, end) in virtual offsets :contentReference[oaicite:4]{index=4}
														
 
															+            let chunk = Chunk::new(chunk_start, chunk_end);
														
 
															             indexer
														
 
															                 .add_record(rname, start, end, chunk)
														
@@ -365,20 +380,33 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
															     }
														
 
															     finalize_bgzf_file(writer, &out_bgz)?;
														
 
															-    // writer.close()?; // writes EOF marker :contentReference[oaicite:5]{index=5}
														
 
															-    let index = indexer.build(); // :contentReference[oaicite:6]{index=6}
														
 
															-    tabix::fs::write(&out_tbi, &index)?; // :contentReference[oaicite:7]{index=7}
														
 
															+    let index = indexer.build();
														
 
															+    tabix::fs::write(&out_tbi, &index)?;
														
 
															     Ok(())
														
 
															 }
														
 
															-/// Returns a set of (chrom, start, end) intervals for centromeric regions
														
 
															-/// parsed from a UCSC-format cytoband BED.
														
 
															+/// Parse centromeric intervals from a UCSC cytoband BED file.
														
 
															 ///
														
 
															-/// Centromeres are identified by the `acen` stain field (column 4).
														
 
															+/// Identifies rows whose stain field (column 5) equals `acen` and returns their
														
 
															+/// coordinates as `(chrom, start, end)`. Coordinates are 0-based half-open as
														
 
															+/// stored in the file.
														
 
															+///
														
 
															+/// Expected column layout: `chrom\tstart\tend\tband_name\tstain`
														
 
															+///
														
 
															+/// # Arguments
														
 
															+///
														
 
															+/// * `path` - Path to the cytoband BED file
														
 
															+///
														
 
															+/// # Returns
														
 
															+///
														
 
															+/// A vector of `(chrom, start, end)` tuples for all `acen` bands, in file order.
														
 
															+///
														
 
															+/// # Errors
														
 
															 ///
														
 
															-/// Format: `chrom\tstart\tend\tband_name\tstain`
														
 
															+/// Returns an error if the file cannot be opened, a line has fewer than 5 columns,
														
 
															+/// or a coordinate field cannot be parsed as `u64`.
														
 
															 pub fn parse_centromere_intervals(path: &str) -> anyhow::Result<Vec<(String, u64, u64)>> {
														
 
															     let reader = BufReader::new(get_reader(path)
														
 
															         .with_context(|| format!("Cannot open cytobands BED: {path}"))?);