1 місяць тому · 4dde55f778
--- a/src/io/bed.rs
+++ b/src/io/bed.rs
@@ -1,10 +1,20 @@
 
				-//! BED file parsing, indexing, and overlap-based variant annotation utilities.
			
 
				+//! BED file I/O, overlap queries, and genomic region annotation.
			
 
				 //!
			
 
				-//! This module provides:
			
 
				-//! - Parsing of BED rows into typed structures
			
 
				-//! - Efficient overlap queries between BED regions and genome ranges
			
 
				-//! - Parallel annotation of variants using BED-defined regions
			
 
				-//! - A pre-indexed BED structure for fast gene queries
			
 
				+//! All coordinates follow **BED convention: 0-based, half-open `[start, end)`**.
			
 
				+//! This matches [`GenomeRange`]'s internal `Range<u32>` representation, so BED
			
 
				+//! values are stored as-is without conversion.
			
 
				+//!
			
 
				+//! # Main types and functions
			
 
				+//!
			
 
				+//! | Item | Purpose |
			
 
				+//! |------|---------|
			
 
				+//! | [`BedRow`] | One parsed BED line (up to BED6) |
			
 
				+//! | [`read_bed`] | Load a BED file into memory, skipping headers and blank lines |
			
 
				+//! | [`annotate_with_bed`] | Annotate a [`Variants`] collection from a BED region set |
			
 
				+//! | [`bedrow_overlaps_par`] | Parallel overlap query: which BED rows hit a set of query ranges |
			
 
				+//! | [`GenesBedIndex`] | Per-contig indexed structure for fast gene-name lookup |
			
 
				+//! | [`convert_bgz_with_tabix`] | Compress a BED file to BGZF and build a Tabix index in one pass |
			
 
				+//! | [`parse_centromere_intervals`] | Extract centromeric intervals from a UCSC cytoband file |
			
 
				 
			
 
				 use std::{
			
 
				     io::{BufRead, BufReader}, path::Path, str::FromStr, sync::Arc
			
@@ -24,22 +34,33 @@ use noodles_csi::binning_index::index::Header;
 
				 use noodles_tabix as tabix;
			
 
				 use std::io::Write;
			
 
				 
			
 
				-/// One row of a BED file.
			
 
				+/// One parsed row from a BED file (up to BED6).
			
 
				 ///
			
 
				-/// Represents a genomic interval with optional name, score, and strand
			
 
				-/// information.
			
 
				+/// Coordinates in `range` are **0-based, half-open** as stored in the BED file.
			
 
				+/// Optional fields (`name`, `score`, `strand`) are `None` when the column is absent
			
 
				+/// or unparseable.
			
 
				 #[derive(Debug, Clone)]
			
 
				 pub struct BedRow {
			
 
				+    /// Genomic interval — 0-based half-open `[start, end)`
			
 
				     pub range: GenomeRange,
			
 
				+    /// BED column 4: feature name
			
 
				     pub name: Option<String>,
			
 
				+    /// BED column 5: score (0–1000 per spec, stored as `u16`)
			
 
				     pub score: Option<u16>,
			
 
				+    /// BED column 6: strand — `true` = `+`, `false` = `-`
			
 
				     pub strand: Option<bool>,
			
 
				 }
			
 
				 
			
 
				-/// Parses a BED row from a tab-separated string.
			
 
				+/// Parse a tab-separated BED line into a [`BedRow`].
			
 
				+///
			
 
				+/// Expects at least 3 tab-separated columns: `chrom`, `start`, `end`.
			
 
				+/// Columns 4–6 (`name`, `score`, `strand`) are optional; missing or unparseable
			
 
				+/// values are stored as `None`. Coordinates are stored as-is (0-based half-open).
			
 
				+///
			
 
				+/// # Errors
			
 
				 ///
			
 
				-/// Expected format (BED6-compatible):
			
 
				-/// `contig  start  end  name?  score?  strand?`
			
 
				+/// Returns an error if fewer than 3 columns are present or if `start`/`end`
			
 
				+/// cannot be parsed as `u32`.
			
 
				 impl FromStr for BedRow {
			
 
				     type Err = anyhow::Error;
			
 
				 
			
@@ -58,7 +79,7 @@ impl FromStr for BedRow {
 
				 
			
 
				         Ok(Self {
			
 
				             range,
			
 
				-            name: v.get(3).map(|v| Some(v.to_string())).unwrap_or(None),
			
 
				+            name: v.get(3).map(|v| v.to_string()),
			
 
				             score: v.get(4).and_then(|v| v.parse().ok()),
			
 
				             strand: v.get(5).and_then(|&v| match v {
			
 
				                 "+" => Some(true),
			
@@ -69,19 +90,30 @@ impl FromStr for BedRow {
 
				     }
			
 
				 }
			
 
				 
			
 
				-/// Exposes the genomic range of a BED row.
			
 
				 impl GetGenomeRange for BedRow {
			
 
				     fn range(&self) -> &GenomeRange {
			
 
				         &self.range
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-/// Reads a BED file into memory.
			
 
				+/// Load a BED file into memory as a vector of [`BedRow`]s.
			
 
				 ///
			
 
				-/// Lines starting with `#` are ignored.
			
 
				+/// Skips blank lines and lines starting with `#`, `track`, or `browser`
			
 
				+/// (standard UCSC header conventions). All other lines are parsed via
			
 
				+/// [`BedRow::from_str`].
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `path` - Path to a BED file (plain text or gzip-compressed)
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// A vector of parsed rows in file order.
			
 
				 ///
			
 
				 /// # Errors
			
 
				-/// Returns an error if the file cannot be opened or if any row fails to parse.
			
 
				+///
			
 
				+/// Returns an error if the file cannot be opened or if any data line fails to parse.
			
 
				+/// I/O errors on individual lines are logged as warnings and skipped.
			
 
				 pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
			
 
				     let reader = BufReader::new(get_reader(path)?);
			
 
				 
			
@@ -89,7 +121,11 @@ pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
 
				     for (i, line) in reader.lines().enumerate() {
			
 
				         match line {
			
 
				             Ok(line) => {
			
 
				-                if line.starts_with("#") {
			
 
				+                if line.is_empty()
			
 
				+                    || line.starts_with('#')
			
 
				+                    || line.starts_with("track")
			
 
				+                    || line.starts_with("browser")
			
 
				+                {
			
 
				                     continue;
			
 
				                 }
			
 
				                 res.push(line.parse().context(format!("Can't parse {line}"))?);
			
@@ -101,18 +137,26 @@ pub fn read_bed(path: &str) -> anyhow::Result<Vec<BedRow>> {
 
				     Ok(res)
			
 
				 }
			
 
				 
			
 
				-/// Annotates variants with a given annotation based on overlap with a BED file,
			
 
				-/// and returns total base count and number of overlapping mutations.
			
 
				+/// Annotate variants that overlap a BED region set and return coverage statistics.
			
 
				+///
			
 
				+/// Loads the BED file, finds all variants whose position overlaps any BED region
			
 
				+/// (via [`overlaps_par`]), pushes `annotation` onto each matching variant, and
			
 
				+/// returns the total covered base-pair count alongside the overlap count.
			
 
				 ///
			
 
				 /// # Arguments
			
 
				-/// * `variants` - Mutable reference to a `Variants` collection to annotate.
			
 
				-/// * `bed_path` - Path to the BED file defining the region class.
			
 
				-/// * `annotation` - The `Annotation` to assign to overlapping variants.
			
 
				+///
			
 
				+/// * `variants` - Variant collection to annotate in place
			
 
				+/// * `bed_path` - Path to the BED file defining the region class
			
 
				+/// * `annotation` - Annotation tag to attach to each overlapping variant
			
 
				 ///
			
 
				 /// # Returns
			
 
				-/// `Ok((total_bp, overlap_count))` — where:
			
 
				-///   - `total_bp`: number of base pairs in the BED regions
			
 
				-///   - `overlap_count`: number of variants overlapping those regions
			
 
				+///
			
 
				+/// `(total_bp, overlap_count)` where `total_bp` is the sum of all BED interval
			
 
				+/// lengths and `overlap_count` is the number of variants that received the annotation.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if the BED file cannot be read.
			
 
				 pub fn annotate_with_bed(
			
 
				     variants: &mut Variants,
			
 
				     bed_path: &str,
			
@@ -121,16 +165,10 @@ pub fn annotate_with_bed(
 
				     let bed_rows = read_bed(bed_path)?;
			
 
				     let ranges: Vec<&GenomeRange> = bed_rows.iter().map(|b| &b.range).collect();
			
 
				 
			
 
				-    // Total number of base pairs in the BED regions
			
 
				     let total_bp: usize = ranges.iter().map(|r| r.length() as usize).sum();
			
 
				-
			
 
				-    // Extract positions of all variants
			
 
				     let positions: Vec<&GenomePosition> = variants.data.iter().map(|v| &v.position).collect();
			
 
				-
			
 
				-    // Find indices of variants overlapping the BED ranges
			
 
				     let overlaps = overlaps_par(&positions, &ranges);
			
 
				 
			
 
				-    // Annotate overlapping variants
			
 
				     for &idx in &overlaps {
			
 
				         variants.data[idx].annotations.push(annotation.clone());
			
 
				     }
			
@@ -138,9 +176,25 @@ pub fn annotate_with_bed(
 
				     Ok((total_bp, overlaps.len()))
			
 
				 }
			
 
				 
			
 
				-/// Returns all BED rows overlapping a set of query ranges (parallel).
			
 
				+/// Return every BED row that overlaps at least one query range, parallelised per contig.
			
 
				+///
			
 
				+/// Both inputs must be sorted by `(contig, start)`. Each matching row appears exactly
			
 
				+/// once in the output regardless of how many queries it overlaps.
			
 
				+///
			
 
				+/// Uses an anchored-scan algorithm (O(n + m)) that is correct even when rows or
			
 
				+/// queries overlap each other (e.g. nested genes). The anchor `j0` advances only
			
 
				+/// when a query definitively ends before the current row starts, so no hit can be
			
 
				+/// missed.
			
 
				+///
			
 
				+/// # Arguments
			
 
				 ///
			
 
				-/// Input rows and queries must be sorted by contig and position.
			
 
				+/// * `rows` - BED rows sorted by `(contig, start)`
			
 
				+/// * `queries` - Query ranges sorted by `(contig, start)`
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// Matching BED rows in per-contig order. Cross-contig order is unspecified (parallel
			
 
				+/// execution).
			
 
				 pub fn bedrow_overlaps_par(
			
 
				     rows: &[BedRow],
			
 
				     queries: &[&GenomeRange],
			
@@ -148,86 +202,57 @@ pub fn bedrow_overlaps_par(
 
				 where
			
 
				     BedRow: Clone + Send + Sync,
			
 
				 {
			
 
				-    // Pre-compute [start, end) indices per contig for both inputs.
			
 
				+    let row_ranges: Vec<&GenomeRange> = rows.iter().map(|r| &r.range).collect();
			
 
				     let (row_contigs, query_contigs) = rayon::join(
			
 
				-        || extract_contig_indices_bed(rows),
			
 
				+        || extract_contig_indices(&row_ranges),
			
 
				         || extract_contig_indices(queries),
			
 
				     );
			
 
				 
			
 
				     row_contigs
			
 
				-        .into_par_iter()                             // one task per contig
			
 
				+        .into_par_iter()
			
 
				         .filter_map(|(contig, r_start, r_end)| {
			
 
				-            // No queries on this contig → skip the task.
			
 
				             let (q_start, q_end) = find_contig_indices(&query_contigs, contig)?;
			
 
				 
			
 
				             let r_slice = &rows[r_start..r_end];
			
 
				             let q_slice = &queries[q_start..q_end];
			
 
				 
			
 
				             let mut hits = Vec::new();
			
 
				-            let (mut i, mut j) = (0usize, 0usize);
			
 
				-
			
 
				-            // Classic two-finger sweep.
			
 
				-            while i < r_slice.len() && j < q_slice.len() {
			
 
				-                let r_range = r_slice[i].range();           // BedRow → GenomeRange
			
 
				-                let q_range = &q_slice[j].range;
			
 
				-
			
 
				-                match (r_range.range.end <= q_range.start, q_range.end <= r_range.range.start) {
			
 
				-                    (true, _) => i += 1,                    // row finishes before query starts
			
 
				-                    (_, true) => j += 1,                    // query finishes before row starts
			
 
				-                    _ => {
			
 
				-                        hits.push(r_slice[i].clone());      // overlap detected
			
 
				-                        if r_range.range.end < q_range.end {
			
 
				-                            i += 1;
			
 
				-                        } else {
			
 
				-                            j += 1;
			
 
				-                        }
			
 
				-                    }
			
 
				+            let mut j0 = 0usize;
			
 
				+
			
 
				+            for row in r_slice {
			
 
				+                let r = row.range();
			
 
				+
			
 
				+                while j0 < q_slice.len() && q_slice[j0].range.end <= r.range.start {
			
 
				+                    j0 += 1;
			
 
				+                }
			
 
				+
			
 
				+                if j0 < q_slice.len() && q_slice[j0].range.start < r.range.end {
			
 
				+                    hits.push(row.clone());
			
 
				                 }
			
 
				             }
			
 
				+
			
 
				             Some(hits)
			
 
				         })
			
 
				         .flatten()
			
 
				         .collect()
			
 
				 }
			
 
				 
			
 
				-/// Computes contiguous slice indices for BED rows grouped by contig.
			
 
				-///
			
 
				-/// Assumes input is sorted by contig.
			
 
				-fn extract_contig_indices_bed(rows: &[BedRow]) -> Vec<(u8, usize, usize)> {
			
 
				-    let mut out = Vec::new();
			
 
				-    if rows.is_empty() {
			
 
				-        return out;
			
 
				-    }
			
 
				-
			
 
				-    let mut current = rows[0].range.contig;
			
 
				-    let mut start = 0;
			
 
				 
			
 
				-    for (idx, row) in rows.iter().enumerate() {
			
 
				-        if row.range.contig != current {
			
 
				-            out.push((current, start, idx));
			
 
				-            current = row.range.contig;
			
 
				-            start = idx;
			
 
				-        }
			
 
				-    }
			
 
				-    out.push((current, start, rows.len()));
			
 
				-    out
			
 
				-}
			
 
				-
			
 
				-/// Pre-indexed BED structure for fast gene lookup by genomic interval.
			
 
				+/// Per-contig indexed BED structure for fast gene-name lookup.
			
 
				 ///
			
 
				-/// BED rows are grouped by contig and stored in sorted order.
			
 
				+/// Rows are sorted by `(contig, start, end)` at construction time and stored in
			
 
				+/// 256 contig slots (one per encoded `u8` contig). Lookups are O(rows before `end`)
			
 
				+/// with early termination — correct even for very long host genes.
			
 
				 #[derive(Clone)]
			
 
				 pub struct GenesBedIndex {
			
 
				-    // contig (u8) -> BedRows on that contig, sorted by range.start
			
 
				     by_contig: Vec<Arc<[BedRow]>>,
			
 
				 }
			
 
				 
			
 
				 impl GenesBedIndex {
			
 
				-    /// Builds a contig-indexed BED structure.
			
 
				+    /// Build a [`GenesBedIndex`] from an unsorted vector of [`BedRow`]s.
			
 
				     ///
			
 
				-    /// Input rows are sorted and grouped internally.
			
 
				+    /// Rows are sorted by `(contig, start, end)` internally.
			
 
				     pub fn new(mut rows: Vec<BedRow>) -> Self {
			
 
				-        // Ensure deterministic grouping & fast queries
			
 
				         rows.sort_unstable_by(|a, b| {
			
 
				             a.range.contig
			
 
				                 .cmp(&b.range.contig)
			
@@ -235,63 +260,63 @@ impl GenesBedIndex {
 
				                 .then_with(|| a.range.range.end.cmp(&b.range.range.end))
			
 
				         });
			
 
				 
			
 
				-        // 256 is fine if you encode contigs into u8
			
 
				         let mut tmp: Vec<Vec<BedRow>> = vec![Vec::new(); 256];
			
 
				         for r in rows {
			
 
				             tmp[r.range.contig as usize].push(r);
			
 
				         }
			
 
				 
			
 
				-        let by_contig = tmp
			
 
				-            .into_iter()
			
 
				-            .map(Arc::<[BedRow]>::from)
			
 
				-            .collect();
			
 
				-
			
 
				-        Self { by_contig }
			
 
				+        Self {
			
 
				+            by_contig: tmp.into_iter().map(Arc::<[BedRow]>::from).collect(),
			
 
				+        }
			
 
				     }
			
 
				 
			
 
				-    /// Returns gene names overlapping the given interval.
			
 
				-    #[inline]
			
 
				+    /// Return the names of all genes overlapping `[start, end)` on `contig`.
			
 
				+    ///
			
 
				+    /// Coordinates are 0-based half-open. If `start > end` the arguments are
			
 
				+    /// swapped defensively. Genes without a name field are silently skipped.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    ///
			
 
				+    /// * `contig` - Encoded contig index (see `contig_to_num`)
			
 
				+    /// * `start` - Query start, 0-based inclusive
			
 
				+    /// * `end` - Query end, 0-based exclusive
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// Names of overlapping genes in start-sorted order.
			
 
				     pub fn query_genes(&self, contig: u8, start: u32, end: u32) -> Vec<String> {
			
 
				         let (s, e) = if start <= end { (start, end) } else { (end, start) };
			
 
				         let rows = &self.by_contig[contig as usize];
			
 
				-        if rows.is_empty() {
			
 
				-            return Vec::new();
			
 
				-        }
			
 
				-
			
 
				-        // lower_bound on start
			
 
				-        let mut i = match rows.binary_search_by_key(&s, |r| r.range.range.start) {
			
 
				-            Ok(i) | Err(i) => i,
			
 
				-        };
			
 
				-        i = i.saturating_sub(1);
			
 
				-
			
 
				-        let mut out: Vec<String> = Vec::new();
			
 
				-        while i < rows.len() {
			
 
				-            let r = &rows[i];
			
 
				-            let rs = r.range.range.start;
			
 
				-            let re = r.range.range.end;
			
 
				 
			
 
				-            if rs > e {
			
 
				-                break;
			
 
				-            }
			
 
				-            if re >= s && rs <= e {
			
 
				-                if let Some(name) = &r.name {
			
 
				-                    out.push(name.clone());
			
 
				-                }
			
 
				-            }
			
 
				-            i += 1;
			
 
				-        }
			
 
				-
			
 
				-        out
			
 
				+        rows.iter()
			
 
				+            .take_while(|r| r.range.range.start < e)
			
 
				+            .filter(|r| r.range.range.end > s)
			
 
				+            .filter_map(|r| r.name.clone())
			
 
				+            .collect()
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-/// Convert BED -> BGZF (.gz) and create a Tabix index (.tbi) in the same pass.
			
 
				+/// Compress a BED file to BGZF and build a Tabix index (`.tbi`) in a single pass.
			
 
				+///
			
 
				+/// The output files are `<input>.gz` and `<input>.gz.tbi`. The input must be
			
 
				+/// tab-delimited, contig-grouped, and coordinate-sorted (Tabix requirement).
			
 
				 ///
			
 
				-/// Notes:
			
 
				-/// - Tabix expects the file to be tab-delimited, grouped by contig, and coordinate-sorted.
			
 
				-/// - BED is 0-based, half-open; Tabix uses 1-based positions. We convert as:
			
 
				-///   start_pos = start0 + 1
			
 
				-///   end_pos   = end0
			
 
				+/// # Coordinate conversion
			
 
				+///
			
 
				+/// BED uses 0-based half-open `[start, end)`. Tabix uses 1-based positions.
			
 
				+/// The conversion is:
			
 
				+/// - `tabix_start = bed_start + 1`  (0-based → 1-based)
			
 
				+/// - `tabix_end   = bed_end`        (0-based exclusive = 1-based inclusive numerically)
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `input` - Path to the input BED file
			
 
				+/// * `force` - Overwrite the output `.gz` file if it already exists
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if the input cannot be read, the output cannot be written,
			
 
				+/// or any data line is malformed (missing columns, non-integer coordinates).
			
 
				 pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::Result<()> {
			
 
				     let input = input.as_ref();
			
 
				     let out_bgz = format!("{}.gz", input.display());
			
@@ -304,7 +329,7 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				 
			
 
				     // Build a tabix index while we write.
			
 
				     let mut indexer = tabix::index::Indexer::default();
			
 
				-    indexer.set_header(Header::default()); // minimal header; usable for most tools :contentReference[oaicite:1]{index=1}
			
 
				+    indexer.set_header(Header::default());
			
 
				 
			
 
				     let mut line = String::new();
			
 
				     loop {
			
@@ -314,17 +339,10 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				             break;
			
 
				         }
			
 
				 
			
 
				-        // Record start virtual offset for this line in the *output* bgzf stream.
			
 
				-        // let chunk_start = writer.bgzf_pos(); // BGZF virtual offset :contentReference[oaicite:2]{index=2}
			
 
				-
			
 
				         let chunk_start = writer.virtual_position();
			
 
				-        // Write line as-is.
			
 
				         writer.write_all(line.as_bytes())?;
			
 
				+        let chunk_end = writer.virtual_position();
			
 
				 
			
 
				-        // Record end virtual offset after writing the line.
			
 
				-        let chunk_end = writer.virtual_position(); // :contentReference[oaicite:3]{index=3}
			
 
				-
			
 
				-        // Add to tabix index if it's a data line (skip meta/header lines).
			
 
				         if !line.starts_with('#') && !line.trim().is_empty() {
			
 
				             let mut it = line.split('\t');
			
 
				             let rname = it
			
@@ -340,10 +358,10 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				             let end0: u32 = it
			
 
				                 .next()
			
 
				                 .context("BED: missing end")?
			
 
				+                .trim()
			
 
				                 .parse()
			
 
				                 .context("BED: invalid end")?;
			
 
				 
			
 
				-            // BED 0-based half-open -> 1-based inclusive-ish interval for tabix
			
 
				             let start1 = start0
			
 
				                 .checked_add(1)
			
 
				                 .context("BED: start overflow")?;
			
@@ -353,10 +371,7 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				             let end = Position::try_from(end0 as usize)
			
 
				                 .context("BED: end must be >= 1 for tabix indexing")?;
			
 
				 
			
 
				-            let chunk = Chunk::new(
			
 
				-                chunk_start,
			
 
				-                chunk_end,
			
 
				-            ); // chunk is [start, end) in virtual offsets :contentReference[oaicite:4]{index=4}
			
 
				+            let chunk = Chunk::new(chunk_start, chunk_end);
			
 
				 
			
 
				             indexer
			
 
				                 .add_record(rname, start, end, chunk)
			
@@ -365,20 +380,33 @@ pub fn convert_bgz_with_tabix(input: impl AsRef<Path>, force: bool) -> anyhow::R
 
				     }
			
 
				 
			
 
				     finalize_bgzf_file(writer, &out_bgz)?;
			
 
				-    // writer.close()?; // writes EOF marker :contentReference[oaicite:5]{index=5}
			
 
				 
			
 
				-    let index = indexer.build(); // :contentReference[oaicite:6]{index=6}
			
 
				-    tabix::fs::write(&out_tbi, &index)?; // :contentReference[oaicite:7]{index=7}
			
 
				+    let index = indexer.build();
			
 
				+    tabix::fs::write(&out_tbi, &index)?;
			
 
				 
			
 
				     Ok(())
			
 
				 }
			
 
				 
			
 
				-/// Returns a set of (chrom, start, end) intervals for centromeric regions
			
 
				-/// parsed from a UCSC-format cytoband BED.
			
 
				+/// Parse centromeric intervals from a UCSC cytoband BED file.
			
 
				 ///
			
 
				-/// Centromeres are identified by the `acen` stain field (column 4).
			
 
				+/// Identifies rows whose stain field (column 5) equals `acen` and returns their
			
 
				+/// coordinates as `(chrom, start, end)`. Coordinates are 0-based half-open as
			
 
				+/// stored in the file.
			
 
				+///
			
 
				+/// Expected column layout: `chrom\tstart\tend\tband_name\tstain`
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `path` - Path to the cytoband BED file
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// A vector of `(chrom, start, end)` tuples for all `acen` bands, in file order.
			
 
				+///
			
 
				+/// # Errors
			
 
				 ///
			
 
				-/// Format: `chrom\tstart\tend\tband_name\tstain`
			
 
				+/// Returns an error if the file cannot be opened, a line has fewer than 5 columns,
			
 
				+/// or a coordinate field cannot be parsed as `u64`.
			
 
				 pub fn parse_centromere_intervals(path: &str) -> anyhow::Result<Vec<(String, u64, u64)>> {
			
 
				     let reader = BufReader::new(get_reader(path)
			
 
				         .with_context(|| format!("Cannot open cytobands BED: {path}"))?);