Jelajahi Sumber

refactor modkit.rs: drop full-file load, use tabix per-region fetch

Memory: the old read_pileup_index loaded the entire genome-wide pileup into
a HashMap (hundreds of millions of CpG sites, several hundred MB) to then
binary-search a few thousand gene regions. Replaced with per-region tabix
fetches via fetch_tabix_lines_with — only the relevant BGZF chunks are read.

Function renames for consistency:
  read_pileup_index             removed (no longer needed)
  compute_gene_activity_from_pileup -> compute_gene_activity (takes pileup path)
  write_gene_activity_bed9_itemrgb  -> write_gene_activity_bed9 (shorter)
  pileup_regions_to_activity_bed9_itemrgb -> pileup_to_activity_bed9

New function: region_fraction_from_tabix(pileup_bgz, region)
  - fetches col 10 (Nvalid_cov) and col 12 (Nmod) for the region
  - returns (fraction, sum_cov, sum_mod) or None if no coverage

Kept: region_fraction_modified (in-memory variant, useful for unit tests)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Thomas 1 bulan lalu
induk
melakukan
f1212b39b4
1 mengubah file dengan 177 tambahan dan 200 penghapusan
  1. 177 200
      src/io/modkit.rs

+ 177 - 200
src/io/modkit.rs

@@ -1,26 +1,28 @@
 //! Modkit pileup parsing and gene-level epigenetic activity computation.
 //!
-//! Reads modkit bedMethyl pileup files (BGZF-compressed) and BED4 region
+//! Reads modkit bedMethyl pileup files (BGZF + Tabix indexed) and BED4 region
 //! files to compute a per-gene **epigenetic activity proxy** based on the
 //! contrast between promoter and gene-body methylation.
 //!
+//! Region methylation is fetched **per region** via the Tabix index using
+//! [`fetch_tabix_lines_with`], avoiding loading the full genome-wide pileup
+//! (hundreds of millions of CpG sites) into memory.
+//!
 //! # Pipeline
 //!
 //! ```text
-//! pileup.bed.gz  ──► read_pileup_index  ──► HashMap<chrom, [PileupSite]>
-//!                                                        │
-//! regions.bed    ──► read_bed4          ──► [BedRegion] ─┤
-//!                                                        │
-//!                         compute_gene_activity_from_pileup
-//!                                                        │
-//!                    write_gene_activity_bed9_itemrgb  ◄─┘
+//! pileup.bed.gz + .tbi ──► fetch per region (fetch_tabix_lines_with)
+//! regions.bed          ──► read_bed4
+//!                               │
+//!                    compute_gene_activity
+//!                               │
+//!              write_gene_activity_bed9_itemrgb
 //! ```
 //!
 //! # Coordinate system
 //!
-//! All coordinates are **0-based half-open `[start, end)`** throughout, matching
-//! the BED convention. `PileupSite.start` is a single-base position (the interval
-//! is `[start, start+1)`).
+//! All coordinates are **0-based half-open `[start, end)`** throughout,
+//! matching the BED convention. `PileupSite.start` is a single-base position.
 
 use anyhow::{anyhow, Context, Result};
 use std::{
@@ -28,19 +30,23 @@ use std::{
     collections::HashMap,
     fs,
     fs::File,
-    io::{BufRead, BufWriter, Write},
+    io::{BufWriter, Write},
     path::Path,
 };
 
-use crate::{helpers::TempFileGuard, io::{bed::read_bed, readers::get_gz_reader}};
+use crate::{
+    helpers::TempFileGuard,
+    io::{bed::read_bed, readers::fetch_tabix_lines_with},
+    positions::GenomeRange,
+};
+
+// ─── Types ───────────────────────────────────────────────────────────────────
 
 /// One methylation call row from a modkit pileup (bedMethyl) file.
 ///
 /// Pileup rows are single-base intervals `[start, start+1)`.
-/// Only the columns needed for region aggregation are kept:
-/// - `start`: col 2, 0-based
-/// - `nvalid_cov`: col 10 (Nvalid_cov)
-/// - `nmod`: col 12 (Nmod)
+/// Only the columns needed for region aggregation are stored:
+/// col 2 (`start`), col 10 (`nvalid_cov`), col 12 (`nmod`).
 #[derive(Debug, Clone)]
 pub struct PileupSite {
     /// 0-based position (half-open interval `[start, start+1)`)
@@ -79,6 +85,27 @@ pub struct ParsedName {
     pub kind: RegionKind,
 }
 
+/// Gene-level epigenetic activity proxy computed from a single sample's pileup.
+///
+/// Score: `log2(body_frac / prom_frac)` — positive means more active-like,
+/// negative means more repressed-like. Output coordinates use the **promoter**
+/// interval (anchors the signal near the TSS in genome browsers).
+#[derive(Debug, Clone)]
+pub struct GeneActivity {
+    pub gene_key: String,
+    pub chrom: String,
+    /// Promoter start, 0-based inclusive
+    pub start: u64,
+    /// Promoter end, 0-based exclusive
+    pub end: u64,
+    pub prom_frac: f64,
+    pub body_frac: f64,
+    /// `log2(body_frac / prom_frac)`
+    pub score: f64,
+}
+
+// ─── Parsing helpers ─────────────────────────────────────────────────────────
+
 /// Parse a region name for `_prom` or `_body` suffix.
 ///
 /// - `"FOO_prom"` → gene key `"FOO"`, kind [`RegionKind::Promoter`]
@@ -94,38 +121,11 @@ pub fn parse_region_name(name: &str) -> ParsedName {
     }
 }
 
-/// Diverging blue→white→red palette for IGV `itemRgb`.
-///
-/// Maps `v` clipped to `[-clip, +clip]` onto the range:
-/// - `v = -clip` → `(0, 0, 255)` pure blue
-/// - `v = 0`     → `(255, 255, 255)` white
-/// - `v = +clip` → `(255, 0, 0)` pure red
-pub fn diverging_rgb(v: f64, clip: f64) -> (u8, u8, u8) {
-    debug_assert!(clip > 0.0, "clip must be > 0");
-    let x = v.clamp(-clip, clip);
-    let t = (x + clip) / (2.0 * clip);
-    if t < 0.5 {
-        let u = t / 0.5;
-        let r = (255.0 * u).round() as u8;
-        let g = (255.0 * u).round() as u8;
-        (r, g, 255)
-    } else {
-        let u = (t - 0.5) / 0.5;
-        let g = (255.0 * (1.0 - u)).round() as u8;
-        let b = (255.0 * (1.0 - u)).round() as u8;
-        (255, g, b)
-    }
-}
-
 /// Read a BED4 regions file into memory.
 ///
-/// Wraps [`read_bed`] from the `bed` module, inheriting its header skipping
-/// (`#`, `track`, `browser`) and gzip support. Coordinates are widened from
-/// `u32` to `u64`; contig names are recovered via `num_to_contig`.
-///
-/// # Arguments
-///
-/// * `path` - Path to the BED4 file (plain or BGZF-compressed)
+/// Wraps [`read_bed`], inheriting its header skipping (`#`, `track`, `browser`)
+/// and gzip support. Coordinates are widened from `u32` to `u64`;
+/// contig names are recovered via `GenomeRange::contig()`.
 ///
 /// # Errors
 ///
@@ -142,136 +142,107 @@ pub fn read_bed4(path: &str) -> Result<Vec<BedRegion>> {
         .collect())
 }
 
-/// Read a modkit pileup file and index sites per chromosome.
-///
-/// Parses columns 1 (chrom), 2 (start), 10 (Nvalid_cov), 12 (Nmod).
-/// Sites with `Nvalid_cov = 0` are dropped. The per-chromosome vectors are
-/// sorted by position to enable binary search in [`region_fraction_modified`].
-///
-/// # Arguments
-///
-/// * `path` - Path to the modkit bedMethyl pileup file (BGZF-compressed)
-///
-/// # Errors
-///
-/// Returns an error if the file cannot be opened or any data line is malformed.
-pub fn read_pileup_index(path: &str) -> Result<HashMap<String, Vec<PileupSite>>> {
-    let mut map: HashMap<String, Vec<PileupSite>> = HashMap::new();
-
-    for (i, line) in get_gz_reader(path)?.lines().enumerate() {
-        let line = line.with_context(|| format!("I/O error at line {}", i + 1))?;
-        if line.starts_with('#') || line.is_empty() {
-            continue;
-        }
-
-        let f: Vec<&str> = line.split('\t').collect();
-
-        let chrom = f.get(0)
-            .ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
-        let start: u64 = f.get(1)
-            .ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
-        let nvalid_cov: u64 = f.get(9)
-            .ok_or_else(|| anyhow!("Missing Nvalid_cov (col 10) at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad Nvalid_cov at line {}", i + 1))?;
-        let nmod: u64 = f.get(11)
-            .ok_or_else(|| anyhow!("Missing Nmod (col 12) at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad Nmod at line {}", i + 1))?;
-
-        if nvalid_cov == 0 {
-            continue;
-        }
-
-        map.entry(chrom).or_default().push(PileupSite { start, nvalid_cov, nmod });
-    }
-
-    for v in map.values_mut() {
-        v.sort_by_key(|s| s.start);
-    }
-
-    Ok(map)
-}
+// ─── Region methylation ───────────────────────────────────────────────────────
 
-/// Aggregate methylation fraction over `[start, end)` from a sorted pileup slice.
+/// Aggregate methylation fraction over `[start, end)` from a **pre-loaded**
+/// sorted pileup slice.
 ///
 /// Returns `(fraction, sum_nvalid_cov, sum_nmod)` where
-/// `fraction = sum(Nmod) / sum(Nvalid_cov)`. Returns `None` if no sites overlap
-/// or if total coverage is zero.
+/// `fraction = Σ Nmod / Σ Nvalid_cov`. Returns `None` if no sites overlap or
+/// total coverage is zero.
 ///
 /// Uses [`slice::partition_point`] for O(log n) start lookup.
+/// Prefer [`region_fraction_from_tabix`] for production use — this variant is
+/// useful when the pileup is already loaded in memory (e.g. unit tests).
 pub fn region_fraction_modified(
     chrom_sites: &[PileupSite],
     start: u64,
     end: u64,
 ) -> Option<(f64, u64, u64)> {
     let i = chrom_sites.partition_point(|s| s.start < start);
-
-    let mut sum_cov: u64 = 0;
-    let mut sum_mod: u64 = 0;
+    let mut sum_cov = 0u64;
+    let mut sum_mod = 0u64;
 
     for s in &chrom_sites[i..] {
-        if s.start >= end {
-            break;
-        }
+        if s.start >= end { break; }
         sum_cov = sum_cov.saturating_add(s.nvalid_cov);
         sum_mod = sum_mod.saturating_add(s.nmod);
     }
 
-    if sum_cov == 0 {
-        None
-    } else {
-        Some((sum_mod as f64 / sum_cov as f64, sum_cov, sum_mod))
-    }
+    if sum_cov == 0 { None } else { Some((sum_mod as f64 / sum_cov as f64, sum_cov, sum_mod)) }
 }
 
-/// Gene-level epigenetic activity proxy computed from a single sample's pileup.
+/// Fetch methylation fraction for `region` directly from a Tabix-indexed pileup.
 ///
-/// Score: `log2(body_frac / prom_frac)` where both fractions are
-/// coverage-weighted methylation ratios over the respective regions.
+/// Uses [`fetch_tabix_lines_with`] to seek only to the relevant BGZF chunks —
+/// no full-file load. Parses col 10 (`Nvalid_cov`) and col 12 (`Nmod`) from
+/// each overlapping pileup line.
 ///
-/// - Score > 0: gene-body methylation exceeds promoter → "active-like"
-/// - Score < 0: promoter methylation exceeds gene-body → "repressed-like"
+/// Returns `(fraction, sum_nvalid_cov, sum_nmod)`, or `None` if no sites
+/// overlap the region or total coverage is zero.
 ///
-/// Output coordinates use the **promoter** interval (anchors to TSS in genome browsers).
-#[derive(Debug, Clone)]
-pub struct GeneActivity {
-    pub gene_key: String,
-    pub chrom: String,
-    /// Promoter start, 0-based
-    pub start: u64,
-    /// Promoter end, 0-based exclusive
-    pub end: u64,
-    pub prom_frac: f64,
-    pub body_frac: f64,
-    /// `log2(body_frac / prom_frac)`
-    pub score: f64,
+/// # Arguments
+///
+/// * `pileup_bgz` - Path to the modkit pileup `.bed.gz`; index expected at
+///   `<pileup_bgz>.tbi`
+/// * `region` - Genomic region to query (0-based half-open)
+///
+/// # Errors
+///
+/// Returns an error if the Tabix index cannot be read or a pileup line is
+/// malformed.
+pub fn region_fraction_from_tabix(
+    pileup_bgz: &str,
+    region: &GenomeRange,
+) -> Result<Option<(f64, u64, u64)>> {
+    let mut sum_cov = 0u64;
+    let mut sum_mod = 0u64;
+
+    fetch_tabix_lines_with(pileup_bgz, region, |line| {
+        let f: Vec<&str> = line.split('\t').collect();
+        let nvalid_cov: u64 = f.get(9)
+            .ok_or_else(|| anyhow!("Missing Nvalid_cov (col 10)"))?
+            .parse().context("Bad Nvalid_cov")?;
+        let nmod: u64 = f.get(11)
+            .ok_or_else(|| anyhow!("Missing Nmod (col 12)"))?
+            .parse().context("Bad Nmod")?;
+        sum_cov = sum_cov.saturating_add(nvalid_cov);
+        sum_mod = sum_mod.saturating_add(nmod);
+        Ok(())
+    })?;
+
+    if sum_cov == 0 { Ok(None) } else { Ok(Some((sum_mod as f64 / sum_cov as f64, sum_cov, sum_mod))) }
 }
 
-/// Compute per-gene epigenetic activity from a modkit pileup and BED4 regions.
+// ─── Activity computation ─────────────────────────────────────────────────────
+
+/// Compute per-gene epigenetic activity from a Tabix-indexed modkit pileup.
 ///
-/// For each gene with both a `_prom` and a `_body` region:
-/// 1. Aggregate methylation fraction `f(R) = Σ Nmod / Σ Nvalid_cov` per region
-/// 2. Skip genes where either fraction is 0 or coverage is below `min_sum_cov`
-/// 3. Compute `score = log2(body_frac / prom_frac)`
+/// For each gene with both a `_prom` and a `_body` region, fetches methylation
+/// fractions via [`region_fraction_from_tabix`] (one tabix seek per region),
+/// then computes `score = log2(body_frac / prom_frac)`.
+///
+/// Genes are skipped if either fraction is 0, total coverage is below
+/// `min_sum_cov`, or the contig is absent from the Tabix index.
 ///
 /// Results are sorted by `(chrom, start)` for IGV compatibility.
 ///
 /// # Arguments
 ///
-/// * `pileup` - Per-chromosome pileup sites from [`read_pileup_index`]
+/// * `pileup_bgz` - Path to the modkit pileup `.bed.gz` (Tabix-indexed)
 /// * `regions` - BED4 regions with `_prom` / `_body` name suffixes
 /// * `min_sum_cov` - Minimum total `Nvalid_cov` required per region
 ///
 /// # Errors
 ///
-/// Returns an error if any region has `end <= start`.
-pub fn compute_gene_activity_from_pileup(
-    pileup: &HashMap<String, Vec<PileupSite>>,
+/// Returns an error if any region has `end <= start` or a Tabix fetch fails.
+pub fn compute_gene_activity(
+    pileup_bgz: &str,
     regions: &[BedRegion],
     min_sum_cov: u64,
 ) -> Result<Vec<GeneActivity>> {
     #[derive(Clone)]
-    struct Part { chrom: String, start: u64, end: u64, frac: f64, sum_cov: u64 }
+    struct Part { chrom: String, start: u64, end: u64, frac: f64 }
     #[derive(Default)]
     struct Pair { prom: Option<Part>, body: Option<Part> }
 
@@ -288,40 +259,39 @@ pub fn compute_gene_activity_from_pileup(
         let ParsedName { gene_key, kind } = parse_region_name(&r.name);
         if kind == RegionKind::Other { continue; }
 
-        let chrom_sites = match pileup.get(&r.chrom) { Some(v) => v, None => continue };
+        let region = GenomeRange::new(&r.chrom, r.start as u32, r.end as u32);
 
-        let (frac, sum_cov, _) = match region_fraction_modified(chrom_sites, r.start, r.end) {
+        let (frac, sum_cov, _) = match region_fraction_from_tabix(pileup_bgz, &region)? {
             Some(x) => x,
             None => continue,
         };
 
         if sum_cov < min_sum_cov || frac <= 0.0 { continue; }
 
-        let part = Part { chrom: r.chrom.clone(), start: r.start, end: r.end, frac, sum_cov };
+        let part = Part { chrom: r.chrom.clone(), start: r.start, end: r.end, frac };
         let entry = by_gene.entry(gene_key).or_default();
         match kind {
-            RegionKind::Promoter  => entry.prom = Some(part),
-            RegionKind::GeneBody  => entry.body = Some(part),
-            RegionKind::Other     => {}
+            RegionKind::Promoter => entry.prom = Some(part),
+            RegionKind::GeneBody => entry.body = Some(part),
+            RegionKind::Other    => {}
         }
     }
 
-    let mut out = Vec::new();
-    for (gene, pair) in by_gene {
-        let (prom, body) = match (pair.prom, pair.body) {
-            (Some(p), Some(b)) => (p, b),
-            _ => continue,
-        };
-        out.push(GeneActivity {
-            gene_key: gene,
-            chrom: prom.chrom,
-            start: prom.start,
-            end: prom.end,
-            prom_frac: prom.frac,
-            body_frac: body.frac,
-            score: (body.frac / prom.frac).log2(),
-        });
-    }
+    let mut out: Vec<GeneActivity> = by_gene
+        .into_iter()
+        .filter_map(|(gene, pair)| {
+            let (prom, body) = (pair.prom?, pair.body?);
+            Some(GeneActivity {
+                gene_key: gene,
+                chrom: prom.chrom,
+                start: prom.start,
+                end: prom.end,
+                prom_frac: prom.frac,
+                body_frac: body.frac,
+                score: (body.frac / prom.frac).log2(),
+            })
+        })
+        .collect();
 
     out.sort_by(|a, b| match a.chrom.cmp(&b.chrom) {
         Ordering::Equal => a.start.cmp(&b.start),
@@ -331,9 +301,28 @@ pub fn compute_gene_activity_from_pileup(
     Ok(out)
 }
 
-/// Convert a log2-ratio to a BED `score` field (0–1000).
+// ─── Colour / score helpers ───────────────────────────────────────────────────
+
+/// Diverging blue→white→red palette for IGV `itemRgb`.
 ///
-/// `v` is clipped to `[-clip, +clip]` then linearly mapped:
+/// `v` is clipped to `[-clip, +clip]` and mapped to:
+/// `-clip` → `(0, 0, 255)`, `0` → `(255, 255, 255)`, `+clip` → `(255, 0, 0)`.
+pub fn diverging_rgb(v: f64, clip: f64) -> (u8, u8, u8) {
+    debug_assert!(clip > 0.0, "clip must be > 0");
+    let t = (v.clamp(-clip, clip) + clip) / (2.0 * clip);
+    if t < 0.5 {
+        let u = t / 0.5;
+        ((255.0 * u).round() as u8, (255.0 * u).round() as u8, 255)
+    } else {
+        let u = (t - 0.5) / 0.5;
+        let c = (255.0 * (1.0 - u)).round() as u8;
+        (255, c, c)
+    }
+}
+
+/// Map a log2-ratio to a BED `score` field (0–1000).
+///
+/// `v` is clipped to `[-clip, +clip]` then linearly scaled:
 /// `-clip` → 0, `0` → 500, `+clip` → 1000.
 pub fn bed_score_from_log2_ratio(v: f64, clip: f64) -> u16 {
     debug_assert!(clip > 0.0, "clip must be > 0");
@@ -341,29 +330,21 @@ pub fn bed_score_from_log2_ratio(v: f64, clip: f64) -> u16 {
     (t * 1000.0).round().clamp(0.0, 1000.0) as u16
 }
 
+// ─── Output ───────────────────────────────────────────────────────────────────
+
 /// Write gene activity as an IGV BED9 track with `itemRgb=On`.
 ///
-/// Color encodes `GeneActivity::score` (`log2(body/prom)`) via
-/// [`diverging_rgb`] clipped to `[-clip, +clip]`. The BED `score` column
-/// (col 5) encodes the same quantity scaled to `[0, 1000]` via
-/// [`bed_score_from_log2_ratio`].
-///
-/// Output is written atomically: data goes to a UUID temp file in the same
-/// directory as `path`, then renamed on success. A [`TempFileGuard`] ensures
-/// the temp file is removed automatically on failure.
+/// Colour encodes `GeneActivity::score` via [`diverging_rgb`] clipped to
+/// `[-clip, +clip]`. BED col 5 (`score`) holds the same quantity scaled to
+/// `[0, 1000]` via [`bed_score_from_log2_ratio`].
 ///
-/// # Arguments
-///
-/// * `path` - Destination BED9 file path
-/// * `track_name` - IGV track name shown in the track label
-/// * `activity` - Gene activity records (produced by [`compute_gene_activity_from_pileup`])
-/// * `clip` - Symmetric clip value for color and score mapping; must be > 0
+/// Written atomically: data goes to a UUID temp file, renamed on success.
+/// A [`TempFileGuard`] removes the temp file automatically on failure.
 ///
 /// # Errors
 ///
-/// Returns an error if `clip <= 0`, the file cannot be written, or the atomic
-/// rename fails.
-pub fn write_gene_activity_bed9_itemrgb(
+/// Returns an error if `clip <= 0`, the file cannot be written, or the rename fails.
+pub fn write_gene_activity_bed9(
     path: &str,
     track_name: &str,
     activity: &[GeneActivity],
@@ -377,21 +358,20 @@ pub fn write_gene_activity_bed9_itemrgb(
     let mut guard = TempFileGuard::new();
     let tmp_path = guard.tmp_path("", tmp_dir);
 
-    let out = File::create(&tmp_path)
-        .with_context(|| format!("failed to create temp file: {}", tmp_path.display()))?;
-    let mut w = BufWriter::new(out);
+    let mut w = BufWriter::new(
+        File::create(&tmp_path)
+            .with_context(|| format!("failed to create temp file: {}", tmp_path.display()))?,
+    );
 
     writeln!(w, r#"track name="{}" itemRgb="On""#, track_name)?;
-
     for a in activity {
         let (rr, gg, bb) = diverging_rgb(a.score, clip);
-        let bed_score = bed_score_from_log2_ratio(a.score, clip);
         writeln!(
             w,
             "{}\t{}\t{}\t{}={:.4}\t{}\t.\t{}\t{}\t{},{},{}",
             a.chrom, a.start, a.end,
             a.gene_key, a.score,
-            bed_score,
+            bed_score_from_log2_ratio(a.score, clip),
             a.start, a.end,
             rr, gg, bb,
         )?;
@@ -407,13 +387,14 @@ pub fn write_gene_activity_bed9_itemrgb(
     Ok(())
 }
 
-/// End-to-end helper: pileup + BED4 regions → IGV BED9 activity track.
+// ─── Pipeline ─────────────────────────────────────────────────────────────────
+
+/// End-to-end pipeline: Tabix pileup + BED4 regions → IGV BED9 activity track.
 ///
-/// Runs the full pipeline in order:
-/// 1. [`read_pileup_index`] — parse and index the modkit pileup
-/// 2. [`read_bed4`] — parse the gene promoter/body regions
-/// 3. [`compute_gene_activity_from_pileup`] — compute log2 activity scores
-/// 4. [`write_gene_activity_bed9_itemrgb`] — write the BED9 track atomically
+/// 1. [`read_bed4`] — parse the gene promoter/body regions
+/// 2. [`compute_gene_activity`] — fetch per-region methylation fractions via
+///    Tabix and compute log2 activity scores
+/// 3. [`write_gene_activity_bed9`] — write the BED9 track atomically
 ///
 /// # Returns
 ///
@@ -422,33 +403,30 @@ pub fn write_gene_activity_bed9_itemrgb(
 /// # Errors
 ///
 /// Returns an error if any pipeline step fails.
-pub fn pileup_regions_to_activity_bed9_itemrgb(
-    pileup_path: &str,
+pub fn pileup_to_activity_bed9(
+    pileup_bgz: &str,
     regions_bed4_path: &str,
     min_sum_cov: u64,
     out_bed9_path: &str,
     track_name: &str,
     clip: f64,
 ) -> Result<usize> {
-    let pile = read_pileup_index(pileup_path)?;
     let regions = read_bed4(regions_bed4_path)?;
-    let activity = compute_gene_activity_from_pileup(&pile, &regions, min_sum_cov)?;
+    let activity = compute_gene_activity(pileup_bgz, &regions, min_sum_cov)?;
     let n = activity.len();
-    write_gene_activity_bed9_itemrgb(out_bed9_path, track_name, &activity, clip)?;
+    write_gene_activity_bed9(out_bed9_path, track_name, &activity, clip)?;
     Ok(n)
 }
 
 #[cfg(test)]
 mod tests {
     use log::info;
-
     use super::*;
     use crate::{config::Config, helpers::test_init};
 
     #[test]
     fn modkit_activity_igv() -> anyhow::Result<()> {
         test_init();
-
         let config = Config::default();
         let regions_bed4_path =
             "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
@@ -462,13 +440,12 @@ mod tests {
                 "{}/{}_{}_modkit_activity.bed",
                 config.tumoral_dir(id), id, config.tumoral_name
             );
-            let n = pileup_regions_to_activity_bed9_itemrgb(
+            let n = pileup_to_activity_bed9(
                 &path, regions_bed4_path, 100, &out,
                 &format!("{id} Gene Activity"), 2.0,
             )?;
             info!("{id}: {n} gene activities written");
         }
-
         Ok(())
     }
 }