Explorar o código

audit and refactor src/io/modkit.rs

- Replace read_bed4 with a wrapper around read_bed from bed.rs: inherits
  header skipping (#/track/browser), gzip support, and blank-line handling
  for free; removes the csv::ReaderBuilder dependency from this path
- Replace read_pileup_index csv::ReaderBuilder with plain BufRead line
  splitting: simpler, explicit column access, consistent with other io files;
  adds header/blank-line skipping that was missing before
- Fix write_gene_activity_bed9_itemrgb: atomic write via TempFileGuard UUID
  temp file + fs::rename; previously wrote directly to the output path leaving
  a corrupt file on partial write failure
- Replace hand-rolled binary search in region_fraction_modified with
  slice::partition_point (same semantics, standard Rust idiom)
- assert! -> debug_assert! in diverging_rgb and bed_score_from_log2_ratio;
  their only caller already validates clip > 0.0 and returns Err
- Add //! module header with pipeline diagram and coordinate system note
- Rustdoc pass on all public items
- DRY test: two identical case loops collapsed into a for loop over ids

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Thomas hai 1 mes
pai
achega
7ce17964e9
Modificáronse 1 ficheiros con 221 adicións e 273 borrados
  1. 221 273
      src/io/modkit.rs

+ 221 - 273
src/io/modkit.rs

@@ -1,34 +1,70 @@
+//! Modkit pileup parsing and gene-level epigenetic activity computation.
+//!
+//! Reads modkit bedMethyl pileup files (BGZF-compressed) and BED4 region
+//! files to compute a per-gene **epigenetic activity proxy** based on the
+//! contrast between promoter and gene-body methylation.
+//!
+//! # Pipeline
+//!
+//! ```text
+//! pileup.bed.gz  ──► read_pileup_index  ──► HashMap<chrom, [PileupSite]>
+//!                                                        │
+//! regions.bed    ──► read_bed4          ──► [BedRegion] ─┤
+//!                                                        │
+//!                         compute_gene_activity_from_pileup
+//!                                                        │
+//!                    write_gene_activity_bed9_itemrgb  ◄─┘
+//! ```
+//!
+//! # Coordinate system
+//!
+//! All coordinates are **0-based half-open `[start, end)`** throughout, matching
+//! the BED convention. `PileupSite.start` is a single-base position (the interval
+//! is `[start, start+1)`).
+
 use anyhow::{anyhow, Context, Result};
-use csv::ReaderBuilder;
 use std::{
-    cmp::Ordering, collections::HashMap, fs::File, io::{BufWriter, Write}
+    cmp::Ordering,
+    collections::HashMap,
+    fs,
+    fs::File,
+    io::{BufRead, BufWriter, Write},
+    path::Path,
 };
 
-use crate::io::readers::get_gz_reader;
+use crate::{helpers::TempFileGuard, io::{bed::read_bed, readers::get_gz_reader}};
 
-/// One methylation call row from a modkit pileup (bedMethyl-like) file.
+/// One methylation call row from a modkit pileup (bedMethyl) file.
 ///
-/// We only keep what we need to aggregate region methylation:
-/// - `start` (0-based position; pileup rows are single-base intervals: [start, start+1))
-/// - `nvalid_cov` (col 10)
-/// - `nmod` (col 12)
+/// Pileup rows are single-base intervals `[start, start+1)`.
+/// Only the columns needed for region aggregation are kept:
+/// - `start`: col 2, 0-based
+/// - `nvalid_cov`: col 10 (Nvalid_cov)
+/// - `nmod`: col 12 (Nmod)
 #[derive(Debug, Clone)]
 pub struct PileupSite {
+    /// 0-based position (half-open interval `[start, start+1)`)
     pub start: u64,
+    /// Number of valid modification calls at this site (col 10)
     pub nvalid_cov: u64,
+    /// Number of modified calls at this site (col 12)
     pub nmod: u64,
 }
 
 /// One region from a BED4 file: chrom, start, end, name.
+///
+/// Coordinates are **0-based half-open** `[start, end)`.
 #[derive(Debug, Clone)]
 pub struct BedRegion {
     pub chrom: String,
+    /// 0-based inclusive start
     pub start: u64,
+    /// 0-based exclusive end
     pub end: u64,
     pub name: String,
 }
 
-/// Promoter/body classification inferred from region name.
+/// Promoter/body classification inferred from a region name suffix.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum RegionKind {
     Promoter,
@@ -36,17 +72,18 @@ pub enum RegionKind {
     Other,
 }
 
-/// Parsed region name into gene key + kind.
+/// A region name split into gene key and [`RegionKind`].
 #[derive(Debug, Clone)]
 pub struct ParsedName {
     pub gene_key: String,
     pub kind: RegionKind,
 }
 
-/// Parse region `name` for `_prom` or `_body` suffix.
+/// Parse a region name for `_prom` or `_body` suffix.
 ///
-/// - `FOO_prom` => gene_key `FOO`, kind `Promoter`
-/// - `FOO_body` => gene_key `FOO`, kind `GeneBody`
+/// - `"FOO_prom"` → gene key `"FOO"`, kind [`RegionKind::Promoter`]
+/// - `"FOO_body"` → gene key `"FOO"`, kind [`RegionKind::GeneBody`]
+/// - anything else → gene key = full name, kind [`RegionKind::Other`]
 pub fn parse_region_name(name: &str) -> ParsedName {
     if let Some(g) = name.strip_suffix("_prom") {
         ParsedName { gene_key: g.to_string(), kind: RegionKind::Promoter }
@@ -57,12 +94,14 @@ pub fn parse_region_name(name: &str) -> ParsedName {
     }
 }
 
-/// Diverging blue→white→red palette for IGV itemRgb.
+/// Diverging blue→white→red palette for IGV `itemRgb`.
 ///
-/// Negative values → blue, zero → white, positive → red.
-/// Values are clipped to `[-clip, +clip]`.
+/// Maps `v` clipped to `[-clip, +clip]` onto the range:
+/// - `v = -clip` → `(0, 0, 255)` pure blue
+/// - `v = 0`     → `(255, 255, 255)` white
+/// - `v = +clip` → `(255, 0, 0)` pure red
 pub fn diverging_rgb(v: f64, clip: f64) -> (u8, u8, u8) {
-    assert!(clip > 0.0, "clip must be > 0");
+    debug_assert!(clip > 0.0, "clip must be > 0");
     let x = v.clamp(-clip, clip);
     let t = (x + clip) / (2.0 * clip);
     if t < 0.5 {
@@ -78,71 +117,67 @@ pub fn diverging_rgb(v: f64, clip: f64) -> (u8, u8, u8) {
     }
 }
 
-/// Read a BED4 regions file.
+/// Read a BED4 regions file into memory.
+///
+/// Wraps [`read_bed`] from the `bed` module, inheriting its header skipping
+/// (`#`, `track`, `browser`) and gzip support. Coordinates are widened from
+/// `u32` to `u64`; contig names are recovered via `num_to_contig`.
+///
+/// # Arguments
+///
+/// * `path` - Path to the BED4 file (plain or BGZF-compressed)
 ///
 /// # Errors
-/// Fails on malformed lines or invalid coordinates.
+///
+/// Returns an error if the file cannot be opened or any line is malformed.
 pub fn read_bed4(path: &str) -> Result<Vec<BedRegion>> {
-    let mut rdr = ReaderBuilder::new()
-        .delimiter(b'\t')
-        .has_headers(false)
-        .flexible(true)
-        .from_path(path)
-        .with_context(|| format!("Failed to open BED: {path}"))?;
-
-    let mut out = Vec::new();
-    for (i, row) in rdr.records().enumerate() {
-        let row = row.with_context(|| format!("BED parse error at line {}", i + 1))?;
-        let chrom = row.get(0).ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
-        let start: u64 = row.get(1).ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
-        let end: u64 = row.get(2).ok_or_else(|| anyhow!("Missing end at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad end at line {}", i + 1))?;
-        if end <= start {
-            return Err(anyhow!("Invalid BED interval end<=start at line {}", i + 1));
-        }
-        let name = row.get(3).unwrap_or(".").to_string();
-        out.push(BedRegion { chrom, start, end, name });
-    }
-    Ok(out)
+    Ok(read_bed(path)?
+        .into_iter()
+        .map(|row| BedRegion {
+            chrom: row.range.contig(),
+            start: row.range.range.start as u64,
+            end:   row.range.range.end   as u64,
+            name:  row.name.unwrap_or_else(|| ".".to_string()),
+        })
+        .collect())
 }
 
-/// Read a modkit pileup file and index it per chromosome.
+/// Read a modkit pileup file and index sites per chromosome.
 ///
-/// Expected columns (1-based):
-/// - chrom (1)
-/// - start (2)
-/// - Nvalid_cov (10)
-/// - Nmod (12)
+/// Parses columns 1 (chrom), 2 (start), 10 (Nvalid_cov), 12 (Nmod).
+/// Sites with `Nvalid_cov = 0` are dropped. The per-chromosome vectors are
+/// sorted by position to enable binary search in [`region_fraction_modified`].
 ///
-/// # Notes
-/// The file is typically sorted; we sort per chromosome just in case.
+/// # Arguments
+///
+/// * `path` - Path to the modkit bedMethyl pileup file (BGZF-compressed)
 ///
 /// # Errors
-/// Fails on malformed lines or invalid integers.
+///
+/// Returns an error if the file cannot be opened or any data line is malformed.
 pub fn read_pileup_index(path: &str) -> Result<HashMap<String, Vec<PileupSite>>> {
-    let mut rdr = ReaderBuilder::new()
-        .delimiter(b'\t')
-        .has_headers(false)
-        .flexible(true)
-        .from_reader(get_gz_reader(path)?);
-
     let mut map: HashMap<String, Vec<PileupSite>> = HashMap::new();
 
-    for (i, row) in rdr.records().enumerate() {
-        let row = row.with_context(|| format!("Pileup parse error at line {}", i + 1))?;
+    for (i, line) in get_gz_reader(path)?.lines().enumerate() {
+        let line = line.with_context(|| format!("I/O error at line {}", i + 1))?;
+        if line.starts_with('#') || line.is_empty() {
+            continue;
+        }
 
-        let chrom = row.get(0).ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
-        let start: u64 = row.get(1).ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
-            .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
+        let f: Vec<&str> = line.split('\t').collect();
 
-        // col10 = Nvalid_cov (index 9), col12 = Nmod (index 11)
-        let nvalid_cov: u64 = row.get(9).ok_or_else(|| anyhow!("Missing Nvalid_cov (col10) at line {}", i + 1))?
+        let chrom = f.get(0)
+            .ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
+        let start: u64 = f.get(1)
+            .ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
+            .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
+        let nvalid_cov: u64 = f.get(9)
+            .ok_or_else(|| anyhow!("Missing Nvalid_cov (col 10) at line {}", i + 1))?
             .parse().with_context(|| format!("Bad Nvalid_cov at line {}", i + 1))?;
-        let nmod: u64 = row.get(11).ok_or_else(|| anyhow!("Missing Nmod (col12) at line {}", i + 1))?
+        let nmod: u64 = f.get(11)
+            .ok_or_else(|| anyhow!("Missing Nmod (col 12) at line {}", i + 1))?
             .parse().with_context(|| format!("Bad Nmod at line {}", i + 1))?;
 
-        // Skip sites with zero valid coverage to avoid useless entries.
         if nvalid_cov == 0 {
             continue;
         }
@@ -150,45 +185,36 @@ pub fn read_pileup_index(path: &str) -> Result<HashMap<String, Vec<PileupSite>>>
         map.entry(chrom).or_default().push(PileupSite { start, nvalid_cov, nmod });
     }
 
-    // Ensure sorted by position for binary searching.
     for v in map.values_mut() {
-        v.sort_by(|a, b| a.start.cmp(&b.start));
+        v.sort_by_key(|s| s.start);
     }
 
     Ok(map)
 }
 
-/// Aggregate methylation fraction over `[start, end)` from a per-chrom pileup index.
+/// Aggregate methylation fraction over `[start, end)` from a sorted pileup slice.
 ///
-/// Fraction is computed as: `sum(Nmod) / sum(Nvalid_cov)`.
+/// Returns `(fraction, sum_nvalid_cov, sum_nmod)` where
+/// `fraction = sum(Nmod) / sum(Nvalid_cov)`. Returns `None` if no sites overlap
+/// or if total coverage is zero.
 ///
-/// # Returns
-/// `(fraction, sum_nvalid_cov, sum_nmod)`; returns `None` if no sites overlap.
+/// Uses [`slice::partition_point`] for O(log n) start lookup.
 pub fn region_fraction_modified(
     chrom_sites: &[PileupSite],
     start: u64,
     end: u64,
 ) -> Option<(f64, u64, u64)> {
-    // lower_bound for first site with pos >= start
-    let mut lo = 0usize;
-    let mut hi = chrom_sites.len();
-    while lo < hi {
-        let mid = (lo + hi) / 2;
-        if chrom_sites[mid].start < start { lo = mid + 1 } else { hi = mid }
-    }
-    let mut i = lo;
+    let i = chrom_sites.partition_point(|s| s.start < start);
 
     let mut sum_cov: u64 = 0;
     let mut sum_mod: u64 = 0;
 
-    while i < chrom_sites.len() {
-        let s = &chrom_sites[i];
+    for s in &chrom_sites[i..] {
         if s.start >= end {
             break;
         }
         sum_cov = sum_cov.saturating_add(s.nvalid_cov);
         sum_mod = sum_mod.saturating_add(s.nmod);
-        i += 1;
     }
 
     if sum_cov == 0 {
@@ -198,108 +224,56 @@ pub fn region_fraction_modified(
     }
 }
 
-/// Gene-level activity proxy computed from one sample’s pileup.
+/// Gene-level epigenetic activity proxy computed from a single sample's pileup.
 ///
-/// Activity score:
-/// `log2((body_frac + eps) / (prom_frac + eps))`
+/// Score: `log2(body_frac / prom_frac)` where both fractions are
+/// coverage-weighted methylation ratios over the respective regions.
 ///
-/// This is written as one BED feature per gene (using promoter coordinates).
+/// - Score > 0: gene-body methylation exceeds promoter → "active-like"
+/// - Score < 0: promoter methylation exceeds gene-body → "repressed-like"
+///
+/// Output coordinates use the **promoter** interval (anchors to TSS in genome browsers).
 #[derive(Debug, Clone)]
 pub struct GeneActivity {
     pub gene_key: String,
     pub chrom: String,
+    /// Promoter start, 0-based
     pub start: u64,
+    /// Promoter end, 0-based exclusive
     pub end: u64,
     pub prom_frac: f64,
     pub body_frac: f64,
+    /// `log2(body_frac / prom_frac)`
     pub score: f64,
 }
 
-/// Compute a per-gene “epigenetic activity” proxy from a single sample’s modkit pileup.
-///
-/// ## Overview
-/// This function derives a gene-level activity-like score by contrasting
-/// **promoter methylation** and **gene-body methylation** within the *same sample*.
-///
-/// It is intended to capture the commonly observed epigenetic pattern where
-/// actively transcribed genes tend to have:
-/// - relatively **low methylation at the promoter**, and
-/// - relatively **higher methylation across the gene body**.
-///
-/// ## Region methylation estimation
-/// For each genomic region \(R = [start, end)\), methylation is estimated by
-/// pooling per-site counts from the pileup:
+/// Compute per-gene epigenetic activity from a modkit pileup and BED4 regions.
 ///
-/// \[
-/// f(R) = \frac{\sum_{i \in R} Nmod_i}{\sum_{i \in R} Nvalid\_cov_i}
-/// \]
+/// For each gene with both a `_prom` and a `_body` region:
+/// 1. Aggregate methylation fraction `f(R) = Σ Nmod / Σ Nvalid_cov` per region
+/// 2. Skip genes where either fraction is 0 or coverage is below `min_sum_cov`
+/// 3. Compute `score = log2(body_frac / prom_frac)`
 ///
-/// where:
-/// - \(Nmod_i\) is the number of modified calls at site \(i\)
-/// - \(Nvalid\_cov_i\) is the number of valid modification calls at site \(i\)
+/// Results are sorted by `(chrom, start)` for IGV compatibility.
 ///
-/// This is a **coverage-weighted fraction**, which avoids biases that arise
-/// from simply averaging per-site percentages.
-///
-/// ## Activity score
-/// For each gene \(g\) with both a promoter region \(P_g\) and a gene-body region \(B_g\),
-/// the activity proxy is defined as:
-///
-/// \[
-/// A_g = \log_2\left(\frac{f(B_g)}{f(P_g)}\right)
-/// \]
-///
-/// ## Skipping rules
-/// To avoid undefined or numerically unstable values, a gene is **skipped** if:
-/// - either the promoter or gene body has **no overlapping pileup sites**
-/// - \(\sum Nvalid\_cov = 0\) in either region
-/// - \(f(P_g) = 0\) or \(f(B_g) = 0\)
-///
-/// In other words, the log-ratio is computed **only when both regions have
-/// strictly positive methylation fractions**.
-///
-/// ## Interpretation (heuristic)
-/// - \(A_g > 0\): gene-body methylation exceeds promoter methylation  
-///   → “more active-like” epigenetic state
-/// - \(A_g < 0\): promoter methylation exceeds gene-body methylation  
-///   → “more repressed-like” epigenetic state
-/// - \(A_g \approx 0\): similar promoter/body methylation  
-///   → ambiguous or context-dependent
-///
-/// **Important:** this score is a proxy and does not directly measure transcription.
-/// When RNA-seq data are available, expression-based measures should be preferred.
+/// # Arguments
 ///
-/// ## Output coordinates
-/// Each output [`GeneActivity`] entry uses the **promoter coordinates** for display
-/// (anchoring the signal near the transcription start site in genome browsers),
-/// while the score itself reflects the promoter/body contrast.
+/// * `pileup` - Per-chromosome pileup sites from [`read_pileup_index`]
+/// * `regions` - BED4 regions with `_prom` / `_body` name suffixes
+/// * `min_sum_cov` - Minimum total `Nvalid_cov` required per region
 ///
-/// ## Errors
-/// Returns an error if:
-/// - any region has invalid coordinates (`end <= start`)
+/// # Errors
 ///
-/// # Arguments
-/// * `pileup` – per-chromosome pileup sites indexed by chromosome
-/// * `regions` – BED4 regions with names ending in `_prom` and `_body`
+/// Returns an error if any region has `end <= start`.
 pub fn compute_gene_activity_from_pileup(
     pileup: &HashMap<String, Vec<PileupSite>>,
     regions: &[BedRegion],
     min_sum_cov: u64,
 ) -> Result<Vec<GeneActivity>> {
     #[derive(Clone)]
-    struct Part {
-        chrom: String,
-        start: u64,
-        end: u64,
-        frac: f64,
-        sum_cov: u64,
-    }
-
+    struct Part { chrom: String, start: u64, end: u64, frac: f64, sum_cov: u64 }
     #[derive(Default)]
-    struct Pair {
-        prom: Option<Part>,
-        body: Option<Part>,
-    }
+    struct Pair { prom: Option<Part>, body: Option<Part> }
 
     let mut by_gene: HashMap<String, Pair> = HashMap::new();
 
@@ -312,52 +286,32 @@ pub fn compute_gene_activity_from_pileup(
         }
 
         let ParsedName { gene_key, kind } = parse_region_name(&r.name);
-        if kind == RegionKind::Other {
-            continue;
-        }
+        if kind == RegionKind::Other { continue; }
+
+        let chrom_sites = match pileup.get(&r.chrom) { Some(v) => v, None => continue };
 
-        let chrom_sites = match pileup.get(&r.chrom) {
-            Some(v) => v,
+        let (frac, sum_cov, _) = match region_fraction_modified(chrom_sites, r.start, r.end) {
+            Some(x) => x,
             None => continue,
         };
 
-        let (frac, sum_cov, _) =
-            match region_fraction_modified(chrom_sites, r.start, r.end) {
-                Some(x) => x,
-                None => continue,
-            };
-
-        // Enforce minimum coverage and positive fraction
-        if sum_cov < min_sum_cov || frac <= 0.0 {
-            continue;
-        }
-
-        let part = Part {
-            chrom: r.chrom.clone(),
-            start: r.start,
-            end: r.end,
-            frac,
-            sum_cov,
-        };
+        if sum_cov < min_sum_cov || frac <= 0.0 { continue; }
 
+        let part = Part { chrom: r.chrom.clone(), start: r.start, end: r.end, frac, sum_cov };
         let entry = by_gene.entry(gene_key).or_default();
         match kind {
-            RegionKind::Promoter => entry.prom = Some(part),
-            RegionKind::GeneBody => entry.body = Some(part),
-            RegionKind::Other => {}
+            RegionKind::Promoter  => entry.prom = Some(part),
+            RegionKind::GeneBody  => entry.body = Some(part),
+            RegionKind::Other     => {}
         }
     }
 
     let mut out = Vec::new();
-
     for (gene, pair) in by_gene {
         let (prom, body) = match (pair.prom, pair.body) {
             (Some(p), Some(b)) => (p, b),
             _ => continue,
         };
-
-        let score = (body.frac / prom.frac).log2();
-
         out.push(GeneActivity {
             gene_key: gene,
             chrom: prom.chrom,
@@ -365,11 +319,10 @@ pub fn compute_gene_activity_from_pileup(
             end: prom.end,
             prom_frac: prom.frac,
             body_frac: body.frac,
-            score,
+            score: (body.frac / prom.frac).log2(),
         });
     }
 
-    // Sort for IGV
     out.sort_by(|a, b| match a.chrom.cmp(&b.chrom) {
         Ordering::Equal => a.start.cmp(&b.start),
         o => o,
@@ -378,39 +331,54 @@ pub fn compute_gene_activity_from_pileup(
     Ok(out)
 }
 
-/// Convert a log2-ratio value to a BED `score` (0..=1000) using symmetric clipping.
-///
-/// Mapping:
-/// - clip to [-clip, +clip]
-/// - scale to [0, 1000]
+/// Convert a log2-ratio to a BED `score` field (0–1000).
 ///
-/// If `v = -clip` → 0  
-/// If `v = 0`     → 500  
-/// If `v = +clip` → 1000
+/// `v` is clipped to `[-clip, +clip]` then linearly mapped:
+/// `-clip` → 0, `0` → 500, `+clip` → 1000.
 pub fn bed_score_from_log2_ratio(v: f64, clip: f64) -> u16 {
-    assert!(clip > 0.0, "clip must be > 0");
-    let x = v.clamp(-clip, clip);
-    let t = (x + clip) / (2.0 * clip); // 0..1
-    let s = (t * 1000.0).round();
-    s.clamp(0.0, 1000.0) as u16
+    debug_assert!(clip > 0.0, "clip must be > 0");
+    let t = (v.clamp(-clip, clip) + clip) / (2.0 * clip);
+    (t * 1000.0).round().clamp(0.0, 1000.0) as u16
 }
 
-/// Write gene activity as an IGV BED9 track with `itemRgb=On` (diverging colors),
-/// and store the (clipped+scaled) log2-ratio in BED column 5 (`score`).
+/// Write gene activity as an IGV BED9 track with `itemRgb=On`.
+///
+/// Color encodes `GeneActivity::score` (`log2(body/prom)`) via
+/// [`diverging_rgb`] clipped to `[-clip, +clip]`. The BED `score` column
+/// (col 5) encodes the same quantity scaled to `[0, 1000]` via
+/// [`bed_score_from_log2_ratio`].
 ///
-/// - Color encodes `GeneActivity::score` (log2(body/prom)) clipped to `[-clip, +clip]`.
-/// - BED `score` encodes the same quantity scaled to [0,1000].
+/// Output is written atomically: data goes to a UUID temp file in the same
+/// directory as `path`, then renamed on success. A [`TempFileGuard`] ensures
+/// the temp file is removed automatically on failure.
+///
+/// # Arguments
+///
+/// * `path` - Destination BED9 file path
+/// * `track_name` - IGV track name shown in the track label
+/// * `activity` - Gene activity records (produced by [`compute_gene_activity_from_pileup`])
+/// * `clip` - Symmetric clip value for color and score mapping; must be > 0
+///
+/// # Errors
+///
+/// Returns an error if `clip <= 0`, the file cannot be written, or the atomic
+/// rename fails.
 pub fn write_gene_activity_bed9_itemrgb(
     path: &str,
     track_name: &str,
     activity: &[GeneActivity],
     clip: f64,
 ) -> Result<()> {
-    if !(clip > 0.0) {
-        return Err(anyhow!("clip must be > 0"));
+    if clip <= 0.0 {
+        return Err(anyhow!("clip must be > 0, got {clip}"));
     }
 
-    let out = File::create(path).with_context(|| format!("Failed to create BED: {path}"))?;
+    let tmp_dir = Path::new(path).parent().unwrap_or(Path::new("."));
+    let mut guard = TempFileGuard::new();
+    let tmp_path = guard.tmp_path("", tmp_dir);
+
+    let out = File::create(&tmp_path)
+        .with_context(|| format!("failed to create temp file: {}", tmp_path.display()))?;
     let mut w = BufWriter::new(out);
 
     writeln!(w, r#"track name="{}" itemRgb="On""#, track_name)?;
@@ -418,36 +386,42 @@ pub fn write_gene_activity_bed9_itemrgb(
     for a in activity {
         let (rr, gg, bb) = diverging_rgb(a.score, clip);
         let bed_score = bed_score_from_log2_ratio(a.score, clip);
-
         writeln!(
             w,
-            "{}\t{}\t{}\t{}\t{}\t.\t{}\t{}\t{},{},{}",
-            a.chrom,
-            a.start,
-            a.end,
-            format!("{}={}", a.gene_key, a.score),
+            "{}\t{}\t{}\t{}={:.4}\t{}\t.\t{}\t{}\t{},{},{}",
+            a.chrom, a.start, a.end,
+            a.gene_key, a.score,
             bed_score,
-            a.start,
-            a.end,
-            rr,
-            gg,
-            bb
+            a.start, a.end,
+            rr, gg, bb,
         )?;
     }
 
+    w.flush().with_context(|| format!("failed to flush: {}", tmp_path.display()))?;
+    drop(w);
+
+    fs::rename(&tmp_path, path)
+        .with_context(|| format!("failed to rename {} → {path}", tmp_path.display()))?;
+
+    guard.disarm();
     Ok(())
 }
 
-/// High-level helper: pileup + regions BED4 -> IGV BED9 activity track (no pseudocounts).
+/// End-to-end helper: pileup + BED4 regions → IGV BED9 activity track.
+///
+/// Runs the full pipeline in order:
+/// 1. [`read_pileup_index`] — parse and index the modkit pileup
+/// 2. [`read_bed4`] — parse the gene promoter/body regions
+/// 3. [`compute_gene_activity_from_pileup`] — compute log2 activity scores
+/// 4. [`write_gene_activity_bed9_itemrgb`] — write the BED9 track atomically
+///
+/// # Returns
 ///
-/// Pipeline:
-/// 1) Read pileup and build a per-chromosome index (`read_pileup_index`)
-/// 2) Read regions BED4 (`read_bed4`)
-/// 3) Compute gene activity (`compute_gene_activity_from_pileup`) using
-///    `score = log2(body_frac / prom_frac)` and **skipping** genes where either fraction is 0
-/// 4) Write BED9 with `itemRgb=On` (`write_gene_activity_bed9_itemrgb`)
+/// The number of gene features written.
 ///
-/// Returns the number of written gene features.
+/// # Errors
+///
+/// Returns an error if any pipeline step fails.
 pub fn pileup_regions_to_activity_bed9_itemrgb(
     pileup_path: &str,
     regions_bed4_path: &str,
@@ -476,50 +450,24 @@ mod tests {
         test_init();
 
         let config = Config::default();
-
-        let id = "DUMCO";
-        let path = format!(
-            "{}/{}_{}_modkit_pileup.bed.gz",
-            config.tumoral_dir(id),
-            id,
-            config.tumoral_name
-        );
-
-        let out = format!(
-            "{}/{}_{}_modkit_activity.bed",
-            config.tumoral_dir(id),
-            id,
-            config.tumoral_name
-        );
-
-        let track_name = format!("{id} Gene Activity");
-
-        let regions_bed4_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
-
-        let n = pileup_regions_to_activity_bed9_itemrgb(&path, regions_bed4_path, 100, &out, &track_name, 2.0)?;
-        info!("{n} genes activities written");
-
-        let id = "CHAHA";
-        let path = format!(
-            "{}/{}_{}_modkit_pileup.bed.gz",
-            config.tumoral_dir(id),
-            id,
-            config.tumoral_name
-        );
-
-        let out = format!(
-            "{}/{}_{}_modkit_activity.bed",
-            config.tumoral_dir(id),
-            id,
-            config.tumoral_name
-        );
-
-        let track_name = format!("{id} Gene Activity");
-
-        let regions_bed4_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
-
-        let n = pileup_regions_to_activity_bed9_itemrgb(&path, regions_bed4_path, 100, &out, &track_name, 2.0)?;
-        info!("{n} genes activities written");
+        let regions_bed4_path =
+            "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
+
+        for id in ["DUMCO", "CHAHA"] {
+            let path = format!(
+                "{}/{}_{}_modkit_pileup.bed.gz",
+                config.tumoral_dir(id), id, config.tumoral_name
+            );
+            let out = format!(
+                "{}/{}_{}_modkit_activity.bed",
+                config.tumoral_dir(id), id, config.tumoral_name
+            );
+            let n = pileup_regions_to_activity_bed9_itemrgb(
+                &path, regions_bed4_path, 100, &out,
+                &format!("{id} Gene Activity"), 2.0,
+            )?;
+            info!("{id}: {n} gene activities written");
+        }
 
         Ok(())
     }