|
|
@@ -0,0 +1,526 @@
|
|
|
+use anyhow::{anyhow, Context, Result};
|
|
|
+use csv::ReaderBuilder;
|
|
|
+use std::{
|
|
|
+ cmp::Ordering, collections::HashMap, fs::File, io::{BufWriter, Write}
|
|
|
+};
|
|
|
+
|
|
|
+use crate::io::readers::get_gz_reader;
|
|
|
+
|
|
|
+/// One methylation call row from a modkit pileup (bedMethyl-like) file.
|
|
|
+///
|
|
|
+/// We only keep what we need to aggregate region methylation:
|
|
|
+/// - `start` (0-based position; pileup rows are single-base intervals: [start, start+1))
|
|
|
+/// - `nvalid_cov` (col 10)
|
|
|
+/// - `nmod` (col 12)
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct PileupSite {
|
|
|
+ pub start: u64,
|
|
|
+ pub nvalid_cov: u64,
|
|
|
+ pub nmod: u64,
|
|
|
+}
|
|
|
+
|
|
|
+/// One region from a BED4 file: chrom, start, end, name.
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct BedRegion {
|
|
|
+ pub chrom: String,
|
|
|
+ pub start: u64,
|
|
|
+ pub end: u64,
|
|
|
+ pub name: String,
|
|
|
+}
|
|
|
+
|
|
|
+/// Promoter/body classification inferred from region name.
|
|
|
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
+pub enum RegionKind {
|
|
|
+ Promoter,
|
|
|
+ GeneBody,
|
|
|
+ Other,
|
|
|
+}
|
|
|
+
|
|
|
+/// Parsed region name into gene key + kind.
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct ParsedName {
|
|
|
+ pub gene_key: String,
|
|
|
+ pub kind: RegionKind,
|
|
|
+}
|
|
|
+
|
|
|
+/// Parse region `name` for `_prom` or `_body` suffix.
|
|
|
+///
|
|
|
+/// - `FOO_prom` => gene_key `FOO`, kind `Promoter`
|
|
|
+/// - `FOO_body` => gene_key `FOO`, kind `GeneBody`
|
|
|
+pub fn parse_region_name(name: &str) -> ParsedName {
|
|
|
+ if let Some(g) = name.strip_suffix("_prom") {
|
|
|
+ ParsedName { gene_key: g.to_string(), kind: RegionKind::Promoter }
|
|
|
+ } else if let Some(g) = name.strip_suffix("_body") {
|
|
|
+ ParsedName { gene_key: g.to_string(), kind: RegionKind::GeneBody }
|
|
|
+ } else {
|
|
|
+ ParsedName { gene_key: name.to_string(), kind: RegionKind::Other }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/// Diverging blue→white→red palette for IGV itemRgb.
|
|
|
+///
|
|
|
+/// Negative values → blue, zero → white, positive → red.
|
|
|
+/// Values are clipped to `[-clip, +clip]`.
|
|
|
+pub fn diverging_rgb(v: f64, clip: f64) -> (u8, u8, u8) {
|
|
|
+ assert!(clip > 0.0, "clip must be > 0");
|
|
|
+ let x = v.clamp(-clip, clip);
|
|
|
+ let t = (x + clip) / (2.0 * clip);
|
|
|
+ if t < 0.5 {
|
|
|
+ let u = t / 0.5;
|
|
|
+ let r = (255.0 * u).round() as u8;
|
|
|
+ let g = (255.0 * u).round() as u8;
|
|
|
+ (r, g, 255)
|
|
|
+ } else {
|
|
|
+ let u = (t - 0.5) / 0.5;
|
|
|
+ let g = (255.0 * (1.0 - u)).round() as u8;
|
|
|
+ let b = (255.0 * (1.0 - u)).round() as u8;
|
|
|
+ (255, g, b)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/// Read a BED4 regions file.
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+/// Fails on malformed lines or invalid coordinates.
|
|
|
+pub fn read_bed4(path: &str) -> Result<Vec<BedRegion>> {
|
|
|
+ let mut rdr = ReaderBuilder::new()
|
|
|
+ .delimiter(b'\t')
|
|
|
+ .has_headers(false)
|
|
|
+ .flexible(true)
|
|
|
+ .from_path(path)
|
|
|
+ .with_context(|| format!("Failed to open BED: {path}"))?;
|
|
|
+
|
|
|
+ let mut out = Vec::new();
|
|
|
+ for (i, row) in rdr.records().enumerate() {
|
|
|
+ let row = row.with_context(|| format!("BED parse error at line {}", i + 1))?;
|
|
|
+ let chrom = row.get(0).ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
|
|
|
+ let start: u64 = row.get(1).ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
|
|
|
+ .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
|
|
|
+ let end: u64 = row.get(2).ok_or_else(|| anyhow!("Missing end at line {}", i + 1))?
|
|
|
+ .parse().with_context(|| format!("Bad end at line {}", i + 1))?;
|
|
|
+ if end <= start {
|
|
|
+ return Err(anyhow!("Invalid BED interval end<=start at line {}", i + 1));
|
|
|
+ }
|
|
|
+ let name = row.get(3).unwrap_or(".").to_string();
|
|
|
+ out.push(BedRegion { chrom, start, end, name });
|
|
|
+ }
|
|
|
+ Ok(out)
|
|
|
+}
|
|
|
+
|
|
|
+/// Read a modkit pileup file and index it per chromosome.
|
|
|
+///
|
|
|
+/// Expected columns (1-based):
|
|
|
+/// - chrom (1)
|
|
|
+/// - start (2)
|
|
|
+/// - Nvalid_cov (10)
|
|
|
+/// - Nmod (12)
|
|
|
+///
|
|
|
+/// # Notes
|
|
|
+/// The file is typically sorted; we sort per chromosome just in case.
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+/// Fails on malformed lines or invalid integers.
|
|
|
+pub fn read_pileup_index(path: &str) -> Result<HashMap<String, Vec<PileupSite>>> {
|
|
|
+ let mut rdr = ReaderBuilder::new()
|
|
|
+ .delimiter(b'\t')
|
|
|
+ .has_headers(false)
|
|
|
+ .flexible(true)
|
|
|
+ .from_reader(get_gz_reader(path)?);
|
|
|
+
|
|
|
+ let mut map: HashMap<String, Vec<PileupSite>> = HashMap::new();
|
|
|
+
|
|
|
+ for (i, row) in rdr.records().enumerate() {
|
|
|
+ let row = row.with_context(|| format!("Pileup parse error at line {}", i + 1))?;
|
|
|
+
|
|
|
+ let chrom = row.get(0).ok_or_else(|| anyhow!("Missing chrom at line {}", i + 1))?.to_string();
|
|
|
+ let start: u64 = row.get(1).ok_or_else(|| anyhow!("Missing start at line {}", i + 1))?
|
|
|
+ .parse().with_context(|| format!("Bad start at line {}", i + 1))?;
|
|
|
+
|
|
|
+ // col10 = Nvalid_cov (index 9), col12 = Nmod (index 11)
|
|
|
+ let nvalid_cov: u64 = row.get(9).ok_or_else(|| anyhow!("Missing Nvalid_cov (col10) at line {}", i + 1))?
|
|
|
+ .parse().with_context(|| format!("Bad Nvalid_cov at line {}", i + 1))?;
|
|
|
+ let nmod: u64 = row.get(11).ok_or_else(|| anyhow!("Missing Nmod (col12) at line {}", i + 1))?
|
|
|
+ .parse().with_context(|| format!("Bad Nmod at line {}", i + 1))?;
|
|
|
+
|
|
|
+ // Skip sites with zero valid coverage to avoid useless entries.
|
|
|
+ if nvalid_cov == 0 {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ map.entry(chrom).or_default().push(PileupSite { start, nvalid_cov, nmod });
|
|
|
+ }
|
|
|
+
|
|
|
+ // Ensure sorted by position for binary searching.
|
|
|
+ for v in map.values_mut() {
|
|
|
+ v.sort_by(|a, b| a.start.cmp(&b.start));
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(map)
|
|
|
+}
|
|
|
+
|
|
|
+/// Aggregate methylation fraction over `[start, end)` from a per-chrom pileup index.
|
|
|
+///
|
|
|
+/// Fraction is computed as: `sum(Nmod) / sum(Nvalid_cov)`.
|
|
|
+///
|
|
|
+/// # Returns
|
|
|
+/// `(fraction, sum_nvalid_cov, sum_nmod)`; returns `None` if no sites overlap.
|
|
|
+pub fn region_fraction_modified(
|
|
|
+ chrom_sites: &[PileupSite],
|
|
|
+ start: u64,
|
|
|
+ end: u64,
|
|
|
+) -> Option<(f64, u64, u64)> {
|
|
|
+ // lower_bound for first site with pos >= start
|
|
|
+ let mut lo = 0usize;
|
|
|
+ let mut hi = chrom_sites.len();
|
|
|
+ while lo < hi {
|
|
|
+ let mid = (lo + hi) / 2;
|
|
|
+ if chrom_sites[mid].start < start { lo = mid + 1 } else { hi = mid }
|
|
|
+ }
|
|
|
+ let mut i = lo;
|
|
|
+
|
|
|
+ let mut sum_cov: u64 = 0;
|
|
|
+ let mut sum_mod: u64 = 0;
|
|
|
+
|
|
|
+ while i < chrom_sites.len() {
|
|
|
+ let s = &chrom_sites[i];
|
|
|
+ if s.start >= end {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ sum_cov = sum_cov.saturating_add(s.nvalid_cov);
|
|
|
+ sum_mod = sum_mod.saturating_add(s.nmod);
|
|
|
+ i += 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if sum_cov == 0 {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some((sum_mod as f64 / sum_cov as f64, sum_cov, sum_mod))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/// Gene-level activity proxy computed from one sample’s pileup.
|
|
|
+///
|
|
|
+/// Activity score:
|
|
|
+/// `log2((body_frac + eps) / (prom_frac + eps))`
|
|
|
+///
|
|
|
+/// This is written as one BED feature per gene (using promoter coordinates).
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct GeneActivity {
|
|
|
+ pub gene_key: String,
|
|
|
+ pub chrom: String,
|
|
|
+ pub start: u64,
|
|
|
+ pub end: u64,
|
|
|
+ pub prom_frac: f64,
|
|
|
+ pub body_frac: f64,
|
|
|
+ pub score: f64,
|
|
|
+}
|
|
|
+
|
|
|
+/// Compute a per-gene “epigenetic activity” proxy from a single sample’s modkit pileup.
|
|
|
+///
|
|
|
+/// ## Overview
|
|
|
+/// This function derives a gene-level activity-like score by contrasting
|
|
|
+/// **promoter methylation** and **gene-body methylation** within the *same sample*.
|
|
|
+///
|
|
|
+/// It is intended to capture the commonly observed epigenetic pattern where
|
|
|
+/// actively transcribed genes tend to have:
|
|
|
+/// - relatively **low methylation at the promoter**, and
|
|
|
+/// - relatively **higher methylation across the gene body**.
|
|
|
+///
|
|
|
+/// ## Region methylation estimation
|
|
|
+/// For each genomic region \(R = [start, end)\), methylation is estimated by
|
|
|
+/// pooling per-site counts from the pileup:
|
|
|
+///
|
|
|
+/// \[
|
|
|
+/// f(R) = \frac{\sum_{i \in R} Nmod_i}{\sum_{i \in R} Nvalid\_cov_i}
|
|
|
+/// \]
|
|
|
+///
|
|
|
+/// where:
|
|
|
+/// - \(Nmod_i\) is the number of modified calls at site \(i\)
|
|
|
+/// - \(Nvalid\_cov_i\) is the number of valid modification calls at site \(i\)
|
|
|
+///
|
|
|
+/// This is a **coverage-weighted fraction**, which avoids biases that arise
|
|
|
+/// from simply averaging per-site percentages.
|
|
|
+///
|
|
|
+/// ## Activity score
|
|
|
+/// For each gene \(g\) with both a promoter region \(P_g\) and a gene-body region \(B_g\),
|
|
|
+/// the activity proxy is defined as:
|
|
|
+///
|
|
|
+/// \[
|
|
|
+/// A_g = \log_2\left(\frac{f(B_g)}{f(P_g)}\right)
|
|
|
+/// \]
|
|
|
+///
|
|
|
+/// ## Skipping rules
|
|
|
+/// To avoid undefined or numerically unstable values, a gene is **skipped** if:
|
|
|
+/// - either the promoter or gene body has **no overlapping pileup sites**
|
|
|
+/// - \(\sum Nvalid\_cov = 0\) in either region
|
|
|
+/// - \(f(P_g) = 0\) or \(f(B_g) = 0\)
|
|
|
+///
|
|
|
+/// In other words, the log-ratio is computed **only when both regions have
|
|
|
+/// strictly positive methylation fractions**.
|
|
|
+///
|
|
|
+/// ## Interpretation (heuristic)
|
|
|
+/// - \(A_g > 0\): gene-body methylation exceeds promoter methylation
|
|
|
+/// → “more active-like” epigenetic state
|
|
|
+/// - \(A_g < 0\): promoter methylation exceeds gene-body methylation
|
|
|
+/// → “more repressed-like” epigenetic state
|
|
|
+/// - \(A_g \approx 0\): similar promoter/body methylation
|
|
|
+/// → ambiguous or context-dependent
|
|
|
+///
|
|
|
+/// **Important:** this score is a proxy and does not directly measure transcription.
|
|
|
+/// When RNA-seq data are available, expression-based measures should be preferred.
|
|
|
+///
|
|
|
+/// ## Output coordinates
|
|
|
+/// Each output [`GeneActivity`] entry uses the **promoter coordinates** for display
|
|
|
+/// (anchoring the signal near the transcription start site in genome browsers),
|
|
|
+/// while the score itself reflects the promoter/body contrast.
|
|
|
+///
|
|
|
+/// ## Errors
|
|
|
+/// Returns an error if:
|
|
|
+/// - any region has invalid coordinates (`end <= start`)
|
|
|
+///
|
|
|
+/// # Arguments
|
|
|
+/// * `pileup` – per-chromosome pileup sites indexed by chromosome
|
|
|
+/// * `regions` – BED4 regions with names ending in `_prom` and `_body`
|
|
|
+pub fn compute_gene_activity_from_pileup(
|
|
|
+ pileup: &HashMap<String, Vec<PileupSite>>,
|
|
|
+ regions: &[BedRegion],
|
|
|
+ min_sum_cov: u64,
|
|
|
+) -> Result<Vec<GeneActivity>> {
|
|
|
+ #[derive(Clone)]
|
|
|
+ struct Part {
|
|
|
+ chrom: String,
|
|
|
+ start: u64,
|
|
|
+ end: u64,
|
|
|
+ frac: f64,
|
|
|
+ sum_cov: u64,
|
|
|
+ }
|
|
|
+
|
|
|
+ #[derive(Default)]
|
|
|
+ struct Pair {
|
|
|
+ prom: Option<Part>,
|
|
|
+ body: Option<Part>,
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut by_gene: HashMap<String, Pair> = HashMap::new();
|
|
|
+
|
|
|
+ for r in regions {
|
|
|
+ if r.end <= r.start {
|
|
|
+ return Err(anyhow!(
|
|
|
+ "Invalid region end<=start {}:{}-{} ({})",
|
|
|
+ r.chrom, r.start, r.end, r.name
|
|
|
+ ));
|
|
|
+ }
|
|
|
+
|
|
|
+ let ParsedName { gene_key, kind } = parse_region_name(&r.name);
|
|
|
+ if kind == RegionKind::Other {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let chrom_sites = match pileup.get(&r.chrom) {
|
|
|
+ Some(v) => v,
|
|
|
+ None => continue,
|
|
|
+ };
|
|
|
+
|
|
|
+ let (frac, sum_cov, _) =
|
|
|
+ match region_fraction_modified(chrom_sites, r.start, r.end) {
|
|
|
+ Some(x) => x,
|
|
|
+ None => continue,
|
|
|
+ };
|
|
|
+
|
|
|
+ // Enforce minimum coverage and positive fraction
|
|
|
+ if sum_cov < min_sum_cov || frac <= 0.0 {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let part = Part {
|
|
|
+ chrom: r.chrom.clone(),
|
|
|
+ start: r.start,
|
|
|
+ end: r.end,
|
|
|
+ frac,
|
|
|
+ sum_cov,
|
|
|
+ };
|
|
|
+
|
|
|
+ let entry = by_gene.entry(gene_key).or_default();
|
|
|
+ match kind {
|
|
|
+ RegionKind::Promoter => entry.prom = Some(part),
|
|
|
+ RegionKind::GeneBody => entry.body = Some(part),
|
|
|
+ RegionKind::Other => {}
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut out = Vec::new();
|
|
|
+
|
|
|
+ for (gene, pair) in by_gene {
|
|
|
+ let (prom, body) = match (pair.prom, pair.body) {
|
|
|
+ (Some(p), Some(b)) => (p, b),
|
|
|
+ _ => continue,
|
|
|
+ };
|
|
|
+
|
|
|
+ let score = (body.frac / prom.frac).log2();
|
|
|
+
|
|
|
+ out.push(GeneActivity {
|
|
|
+ gene_key: gene,
|
|
|
+ chrom: prom.chrom,
|
|
|
+ start: prom.start,
|
|
|
+ end: prom.end,
|
|
|
+ prom_frac: prom.frac,
|
|
|
+ body_frac: body.frac,
|
|
|
+ score,
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ // Sort for IGV
|
|
|
+ out.sort_by(|a, b| match a.chrom.cmp(&b.chrom) {
|
|
|
+ Ordering::Equal => a.start.cmp(&b.start),
|
|
|
+ o => o,
|
|
|
+ });
|
|
|
+
|
|
|
+ Ok(out)
|
|
|
+}
|
|
|
+
|
|
|
+/// Convert a log2-ratio value to a BED `score` (0..=1000) using symmetric clipping.
|
|
|
+///
|
|
|
+/// Mapping:
|
|
|
+/// - clip to [-clip, +clip]
|
|
|
+/// - scale to [0, 1000]
|
|
|
+///
|
|
|
+/// If `v = -clip` → 0
|
|
|
+/// If `v = 0` → 500
|
|
|
+/// If `v = +clip` → 1000
|
|
|
+pub fn bed_score_from_log2_ratio(v: f64, clip: f64) -> u16 {
|
|
|
+ assert!(clip > 0.0, "clip must be > 0");
|
|
|
+ let x = v.clamp(-clip, clip);
|
|
|
+ let t = (x + clip) / (2.0 * clip); // 0..1
|
|
|
+ let s = (t * 1000.0).round();
|
|
|
+ s.clamp(0.0, 1000.0) as u16
|
|
|
+}
|
|
|
+
|
|
|
+/// Write gene activity as an IGV BED9 track with `itemRgb=On` (diverging colors),
|
|
|
+/// and store the (clipped+scaled) log2-ratio in BED column 5 (`score`).
|
|
|
+///
|
|
|
+/// - Color encodes `GeneActivity::score` (log2(body/prom)) clipped to `[-clip, +clip]`.
|
|
|
+/// - BED `score` encodes the same quantity scaled to [0,1000].
|
|
|
+pub fn write_gene_activity_bed9_itemrgb(
|
|
|
+ path: &str,
|
|
|
+ track_name: &str,
|
|
|
+ activity: &[GeneActivity],
|
|
|
+ clip: f64,
|
|
|
+) -> Result<()> {
|
|
|
+ if !(clip > 0.0) {
|
|
|
+ return Err(anyhow!("clip must be > 0"));
|
|
|
+ }
|
|
|
+
|
|
|
+ let out = File::create(path).with_context(|| format!("Failed to create BED: {path}"))?;
|
|
|
+ let mut w = BufWriter::new(out);
|
|
|
+
|
|
|
+ writeln!(w, r#"track name="{}" itemRgb="On""#, track_name)?;
|
|
|
+
|
|
|
+ for a in activity {
|
|
|
+ let (rr, gg, bb) = diverging_rgb(a.score, clip);
|
|
|
+ let bed_score = bed_score_from_log2_ratio(a.score, clip);
|
|
|
+
|
|
|
+ writeln!(
|
|
|
+ w,
|
|
|
+ "{}\t{}\t{}\t{}\t{}\t.\t{}\t{}\t{},{},{}",
|
|
|
+ a.chrom,
|
|
|
+ a.start,
|
|
|
+ a.end,
|
|
|
+ format!("{}={}", a.gene_key, a.score),
|
|
|
+ bed_score,
|
|
|
+ a.start,
|
|
|
+ a.end,
|
|
|
+ rr,
|
|
|
+ gg,
|
|
|
+ bb
|
|
|
+ )?;
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|
|
|
+/// High-level helper: pileup + regions BED4 -> IGV BED9 activity track (no pseudocounts).
|
|
|
+///
|
|
|
+/// Pipeline:
|
|
|
+/// 1) Read pileup and build a per-chromosome index (`read_pileup_index`)
|
|
|
+/// 2) Read regions BED4 (`read_bed4`)
|
|
|
+/// 3) Compute gene activity (`compute_gene_activity_from_pileup`) using
|
|
|
+/// `score = log2(body_frac / prom_frac)` and **skipping** genes where either fraction is 0
|
|
|
+/// 4) Write BED9 with `itemRgb=On` (`write_gene_activity_bed9_itemrgb`)
|
|
|
+///
|
|
|
+/// Returns the number of written gene features.
|
|
|
+pub fn pileup_regions_to_activity_bed9_itemrgb(
|
|
|
+ pileup_path: &str,
|
|
|
+ regions_bed4_path: &str,
|
|
|
+ min_sum_cov: u64,
|
|
|
+ out_bed9_path: &str,
|
|
|
+ track_name: &str,
|
|
|
+ clip: f64,
|
|
|
+) -> Result<usize> {
|
|
|
+ let pile = read_pileup_index(pileup_path)?;
|
|
|
+ let regions = read_bed4(regions_bed4_path)?;
|
|
|
+ let activity = compute_gene_activity_from_pileup(&pile, ®ions, min_sum_cov)?;
|
|
|
+ let n = activity.len();
|
|
|
+ write_gene_activity_bed9_itemrgb(out_bed9_path, track_name, &activity, clip)?;
|
|
|
+ Ok(n)
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use log::info;
|
|
|
+
|
|
|
+ use super::*;
|
|
|
+ use crate::{config::Config, helpers::test_init};
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn modkit_activity_igv() -> anyhow::Result<()> {
|
|
|
+ test_init();
|
|
|
+
|
|
|
+ let config = Config::default();
|
|
|
+
|
|
|
+ let id = "DUMCO";
|
|
|
+ let path = format!(
|
|
|
+ "{}/{}_{}_modkit_pileup.bed.gz",
|
|
|
+ config.tumoral_dir(id),
|
|
|
+ id,
|
|
|
+ config.tumoral_name
|
|
|
+ );
|
|
|
+
|
|
|
+ let out = format!(
|
|
|
+ "{}/{}_{}_modkit_activity.bed",
|
|
|
+ config.tumoral_dir(id),
|
|
|
+ id,
|
|
|
+ config.tumoral_name
|
|
|
+ );
|
|
|
+
|
|
|
+ let track_name = format!("{id} Gene Activity");
|
|
|
+
|
|
|
+ let regions_bed4_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
|
|
|
+
|
|
|
+ let n = pileup_regions_to_activity_bed9_itemrgb(&path, regions_bed4_path, 100, &out, &track_name, 2.0)?;
|
|
|
+ info!("{n} genes activities written");
|
|
|
+
|
|
|
+ let id = "CHAHA";
|
|
|
+ let path = format!(
|
|
|
+ "{}/{}_{}_modkit_pileup.bed.gz",
|
|
|
+ config.tumoral_dir(id),
|
|
|
+ id,
|
|
|
+ config.tumoral_name
|
|
|
+ );
|
|
|
+
|
|
|
+ let out = format!(
|
|
|
+ "{}/{}_{}_modkit_activity.bed",
|
|
|
+ config.tumoral_dir(id),
|
|
|
+ id,
|
|
|
+ config.tumoral_name
|
|
|
+ );
|
|
|
+
|
|
|
+ let track_name = format!("{id} Gene Activity");
|
|
|
+
|
|
|
+ let regions_bed4_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_genes_prom_body.bed";
|
|
|
+
|
|
|
+ let n = pileup_regions_to_activity_bed9_itemrgb(&path, regions_bed4_path, 100, &out, &track_name, 2.0)?;
|
|
|
+ info!("{n} genes activities written");
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|