| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- //! IGV session URL builder (igv.js compatible).
- //!
- //! This module builds an **igv.js session JSON**, compresses it with the same
- //! deflate + base64 + URL-safe substitutions used by igvteam utilities, and
- //! returns a link of the form:
- //!
- //! `"{base_url}?sessionURL=blob:{blob}"`
- //!
- //! ## Coordinate conventions
- //!
- //! The `locus` field in igv.js session JSON is a **string** in the human-facing
- //! form `chr:start-end`. In igv.js, the internal reference frame uses a 0-based
- //! `start`, but `getLocusString()` returns a `chr:start-end` representation
- //! where `start` is **1-based**. :contentReference[oaicite:1]{index=1}
- //!
- //! Therefore, the APIs in this module treat `position`, `start`, and `end` as
- //! **1-based coordinates** intended for locus strings.
- //!
- //! ## Example
- //!
- //! ```no_run
- //! # use pandora_lib_igv::{Session, ReferenceValues, Track, BamTrack};
- //! let url = Session::default()
- //! .with_reference(ReferenceValues::default())
- //! .with_locus_at(("chr1", 1_000_000), 1000).unwrap()
- //! .add_track(Track::Bam(BamTrack::new("tumor", "/data/tumor.bam"))).unwrap()
- //! .link("igv/").unwrap();
- //! println!("{url}");
- //! ```
- pub mod tracks;
- use anyhow::{Context, Ok};
- use base64::engine::general_purpose::STANDARD;
- use base64::Engine;
- use flate2::write::DeflateEncoder;
- use flate2::Compression;
- use serde::Serialize;
- use serde_json::{json, Value};
- use std::io::prelude::*;
- use crate::tracks::Track;
- /// Compress a session string using the igvteam "compressString" algorithm.
- ///
- /// This matches igv-utils' behavior (deflate, base64, then URL-safe character
- /// substitutions) so that the resulting blob can be used with
- /// `?sessionURL=blob:{blob}`.
- ///
- /// Reference implementation:
- /// <https://github.com/igvteam/igv-utils/blob/master/src/bgzf.js#L125>
- fn compress_string(input: &str) -> anyhow::Result<String> {
- let bytes = input.as_bytes();
- // Compress bytes
- let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
- encoder
- .write_all(bytes)
- .context("Failed to write data to encoder")?;
- let compressed_bytes = encoder.finish().context("Failed to finish encoding")?;
- // Base64 encode
- let encoded = STANDARD.encode(compressed_bytes);
- // URL-safe replacements
- Ok(encoded
- .replace('+', ".")
- .replace('/', "_")
- .replace('=', "-"))
- }
- /// Reference genome definition used by igv.js sessions.
- ///
- /// This is serialized into the session JSON under `"reference"`.
- ///
- /// Notes:
- /// - `fastaURL` and `indexURL` must point to the reference FASTA and its `.fai`.
- /// - `cytobandURL` and `aliasURL` are optional but improve display/compatibility.
- ///
- /// To add more track/fields, see:
- /// - <https://github.com/igvteam/igv.js/wiki/Tracks-2.0>
- /// - <https://igv.org/doc/igvjs/#tracks/Tracks/>
- #[derive(Debug, Serialize)]
- pub struct ReferenceValues {
- /// Reference identifier (igv.js `"id"`).
- pub id: String,
- /// Human readable name (igv.js `"name"`).
- pub name: String,
- /// FASTA URL (igv.js `"fastaURL"`).
- #[serde(rename = "fastaURL")]
- pub fasta_url: String,
- /// FASTA index URL (igv.js `"indexURL"`, typically `.fai`).
- #[serde(rename = "indexURL")]
- pub index_url: String,
- /// Cytoband BED URL (igv.js `"cytobandURL"`).
- #[serde(rename = "cytobandURL")]
- pub cytoband_url: String,
- /// Chromosome alias mapping URL (igv.js `"aliasURL"`).
- #[serde(rename = "aliasURL")]
- pub alias_url: String,
- }
- impl Default for ReferenceValues {
- fn default() -> Self {
- Self {
- id: "chm13v2.0".to_string(),
- name: "Human (T2T/hs1 CHM13-v2.0)".to_string(),
- fasta_url: "/data/ref/hs1/chm13v2.0.fa".to_string(),
- index_url: "/data/ref/hs1/chm13v2.0.fa.fai".to_string(),
- cytoband_url: "/data/ref/hs1/chm13v2.0_cytobands_allchrs.bed".to_string(),
- alias_url: "/data/ref/hs1/GCA_009914755.4.chromAlias.txt".to_string(),
- }
- }
- }
- /// igv.js session builder.
- ///
- /// Internally holds a JSON `Value` mirroring igv.js session schema plus a list
- /// of tracks used to assign track order deterministically.
- #[derive(Debug)]
- pub struct Session {
- value: Value,
- tracks: Vec<Track>,
- }
- impl Default for Session {
- fn default() -> Self {
- let value = json!({
- "version": "2.16.0",
- "showSampleNames": false,
- "reference": {},
- "locus": [],
- "tracks": [],
- });
- Self {
- value,
- tracks: Vec::new(),
- }
- }
- }
- impl Session {
- /// Set the session reference genome definition.
- ///
- /// This overwrites the `"reference"` object in the underlying session JSON.
- pub fn with_reference(mut self, reference_values: ReferenceValues) -> Self {
- if let Some(reference) = self.value.get_mut("reference") {
- *reference = json!(reference_values);
- }
- self
- }
- /// Set a single locus using a pre-formatted IGV locus string: `chr:start-end`.
- pub fn with_locus_str(mut self, locus: &str) -> anyhow::Result<Self> {
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = json!(locus);
- Ok(self)
- }
- /// Set multiple loci (e.g. split view): `["chr1:1-1000", "chr2:200-400"]`.
- pub fn with_loci<I, S>(mut self, loci: I) -> anyhow::Result<Self>
- where
- I: IntoIterator<Item = S>,
- S: AsRef<str>,
- {
- let arr: Vec<Value> = loci
- .into_iter()
- .map(|s| Value::String(s.as_ref().to_string()))
- .collect();
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = Value::Array(arr);
- Ok(self)
- }
- /// Set locus from 1-based inclusive coordinates.
- pub fn with_region_1based(
- mut self,
- contig: &str,
- start: u32,
- end: u32,
- ) -> anyhow::Result<Self> {
- let start = start.max(1);
- let end = end.max(start);
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = json!(format!("{contig}:{start}-{end}"));
- Ok(self)
- }
- /// Set locus from 0-based half-open coordinates (start inclusive, end exclusive),
- /// converting to IGV locus string (1-based inclusive).
- pub fn with_region_0based(
- mut self,
- contig: &str,
- start0: u32,
- end0: u32,
- ) -> anyhow::Result<Self> {
- let start1 = start0.saturating_add(1);
- let end1 = end0.max(start0).saturating_add(0); // end0 exclusive -> inclusive end is end0
- // For half-open [start0, end0), inclusive end is end0 (in 1-based)
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = json!(format!("{contig}:{start1}-{end1}"));
- Ok(self)
- }
- pub fn to_json_value(&self) -> &Value {
- &self.value
- }
- pub fn to_json_string(&self) -> String {
- self.value.to_string()
- }
- pub fn to_json_string_pretty(&self) -> anyhow::Result<String> {
- Ok(serde_json::to_string_pretty(&self.value)?)
- }
- /// Set the locus for the session as a `chr:start-end` string.
- ///
- /// ## Coordinates
- /// This API expects **1-based** coordinates to be used in the locus string.
- /// See module-level documentation for details on igv.js conventions. :contentReference[oaicite:2]{index=2}
- pub fn with_locus(mut self, from: (String, i32), to: i32) -> anyhow::Result<Self> {
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = json!(format!("{}:{}-{}", from.0, from.1, to));
- Ok(self)
- }
- /// Center the view on a 1-based position with a symmetric window.
- ///
- /// Builds a locus string: `"{contig}:{start}-{end}"` where:
- /// - `position` is 1-based
- /// - `start = max(1, position - plus_minus)`
- /// - `end = position + plus_minus`
- pub fn with_locus_at(
- mut self,
- contig: &str,
- position: u32,
- plus_minus: u32,
- ) -> anyhow::Result<Self> {
- let start = position.saturating_sub(plus_minus).max(1);
- let end = position.saturating_add(plus_minus);
- *self
- .value
- .get_mut("locus")
- .context("Can't access locus value")? = json!(format!("{contig}:{start}-{end}"));
- Ok(self)
- }
- /// Append a track to the session.
- ///
- /// Tracks are ordered in insertion order. Each call recomputes the `"tracks"`
- /// JSON array from the stored track list.
- pub fn add_track(mut self, track: Track) -> anyhow::Result<Self> {
- let mut track = track;
- let pos = self.tracks.len() + 1;
- track.order(pos as i16);
- self.tracks.push(track);
- let tv: Vec<Value> = self.tracks.iter().map(|t| t.to_json()).collect();
- *self
- .value
- .get_mut("tracks")
- .context("Can't access locus value")? = Value::Array(tv);
- Ok(self)
- }
- /// Add many tracks at once (preserves order).
- pub fn add_tracks<I>(mut self, tracks: I) -> anyhow::Result<Self>
- where
- I: IntoIterator<Item = Track>,
- {
- for t in tracks {
- self = self.add_track(t)?;
- }
- Ok(self)
- }
- /// Explicitly re-assign order by current vector order (useful after filtering).
- pub fn renumber_tracks(mut self) -> anyhow::Result<Self> {
- for (i, t) in self.tracks.iter_mut().enumerate() {
- t.order((i + 1) as i16);
- }
- let tv: Vec<Value> = self.tracks.iter().map(|t| t.to_json()).collect();
- *self
- .value
- .get_mut("tracks")
- .context("Can't access tracks value")? = Value::Array(tv);
- Ok(self)
- }
- /// Render the session as an IGV link using a compressed `blob:` sessionURL.
- ///
- /// `base_url` should be the route serving your igv.js page, e.g. `"igv/"`.
- pub fn link(&self, base_url: &str) -> anyhow::Result<String> {
- let blob = compress_string(&self.to_json_string())?;
- Ok(format!("{base_url}?sessionURL=blob:{blob}"))
- }
- }
- #[cfg(test)]
- mod tests {
- use crate::tracks::{bam::BamTrack, genes::GenesTrack};
- use super::*;
- #[test]
- fn it_works() -> anyhow::Result<()> {
- let sess = Session::default()
- .with_reference(ReferenceValues::default())
- .with_locus_at("chr1", 47_098_189, 33)?
- .add_track(Track::Bam(BamTrack::new(
- "VIEL diag",
- "/data/longreads_basic_pipe/VIEL/diag/VIEL_diag_hs1.bam",
- )))?
- .add_track(Track::Bam(BamTrack::new(
- "VIEL mrd",
- "/data/longreads_basic_pipe/VIEL/mrd/VIEL_mrd_hs1.bam",
- )))?
- // .add_track(Track::Variants(VariantsTrack::new(
- // "/data/longreads_basic_pipe/VIEL/diag/VIEL_loh.vcf.gz",
- // "LOH",
- // )))?
- .add_track(Track::Genes(GenesTrack::new(
- "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz",
- )))?;
- let compressed_str = sess.link("http://store-desktop.local/igv/")?;
- println!("{compressed_str}");
- Ok(())
- }
- #[test]
- fn denovo_link() -> anyhow::Result<()> {
- let id = "ACHITE";
- let chr = "chr1";
- let alt_id = "47108362-81682505_8090_wtdbg2";
- let alt_prefix = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}/{alt_id}");
- let reference = ReferenceValues {
- id: "contig".to_string(),
- name: "de novo".to_owned(),
- fasta_url: format!("{alt_prefix}.fa"),
- index_url: format!("{alt_prefix}.fa.fai"),
- cytoband_url: "".to_string(),
- alias_url: "".to_owned(),
- };
- let sess = Session::default()
- .with_reference(reference)
- .with_locus_at(alt_id, 100, 19)?
- .add_track(Track::Bam(BamTrack::new(
- "On contig",
- &format!("{alt_prefix}.bam"),
- )))?;
- let compressed_str = sess.link("http://store-desktop.local/igv/")?;
- println!("{compressed_str}");
- Ok(())
- }
- }
|