lib.rs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. //! IGV session URL builder (igv.js compatible).
  2. //!
  3. //! This module builds an **igv.js session JSON**, compresses it with the same
  4. //! deflate + base64 + URL-safe substitutions used by igvteam utilities, and
  5. //! returns a link of the form:
  6. //!
  7. //! `"{base_url}?sessionURL=blob:{blob}"`
  8. //!
  9. //! ## Coordinate conventions
  10. //!
  11. //! The `locus` field in igv.js session JSON is a **string** in the human-facing
  12. //! form `chr:start-end`. In igv.js, the internal reference frame uses a 0-based
  13. //! `start`, but `getLocusString()` returns a `chr:start-end` representation
  14. //! where `start` is **1-based**. :contentReference[oaicite:1]{index=1}
  15. //!
  16. //! Therefore, the APIs in this module treat `position`, `start`, and `end` as
  17. //! **1-based coordinates** intended for locus strings.
  18. //!
  19. //! ## Example
  20. //!
  21. //! ```no_run
  22. //! # use pandora_lib_igv::{Session, ReferenceValues, Track, BamTrack};
  23. //! let url = Session::default()
  24. //! .with_reference(ReferenceValues::default())
  25. //! .with_locus_at(("chr1", 1_000_000), 1000).unwrap()
  26. //! .add_track(Track::Bam(BamTrack::new("tumor", "/data/tumor.bam"))).unwrap()
  27. //! .link("igv/").unwrap();
  28. //! println!("{url}");
  29. //! ```
  30. pub mod tracks;
  31. use anyhow::{Context, Ok};
  32. use base64::engine::general_purpose::STANDARD;
  33. use base64::Engine;
  34. use flate2::write::DeflateEncoder;
  35. use flate2::Compression;
  36. use serde::Serialize;
  37. use serde_json::{json, Value};
  38. use std::io::prelude::*;
  39. use crate::tracks::Track;
  40. /// Compress a session string using the igvteam "compressString" algorithm.
  41. ///
  42. /// This matches igv-utils' behavior (deflate, base64, then URL-safe character
  43. /// substitutions) so that the resulting blob can be used with
  44. /// `?sessionURL=blob:{blob}`.
  45. ///
  46. /// Reference implementation:
  47. /// <https://github.com/igvteam/igv-utils/blob/master/src/bgzf.js#L125>
  48. fn compress_string(input: &str) -> anyhow::Result<String> {
  49. let bytes = input.as_bytes();
  50. // Compress bytes
  51. let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
  52. encoder
  53. .write_all(bytes)
  54. .context("Failed to write data to encoder")?;
  55. let compressed_bytes = encoder.finish().context("Failed to finish encoding")?;
  56. // Base64 encode
  57. let encoded = STANDARD.encode(compressed_bytes);
  58. // URL-safe replacements
  59. Ok(encoded
  60. .replace('+', ".")
  61. .replace('/', "_")
  62. .replace('=', "-"))
  63. }
  64. /// Reference genome definition used by igv.js sessions.
  65. ///
  66. /// This is serialized into the session JSON under `"reference"`.
  67. ///
  68. /// Notes:
  69. /// - `fastaURL` and `indexURL` must point to the reference FASTA and its `.fai`.
  70. /// - `cytobandURL` and `aliasURL` are optional but improve display/compatibility.
  71. ///
  72. /// To add more track/fields, see:
  73. /// - <https://github.com/igvteam/igv.js/wiki/Tracks-2.0>
  74. /// - <https://igv.org/doc/igvjs/#tracks/Tracks/>
  75. #[derive(Debug, Serialize)]
  76. pub struct ReferenceValues {
  77. /// Reference identifier (igv.js `"id"`).
  78. pub id: String,
  79. /// Human readable name (igv.js `"name"`).
  80. pub name: String,
  81. /// FASTA URL (igv.js `"fastaURL"`).
  82. #[serde(rename = "fastaURL")]
  83. pub fasta_url: String,
  84. /// FASTA index URL (igv.js `"indexURL"`, typically `.fai`).
  85. #[serde(rename = "indexURL")]
  86. pub index_url: String,
  87. /// Cytoband BED URL (igv.js `"cytobandURL"`).
  88. #[serde(rename = "cytobandURL")]
  89. pub cytoband_url: String,
  90. /// Chromosome alias mapping URL (igv.js `"aliasURL"`).
  91. #[serde(rename = "aliasURL")]
  92. pub alias_url: String,
  93. }
  94. impl Default for ReferenceValues {
  95. fn default() -> Self {
  96. Self {
  97. id: "chm13v2.0".to_string(),
  98. name: "Human (T2T/hs1 CHM13-v2.0)".to_string(),
  99. fasta_url: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  100. index_url: "/data/ref/hs1/chm13v2.0.fa.fai".to_string(),
  101. cytoband_url: "/data/ref/hs1/chm13v2.0_cytobands_allchrs.bed".to_string(),
  102. alias_url: "/data/ref/hs1/GCA_009914755.4.chromAlias.txt".to_string(),
  103. }
  104. }
  105. }
  106. /// igv.js session builder.
  107. ///
  108. /// Internally holds a JSON `Value` mirroring igv.js session schema plus a list
  109. /// of tracks used to assign track order deterministically.
  110. #[derive(Debug)]
  111. pub struct Session {
  112. value: Value,
  113. tracks: Vec<Track>,
  114. }
  115. impl Default for Session {
  116. fn default() -> Self {
  117. let value = json!({
  118. "version": "2.16.0",
  119. "showSampleNames": false,
  120. "reference": {},
  121. "locus": [],
  122. "tracks": [],
  123. });
  124. Self {
  125. value,
  126. tracks: Vec::new(),
  127. }
  128. }
  129. }
  130. impl Session {
  131. /// Set the session reference genome definition.
  132. ///
  133. /// This overwrites the `"reference"` object in the underlying session JSON.
  134. pub fn with_reference(mut self, reference_values: ReferenceValues) -> Self {
  135. if let Some(reference) = self.value.get_mut("reference") {
  136. *reference = json!(reference_values);
  137. }
  138. self
  139. }
  140. /// Set a single locus using a pre-formatted IGV locus string: `chr:start-end`.
  141. pub fn with_locus_str(mut self, locus: &str) -> anyhow::Result<Self> {
  142. *self
  143. .value
  144. .get_mut("locus")
  145. .context("Can't access locus value")? = json!(locus);
  146. Ok(self)
  147. }
  148. /// Set multiple loci (e.g. split view): `["chr1:1-1000", "chr2:200-400"]`.
  149. pub fn with_loci<I, S>(mut self, loci: I) -> anyhow::Result<Self>
  150. where
  151. I: IntoIterator<Item = S>,
  152. S: AsRef<str>,
  153. {
  154. let arr: Vec<Value> = loci
  155. .into_iter()
  156. .map(|s| Value::String(s.as_ref().to_string()))
  157. .collect();
  158. *self
  159. .value
  160. .get_mut("locus")
  161. .context("Can't access locus value")? = Value::Array(arr);
  162. Ok(self)
  163. }
  164. /// Set locus from 1-based inclusive coordinates.
  165. pub fn with_region_1based(
  166. mut self,
  167. contig: &str,
  168. start: u32,
  169. end: u32,
  170. ) -> anyhow::Result<Self> {
  171. let start = start.max(1);
  172. let end = end.max(start);
  173. *self
  174. .value
  175. .get_mut("locus")
  176. .context("Can't access locus value")? = json!(format!("{contig}:{start}-{end}"));
  177. Ok(self)
  178. }
  179. /// Set locus from 0-based half-open coordinates (start inclusive, end exclusive),
  180. /// converting to IGV locus string (1-based inclusive).
  181. pub fn with_region_0based(
  182. mut self,
  183. contig: &str,
  184. start0: u32,
  185. end0: u32,
  186. ) -> anyhow::Result<Self> {
  187. let start1 = start0.saturating_add(1);
  188. let end1 = end0.max(start0).saturating_add(0); // end0 exclusive -> inclusive end is end0
  189. // For half-open [start0, end0), inclusive end is end0 (in 1-based)
  190. *self
  191. .value
  192. .get_mut("locus")
  193. .context("Can't access locus value")? = json!(format!("{contig}:{start1}-{end1}"));
  194. Ok(self)
  195. }
  196. pub fn to_json_value(&self) -> &Value {
  197. &self.value
  198. }
  199. pub fn to_json_string(&self) -> String {
  200. self.value.to_string()
  201. }
  202. pub fn to_json_string_pretty(&self) -> anyhow::Result<String> {
  203. Ok(serde_json::to_string_pretty(&self.value)?)
  204. }
  205. /// Set the locus for the session as a `chr:start-end` string.
  206. ///
  207. /// ## Coordinates
  208. /// This API expects **1-based** coordinates to be used in the locus string.
  209. /// See module-level documentation for details on igv.js conventions. :contentReference[oaicite:2]{index=2}
  210. pub fn with_locus(mut self, from: (String, i32), to: i32) -> anyhow::Result<Self> {
  211. *self
  212. .value
  213. .get_mut("locus")
  214. .context("Can't access locus value")? = json!(format!("{}:{}-{}", from.0, from.1, to));
  215. Ok(self)
  216. }
  217. /// Center the view on a 1-based position with a symmetric window.
  218. ///
  219. /// Builds a locus string: `"{contig}:{start}-{end}"` where:
  220. /// - `position` is 1-based
  221. /// - `start = max(1, position - plus_minus)`
  222. /// - `end = position + plus_minus`
  223. pub fn with_locus_at(
  224. mut self,
  225. contig: &str,
  226. position: u32,
  227. plus_minus: u32,
  228. ) -> anyhow::Result<Self> {
  229. let start = position.saturating_sub(plus_minus).max(1);
  230. let end = position.saturating_add(plus_minus);
  231. *self
  232. .value
  233. .get_mut("locus")
  234. .context("Can't access locus value")? = json!(format!("{contig}:{start}-{end}"));
  235. Ok(self)
  236. }
  237. /// Append a track to the session.
  238. ///
  239. /// Tracks are ordered in insertion order. Each call recomputes the `"tracks"`
  240. /// JSON array from the stored track list.
  241. pub fn add_track(mut self, track: Track) -> anyhow::Result<Self> {
  242. let mut track = track;
  243. let pos = self.tracks.len() + 1;
  244. track.order(pos as i16);
  245. self.tracks.push(track);
  246. let tv: Vec<Value> = self.tracks.iter().map(|t| t.to_json()).collect();
  247. *self
  248. .value
  249. .get_mut("tracks")
  250. .context("Can't access locus value")? = Value::Array(tv);
  251. Ok(self)
  252. }
  253. /// Add many tracks at once (preserves order).
  254. pub fn add_tracks<I>(mut self, tracks: I) -> anyhow::Result<Self>
  255. where
  256. I: IntoIterator<Item = Track>,
  257. {
  258. for t in tracks {
  259. self = self.add_track(t)?;
  260. }
  261. Ok(self)
  262. }
  263. /// Explicitly re-assign order by current vector order (useful after filtering).
  264. pub fn renumber_tracks(mut self) -> anyhow::Result<Self> {
  265. for (i, t) in self.tracks.iter_mut().enumerate() {
  266. t.order((i + 1) as i16);
  267. }
  268. let tv: Vec<Value> = self.tracks.iter().map(|t| t.to_json()).collect();
  269. *self
  270. .value
  271. .get_mut("tracks")
  272. .context("Can't access tracks value")? = Value::Array(tv);
  273. Ok(self)
  274. }
  275. /// Render the session as an IGV link using a compressed `blob:` sessionURL.
  276. ///
  277. /// `base_url` should be the route serving your igv.js page, e.g. `"igv/"`.
  278. pub fn link(&self, base_url: &str) -> anyhow::Result<String> {
  279. let blob = compress_string(&self.to_json_string())?;
  280. Ok(format!("{base_url}?sessionURL=blob:{blob}"))
  281. }
  282. }
  283. #[cfg(test)]
  284. mod tests {
  285. use crate::tracks::{bam::BamTrack, genes::GenesTrack};
  286. use super::*;
  287. #[test]
  288. fn it_works() -> anyhow::Result<()> {
  289. let sess = Session::default()
  290. .with_reference(ReferenceValues::default())
  291. .with_locus_at("chr1", 47_098_189, 33)?
  292. .add_track(Track::Bam(BamTrack::new(
  293. "VIEL diag",
  294. "/data/longreads_basic_pipe/VIEL/diag/VIEL_diag_hs1.bam",
  295. )))?
  296. .add_track(Track::Bam(BamTrack::new(
  297. "VIEL mrd",
  298. "/data/longreads_basic_pipe/VIEL/mrd/VIEL_mrd_hs1.bam",
  299. )))?
  300. // .add_track(Track::Variants(VariantsTrack::new(
  301. // "/data/longreads_basic_pipe/VIEL/diag/VIEL_loh.vcf.gz",
  302. // "LOH",
  303. // )))?
  304. .add_track(Track::Genes(GenesTrack::new(
  305. "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz",
  306. )))?;
  307. let compressed_str = sess.link("http://store-desktop.local/igv/")?;
  308. println!("{compressed_str}");
  309. Ok(())
  310. }
  311. #[test]
  312. fn denovo_link() -> anyhow::Result<()> {
  313. let id = "ACHITE";
  314. let chr = "chr1";
  315. let alt_id = "47108362-81682505_8090_wtdbg2";
  316. let alt_prefix = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}/{alt_id}");
  317. let reference = ReferenceValues {
  318. id: "contig".to_string(),
  319. name: "de novo".to_owned(),
  320. fasta_url: format!("{alt_prefix}.fa"),
  321. index_url: format!("{alt_prefix}.fa.fai"),
  322. cytoband_url: "".to_string(),
  323. alias_url: "".to_owned(),
  324. };
  325. let sess = Session::default()
  326. .with_reference(reference)
  327. .with_locus_at(alt_id, 100, 19)?
  328. .add_track(Track::Bam(BamTrack::new(
  329. "On contig",
  330. &format!("{alt_prefix}.bam"),
  331. )))?;
  332. let compressed_str = sess.link("http://store-desktop.local/igv/")?;
  333. println!("{compressed_str}");
  334. Ok(())
  335. }
  336. }