浏览代码

lollipops

Thomas 2 月之前
父节点
当前提交
ab22b90448
共有 5 个文件被更改,包括 416 次插入29 次删除
  1. 205 5
      src/circ.rs
  2. 146 11
      src/lib.rs
  3. 8 4
      src/loliplot.rs
  4. 50 0
      src/loliplot_parser.rs
  5. 7 9
      src/mutation.rs

+ 205 - 5
src/circ.rs

@@ -8,7 +8,50 @@ use std::{
 use csv::ReaderBuilder;
 use log::info;
 
-use crate::cytoband::Range;
+use crate::cytoband::{read_ranges, Range};
+
+#[derive(Debug, Clone)]
+pub struct CircosConfig {
+    pub cytobands_bed: String,
+    pub r: f64,
+    pub cx: f64,
+    pub cy: f64,
+    pub angle_start: f64,
+    pub angle_end: f64,
+    pub gap: f64,
+}
+
+impl Default for CircosConfig {
+    fn default() -> Self {
+        let r = 1050.0;
+        let cx = 0.0;
+        let cy = 0.0;
+        let angle_start = -std::f64::consts::FRAC_PI_2;
+        let angle_end = angle_start + 2.0 * std::f64::consts::PI;
+        let gap = 0.012 * std::f64::consts::PI;
+
+        Self {
+            cytobands_bed: "/data/ref/hs1/cytoBandMapped.bed".to_string(),
+            r,
+            cx,
+            cy,
+            angle_start,
+            angle_end,
+            gap,
+        }
+    }
+}
+
+impl CircosConfig {
+    /// Rotate the whole configuration by a given angle in degrees.
+    /// Positive values = clockwise, negative = counter-clockwise.
+    pub fn rotate(&mut self, degrees: f64) {
+        let radians = degrees.to_radians();
+        self.angle_start -= radians;
+        self.angle_end -= radians;
+    }
+}
+
 
 const CAP_MINOR_RATIO: f64 = 0.3; // 1/3 of thickness
 
@@ -296,14 +339,17 @@ pub fn classic_caps(i: usize, len: usize) -> CapStyle {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct Circos {
     pub tracks: Vec<Track>,
+    pub ref_track: Option<usize>,
     pub svg_elements: Vec<String>,
     pub min_x: f64,
     pub min_y: f64,
     pub max_x: f64,
     pub max_y: f64,
+    pub chroms_ids: HashMap<String, usize>,
+    pub config: CircosConfig,
 }
 
 struct RenderState {
@@ -318,14 +364,165 @@ impl Circos {
     pub fn new() -> Self {
         Self {
             tracks: Vec::new(),
+            ref_track: None,
             svg_elements: Vec::new(),
             min_x: f64::INFINITY,
             min_y: f64::INFINITY,
             max_x: f64::NEG_INFINITY,
             max_y: f64::NEG_INFINITY,
+            chroms_ids: HashMap::new(),
+            config: CircosConfig::default(),
         }
     }
 
+    pub fn new_with_chrom(chroms: &[String], cfg: CircosConfig) -> anyhow::Result<Self> {
+        let mut circos = Circos::default();
+        circos.config = cfg.clone();
+
+        let mut track = Track::new(cfg.cx, cfg.cy, cfg.angle_start, cfg.angle_end, cfg.gap);
+
+        for (set_idx, name) in chroms.iter().enumerate() {
+            let ranges = read_ranges(&cfg.cytobands_bed, name)?;
+
+            track.add_range_set((*name).to_string(), ranges.clone());
+            track.add_arcs_for_set(set_idx, cfg.r * 0.955555, cfg.r, classic_caps);
+        }
+
+        let chrom_name_radius = cfg.r + ( cfg.r * 0.6 );
+
+        // Add chromosome labels
+        for (set_idx, name) in chroms.iter().enumerate() {
+            circos.chroms_ids.insert(name.clone(), set_idx);
+            let ranges = read_ranges(&cfg.cytobands_bed, name)?;
+
+            let p = ranges
+                .iter()
+                .find_map(|r| {
+                    if r.category.as_str() == "acen" {
+                        Some(r.start + r.end.saturating_sub(r.start))
+                    } else {
+                        None
+                    }
+                })
+                .unwrap_or(0);
+
+            track.add_label(
+                Attach::Data {
+                    set_idx,
+                    start: p,
+                    end: None,
+                },
+                chrom_name_radius,
+                LabelMode::Perimeter,
+                28.0,
+                Some("white".to_string()),
+                0.85,
+                (*name).to_string(),
+            );
+            track.add_arc(
+                Attach::Data {
+                    set_idx,
+                    start: p,
+                    end: Some(p),
+                },
+                cfg.r + 10.0,
+                chrom_name_radius - 35.0,
+                "gpos50".to_string(),
+                CapStyle::None,
+            );
+        }
+
+        circos.add_track(track);
+        circos.ref_track = Some(0);
+
+        Ok(circos)
+    }
+
+    /// Add perimeter labels to the reference track with automatic bumping to avoid overlaps.
+    ///
+    /// - `labels`: slice of `(label, chrom_id, pos_bp)`, expected sorted by chromosome and position.
+    /// - Labels on the same chromosome closer than 10 Mbp are bumped outward, with a connector drawn
+    ///   from the true position to the bumped anchor.
+    /// - Labels are rendered on the perimeter at `self.config.r + 200.0`.
+    ///
+    /// No-op if no reference track is configured.
+    pub fn add_labels(&mut self, labels: &[(String, usize, u32)]) {
+        // Genomic distance (in bp) under which two consecutive labels on the same chromosome
+        // are considered “too close” and the latter is bumped.
+        const DIST_THRESHOLD_BP: u32 = 10_000_000;
+
+        // Radius at which labels are rendered on the perimeter.
+        let label_radius = self.config.r + ( self.config.r * 0.33);
+
+        // If there is no reference track configured, there is nothing to draw.
+        let Some(ref_track_idx) = self.ref_track else {
+            return;
+        };
+        let ref_track = &mut self.tracks[ref_track_idx];
+
+        // Build a sequence of (label, chrom_id, pos_bp, bumped_anchor_pos?)
+        // where bumped_anchor_pos is Some(synthetic_pos) if we decided to bump.
+        let mut planned: Vec<(&str, usize, u32, Option<u32>)> = Vec::with_capacity(labels.len());
+
+        for (label, chrom_id, pos_bp) in labels {
+            if let Some((_prev_label, prev_id, prev_pos, prev_bump)) = planned.last().copied() {
+                let last_ref_pos = prev_bump.unwrap_or(prev_pos);
+                // If same chromosome and too close to the previous label’s *reference* anchor,
+                // bump this one by shifting its anchor forward by the threshold distance.
+                if prev_id == *chrom_id && pos_bp.saturating_sub(last_ref_pos) < DIST_THRESHOLD_BP {
+                    planned.push((
+                        label.as_str(),
+                        *chrom_id,
+                        *pos_bp,
+                        Some(last_ref_pos + DIST_THRESHOLD_BP),
+                    ));
+                } else {
+                    planned.push((label.as_str(), *chrom_id, *pos_bp, None));
+                }
+            } else {
+                // First label: never bumped.
+                planned.push((label.as_str(), *chrom_id, *pos_bp, None));
+            }
+        }
+
+        // Emit labels and connector paths.
+        for (label, set_idx, pos_bp, bump_anchor) in planned {
+            // 1) Text label on the perimeter, attached at either the real or bumped anchor.
+            ref_track.add_label(
+                Attach::Data {
+                    set_idx,
+                    start: bump_anchor.unwrap_or(pos_bp),
+                    end: None,
+                },
+                label_radius,               // radial placement of the label
+                LabelMode::Perimeter,       // mode
+                18.0,                       // font size
+                Some("yellow".to_string()), // fill/background color for label
+                0.95,                       // opacity
+                label.to_string(),          // label text
+            );
+
+            // 2) Connector from true genomic pos to the (possibly bumped) anchor.
+            ref_track.add_path(
+                Attach::Data {
+                    set_idx,
+                    start: pos_bp,
+                    end: None,
+                }, // from true position
+                Attach::Data {
+                    set_idx,
+                    start: bump_anchor.unwrap_or(pos_bp),
+                    end: None,
+                }, // to anchor
+                self.config.r + 10.0, // inner radius for path
+                label_radius - 70.0,  // outer radius for path
+                "black".to_string(),  // stroke color
+                1,                    // stroke width
+            );
+        }
+    }
+
+
     pub fn new_wg(
         chromosomes_ri: f64,
         cytobands: &str,
@@ -953,9 +1150,10 @@ pub fn read_arcs(path: &str) -> anyhow::Result<Vec<ArcData>> {
 
 /// Half-open intervals [start, end). For CLOSED intervals, change the `<=` test to `<`.
 
-
 #[inline]
-fn len(a: &ArcData) -> u32 { a.end - a.start } // half-open; for closed use +1
+fn len(a: &ArcData) -> u32 {
+    a.end - a.start
+} // half-open; for closed use +1
 
 /// Minimal tracks per chromosome, first-fit (left-justified),
 /// and for intervals with the same `start`, place longer ones first.
@@ -975,7 +1173,9 @@ pub fn pack_tracks(mut arcs: Vec<ArcData>) -> Vec<Vec<ArcData>> {
     while i < arcs.len() {
         let chr = arcs[i].chr.clone();
         let start_i = i;
-        while i < arcs.len() && arcs[i].chr == chr { i += 1; }
+        while i < arcs.len() && arcs[i].chr == chr {
+            i += 1;
+        }
         let slice = &arcs[start_i..i];
 
         let local = pack_one_chr_first_fit(slice);

+ 146 - 11
src/lib.rs

@@ -11,11 +11,14 @@ pub mod theme;
 #[cfg(test)]
 mod tests {
     use crate::{
-        gene_model::load_gene_model_from_gff3, loliplot::{Lollipop, SvgConfig}, loliplot_parser::{parse_domains_by_gene_name, Rgb}, mutation::{dels_to_ranges, mutations_to_lollipops, parse_kind_colors, parse_mutations}
+        circ::TranslocData,
+        gene_model::load_gene_model_from_gff3,
+        loliplot_parser::{collapse_domain_names, parse_domains_by_gene_name},
+        mutation::{dels_to_ranges, mutations_to_lollipops, parse_kind_colors, parse_mutations},
     };
     use cytoband::{read_ranges, svg_chromosome, AdditionalRect, RectPosition};
     use report::compile_typst_report;
-    use std::{collections::HashMap, fs::File, path::Path};
+    use std::{collections::HashMap, path::Path};
 
     use crate::{
         circ::{pack_tracks, read_arcs, read_gene_labels, read_translocs},
@@ -70,7 +73,7 @@ mod tests {
             cytoband::Lollipop {
                 position: 5000000,
                 above: true,
-                letter: 'A',
+                letter: ' ',
                 color: String::from("red"),
             },
             cytoband::Lollipop {
@@ -360,6 +363,92 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn gene_translocations() -> anyhow::Result<()> {
+        let gene_name = "NOTCH1";
+        let translocs = read_translocs("/data/tmp_trl.tsv")?;
+
+        // Select translocations having one partner with gene_name
+        let translocs: Vec<TranslocData> = translocs
+            .into_iter()
+            .filter(|t| t.left_gene == gene_name || t.right_gene == gene_name)
+            .collect();
+
+        // Chromosomes to render
+        let mut chroms: Vec<String> = Vec::new();
+        for chr_id in 0..24 {
+            let chr_name = if chr_id <= 21 {
+                format!("chr{}", chr_id + 1)
+            } else if chr_id == 22 {
+                "chrX".to_string()
+            } else {
+                "chrY".to_string()
+            };
+
+            let present = translocs
+                .iter()
+                .any(|t| t.left_chr == chr_name || t.right_chr == chr_name);
+            if present && !chroms.contains(&chr_name) {
+                chroms.push(chr_name);
+            }
+        }
+
+        let cfg = circ::CircosConfig {
+            r: 250.0,
+            ..Default::default()
+        };
+
+        let mut circos = circ::Circos::new_with_chrom(&chroms, cfg)?;
+
+        // LABELS
+        let mut hm: HashMap<String, (usize, u32, usize)> = HashMap::new();
+
+        translocs.iter().for_each(|t| {
+            let left_chrom_id = *circos.chroms_ids.get(&t.left_chr).unwrap();
+            hm.entry(t.left_gene.to_string())
+                .and_modify(|e| e.2 += 1)
+                .or_insert((left_chrom_id, t.left_pos, 1));
+
+            let right_chrom_id = *circos.chroms_ids.get(&t.right_chr).unwrap();
+            hm.entry(t.right_gene.to_string())
+                .and_modify(|e| e.2 += 1)
+                .or_insert((right_chrom_id, t.right_pos, 1));
+        });
+        circos.add_labels(
+            &hm.iter()
+                .map(|(gene, (id, pos, n))| (format!("{gene} (n={n})"), *id, *pos))
+                .collect::<Vec<(String, usize, u32)>>(),
+        );
+
+        // ARCS
+        translocs.iter().for_each(|t| {
+            let left_chrom_id = *circos.chroms_ids.get(&t.left_chr).unwrap();
+            let right_chrom_id = *circos.chroms_ids.get(&t.right_chr).unwrap();
+            circos.tracks[circos.ref_track.unwrap()].add_cord(
+                circ::Attach::Data {
+                    set_idx: left_chrom_id,
+                    start: t.left_pos,
+                    end: None,
+                },
+                circ::Attach::Data {
+                    set_idx: right_chrom_id,
+                    start: t.right_pos,
+                    end: None,
+                },
+                circos.config.r - (circos.config.r * 0.1),
+                0.3,
+                "red".to_string(),
+                2,
+            );
+        });
+
+        circos.save_to_file(
+            &format!("/data/genes_results/{gene_name}_translocations.svg"),
+            100.0,
+        )?;
+        Ok(())
+    }
+
     #[test]
     fn deletions() -> anyhow::Result<()> {
         let mut circos = circ::Circos::new();
@@ -756,18 +845,22 @@ mod tests {
                                 // let id = "NM_000314.8"; // PTEN
                                 // let id = "NM_001015877.2"; // PHF6
                                 // let id = "NM_001005361.3"; // DNM2
-        let mut pten_cfg = SvgConfig::default();
-        pten_cfg.range_max_lanes = 20;
+                                // let id = "NM_002185.5"; // IL7R
+        let pten_cfg = loliplot::SvgConfig {
+            range_max_lanes: 20,
+            ..Default::default()
+        };
 
         // let gff3 = Path::new("./notch_gff.gff3");
         let gff3 = Path::new("/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1.gff3");
         let r = load_gene_model_from_gff3(gff3, id)?;
         let gene_name = r.gene_id.clone();
 
-        let domains = parse_domains_by_gene_name(
+        let mut domains = parse_domains_by_gene_name(
             "/data/ref/hs1/my_protein_domains_hs1_lifted_v2.4.0_sorted.gff3",
             &gene_name,
         )?;
+        collapse_domain_names(&mut domains);
         let colors = parse_kind_colors("/data/colors.tsv")?;
         let muts = parse_mutations(format!("/data/genes_results/{gene_name}_muts.tsv"))?;
         println!("{}", muts.len());
@@ -779,11 +872,6 @@ mod tests {
 
         let lols = mutations_to_lollipops(&muts, &colors);
 
-        println!("{:?}", lols);
-        println!("{}", lols.len());
-        lols.iter()
-            .filter(|l| l.kind == "SNV_MISS")
-            .for_each(|l| println!("{l:?}"));
         let svg = gene_model_to_svg(&r, &domains, &lols, &dels, &pten_cfg); // write svg to file
         std::fs::write(
             format!("/data/genes_results/{gene_name}_struct_muts.svg"),
@@ -798,4 +886,51 @@ mod tests {
         compile_typst_report("LEVASSEUR").unwrap();
         // write_report("/data/test.pdf");
     }
+
+    #[test]
+    fn cdkn2a_del() -> anyhow::Result<()> {
+        let gene_name = "CDKN2A";
+        let colors = parse_kind_colors("/data/colors.tsv")?;
+
+        let mut dels = dels_to_ranges(
+            format!("/data/genes_results/{gene_name}_dels.tsv"),
+            &colors,
+            loliplot::LollipopSide::Bottom, // or Bottom — your choice for this track
+        )?;
+        dels.sort_by_key(|d| std::cmp::Reverse(d.end.saturating_sub(d.start)));
+
+
+        println!("{}", dels.len());
+        let additional_rects: Vec<AdditionalRect> = dels
+            .into_iter()
+            .filter(|d| d.end.saturating_sub(d.start) > 1_000_000)
+            .enumerate()
+            .map(|(i, d)| AdditionalRect {
+                start: d.start as u32,
+                end: d.end as u32,
+                color: d.color.to_string(),
+                position: RectPosition::Below(i as u32),
+            })
+            .collect();
+
+
+        let lollipops = vec![cytoband::Lollipop {
+            position: 21989169,
+            above: true,
+            letter: ' ',
+            color: String::from("red"),
+        }];
+
+        svg_chromosome(
+            "chr9",
+            2000,
+            100,
+            "/data/ref/hs1/cytoBandMapped.bed",
+            "/data/chr1.svg",
+            &additional_rects,
+            &lollipops,
+        )?;
+
+        Ok(())
+    }
 }

+ 8 - 4
src/loliplot.rs

@@ -174,7 +174,7 @@ impl Default for SvgConfig {
     fn default() -> Self {
         Self {
             svg_width: 1500,
-            svg_height: 500,
+            svg_height: 600,
             exon_height: 150,
             intron_width: 10,
             left_right_margin: 20,
@@ -192,7 +192,7 @@ impl Default for SvgConfig {
             pin_hgap: 1.0,
             pin_max_dx: 60.0,
             pin_lane_gap: 33.0,
-            pin_max_lanes: 4,
+            pin_max_lanes: 7,
 
             range_height: 10,
             range_alpha: 0.95,
@@ -264,6 +264,11 @@ pub fn gene_model_to_svg(
     let n = exons.len();
     assert!(n > 0, "GeneModel has no exons");
 
+    let lollipops: Vec<&Lollipop> = lollipops
+        .iter()
+        .filter(|l| l.pos >= gm.start && l.pos <= gm.end)
+        .collect();
+
     // Layout scale (exons to scale, introns fixed)
     let total_exon_bp: u64 = exons.iter().map(|iv| iv.end - iv.start + 1).sum();
     let intron_total = cfg.intron_width.saturating_mul(n.saturating_sub(1) as u32) as u64;
@@ -568,8 +573,7 @@ pub fn gene_model_to_svg(
 
         let _ = writeln!(
             heads,
-            r#"  <circle cx="{:.2}" cy="{:.2}" r="{:.2}" fill="none" stroke="white" stroke-width="{:.2}"/>"#,
-            x_fin, circle_cy, r_white, ring_width
+            r#"  <circle cx="{x_fin:.2}" cy="{circle_cy:.2}" r="{r_white:.2}" fill="none" stroke="white" stroke-width="{ring_width:.2}"/>"#
         );
     };
 

+ 50 - 0
src/loliplot_parser.rs

@@ -1,5 +1,6 @@
 use anyhow::{anyhow, Context, Result};
 use std::collections::HashMap;
+use std::fmt;
 use std::fs::File;
 use std::io::{BufRead, BufReader, Read};
 use std::path::Path;
@@ -11,6 +12,12 @@ pub struct Rgb {
     pub b: u8,
 }
 
+impl fmt::Display for Rgb {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "#{:02X}{:02X}{:02X}", self.r, self.g, self.b)
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct ProteinDomain {
     pub start: u64,                 // 1-based inclusive
@@ -109,3 +116,46 @@ fn open_maybe_gzip(path: &Path) -> Result<Box<dyn Read>> {
     Ok(Box::new(file))
 }
 
+pub fn collapse_domain_names(domains: &mut Vec<ProteinDomain>) {
+    if domains.is_empty() {
+        return;
+    }
+
+    let mut i = 0;
+    while i < domains.len() {
+        let name = domains[i].name.clone();
+        if name.is_empty() {
+            i += 1;
+            continue;
+        }
+
+        // Collect the consecutive run with the same name
+        let mut j = i + 1;
+        while j < domains.len() && domains[j].name == name {
+            j += 1;
+        }
+
+        if j - i > 1 {
+            // Determine the longest domain in the run
+            let mut longest_idx = i;
+            let mut longest_len = domains[i].end - domains[i].start;
+            for k in i + 1..j {
+                let len = domains[k].end - domains[k].start;
+                if len > longest_len {
+                    longest_len = len;
+                    longest_idx = k;
+                }
+            }
+
+            // Clear names of all except the longest
+            for k in i..j {
+                if k != longest_idx {
+                    domains[k].name.clear();
+                }
+            }
+        }
+
+        i = j;
+    }
+}
+

+ 7 - 9
src/mutation.rs

@@ -190,7 +190,7 @@ fn open_maybe_gzip(path: &Path) -> Result<Box<dyn Read>> {
     let is_gz = path
         .extension()
         .and_then(|e| e.to_str())
-        .map_or(false, |e| e.eq_ignore_ascii_case("gz"));
+        .map_or_else(|| false, |e| e.eq_ignore_ascii_case("gz"));
     if is_gz {
         Ok(Box::new(MultiGzDecoder::new(file)))
     } else {
@@ -294,7 +294,7 @@ pub fn mutations_to_lollipops(
         // Deterministic order of kinds at a site
         vec_kc.sort_by(|a, b| a.0.cmp(&b.0));
 
-        for (idx, (kind_key, n)) in vec_kc.into_iter().enumerate() {
+        for (kind_key, n) in vec_kc.into_iter() {
             let color = kind_colors.get(&kind_key).copied().unwrap_or(Rgb {
                 r: 160,
                 g: 160,
@@ -317,7 +317,6 @@ pub fn mutations_to_lollipops(
     out
 }
 
-
 #[derive(Debug, Clone)]
 pub struct Range {
     pub chrom: String,
@@ -328,7 +327,6 @@ pub struct Range {
     pub side: LollipopSide, // reuse Top/Bottom
 }
 
-
 pub fn dels_to_ranges<P: AsRef<Path>>(
     path: P,
     kind_colors: &HashMap<String, Rgb>,
@@ -397,10 +395,11 @@ fn parse_dels_to_ranges_from_reader<R: Read>(
 
         // normalize key (match your color table keys)
         let kind_key = kind_raw.to_ascii_uppercase();
-        let color = kind_colors
-            .get(&kind_key)
-            .copied()
-            .unwrap_or(Rgb { r: 160, g: 160, b: 160 });
+        let color = kind_colors.get(&kind_key).copied().unwrap_or(Rgb {
+            r: 160,
+            g: 160,
+            b: 160,
+        });
 
         out.push(Range {
             chrom: chrom.to_string(),
@@ -414,4 +413,3 @@ fn parse_dels_to_ranges_from_reader<R: Read>(
 
     Ok(out)
 }
-