Browse Source

ok all contigs graphs and stats

Your Name 1 year ago
parent
commit
9289932830
4 changed files with 958 additions and 127 deletions
  1. 296 19
      Cargo.lock
  2. 2 1
      Cargo.toml
  3. 593 65
      src/counts.rs
  4. 67 42
      src/lib.rs

+ 296 - 19
Cargo.lock

@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "addr2line"
+version = "0.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375"
+dependencies = [
+ "gimli",
+]
+
 [[package]]
 name = "adler2"
 version = "2.0.0"
@@ -26,7 +35,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
 dependencies = [
  "cfg-if",
- "getrandom",
+ "getrandom 0.2.15",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -161,6 +170,21 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "backtrace"
+version = "0.3.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "base64"
 version = "0.22.1"
@@ -185,7 +209,7 @@ dependencies = [
  "bitflags",
  "cexpr",
  "clang-sys",
- "itertools",
+ "itertools 0.12.1",
  "lazy_static",
  "lazycell",
  "proc-macro2",
@@ -736,6 +760,28 @@ dependencies = [
  "typeid",
 ]
 
+[[package]]
+name = "failure"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86"
+dependencies = [
+ "backtrace",
+ "failure_derive",
+]
+
+[[package]]
+name = "failure_derive"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "synstructure",
+]
+
 [[package]]
 name = "flate2"
 version = "1.0.33"
@@ -786,6 +832,17 @@ dependencies = [
  "version_check",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.9.0+wasi-snapshot-preview1",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.15"
@@ -794,9 +851,15 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
 ]
 
+[[package]]
+name = "gimli"
+version = "0.31.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64"
+
 [[package]]
 name = "glob"
 version = "0.3.1"
@@ -1005,6 +1068,15 @@ version = "1.70.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
 
+[[package]]
+name = "itertools"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.12.1"
@@ -1144,6 +1216,15 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "matrixmultiply"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "916806ba0031cd542105d916a97c8572e1fa6dd79c9c51e7eb43a09ec2dd84c1"
+dependencies = [
+ "rawpointer",
+]
+
 [[package]]
 name = "matrixmultiply"
 version = "0.3.9"
@@ -1198,13 +1279,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4"
 dependencies = [
  "approx",
- "matrixmultiply",
+ "matrixmultiply 0.3.9",
  "nalgebra-macros",
- "num-complex",
- "num-rational",
+ "num-complex 0.4.6",
+ "num-rational 0.4.2",
  "num-traits",
- "rand",
- "rand_distr",
+ "rand 0.8.5",
+ "rand_distr 0.4.3",
  "simba",
  "typenum",
 ]
@@ -1220,6 +1301,19 @@ dependencies = [
  "syn 2.0.77",
 ]
 
+[[package]]
+name = "ndarray"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac06db03ec2f46ee0ecdca1a1c34a99c0d188a0d83439b84bf0cb4b386e4ab09"
+dependencies = [
+ "matrixmultiply 0.2.4",
+ "num-complex 0.2.4",
+ "num-integer",
+ "num-traits",
+ "rawpointer",
+]
+
 [[package]]
 name = "newtype_derive"
 version = "0.1.6"
@@ -1273,6 +1367,41 @@ dependencies = [
  "noodles-core",
 ]
 
+[[package]]
+name = "num"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36"
+dependencies = [
+ "num-bigint",
+ "num-complex 0.2.4",
+ "num-integer",
+ "num-iter",
+ "num-rational 0.2.4",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
 [[package]]
 name = "num-complex"
 version = "0.4.6"
@@ -1307,6 +1436,29 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-rational"
 version = "0.4.2"
@@ -1333,6 +1485,15 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
+[[package]]
+name = "object"
+version = "0.36.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.19.0"
@@ -1430,7 +1591,9 @@ dependencies = [
  "pandora_lib_pileup",
  "plotly",
  "postcard",
+ "rand 0.8.5",
  "rayon",
+ "rstat",
  "rust-htslib",
  "serde",
  "serde_json",
@@ -1527,7 +1690,7 @@ dependencies = [
  "once_cell",
  "plotly_derive",
  "plotly_kaleido",
- "rand",
+ "rand 0.8.5",
  "rinja",
  "serde",
  "serde_json",
@@ -1619,6 +1782,19 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "getrandom 0.1.16",
+ "libc",
+ "rand_chacha 0.2.2",
+ "rand_core 0.5.1",
+ "rand_hc",
+]
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -1626,8 +1802,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
  "libc",
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.5.1",
 ]
 
 [[package]]
@@ -1637,7 +1823,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+dependencies = [
+ "getrandom 0.1.16",
 ]
 
 [[package]]
@@ -1646,7 +1841,16 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96977acbdd3a6576fb1d27391900035bf3863d4a16422973a409b488cf29ffb2"
+dependencies = [
+ "rand 0.7.3",
 ]
 
 [[package]]
@@ -1656,7 +1860,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
 dependencies = [
  "num-traits",
- "rand",
+ "rand 0.8.5",
+]
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core 0.5.1",
 ]
 
 [[package]]
@@ -1700,7 +1913,7 @@ version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
  "libredox",
  "thiserror",
 ]
@@ -1779,6 +1992,21 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "rstat"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "966f03a9f07b5a3d22dd5d77b2d94082b34b692ca30d39a98ecc8ffd0373b807"
+dependencies = [
+ "failure",
+ "ndarray",
+ "num",
+ "rand 0.7.3",
+ "rand_distr 0.2.2",
+ "spaces",
+ "special-fun",
+]
+
 [[package]]
 name = "rust-htslib"
 version = "0.47.0"
@@ -1801,6 +2029,12 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
 [[package]]
 name = "rustc-hash"
 version = "1.1.0"
@@ -1967,7 +2201,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae"
 dependencies = [
  "approx",
- "num-complex",
+ "num-complex 0.4.6",
  "num-traits",
  "paste",
  "wide",
@@ -1985,6 +2219,25 @@ version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
 
+[[package]]
+name = "spaces"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5897ef48e6efcee1ca844267cd2036bc67e056612c48e445e240dbe25e193ceb"
+dependencies = [
+ "itertools 0.8.2",
+ "num-traits",
+]
+
+[[package]]
+name = "special-fun"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1e94eec0d1b647005b0c09f67f5ee4ed594a0dc115aacf56d5afc4511816b8e"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "spin"
 version = "0.9.8"
@@ -2009,7 +2262,7 @@ dependencies = [
  "approx",
  "nalgebra",
  "num-traits",
- "rand",
+ "rand 0.8.5",
 ]
 
 [[package]]
@@ -2059,6 +2312,18 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "synstructure"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+ "unicode-xid",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.63"
@@ -2173,6 +2438,12 @@ version = "0.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
 
+[[package]]
+name = "unicode-xid"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "229730647fbc343e3a80e463c1db7f78f3855d3f3739bee0dda773c9a037c90a"
+
 [[package]]
 name = "url"
 version = "2.5.2"
@@ -2196,7 +2467,7 @@ version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
 ]
 
 [[package]]
@@ -2211,6 +2482,12 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 
+[[package]]
+name = "wasi"
+version = "0.9.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+
 [[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
@@ -2491,7 +2768,7 @@ dependencies = [
  "lzma-rs",
  "memchr",
  "pbkdf2",
- "rand",
+ "rand 0.8.5",
  "sha1",
  "thiserror",
  "time",

+ 2 - 1
Cargo.toml

@@ -13,7 +13,6 @@ num-format = "0.4.4"
 rayon = "1.10.0"
 rust-htslib = "0.47.0"
 uuid = { version = "1.10.0", features = ["v4"] }
-# pandora_lib_variants = { git = "https://git.t0m4.fr/Thomas/pandora_lib_variants.git" }
 pandora_lib_pileup = { git = "https://git.t0m4.fr/Thomas/pandora_lib_pileup.git" }
 pandora_lib_graph = { git = "https://git.t0m4.fr/Thomas/pandora_lib_graph.git" }
 indicatif-log-bridge = "0.2.2"
@@ -26,4 +25,6 @@ serde_json = "1.0.128"
 ordered-float = "4.2.2"
 statrs = "0.17.1"
 plotly = { version = "0.9.1", features = ["kaleido"] }
+rstat = "0.6.0"
+rand = "0.8.5"
 

+ 593 - 65
src/counts.rs

@@ -1,14 +1,23 @@
+use anyhow::Context;
 use log::{info, warn};
+use ordered_float::Float;
+use pandora_lib_graph::cytoband::{svg_chromosome, AdditionalRect, RectPosition};
+use plotly::{common::Marker, layout::BarMode, Bar, Layout, Plot, Scatter};
+use rand::{thread_rng, Rng};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use serde::{
     de::{self, Visitor},
-    Deserialize, Deserializer,
+    Deserialize, Deserializer, Serialize,
+};
+use statrs::{
+    distribution::{Continuous, Discrete},
+    statistics::Statistics,
 };
 use std::{
-    collections::{BTreeMap, HashMap},
-    fmt,
+    collections::{BTreeMap, HashMap, HashSet},
+    f64, fmt,
     fs::File,
-    io::{BufRead, BufReader},
+    io::{BufRead, BufReader, Write},
     str::FromStr,
 };
 
@@ -21,6 +30,7 @@ pub struct Count {
     pub sa_outlier: bool,
     pub frac_se: f32,
     pub se_outlier: bool,
+    pub annotation: Vec<CountAnnotation>,
 }
 
 impl fmt::Display for Count {
@@ -52,6 +62,12 @@ impl fmt::Display for CountRange {
     }
 }
 
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+pub enum CountAnnotation {
+    MaskedLowMRD,
+    MaskedQuality,
+}
+
 impl<'de> Deserialize<'de> for Count {
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     where
@@ -92,6 +108,7 @@ impl<'de> Deserialize<'de> for Count {
                     sa_outlier: bool::from_str(parts[4]).map_err(E::custom)?,
                     frac_se: f32::from_str(parts[5]).map_err(E::custom)?,
                     se_outlier: bool::from_str(parts[6]).map_err(E::custom)?,
+                    annotation: Vec::new(),
                 })
             }
         }
@@ -128,10 +145,35 @@ fn escape_control_chars(s: &str) -> String {
 #[derive(Debug)]
 pub struct Counts {
     pub data: HashMap<String, Vec<Count>>,
+    pub mrd: HashMap<String, Vec<Count>>,
 }
 
 impl Counts {
-    pub fn from_files(paths: Vec<&str>) -> Self {
+    pub fn from_files(paths: Vec<String>) -> Self {
+        let counts: Vec<Vec<Count>> = paths
+            .par_iter()
+            .map(|path| match read_counts_from_file(path) {
+                Ok(c) => c,
+                Err(e) => {
+                    warn!("Couldnt load {path}: {e}");
+                    Vec::new()
+                }
+            })
+            .filter(|v| !v.is_empty())
+            .collect();
+
+        let mut data = HashMap::new();
+        for count in counts {
+            let contig = count.first().unwrap().position.contig.clone();
+            data.insert(contig, count);
+        }
+        Counts {
+            data,
+            mrd: HashMap::new(),
+        }
+    }
+
+    pub fn mrd_from_files(&mut self, paths: Vec<String>) {
         let counts: Vec<Vec<Count>> = paths
             .par_iter()
             .map(|path| match read_counts_from_file(path) {
@@ -149,11 +191,325 @@ impl Counts {
             let contig = count.first().unwrap().position.contig.clone();
             data.insert(contig, count);
         }
-        Counts { data }
+        self.mrd = data;
+    }
+
+    pub fn mask_low_mrd(&mut self, contig: &str, min_reads: u32) -> anyhow::Result<()> {
+        if let (Some(mrd), Some(diag)) = (self.mrd.get(contig), self.data.get_mut(contig)) {
+            for (m, d) in mrd.iter().zip(diag) {
+                if m.n_reads < min_reads {
+                    d.annotation.push(CountAnnotation::MaskedLowMRD);
+                }
+            }
+            Ok(())
+        } else {
+            anyhow::bail!("No {contig} in both mrd and diag.")
+        }
+    }
+
+    pub fn mask_low_quality(&mut self, contig: &str, max_ratio: f64) -> anyhow::Result<()> {
+        if let Some(diag) = self.data.get_mut(contig) {
+            for d in diag.iter_mut() {
+                if (d.n_low_mapq as f64 / (d.n_reads + d.n_low_mapq) as f64) > max_ratio {
+                    d.annotation.push(CountAnnotation::MaskedQuality);
+                }
+            }
+            Ok(())
+        } else {
+            anyhow::bail!("No {contig} in both mrd and diag.")
+        }
+    }
+
+    pub fn frequencies(&self, contig: &str) -> anyhow::Result<Vec<(f64, f64)>> {
+        let data = self.get(contig)?;
+
+        let mut frequencies = HashMap::new();
+        for count in data.iter() {
+            *frequencies.entry(*count).or_insert(0) += 1;
+        }
+
+        let mut frequencies: Vec<(u32, f64)> =
+            frequencies.iter().map(|(x, y)| (*x, *y as f64)).collect();
+        frequencies.sort_by_key(|v| v.0);
+        Ok(frequencies.iter().map(|(x, y)| (*x as f64, *y)).collect())
+    }
+
+    pub fn percentile(&self, contig: &str, percentile: f64) -> anyhow::Result<u32> {
+        let mut data = self.get(contig)?;
+        data.sort_unstable();
+        let total_count = data.len();
+        let index = |percentile: f64| -> usize {
+            (percentile / 100.0 * (total_count - 1) as f64).round() as usize
+        };
+
+        Ok(*data.get(index(percentile)).context("Error in percentile")?)
+    }
+
+    pub fn save_contig(&mut self, contig: &str, prefix: &str, breaks: Vec<u32>) -> anyhow::Result<()> {
+        self.mask_low_mrd(contig, 6)?;
+        self.mask_low_quality(contig, 0.1)?;
+
+        let data: Vec<f64> = self.get(contig)?.iter().map(|v| *v as f64).collect();
+        let n_final = data.len();
+
+        let frequencies = self.frequencies(contig)?;
+        let percentile_99 = self.percentile(contig, 99.0)?;
+
+        let mut data_x = Vec::new();
+        let mut data_y = Vec::new();
+        frequencies.iter().for_each(|(x, y)| {
+            if *x <= percentile_99 as f64 {
+                data_x.push(*x);
+                data_y.push(*y / n_final as f64);
+            }
+        });
+
+        // Distribution plot
+        let distribution_path = format!("{prefix}_{contig}_distrib.svg");
+        info!("Saving graph: {distribution_path}");
+        let mut plot = Plot::new();
+        let colors: Vec<plotly::color::Rgb> = data_x
+            .iter()
+            .map(|&x| {
+                if x < 2.0 {
+                    plotly::color::Rgb::new(193, 18, 31)
+                } else if x >= 15.0 {
+                    plotly::color::Rgb::new(138, 201, 38)
+                } else if x < 6.0 {
+                    plotly::color::Rgb::new(243, 114, 44)
+                } else {
+                    plotly::color::Rgb::new(255, 202, 58)
+                }
+            })
+            .collect();
+
+        let bars = Bar::new(data_x.clone(), data_y.clone())
+            .show_legend(false)
+            .marker(Marker::new().color_array(colors));
+
+        plot.add_trace(bars);
+
+        let sum: f64 = data.iter().sum();
+        let mean = (&data).mean();
+        let count = data.len() as f64;
+        let std_dev = (&data).std_dev();
+
+        // Normal
+        let normal = statrs::distribution::Normal::new(mean, std_dev)?;
+        let data_y: Vec<f64> = data_x.iter().map(|x| normal.pdf(*x)).collect();
+        let trace = Scatter::new(data_x.clone(), data_y).name("Normal");
+        plot.add_trace(trace);
+
+        // // Gamma
+        // let shape = mean * mean / variance;
+        // let rate = mean / variance;
+        //
+        // let gamma = statrs::distribution::Gamma::new(shape, rate).unwrap();
+        // let data_y: Vec<f64> = data_x.iter().map(|x| gamma.pdf(*x)).collect();
+        // let trace = Scatter::new(data_x.clone(), data_y).name("Gamma");
+        // plot.add_trace(trace);
+
+        // Poisson
+        let lambda = sum / count;
+        let poisson = statrs::distribution::Poisson::new(lambda)?;
+        let data_y = data_x.iter().map(|x| poisson.pmf(*x as u64)).collect();
+        let trace = Scatter::new(data_x.clone(), data_y).name("Poisson");
+        plot.add_trace(trace);
+
+        plot.use_local_plotly();
+        plot.write_image(distribution_path, plotly::ImageFormat::SVG, 800, 600, 1.0);
+
+        // Fractions
+        let mut breaks_values = Vec::new();
+        for (i, b) in breaks.iter().enumerate() {
+            if i == 0 {
+                let total: f64 = frequencies
+                    .iter()
+                    .filter(|(x, _)| *x < *b as f64)
+                    .map(|(_, y)| *y / count)
+                    .sum();
+                breaks_values.push((format!("< {b}"), total));
+            } else {
+                let last = breaks[i - 1];
+                let total: f64 = frequencies
+                    .iter()
+                    .filter(|(x, _)| *x < *b as f64 && *x >= last as f64)
+                    .map(|(_, y)| *y / count)
+                    .sum();
+
+                breaks_values.push((format!("[{last} - {b}["), total));
+            }
+        }
+
+        let last = *breaks.last().unwrap();
+        let total: f64 = frequencies
+            .iter()
+            .filter(|(x, _)| *x >= last as f64)
+            .map(|(_, y)| *y / count)
+            .sum();
+        breaks_values.push((format!(">= {last}"), total));
+
+        // Chromosome
+        let tol = 25;
+        let chromosome_path = format!("{prefix}_{contig}_chromosome.svg");
+        info!("Saving graph: {chromosome_path}");
+
+        let target_annotations: HashSet<CountAnnotation> = vec![
+            CountAnnotation::MaskedLowMRD,
+            CountAnnotation::MaskedQuality,
+        ]
+        .into_iter()
+        .collect();
+
+        let d: Vec<u32> = self
+            .data
+            .get(contig)
+            .unwrap()
+            .iter()
+            .map(|c| {
+                if c.annotation
+                    .iter()
+                    .any(|ann| target_annotations.contains(ann))
+                {
+                    10_000u32
+                } else {
+                    c.n_reads
+                }
+            })
+            .collect();
+
+        let hm = self.counts_annotations(contig)?;
+
+        let len = d.len();
+        let mut masked: Vec<(String, f64)> = hm
+            .iter()
+            .map(|(k, v)| (format!("{:?}", k), *v as f64 / len as f64))
+            .collect();
+        masked.push(("Un masked".to_string(), n_final as f64 / len as f64));
+
+        let under_6_rects: Vec<AdditionalRect> = ranges_under(&d, 5, tol)
+            .iter()
+            .filter(|(s, e)| e > s)
+            .map(|(start, end)| AdditionalRect {
+                start: *start as u32 * 1000,
+                end: *end as u32 * 1000,
+                color: String::from("red"),
+                position: RectPosition::Below(1),
+            })
+            .collect();
+
+        let over_6_rects: Vec<AdditionalRect> = ranges_between(&d, 6, 9999, tol)
+            .iter()
+            .filter(|(s, e)| e > s)
+            .map(|(start, end)| AdditionalRect {
+                start: *start as u32 * 1000,
+                end: *end as u32 * 1000,
+                color: String::from("green"),
+                position: RectPosition::Below(2),
+            })
+            .collect();
+
+        let masked_rec: Vec<AdditionalRect> = ranges_over(&d, 10000, tol)
+            .iter()
+            .filter(|(s, e)| e > s)
+            .map(|(start, end)| AdditionalRect {
+                start: *start as u32 * 1000,
+                end: *end as u32 * 1000,
+                color: String::from("grey"),
+                position: RectPosition::Below(0),
+            })
+            .collect();
+
+        let mut all = Vec::new();
+        all.extend(under_6_rects);
+        all.extend(over_6_rects);
+        // all.extend(over15);
+        all.extend(masked_rec);
+
+        svg_chromosome(
+            contig,
+            1000,
+            50,
+            "/data/ref/hs1/cytoBandMapped.bed",
+            &chromosome_path,
+            &all,
+            &Vec::new(),
+        )
+        .unwrap();
+
+        let stats = CountsStats {
+            sum,
+            mean,
+            std_dev,
+            breaks_values,
+            masked,
+        };
+
+        // Save stats
+        let json_path = format!("{prefix}_{contig}_stats.json");
+        info!("Saving stats into: {json_path}");
+        let json = serde_json::to_string_pretty(&stats)?;
+        let mut file = File::create(json_path)?;
+        file.write_all(json.as_bytes())?;
+
+        Ok(())
+    }
+
+    pub fn save_contigs(
+        &mut self,
+        contigs: &Vec<String>,
+        prefix: &str,
+        breaks: Vec<u32>,
+    ) -> anyhow::Result<()> {
+        for contig in contigs {
+            self.save_contig(contig, prefix, breaks.clone())?;
+        }
+        Ok(())
+    }
+
+    pub fn counts_annotations(
+        &self,
+        contig: &str,
+    ) -> anyhow::Result<HashMap<CountAnnotation, u64>> {
+        if let Some(d) = self.data.get(contig) {
+            let mut counts = HashMap::new();
+            for c in d {
+                for a in &c.annotation {
+                    *counts.entry(a.clone()).or_insert(0) += 1;
+                }
+            }
+            Ok(counts)
+        } else {
+            anyhow::bail!("No {contig} in counts.")
+        }
     }
 
     pub fn get(&self, contig: &str) -> anyhow::Result<Vec<u32>> {
         if let Some(ccounts) = self.data.get(contig) {
+            let target_annotations: HashSet<CountAnnotation> = vec![
+                CountAnnotation::MaskedLowMRD,
+                CountAnnotation::MaskedQuality,
+            ]
+            .into_iter()
+            .collect();
+
+            Ok(ccounts
+                .iter()
+                .filter(|count| {
+                    !count
+                        .annotation
+                        .iter()
+                        .any(|ann| target_annotations.contains(ann))
+                })
+                .map(|c| c.n_reads)
+                .collect())
+        } else {
+            anyhow::bail!("No {contig} in counts.")
+        }
+    }
+
+    pub fn mrd(&self, contig: &str) -> anyhow::Result<Vec<u32>> {
+        if let Some(ccounts) = self.mrd.get(contig) {
             Ok(ccounts.iter().map(|c| c.n_reads).collect())
         } else {
             anyhow::bail!("No {contig} in counts.")
@@ -172,10 +528,10 @@ impl Counts {
 
             let cdf = ND::new(n_reads.clone());
 
-            println!("CDF at 13: {:?}", cdf.cdf(13));
+            // println!("CDF at 13: {:?}", cdf.cdf(13));
             println!("Percentile at 99: {:?}", cdf.percentile(99.0));
             println!("above 15X: {:?}", cdf.proportion_above(15));
-            println!("above 15.1X: {:?}", cdf.fitted_proportion_above(15.1));
+            // println!("above 15.1X: {:?}", cdf.fitted_proportion_above(&15.1));
             println!("under 6X: {:?}", cdf.proportion_under(6));
 
             Ok(percentiles
@@ -197,29 +553,146 @@ impl Counts {
             anyhow::bail!("No {contig} in counts")
         }
     }
-}
 
-use statrs::{
-    distribution::{ContinuousCDF, Normal},
-    statistics::Distribution,
-};
+    pub fn distribution(&self, contig: &str) -> anyhow::Result<ND> {
+        Ok(ND::new(self.get(contig)?))
+    }
+
+    pub fn save_stats(&self) -> anyhow::Result<()> {
+        Ok(())
+    }
+
+    pub fn save_global_proportions_graph(
+        &self,
+        path: &str,
+        contigs: &Vec<String>,
+        breaks: Vec<u32>,
+    ) {
+        let mut breaks_str = Vec::new();
+        for (i, b) in breaks.iter().enumerate() {
+            if i == 0 {
+                breaks_str.push(format!("< {b}"));
+            } else {
+                let last = breaks[i - 1];
+                breaks_str.push(format!("[{last} - {b}["))
+            }
+        }
+        breaks_str.push(format!(">= {}", breaks.last().unwrap()));
+
+        let mut proportions = Vec::new();
+
+        for contig in contigs.iter() {
+            let d = self.get(contig).unwrap();
+            let nd = ND::new(d);
+            proportions.push(nd.frequencies(&breaks));
+        }
+
+        let mut plot = Plot::new();
+        let layout = Layout::new().bar_mode(BarMode::Stack);
+        for (i, v) in transpose(proportions).iter().enumerate() {
+            plot.add_trace(Bar::new(contigs.clone(), v.to_vec()).name(&breaks_str[i]));
+        }
+        println!("{:?}", contigs);
+        plot.set_layout(layout);
+        plot.use_local_plotly();
+        plot.write_image(path, plotly::ImageFormat::SVG, 800, 600, 1.0);
+    }
+
+    pub fn save_global_distribution_graph(&self, path: &str, contigs: &Vec<String>) {
+        let d: Vec<u32> = contigs.iter().flat_map(|c| self.get(c).unwrap()).collect();
+        let mut data_sorted = d.clone();
+        data_sorted.sort_unstable();
+
+        let nd = ND::new(d.clone());
+        let mut plot = Plot::new();
+
+        let bar_x: Vec<u32> = (1..=nd.percentile(99.0).unwrap()).collect();
+        let colors: Vec<plotly::color::Rgb> = bar_x
+            .iter()
+            .map(|&x| {
+                if x <= 2 {
+                    plotly::color::Rgb::new(193, 18, 31)
+                } else if x >= 15 {
+                    plotly::color::Rgb::new(138, 201, 38)
+                } else if x <= 6 {
+                    plotly::color::Rgb::new(243, 114, 44)
+                } else {
+                    plotly::color::Rgb::new(255, 202, 58)
+                }
+            })
+            .collect();
+
+        let data: Vec<u32> = d.iter().filter(|x| **x >= 1).copied().collect();
+
+        // frequencies
+        let mut frequencies = HashMap::new();
+        for &value in &data {
+            *frequencies.entry(value).or_insert(0) += 1;
+        }
+
+        let bars = Bar::new(
+            bar_x.clone(),
+            bar_x
+                .iter()
+                .map(|x| *frequencies.get(x).unwrap_or(&0) as f64 / data.len() as f64)
+                .collect(),
+        )
+        .show_legend(false)
+        .marker(Marker::new().color_array(colors));
+
+        plot.add_trace(bars);
+
+        let data_x = generate_range(0.0, nd.percentile(99.0).unwrap().into(), 100);
+        let data_y: Vec<f64> = data_x.iter().map(|x| nd.fitted_normal.pdf(x)).collect();
+        let trace = Scatter::new(data_x.clone(), data_y).name("Gaussian");
+        plot.add_trace(trace);
+
+        // Gamma
+        let data: Vec<f64> = d.iter().map(|x| *x as f64).collect();
+
+        let sum: f64 = data.iter().sum();
+        let mean = (&data).mean();
+        let variance = (&data).variance();
+        let count = d.len() as f64;
+        let shape = mean * mean / variance;
+        let rate = mean / variance;
+
+        let gamma = statrs::distribution::Gamma::new(shape, rate).unwrap();
+        let data_y: Vec<f64> = data_x.iter().map(|x| gamma.pdf(*x)).collect();
+        let trace = Scatter::new(data_x.clone(), data_y).name("Gamma");
+        plot.add_trace(trace);
+
+        // Poisson
+        let lambda = sum / count;
+        let poisson = statrs::distribution::Poisson::new(lambda).unwrap();
+        let data_y = data_x.iter().map(|x| poisson.pmf(*x as u64)).collect();
+        let trace = Scatter::new(data_x.clone(), data_y).name("Poisson");
+        plot.add_trace(trace);
+
+        plot.use_local_plotly();
+        plot.write_image(path, plotly::ImageFormat::SVG, 800, 600, 1.0);
+        println!("> 15x: {:?}", nd.frequencies(&vec![1, 6, 15]));
+    }
+}
 
 pub struct ND {
     pub data: Vec<u32>,
     pub distribution: BTreeMap<u32, f64>,
     pub total_count: usize,
     pub frequency: HashMap<u32, usize>,
-    pub fitted_normal: Normal,
+    pub fitted_normal: UvNormal,
 }
 
+use rstat::{fitting::MLE, normal::UvNormal, ContinuousDistribution};
+
 impl ND {
     fn new(mut data: Vec<u32>) -> Self {
         data.sort_unstable();
         let n = data.len();
         info!("n values {n}");
-        let mut frequency = HashMap::new();
         let mut distribution = BTreeMap::new();
 
+        let mut frequency = HashMap::new();
         for &value in &data {
             *frequency.entry(value).or_insert(0) += 1;
         }
@@ -232,7 +705,14 @@ impl ND {
         }
 
         // Fit normal distribution
-        let fitted_normal = Self::fit_normal(&data);
+        let fitted_normal = rstat::univariate::normal::Normal::fit_mle(
+            &data
+                .iter()
+                .filter(|x| *x >= &1u32)
+                .map(|x| *x as f64)
+                .collect::<Vec<f64>>(),
+        )
+        .unwrap();
 
         Self {
             data,
@@ -243,38 +723,23 @@ impl ND {
         }
     }
 
-    fn fit_normal(data: &[u32]) -> Normal {
-        let n = data.len() as f64;
-
-        // Calculate mean and variance in a single pass
-        let (sum, sum_sq) = data
-            .iter()
-            .filter(|v| **v > 1)
-            .fold((0.0, 0.0), |(sum, sum_sq), x| {
-                let x = *x as f64;
-                (sum + x, sum_sq + x * x)
-            });
-
-        let mean = sum / n;
-        let variance = (sum_sq / n) - (mean * mean);
-        let std_dev = variance.sqrt();
-
-        Normal::new(mean, std_dev).unwrap()
-    }
-
-    pub fn pdf(&self, x: f64) -> f64 {
-        let epsilon = 1e-6; // Small value for numerical differentiation
-        let cdf_x = self.fitted_normal.cdf(x);
-        let cdf_x_plus_epsilon = self.fitted_normal.cdf(x + epsilon);
-
-        // Approximate the derivative
-        (cdf_x_plus_epsilon - cdf_x) / epsilon
-    }
-
     pub fn frequency(&self, x: u32) -> usize {
         *self.frequency.get(&x).unwrap_or(&0)
     }
 
+    pub fn frequencies(&self, breaks: &Vec<u32>) -> Vec<f64> {
+        let mut last_prop_under = 0.0;
+        let mut res = Vec::new();
+        for brk in breaks {
+            let v = self.proportion_under(*brk) - last_prop_under;
+            res.push(v);
+            last_prop_under += v;
+        }
+        let per99 = self.percentile(99.0).unwrap();
+        res.push(self.proportion_under(per99) - last_prop_under);
+        res
+    }
+
     pub fn percentile(&self, percentile: f64) -> Option<u32> {
         if !(0.0..=100.0).contains(&percentile) {
             return None;
@@ -284,12 +749,6 @@ impl ND {
         self.data.get(index).cloned()
     }
 
-    pub fn cdf(&self, x: u32) -> f64 {
-        self.distribution
-            .range(..=x)
-            .next_back()
-            .map_or(0.0, |(_, &prob)| prob)
-    }
     pub fn proportion_under(&self, x: u32) -> f64 {
         let count = self
             .frequency
@@ -309,21 +768,6 @@ impl ND {
             .sum::<usize>();
         count as f64 / self.total_count as f64
     }
-
-    pub fn fitted_proportion_under(&self, x: f64) -> f64 {
-        self.fitted_normal.cdf(x)
-    }
-
-    pub fn fitted_proportion_above(&self, x: f64) -> f64 {
-        1.0 - self.fitted_normal.cdf(x)
-    }
-
-    pub fn get_fitted_parameters(&self) -> (f64, f64) {
-        (
-            self.fitted_normal.mean().unwrap(),
-            self.fitted_normal.std_dev().unwrap(),
-        )
-    }
 }
 
 pub fn generate_range(start: f64, end: f64, steps: usize) -> Vec<f64> {
@@ -412,3 +856,87 @@ where
         })
         .collect()
 }
+
+pub fn transpose(v: Vec<Vec<f64>>) -> Vec<Vec<f64>> {
+    assert!(!v.is_empty());
+    let len = v[0].len();
+    let mut result = vec![Vec::with_capacity(v.len()); len];
+
+    for row in v {
+        for (i, val) in row.into_iter().enumerate() {
+            result[i].push(val);
+        }
+    }
+    result
+}
+
+#[derive(Debug, Serialize)]
+pub struct CountsStats {
+    pub sum: f64,
+    pub mean: f64,
+    pub std_dev: f64,
+    pub breaks_values: Vec<(String, f64)>,
+    pub masked: Vec<(String, f64)>,
+}
+
+// pub fn save_barplota
+//     data: Vec<f64>,
+//     data_x: Vec<f64>,
+//     data_y: Vec<f64>,
+//     path: &str,
+// ) -> anyhow::Result<CountsStats> {
+//     let mut plot = Plot::new();
+//
+//     let colors: Vec<plotly::color::Rgb> = data_x
+//         .iter()
+//         .map(|&x| {
+//             if x <= 2.0 {
+//                 plotly::color::Rgb::new(193, 18, 31)
+//             } else if x >= 15.0 {
+//                 plotly::color::Rgb::new(138, 201, 38)
+//             } else if x <= 6.0 {
+//                 plotly::color::Rgb::new(243, 114, 44)
+//             } else {
+//                 plotly::color::Rgb::new(255, 202, 58)
+//             }
+//         })
+//         .collect();
+//
+//     let bars = Bar::new(data_x.clone(), data_y.clone())
+//         .show_legend(false)
+//         .marker(Marker::new().color_array(colors));
+//
+//     plot.add_trace(bars);
+//
+//     let sum: f64 = data.iter().sum();
+//     let mean = (&data).mean();
+//     let count = data.len() as f64;
+//     let std_dev = (&data).std_dev();
+//     println!("mean {mean}");
+//
+//     // Normal
+//     let normal = statrs::distribution::Normal::new(mean, std_dev)?;
+//     let data_y: Vec<f64> = data_x.iter().map(|x| normal.pdf(*x)).collect();
+//     let trace = Scatter::new(data_x.clone(), data_y).name("Normal");
+//     plot.add_trace(trace);
+//
+//     // // Gamma
+//     // let shape = mean * mean / variance;
+//     // let rate = mean / variance;
+//     //
+//     // let gamma = statrs::distribution::Gamma::new(shape, rate).unwrap();
+//     // let data_y: Vec<f64> = data_x.iter().map(|x| gamma.pdf(*x)).collect();
+//     // let trace = Scatter::new(data_x.clone(), data_y).name("Gamma");
+//     // plot.add_trace(trace);
+//
+//     // Poisson
+//     let lambda = sum / count;
+//     let poisson = statrs::distribution::Poisson::new(lambda)?;
+//     let data_y = data_x.iter().map(|x| poisson.pmf(*x as u64)).collect();
+//     let trace = Scatter::new(data_x.clone(), data_y).name("Poisson");
+//     plot.add_trace(trace);
+//
+//     plot.use_local_plotly();
+//     plot.write_image(path, plotly::ImageFormat::SVG, 800, 600, 1.0);
+//     Ok(CountsStats { sum, mean, std_dev })
+// }

+ 67 - 42
src/lib.rs

@@ -306,9 +306,10 @@ pub fn par_whole_scan(dict_file: &str, bam_path: &str, out_dir: &str) -> anyhow:
 #[cfg(test)]
 mod tests {
 
-    use counts::{ranges_between, ranges_over, ranges_under, Counts};
+    use counts::{generate_range, ranges_between, ranges_over, ranges_under,  Counts};
     use pandora_lib_graph::cytoband::{svg_chromosome, AdditionalRect, RectPosition};
-    use plotly::{common::Marker, Bar, Plot};
+    use plotly::{common::Marker, Bar, Plot, Scatter};
+    use rstat::{statistics::Quantiles, ContinuousDistribution, Probability};
     use rust_htslib::bam::Reader;
 
     use super::*;
@@ -405,13 +406,70 @@ mod tests {
         assert!(records_ids.contains(&"56bbfd4a-0d1d-4d97-b307-7626be446ce8".to_string()));
     }
 
+    #[test]
+    fn diff() -> anyhow::Result<()> {
+        init();
+        let id = "BECERRA";
+        let contig = "chr1";
+        let breaks = vec![1, 6, 15];
+
+        let result_dir = "/data/longreads_basic_pipe";
+        let save_dir = format!("{result_dir}/{id}/diag/report/data/scan");
+        info!("Files will be saved into {save_dir}");
+        fs::create_dir_all(&save_dir)?;
+
+        let mut contigs: Vec<String> = (1..=22).map(|c| format!("chr{c}")).collect();
+        contigs.push("chrX".to_string());
+        contigs.push("chrY".to_string());
+        let mut counts = Counts::from_files(
+            contigs
+                .clone()
+                .iter()
+                .map(|c| format!("{result_dir}/{id}/diag/scan/{c}_counts.tsv"))
+                .collect(),
+        );
+        counts.mrd_from_files(
+            contigs
+                .clone()
+                .iter()
+                .map(|c| format!("{result_dir}/{id}/mrd/scan/{c}_counts.tsv"))
+                .collect(),
+        );
+        counts.mask_low_mrd(contig, 6)?;
+        counts.mask_low_quality(contig, 0.1)?;
+        // let hm = counts.counts_annotations(contig)?;
+
+        // let len = counts.data.get(contig).unwrap().len();
+        // hm.iter().for_each(|(k, v)| {
+        //     println!("{:?} {:0.2}", k, *v as f64 / len as f64);
+        // });
+        // let n_final = counts.get(contig).unwrap().len();
+        // println!("n ok: {:0.2}", n_final as f64 / len as f64);
+
+        counts.save_contigs(&contigs, &format!("{save_dir}/{id}"), breaks)?;
+        Ok(())
+    }
+
     #[test]
     fn load() -> anyhow::Result<()> {
         init();
         info!("loading");
-        let contig = "chr22";
-        let count_file = &format!("/data/longreads_basic_pipe/ROBIN/diag/scan/{contig}_counts.tsv");
-        let counts = Counts::from_files(vec![count_file]);
+        let id = "BECERRA";
+        let contig = "chr9";
+        let breaks = vec![1, 6, 15];
+
+        let mut contigs: Vec<String> = (1..=22).map(|c| format!("chr{c}")).collect();
+        contigs.push("chrX".to_string());
+        contigs.push("chrY".to_string());
+        let counts = Counts::from_files(
+            contigs
+                .clone()
+                .iter()
+                .map(|c| format!("/data/longreads_basic_pipe/{id}/diag/scan/{c}_counts.tsv"))
+                .collect(),
+        );
+        counts.save_global_proportions_graph("/data/proportions.svg", &contigs, breaks);
+        counts.save_global_distribution_graph("/data/global_distribution.svg", &contigs);
 
         let chr1_nd_reads = counts.nd_reads(contig)?;
         println!(
@@ -422,11 +480,12 @@ mod tests {
         );
         println!("< 6x: {:.2}%", chr1_nd_reads.proportion_under(6) * 100.0);
         println!("> 15x: {:.2}%", chr1_nd_reads.proportion_above(15) * 100.0);
+        println!("> 15x: {:?}", chr1_nd_reads.frequencies(&vec![1, 6, 15]));
         let d = counts.get(contig)?;
 
-        let tol = 20;
+        let tol = 25;
 
-        let under_6_rects: Vec<AdditionalRect> = ranges_under(&d, 6, tol)
+        let under_6_rects: Vec<AdditionalRect> = ranges_under(&d, 6, 0)
             .iter()
             .filter(|(s, e)| e > s)
             // .filter(|(s, e)| e - s > tol)
@@ -450,10 +509,9 @@ mod tests {
             })
             .collect();
 
-        let over15: Vec<AdditionalRect> = ranges_over(&d, 15, tol)
+        let over15: Vec<AdditionalRect> = ranges_over(&d, 15, 10)
             .iter()
             .filter(|(s, e)| e > s)
-            // .filter(|(s, e)| e - s > tol)
             .map(|(start, end)| AdditionalRect {
                 start: *start as u32 * 1000,
                 end: *end as u32 * 1000,
@@ -478,39 +536,6 @@ mod tests {
         )
         .unwrap();
 
-        let mut plot = Plot::new();
-
-        let bar_x: Vec<u32> = (0..=chr1_nd_reads.percentile(99.0).unwrap()).collect();
-        let colors: Vec<plotly::color::Rgb> = bar_x
-            .iter()
-            .map(|&x| {
-                if x <= 2 {
-                    plotly::color::Rgb::new(193, 18, 31)
-                } else if x >= 15 {
-                    plotly::color::Rgb::new(138, 201, 38)
-                } else if x <= 6 {
-                    plotly::color::Rgb::new(243, 114, 44)
-                } else {
-                    plotly::color::Rgb::new(255, 202, 58)
-                }
-            })
-            .collect();
-
-        let bars = Bar::new(
-            bar_x.clone(),
-            bar_x
-                .iter()
-                .map(|x| chr1_nd_reads.frequency(*x) as f64 / chr1_nd_reads.data.len() as f64)
-                .collect(),
-        )
-        .show_legend(false)
-        .marker(Marker::new().color_array(colors));
-
-        plot.add_trace(bars);
-
-        plot.use_local_plotly();
-        plot.write_image("/data/test2.svg", plotly::ImageFormat::SVG, 800, 600, 1.0);
-
         Ok(())
     }
 }