| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- use std::time::Duration;
- use anyhow::{Context, Ok, Result};
- use hashbrown::HashMap;
- use indicatif::{ProgressBar, ProgressStyle};
- use statrs::distribution::{ChiSquared, ContinuousCDF};
- pub fn chi_square_test_impl(observed: &[f64], expected: &[f64]) -> anyhow::Result<f64> {
- if observed.len() != expected.len() {
- return Err(anyhow::anyhow!("Input vectors must have the same length"));
- }
- // Calculate the chi-squared statistic
- let chi_squared_statistic: f64 = observed
- .iter()
- .zip(expected.iter())
- .map(|(obs, exp)| ((obs - exp).abs() - 0.5).powi(2) / exp)
- .sum();
- // Degrees of freedom is the number of categories minus 1
- // let degrees_of_freedom = (observed.len() - 1) as f64;
- let degrees_of_freedom = 1.0;
- // Calculate p-value using chi-squared distribution
- let chi_squared_distribution = ChiSquared::new(degrees_of_freedom).unwrap();
- let p_value = 1.0 - chi_squared_distribution.cdf(chi_squared_statistic);
- // You can use the p-value to make decisions based on your significance level
- // For example, with a significance level of 0.05, if p_value < 0.05, reject the null hypothesis
- Ok(p_value)
- }
- /// 2-sample test for equality of proportions with continuity correction
- /// remerciements to chatGPT
- pub fn chi_square_test_for_proportions(
- success_a: f64,
- total_a: f64,
- success_b: f64,
- total_b: f64,
- ) -> anyhow::Result<f64> {
- let observed_counts = vec![
- success_a,
- total_a - success_a,
- success_b,
- total_b - success_b,
- ];
- let expected_counts = vec![
- total_a * (success_a + success_b) / (total_a + total_b),
- total_a * (total_a - success_a + total_b - success_b) / (total_a + total_b),
- total_b * (success_a + success_b) / (total_a + total_b),
- total_b * (total_a - success_a + total_b - success_b) / (total_a + total_b),
- ];
- chi_square_test_impl(&observed_counts, &expected_counts)
- }
- pub fn get_hts_nt_pileup(
- bam: &mut rust_htslib::bam::IndexedReader,
- chr: &str,
- start: i32,
- with_next_ins: bool,
- ) -> Result<Vec<u8>> {
- use rust_htslib::{bam, bam::Read};
- let stop = start + 1;
- let mut bases = Vec::new();
- bam.fetch((chr, start, stop))?;
- let mut bam_pileup = Vec::new();
- for p in bam.pileup() {
- let pileup = p.context(format!(
- "Can't pilup bam at position {}:{}-{}",
- chr, start, stop
- ))?;
- let position = pileup.pos() as i32;
- if position == start {
- for alignment in pileup.alignments() {
- match alignment.indel() {
- bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
- bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
- _ => {
- let record = alignment.record();
- if record.seq_len() > 0 {
- if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
- bases.push(b);
- }
- } else {
- if alignment.is_del() {
- bases.push(b'D');
- }
- }
- }
- }
- }
- }
- }
- Ok(bases)
- }
- pub fn hts_base_at(
- record: &rust_htslib::bam::record::Record,
- at_pos: u32,
- with_next_ins: bool,
- ) -> Result<Option<u8>> {
- use rust_htslib::bam::record::Cigar;
- let cigar = record.cigar();
- let seq = record.seq();
- let pos = cigar.pos() as u32;
- let mut read_i = 0u32;
- let at_pos = at_pos - 1;
- let mut ref_pos = pos;
- if ref_pos > at_pos {
- return Ok(None);
- }
- for (id, op) in cigar.iter().enumerate() {
- let (add_read, add_ref) = match *op {
- Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
- Cigar::Ins(len) => (len, 0),
- Cigar::Del(len) => (0, len),
- Cigar::RefSkip(len) => (0, len),
- Cigar::SoftClip(len) => (len, 0),
- Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
- };
- // If at the end of the op len and next op is Ins return I
- if with_next_ins && ref_pos + add_read == at_pos + 1 {
- if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
- return Ok(Some(b'I'));
- }
- }
- if ref_pos + add_ref > at_pos {
- // Handle deletions directly
- if let Cigar::Del(_) = *op {
- return Ok(Some(b'D'));
- } else if let Cigar::RefSkip(_) = op {
- return Ok(None);
- } else {
- let diff = at_pos - ref_pos;
- let p = read_i + diff;
- return Ok(Some(seq[p as usize]));
- }
- }
- read_i += add_read;
- ref_pos += add_ref;
- }
- Ok(None)
- }
- // thanks to chatGPT (the best)
- pub fn estimate_shannon_entropy(dna_sequence: &str) -> f64 {
- let m = dna_sequence.len() as f64;
- // Count occurrences of each base
- let mut bases = HashMap::<char, usize>::new();
- for base in dna_sequence.chars() {
- *bases.entry(base).or_insert(0) += 1;
- }
- // Calculate Shannon entropy
- let mut shannon_entropy_value = 0.0;
- for &n_i in bases.values() {
- let p_i = n_i as f64 / m;
- shannon_entropy_value -= p_i * p_i.log2();
- }
- shannon_entropy_value
- }
- pub fn print_stat_cat(s: &HashMap<String, u32>, denum: u32) {
- let denum = denum as f32;
- let mut v: Vec<(&String, &u32)> = s.iter().map(|e| e).collect();
- v.sort_by(|(_, a), (_, b)| b.cmp(a));
- let mut table = prettytable::table!(["category", "n", "%"]);
- v.iter().for_each(|(k, v)| {
- let p = (**v as f32) * 100 as f32 / denum;
- let p = format!("{:.2}", p);
- table.add_row([*k, &v.to_string(), &p].into());
- });
- table.printstd();
- }
- pub fn new_pg(len: u64) -> ProgressBar {
- let sty = ProgressStyle::with_template(
- " {spinner} {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7}",
- )
- .unwrap()
- .progress_chars("=>-");
- let pg = ProgressBar::new(len);
- pg.set_style(sty);
- pg.enable_steady_tick(Duration::from_millis(200));
- pg
- }
- pub fn new_pg_speed(len: u64) -> ProgressBar {
- let sty = ProgressStyle::with_template(
- " {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7} {per_sec}",
- )
- .unwrap()
- .progress_chars("=>-");
- let pg = ProgressBar::new(len);
- pg.set_style(sty);
- pg.enable_steady_tick(Duration::from_millis(200));
- pg
- }
- pub fn new_pg_bytes(len: u64) -> ProgressBar {
- let sty = ProgressStyle::with_template(
- " {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {decimal_bytes:>7}/{decimal_total_bytes:7} {decimal_bytes_per_sec}",
- )
- .unwrap()
- .progress_chars("=>-");
- let pg = ProgressBar::new(len);
- pg.set_style(sty);
- pg.enable_steady_tick(Duration::from_millis(200));
- pg
- }
|