use std::time::Duration; use anyhow::{Context, Ok, Result}; use hashbrown::HashMap; use indicatif::{ProgressBar, ProgressStyle}; use statrs::distribution::{ChiSquared, ContinuousCDF}; pub fn chi_square_test_impl(observed: &[f64], expected: &[f64]) -> anyhow::Result { if observed.len() != expected.len() { return Err(anyhow::anyhow!("Input vectors must have the same length")); } // Calculate the chi-squared statistic let chi_squared_statistic: f64 = observed .iter() .zip(expected.iter()) .map(|(obs, exp)| ((obs - exp).abs() - 0.5).powi(2) / exp) .sum(); // Degrees of freedom is the number of categories minus 1 // let degrees_of_freedom = (observed.len() - 1) as f64; let degrees_of_freedom = 1.0; // Calculate p-value using chi-squared distribution let chi_squared_distribution = ChiSquared::new(degrees_of_freedom).unwrap(); let p_value = 1.0 - chi_squared_distribution.cdf(chi_squared_statistic); // You can use the p-value to make decisions based on your significance level // For example, with a significance level of 0.05, if p_value < 0.05, reject the null hypothesis Ok(p_value) } /// 2-sample test for equality of proportions with continuity correction /// remerciements to chatGPT pub fn chi_square_test_for_proportions( success_a: f64, total_a: f64, success_b: f64, total_b: f64, ) -> anyhow::Result { let observed_counts = vec![ success_a, total_a - success_a, success_b, total_b - success_b, ]; let expected_counts = vec![ total_a * (success_a + success_b) / (total_a + total_b), total_a * (total_a - success_a + total_b - success_b) / (total_a + total_b), total_b * (success_a + success_b) / (total_a + total_b), total_b * (total_a - success_a + total_b - success_b) / (total_a + total_b), ]; chi_square_test_impl(&observed_counts, &expected_counts) } pub fn get_hts_nt_pileup( bam: &mut rust_htslib::bam::IndexedReader, chr: &str, start: i32, with_next_ins: bool, ) -> Result> { use rust_htslib::{bam, bam::Read}; let stop = start + 1; let mut bases = Vec::new(); bam.fetch((chr, start, stop))?; let mut bam_pileup = Vec::new(); for p in bam.pileup() { let pileup = p.context(format!( "Can't pilup bam at position {}:{}-{}", chr, start, stop ))?; let position = pileup.pos() as i32; if position == start { for alignment in pileup.alignments() { match alignment.indel() { bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'), bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'), _ => { let record = alignment.record(); if record.seq_len() > 0 { if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? { bases.push(b); } } else { if alignment.is_del() { bases.push(b'D'); } } } } } } } Ok(bases) } pub fn hts_base_at( record: &rust_htslib::bam::record::Record, at_pos: u32, with_next_ins: bool, ) -> Result> { use rust_htslib::bam::record::Cigar; let cigar = record.cigar(); let seq = record.seq(); let pos = cigar.pos() as u32; let mut read_i = 0u32; let at_pos = at_pos - 1; let mut ref_pos = pos; if ref_pos > at_pos { return Ok(None); } for (id, op) in cigar.iter().enumerate() { let (add_read, add_ref) = match *op { Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len), Cigar::Ins(len) => (len, 0), Cigar::Del(len) => (0, len), Cigar::RefSkip(len) => (0, len), Cigar::SoftClip(len) => (len, 0), Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0), }; // If at the end of the op len and next op is Ins return I if with_next_ins && ref_pos + add_read == at_pos + 1 { if let Some(Cigar::Ins(_)) = cigar.get(id + 1) { return Ok(Some(b'I')); } } if ref_pos + add_ref > at_pos { // Handle deletions directly if let Cigar::Del(_) = *op { return Ok(Some(b'D')); } else if let Cigar::RefSkip(_) = op { return Ok(None); } else { let diff = at_pos - ref_pos; let p = read_i + diff; return Ok(Some(seq[p as usize])); } } read_i += add_read; ref_pos += add_ref; } Ok(None) } // thanks to chatGPT (the best) pub fn estimate_shannon_entropy(dna_sequence: &str) -> f64 { let m = dna_sequence.len() as f64; // Count occurrences of each base let mut bases = HashMap::::new(); for base in dna_sequence.chars() { *bases.entry(base).or_insert(0) += 1; } // Calculate Shannon entropy let mut shannon_entropy_value = 0.0; for &n_i in bases.values() { let p_i = n_i as f64 / m; shannon_entropy_value -= p_i * p_i.log2(); } shannon_entropy_value } pub fn print_stat_cat(s: &HashMap, denum: u32) { let denum = denum as f32; let mut v: Vec<(&String, &u32)> = s.iter().map(|e| e).collect(); v.sort_by(|(_, a), (_, b)| b.cmp(a)); let mut table = prettytable::table!(["category", "n", "%"]); v.iter().for_each(|(k, v)| { let p = (**v as f32) * 100 as f32 / denum; let p = format!("{:.2}", p); table.add_row([*k, &v.to_string(), &p].into()); }); table.printstd(); } pub fn new_pg(len: u64) -> ProgressBar { let sty = ProgressStyle::with_template( " {spinner} {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7}", ) .unwrap() .progress_chars("=>-"); let pg = ProgressBar::new(len); pg.set_style(sty); pg.enable_steady_tick(Duration::from_millis(200)); pg } pub fn new_pg_speed(len: u64) -> ProgressBar { let sty = ProgressStyle::with_template( " {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7} {per_sec}", ) .unwrap() .progress_chars("=>-"); let pg = ProgressBar::new(len); pg.set_style(sty); pg.enable_steady_tick(Duration::from_millis(200)); pg } pub fn new_pg_bytes(len: u64) -> ProgressBar { let sty = ProgressStyle::with_template( " {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {decimal_bytes:>7}/{decimal_total_bytes:7} {decimal_bytes_per_sec}", ) .unwrap() .progress_chars("=>-"); let pg = ProgressBar::new(len); pg.set_style(sty); pg.enable_steady_tick(Duration::from_millis(200)); pg }