Thomas 1 жил өмнө
parent
commit
df63d1b17a

+ 0 - 1764
:

@@ -1,1764 +0,0 @@
-use crate::{
-    annotations::{
-        cosmic::Cosmic, echtvar::{parse_echtvar_val, run_echtvar}, gnomad::GnomAD, ncbi_gff::NCBIGFF, pangolin::{pangolin_parse_results, pangolin_save_variants, run_pangolin, Pangolin}, vep::{get_best_vep, vep_chunk, VEP}
-    },
-    callers::{
-        clairs::{ClairSFormat, ClairSInfo},
-        deepvariant::{DeepVariantFormat, DeepVariantInfo},
-        nanomonsv::{NanomonsvFormat, NanomonsvInfo},
-        sniffles::{SnifflesFormat, SnifflesInfo},
-    },
-    config::{self, Config},
-    in_out::{
-        self,
-        dict_reader::read_dict,
-        get_reader,
-        vcf_reader::{read_vcf, VCFRow},
-        vcf_writer::{vcf_header_from, VariantWritter},
-    },
-    sql::{stats_sql::insert_stats, variants_sql::insert_variants},
-    utils::{
-        chi_square_test_for_proportions, count_repetitions, estimate_shannon_entropy,
-        get_hts_nt_pileup, new_pg, new_pg_speed, print_stat_cat,
-    },
-};
-use anyhow::{anyhow, Context, Ok, Result};
-use csv::ReaderBuilder;
-use dashmap::DashMap;
-use hashbrown::HashMap;
-use indicatif::{MultiProgress, ParallelProgressIterator};
-use log::{info, warn};
-use noodles_core::{region::Region, Position};
-use noodles_fasta::indexed_reader::Builder as FastaBuilder;
-use noodles_gff as gff;
-
-use rayon::prelude::*;
-use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
-use std::{fs, io::Write};
-use std::{
-    env::temp_dir,
-    fmt,
-    fs::File,
-    str::FromStr,
-    sync::{
-        atomic::{AtomicI32, Ordering},
-        Arc,
-    },
-};
-use utoipa::{openapi::schema, ToSchema};
-
-// chr12:25116542|G>T KRAS
-#[derive(Debug, Clone)]
-pub struct Variants {
-    pub name: String,
-    pub data: Vec<Variant>,
-    pub constit: DashMap<String, Variant>,
-    pub stats_vcf: StatsVCF,
-    pub stats_bam: StatsBAM,
-    pub cfg: Config,
-    pub mp: MultiProgress,
-}
-
-impl Serialize for Variants {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        // 3 is the number of fields in the struct.
-        let mut state = serializer.serialize_struct("Variants", 5)?;
-        state.serialize_field("name", &self.name)?;
-        state.serialize_field("data", &self.data)?;
-        state.serialize_field("constit", &self.constit)?;
-        state.serialize_field("stats_vcf", &self.stats_vcf)?;
-        state.serialize_field("stats_bam", &self.stats_bam)?;
-        state.end()
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub struct StatsVCF {
-    n_tumoral_init: usize,
-    n_constit_init: usize,
-    n_constit: i32,
-    n_loh: i32,
-    n_low_mrd_depth: i32,
-}
-
-impl fmt::Display for StatsVCF {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let k = 100.0 / self.n_tumoral_init as f64;
-        let string = format!(
-            "VCF filters found {} ({:.1}%) constit, {} ({:.1}%) LOH, {} ({:.1}%) Low depth for constit variants",
-            self.n_constit, self.n_constit as f64 * k,
-            self.n_loh, self.n_loh as f64 * k,
-            self.n_low_mrd_depth, self.n_low_mrd_depth as f64 * k
-        );
-        write!(f, "{}", string)
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub struct StatsBAM {
-    n_lasting: i32,
-    n_constit: i32,
-    n_low_mrd_depth: i32,
-    n_low_diversity: i32,
-    n_somatic: i32,
-}
-
-impl fmt::Display for StatsBAM {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let k = 100.0 / self.n_lasting as f64;
-        let string = format!(
-            "BAM filters found {} ({:.1}%) constit, {} ({:.1}%) low depth for constit variants, {} ({:.1}%) low diversity of sequence at the variant position, {} ({:.1}%) somatic variants",
-            self.n_constit, self.n_constit as f64 * k,
-            self.n_low_mrd_depth, self.n_low_mrd_depth as f64 * k,
-            self.n_low_diversity, self.n_low_diversity as f64 * k,
-            self.n_somatic, self.n_somatic as f64 * k
-        );
-        write!(f, "{}", string)
-    }
-}
-
-impl Variants {
-    pub fn from_vec(name: String, mp: &MultiProgress, data: Vec<Variant>) -> Self {
-        Self {
-            name,
-            data,
-            constit: DashMap::new(),
-            stats_vcf: StatsVCF::default(),
-            stats_bam: StatsBAM::default(),
-            cfg: Config::get().unwrap(),
-            mp: mp.clone(),
-        }
-    }
-
-    pub fn from_vcfs(
-        name: String,
-        v: Vec<(&str, &VCFSource, &VariantType)>,
-        cfg: &Config,
-        mp: MultiProgress,
-    ) -> Result<Self> {
-        let pg = mp.add(new_pg(v.len() as u64));
-        pg.set_message("Reading VCF");
-
-        let constit: Arc<DashMap<String, Variant>> = Arc::new(DashMap::new());
-        let n_constit = AtomicI32::new(0);
-        let data: Vec<Variant> = v
-            .par_iter()
-            // .progress_count(v.len() as u64)
-            .flat_map(|(path, source, variant_type)| {
-                let r = match variant_type {
-                    VariantType::Somatic => read_vcf(path, source, variant_type).unwrap(),
-                    VariantType::Constitutionnal => {
-                        read_vcf(path, source, variant_type)
-                            .unwrap()
-                            .par_iter()
-                            .for_each(|e| {
-                                n_constit.fetch_add(1, Ordering::SeqCst);
-                                constit.insert(
-                                    format!(
-                                        "{}:{}|{}>{}",
-                                        e.contig, e.position, e.reference, e.alternative
-                                    ),
-                                    e.clone(),
-                                );
-                            });
-                        vec![]
-                    }
-                };
-                pg.inc(1);
-                r
-            })
-            .collect();
-
-        let stats_vcf = StatsVCF::default();
-        let stats_bam = StatsBAM::default();
-
-        let constit = Arc::try_unwrap(constit).unwrap();
-        let elapsed = pg.elapsed();
-        pg.finish();
-        info!("{} variants parsed from somatic VCFs and {} variants positions parsed from constitutionnal VCFs. Executed in {}s", data.len(), constit.len(), elapsed.as_secs());
-        let cfg = cfg.clone();
-
-        return Ok(Self {
-            name,
-            data,
-            constit,
-            stats_vcf,
-            stats_bam,
-            cfg,
-            mp: mp.clone(),
-        });
-    }
-
-    pub fn vcf_filters(&mut self) {
-        let cfg = &self.cfg;
-        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
-        pg.set_message("VCF filtering");
-
-        let n_tumoral_init = self.len();
-        let n_constit_init = self.constit_len();
-        let min_loh_diff = cfg.deepvariant_loh_pval as f64;
-        let min_mrd_depth = cfg.min_mrd_depth;
-
-        info!("Filtering Constitutionnal (reported variant in constit), LOH (VAF proportion test < {}), LowMRDDepth (< {} in constit) variants by VCF annotations of {} likely somatic variants", min_loh_diff, min_mrd_depth, n_tumoral_init);
-        let n_constit = AtomicI32::new(0);
-        let n_loh = AtomicI32::new(0);
-        let n_low_mrd_depth = AtomicI32::new(0);
-        self.data = self
-            .data
-            .par_iter()
-            .map(|e| {
-                let mut tumoral = e.clone();
-                let k = format!(
-                    "{}:{}|{}>{}",
-                    tumoral.contig, tumoral.position, tumoral.reference, tumoral.alternative
-                );
-
-                if let Some(mut constit) = self.constit.get_mut(&k) {
-                    if constit.get_depth() < min_mrd_depth {
-                        n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
-                        tumoral.annotations.push(AnnotationType::VariantCategory(
-                            VariantCategory::LowMRDDepth,
-                        ));
-                    } else if constit.get_n_alt() == constit.get_depth()
-                        && tumoral.get_n_alt() == tumoral.get_depth()
-                    {
-                        n_constit.fetch_add(1, Ordering::SeqCst);
-                        tumoral
-                            .annotations
-                            .push(AnnotationType::VariantCategory(VariantCategory::Constit));
-                    } else {
-                        let pval = chi_square_test_for_proportions(
-                            tumoral.get_n_alt() as f64,
-                            tumoral.get_depth() as f64,
-                            constit.get_n_alt() as f64,
-                            constit.get_depth() as f64,
-                        )
-                        .unwrap();
-                        if pval != 0.0 && pval <= min_loh_diff {
-                            n_loh.fetch_add(1, Ordering::SeqCst);
-                            tumoral
-                                .annotations
-                                .push(AnnotationType::VariantCategory(VariantCategory::LOH));
-                        } else {
-                            n_constit.fetch_add(1, Ordering::SeqCst);
-                            tumoral
-                                .annotations
-                                .push(AnnotationType::VariantCategory(VariantCategory::Constit));
-                        }
-                    }
-                // If not un Constit registry, ClairS look for VCF constit depth and n_alt
-                } else if let Format::ClairS(format) = &tumoral.callers_data.get(0).unwrap().format
-                {
-                    if format.ndp < min_mrd_depth {
-                        n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
-                        tumoral.annotations.push(AnnotationType::VariantCategory(
-                            VariantCategory::LowMRDDepth,
-                        ));
-                    } else if let ReferenceAlternative::Nucleotide(alt_base) = &tumoral.alternative
-                    {
-                        let mrd_n_alt = match alt_base {
-                            Base::A => format.nau,
-                            Base::T => format.ntu,
-                            Base::C => format.ncu,
-                            Base::G => format.ngu,
-                            _ => 0,
-                        };
-                        if mrd_n_alt != 0 {
-                            n_constit.fetch_add(1, Ordering::SeqCst);
-                            tumoral
-                                .annotations
-                                .push(AnnotationType::VariantCategory(VariantCategory::Constit));
-                        }
-                    }
-                }
-                pg.inc(1);
-                tumoral
-            })
-            .collect();
-
-        let n_constit = n_constit.load(Ordering::SeqCst);
-        let n_loh = n_loh.load(Ordering::SeqCst);
-        let n_low_mrd_depth = n_low_mrd_depth.load(Ordering::SeqCst);
-
-        self.stats_vcf = StatsVCF {
-            n_tumoral_init,
-            n_constit_init,
-            n_constit,
-            n_loh,
-            n_low_mrd_depth,
-        };
-        // let elapsed = start.elapsed();
-        let elapsed = pg.elapsed();
-        pg.finish();
-        info!("{}. Executed in {}s", self.stats_vcf, elapsed.as_secs());
-    }
-
-    /// Filter variants by reading informations from constist BAM.
-    pub fn bam_filters(&mut self, mrd_bam: &str) {
-        let cfg = &self.cfg;
-        // let start = Instant::now();
-        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
-        pg.set_message("BAM filtering");
-
-        let min_mrd_depth = cfg.min_mrd_depth;
-        info!("Filtering Constitutionnal (Alt base found in BAM pileup), LowDiversity (sequence +/- 20nt around variant with entropy < {}), LowMRDDepth (BAM pileup depth < {}) variants by BAM pileup fetching of {} likely somatic variants", cfg.min_diversity, min_mrd_depth, self.stats_vcf.n_tumoral_init - (self.stats_vcf.n_constit + self.stats_vcf.n_loh + self.stats_vcf.n_low_mrd_depth) as usize);
-
-        let n_already = AtomicI32::new(0);
-        let n_constit = AtomicI32::new(0);
-        let n_low_mrd_depth = AtomicI32::new(0);
-        let n_low_diversity = AtomicI32::new(0);
-        let n_somatic = AtomicI32::new(0);
-        self.data.par_chunks_mut(10_000).for_each(|chunk| {
-            let mut bam = rust_htslib::bam::IndexedReader::from_path(mrd_bam)
-                .context(anyhow!("Reading {}", mrd_bam))
-                .unwrap();
-            let mut genome_reader = FastaBuilder::default()
-                .build_from_path(&cfg.reference_fa)
-                .unwrap();
-
-            for tumoral in chunk.iter_mut() {
-                pg.inc(1);
-
-                if tumoral.annotations.len() > 0 {
-                    n_already.fetch_add(1, Ordering::SeqCst);
-                    continue;
-                }
-                let (pos, is_ins) = match tumoral.alt_cat() {
-                    AlterationCategory::INS => (tumoral.position, true),
-                    AlterationCategory::DEL => (tumoral.position, false),
-                    _ => (tumoral.position, false),
-                };
-                match get_hts_nt_pileup(
-                    &mut bam,
-                    &tumoral.contig,
-                    pos as i32,
-                    is_ins, // tumoral.position as i32,
-                ) {
-                    std::result::Result::Ok(bases) => {
-                        let depth = bases.len() as u32;
-
-                        if depth < min_mrd_depth {
-                            n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
-                            tumoral.annotations.push(AnnotationType::VariantCategory(
-                                VariantCategory::LowMRDDepth,
-                            ));
-                        } else {
-                            // Check local diversity
-                            let start =
-                                Position::try_from((tumoral.position - 20) as usize).unwrap();
-                            let end = Position::try_from((tumoral.position + 19) as usize).unwrap();
-                            let r = Region::new(tumoral.contig.to_string(), start..=end);
-                            if let std::result::Result::Ok(reg) = genome_reader.query(&r) {
-                                let s = reg.sequence();
-                                let u = s.as_ref();
-                                let s = String::from_utf8(u.to_vec()).unwrap();
-                                let ent = estimate_shannon_entropy(&s.to_lowercase());
-
-                                if ent < cfg.min_diversity {
-                                    n_low_diversity.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::LowDiversity,
-                                    ));
-                                    continue;
-                                }
-
-                                // Check triplets or doublets if DeepVariant
-                                let callers = tumoral.callers();
-
-                                if callers.len() == 1 {
-                                    if callers[0] == "DeepVariant".to_string() {
-                                        let seq_left = &s[0..20];
-                                        let seq_right = &s[21..s.len() - 1];
-
-                                        // Triplet right
-                                        if count_repetitions(seq_right, 3) >= 3 {
-                                            n_low_diversity.fetch_add(1, Ordering::SeqCst);
-                                            tumoral.annotations.push(
-                                                AnnotationType::VariantCategory(
-                                                    VariantCategory::LowDiversity,
-                                                ),
-                                            );
-                                            continue;
-                                        }
-
-                                        // Doublet right
-                                        if count_repetitions(seq_right, 2) >= 4 {
-                                            n_low_diversity.fetch_add(1, Ordering::SeqCst);
-                                            tumoral.annotations.push(
-                                                AnnotationType::VariantCategory(
-                                                    VariantCategory::LowDiversity,
-                                                ),
-                                            );
-                                            continue;
-                                        }
-
-                                        // Triplet left
-                                        if count_repetitions(seq_left, 3) >= 3 {
-                                            n_low_diversity.fetch_add(1, Ordering::SeqCst);
-                                            tumoral.annotations.push(
-                                                AnnotationType::VariantCategory(
-                                                    VariantCategory::LowDiversity,
-                                                ),
-                                            );
-                                            continue;
-                                        }
-
-                                        // Doublet left
-                                        if count_repetitions(seq_left, 2) >= 4 {
-                                            n_low_diversity.fetch_add(1, Ordering::SeqCst);
-                                            tumoral.annotations.push(
-                                                AnnotationType::VariantCategory(
-                                                    VariantCategory::LowDiversity,
-                                                ),
-                                            );
-                                            continue;
-                                        }
-                                    }
-                                }
-                            }
-
-                            // Check if the base is in constitutionnal pileup
-                            if let ReferenceAlternative::Nucleotide(alt_b) = &tumoral.alternative {
-                                let alt_b = alt_b.clone().into_u8();
-                                let n_alt_mrd = bases
-                                    .clone()
-                                    .into_iter()
-                                    .filter(|e| *e == alt_b)
-                                    .collect::<Vec<_>>()
-                                    .len();
-                                if n_alt_mrd > 0 {
-                                    n_constit.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Constit,
-                                    ));
-                                } else {
-                                    n_somatic.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Somatic,
-                                    ));
-                                }
-                            } else if tumoral.is_ins() {
-                                let n_alt_mrd =
-                                    bases.clone().into_iter().filter(|e| *e == b'I').count();
-                                if n_alt_mrd > 0 {
-                                    n_constit.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Constit,
-                                    ));
-                                } else {
-                                    n_somatic.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Somatic,
-                                    ));
-                                }
-                            } else if tumoral.alt_cat() == AlterationCategory::DEL {
-                                let n_alt_mrd =
-                                    bases.clone().into_iter().filter(|e| *e == b'D').count();
-                                if n_alt_mrd > 0 {
-                                    n_constit.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Constit,
-                                    ));
-                                } else {
-                                    n_somatic.fetch_add(1, Ordering::SeqCst);
-                                    tumoral.annotations.push(AnnotationType::VariantCategory(
-                                        VariantCategory::Somatic,
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                    Err(r) => panic!("{}", r),
-                }
-            }
-        });
-        let n_constit = n_constit.load(Ordering::SeqCst);
-        let n_low_mrd_depth = n_low_mrd_depth.load(Ordering::SeqCst);
-        let n_low_diversity = n_low_diversity.load(Ordering::SeqCst);
-        let n_somatic = n_somatic.load(Ordering::SeqCst);
-        let n_lasting = self.data.len() as i32 - n_already.load(Ordering::SeqCst);
-        self.stats_bam = StatsBAM {
-            n_lasting,
-            n_constit,
-            n_low_mrd_depth,
-            n_low_diversity,
-            n_somatic,
-        };
-        let elapsed = pg.elapsed();
-        pg.finish();
-        info!("{}. Executed in {}s", self.stats_vcf, elapsed.as_secs());
-    }
-
-    pub fn get_cat(&mut self, cat: &VariantCategory) -> Vec<Variant> {
-        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
-        pg.set_message(format!("Get cat {:?}", cat));
-        self.data
-            .par_iter()
-            .progress_with(pg)
-            .flat_map(|e| {
-                if e.annotations
-                    .iter()
-                    .filter(|e| match e {
-                        AnnotationType::VariantCategory(vc) => vc == cat,
-                        _ => false,
-                    })
-                    .count()
-                    > 0
-                {
-                    vec![e.clone()]
-                } else {
-                    vec![]
-                }
-            })
-            .collect::<Vec<Variant>>()
-    }
-
-    pub fn write_vcf_cat(&mut self, path: &str, cat: &VariantCategory) -> Result<()> {
-        info!("Writing VCF {}", path);
-
-        let mut to_write = sort_variants(self.get_cat(cat), &self.cfg.dict_file)?;
-        let pg = self.mp.add(new_pg_speed(to_write.len() as u64));
-        pg.set_message("Writing VCF");
-
-        let mut w = VariantWritter::new(path, &self.cfg.dict_file)?;
-        for row in to_write.iter_mut() {
-            w.write_variant(row)?;
-            pg.inc(1);
-        }
-        w.write_index_finish()?;
-        Ok(())
-    }
-
-    /// Keep variants annotated Somatic
-    pub fn keep_somatics_un(&mut self) {
-        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
-        pg.set_message("Filtering Variants");
-
-        self.data = self
-            .data
-            .par_iter_mut()
-            .progress_with(pg)
-            .flat_map(|e| {
-                // keep unannotated and somatic
-                if e.annotations
-                    .iter()
-                    .filter(|a| match a {
-                        AnnotationType::VariantCategory(vc) => match vc {
-                            VariantCategory::Somatic => false,
-                            _ => true,
-                        },
-                        _ => false,
-                    })
-                    .count()
-                    == 0
-                {
-                    vec![e]
-                } else {
-                    vec![]
-                }
-            })
-            .map(|e| e.clone())
-            .collect();
-    }
-
-    /// Annotate with VEP
-    pub fn vep(&mut self) {
-        let pg = self.mp.add(new_pg_speed(self.len() as u64));
-        pg.set_message("VEP");
-        self.data
-            .par_chunks_mut(self.cfg.vep_chunk_size)
-            .progress_with(pg)
-            .for_each(|chunks| vep_chunk(chunks).unwrap());
-    }
-
-    /// sort_variants TODO
-    pub fn sort(&mut self) -> Result<()> {
-        let cfg = &self.cfg;
-        self.data = sort_variants(self.data.clone(), &cfg.dict_file)?;
-        Ok(())
-    }
-
-    ///
-    pub fn merge(&mut self) {
-        let pg = self.mp.add(new_pg_speed(self.len() as u64));
-        pg.set_message("Merging Variants by contig, positions, ref, alt");
-        let hm: DashMap<String, Variant> = DashMap::new();
-        self.data.par_iter().progress_with(pg).for_each(|e| {
-            let k = format!(
-                "{}:{}|{}>{}",
-                e.contig, e.position, e.reference, e.alternative
-            );
-
-            if let Some(mut v) = hm.get_mut(&k) {
-                let v = v.value_mut();
-                e.callers_data.iter().for_each(|cd| {
-                    v.callers_data.push(cd.clone());
-                    v.callers_data.dedup();
-                });
-                v.source.extend(e.source.clone());
-                v.source.dedup();
-            } else {
-                hm.insert(k, e.clone());
-            }
-        });
-        self.data = hm.iter().map(|e| e.value().clone()).collect();
-    }
-
-    pub fn annotate_gff_feature(&mut self, gff_path: &str) -> Result<()> {
-        let gff_path = gff_path.to_string();
-        let len = self.data.len();
-        let pg = self.mp.add(new_pg_speed(self.len() as u64));
-        pg.set_message("GFF Annotate");
-
-        self.data
-            .par_chunks_mut(len / 33)
-            .progress_with(pg)
-            .for_each(|chunk| {
-                let mut reader = File::open(gff_path.to_string())
-                    .map(noodles_bgzf::Reader::new)
-                    .map(gff::Reader::new)
-                    .unwrap();
-
-                let index = noodles_csi::read(format!("{}.csi", gff_path)).unwrap();
-
-                for v in chunk.iter_mut() {
-                    let start = Position::try_from(v.position as usize).unwrap();
-                    let r = Region::new(v.contig.to_string(), start..=start);
-                    if let std::result::Result::Ok(rows) = reader.query(&index, &r.clone()) {
-                        for row in rows {
-                            let ncbi = NCBIGFF::try_from(row.unwrap()).unwrap();
-                            v.annotations.push(AnnotationType::NCBIGFF(ncbi));
-                        }
-                    }
-                }
-            });
-        Ok(())
-    }
-
-    pub fn echtvar_annotate(&mut self, header_path: &str) -> Result<()> {
-        let len = self.len();
-        let header = vcf_header_from(header_path)?;
-        let pg = self.mp.add(new_pg_speed(len as u64));
-        pg.set_message("Echtvar Annotate");
-
-        self.data
-            .par_chunks_mut(len / 33)
-            .progress_with(pg)
-            .for_each(|chunk| {
-                let in_tmp = format!(
-                    "{}/echtvar_in_{}.vcf",
-                    temp_dir().to_str().unwrap(),
-                    uuid::Uuid::new_v4()
-                );
-
-                let out_tmp = format!(
-                    "{}/echtvar_in_{}.vcf.gz",
-                    temp_dir().to_str().unwrap(),
-                    uuid::Uuid::new_v4()
-                );
-                let mut vcf = File::create(&in_tmp).unwrap();
-
-                let _ = writeln!(vcf, "{}", header);
-
-                for (i, row) in chunk.iter().enumerate() {
-                    let _ = writeln!(
-                        vcf,
-                        "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}\t{}",
-                        row.contig,
-                        row.position,
-                        i + 1,
-                        row.reference,
-                        row.alternative,
-                        ".",
-                        ".",
-                        "."
-                    );
-                }
-
-                run_echtvar(&in_tmp, &out_tmp).unwrap();
-
-                let mut reader = ReaderBuilder::new()
-                    .delimiter(b'\t')
-                    .has_headers(false)
-                    .comment(Some(b'#'))
-                    .flexible(true)
-                    .from_reader(get_reader(&out_tmp).unwrap());
-
-                // let mut lines: HashMap<u64, Vec<VEPLine>> = HashMap::new();
-                let mut last: usize = 1;
-                for line in reader.deserialize::<VCFRow>() {
-                    if let std::result::Result::Ok(row) = line {
-                        let (cosmic, gnomad) = parse_echtvar_val(&row.info).unwrap();
-                        let id: usize = row.id.parse().unwrap();
-                        if id != last {
-                            panic!("Echtvar output not in input order!");
-                        }
-                        if let Some(c) = cosmic {
-                            chunk[id - 1].annotations.push(AnnotationType::Cosmic(c));
-                        }
-                        if let Some(g) = gnomad {
-                            chunk[id - 1].annotations.push(AnnotationType::GnomAD(g));
-                        }
-                        last += 1;
-                    }
-                }
-            });
-        Ok(())
-    }
-
-    pub fn category_iter(&self, category: &VariantCategory) -> Vec<&Variant> {
-        self.data
-            .par_iter()
-            .filter(|v| {
-                for annotation in v.annotations.iter() {
-                    match annotation {
-                        AnnotationType::VariantCategory(cat) => {
-                            if cat == category {
-                                return true;
-                            }
-                        }
-                        _ => (),
-                    }
-                }
-                return false;
-            })
-            .collect::<Vec<&Variant>>()
-    }
-
-    /// Filter based on GnomAD if gnomad_af < max_gnomad_af
-    pub fn filter_snp(&mut self) -> Result<i32> {
-        let n_snp = AtomicI32::new(0);
-        self.data = self
-            .data
-            .clone()
-            .into_par_iter()
-            .filter(|e| {
-                let mut res = true;
-                e.annotations.iter().for_each(|a| {
-                    match a {
-                        AnnotationType::GnomAD(g) => {
-                            res = g.gnomad_af < self.cfg.max_gnomad_af;
-                        }
-                        _ => (),
-                    };
-                });
-                if !res {
-                    n_snp.fetch_add(1, Ordering::SeqCst);
-                }
-                res
-            })
-            .collect();
-        let n = n_snp.load(Ordering::SeqCst);
-        Ok(n)
-    }
-
-    pub fn pangolin(&mut self) -> Result<()> {
-        let tmp_file = pangolin_save_variants(&self)?;
-        let res_file = run_pangolin(&tmp_file)?;
-        
-        fs::remove_file(tmp_file)?;
-        let res = pangolin_parse_results(&res_file)?;
-        let mut res = res.iter();
-        fs::remove_file(res_file)?;
-        info!("Adding pangolin results for {} variants.", res.len());
-
-        let mut n_added = 0;
-        if let Some(r) = res.next() {
-            let mut curr = r.clone();
-            for variant in self.data.iter_mut() {
-                if variant.contig == curr.0 && variant.position == curr.1 {
-                    if variant.reference == curr.2 {
-                        if variant.alternative == curr.3  {
-                            variant.annotations.push(AnnotationType::Pangolin(curr.4));
-                            n_added += 1;
-                            if let Some(r) = res.next() {
-                                curr = r.clone();
-                            } else {
-                                break;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        assert_eq!(res.len(), 0);
-
-        Ok(())
-    }
-
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    pub fn constit_len(&self) -> usize {
-        self.constit.len()
-    }
-
-    pub fn get_variant(&self, contig: &str, pos: u32) -> Vec<Variant> {
-        self.data
-            .par_iter()
-            .filter(|v| v.contig == contig && v.position == pos)
-            .map(|v| v.clone())
-            .collect()
-    }
-
-    pub fn stats(&self) -> Result<Vec<Stat>> {
-        let mut callers_cat = HashMap::new();
-        let mut n_caller_data = 0;
-
-        let mut variants_cat = HashMap::new();
-        let mut n_variants_wcat = 0;
-
-        let mut ncbi_feature = HashMap::new();
-        let mut n_ncbi_feature = 0;
-
-        let mut cosmic_sup_1 = HashMap::new();
-        let mut n_cosmic_sup_1 = 0;
-
-        let mut cons_cat = HashMap::new();
-        let mut n_csq = 0;
-
-        let add_hm = |hm: &mut HashMap<String, u32>, k: &str| {
-            let (_, v) = hm.raw_entry_mut().from_key(k).or_insert(k.to_string(), 1);
-            *v += 1;
-        };
-
-        for ele in self.data.iter() {
-            // Callers
-            let mut callers = Vec::new();
-            for cd in &ele.callers_data {
-                callers.push(
-                    match cd.format {
-                        Format::DeepVariant(_) => "DeepVariant",
-                        Format::ClairS(_) => "ClairS",
-                        Format::Sniffles(_) => "Sniffles",
-                        Format::Nanomonsv(_) => "Nanomonsv",
-                    }
-                    .to_string(),
-                );
-            }
-
-            if !callers.is_empty() {
-                n_caller_data += 1;
-                callers.sort();
-                let k = callers.join(",");
-
-                let (_, v) = callers_cat
-                    .raw_entry_mut()
-                    .from_key(&k)
-                    .or_insert(k.clone(), 1);
-                *v += 1;
-            }
-
-            // Var cat
-
-            // Annotations
-            for annot in ele.annotations.iter() {
-                let mut features = Vec::new();
-                let mut variant_cat = Vec::new();
-                let mut cosmic_m1 = false;
-
-                match annot {
-                    AnnotationType::NCBIGFF(ncbi) => {
-                        features.push(ncbi.feature.to_string());
-                    }
-                    AnnotationType::Cosmic(c) => {
-                        if c.cosmic_cnt > 1 {
-                            cosmic_m1 = true;
-                        }
-                    }
-                    AnnotationType::VariantCategory(vc) => {
-                        let s = serde_json::to_string(vc)?;
-                        variant_cat.push(s);
-                    }
-                    _ => (),
-                };
-
-                if !features.is_empty() {
-                    features.sort();
-                    add_hm(&mut ncbi_feature, &features.join(","));
-                    n_ncbi_feature += 1;
-                }
-
-                if !variant_cat.is_empty() {
-                    add_hm(&mut variants_cat, &variant_cat.join(","));
-                    n_variants_wcat += 1;
-                }
-
-                if cosmic_m1 {
-                    add_hm(&mut cosmic_sup_1, "Cosmic > 1");
-                    n_cosmic_sup_1 += 1;
-                }
-            }
-
-            // VEP
-            let d: Vec<VEP> = ele
-                .annotations
-                .iter()
-                .flat_map(|e| {
-                    if let AnnotationType::VEP(e) = e {
-                        e.clone()
-                    } else {
-                        vec![]
-                    }
-                })
-                .collect();
-            if let std::result::Result::Ok(vep) = get_best_vep(&d) {
-                if let Some(csq) = vep.consequence {
-                    n_csq += 1;
-                    let csq = csq.join(",");
-                    let (_, v) = cons_cat
-                        .raw_entry_mut()
-                        .from_key(&csq)
-                        .or_insert(csq.clone(), 1);
-                    *v += 1;
-                }
-            }
-        }
-
-        print_stat_cat(&cons_cat, n_csq as u32);
-        print_stat_cat(&ncbi_feature, n_ncbi_feature as u32);
-        print_stat_cat(&cosmic_sup_1, n_cosmic_sup_1 as u32);
-        print_stat_cat(&callers_cat, n_caller_data as u32);
-
-        // let file = File::create(path)?;
-        // let mut writer = BufWriter::new(file);
-        let mut results = Vec::new();
-        results.push(Stat::new(
-            "consequences".to_string(),
-            cons_cat,
-            n_csq as u32,
-        ));
-        results.push(Stat::new(
-            "variants_cat".to_string(),
-            variants_cat,
-            n_variants_wcat as u32,
-        ));
-        results.push(Stat::new(
-            "ncbi_feature".to_string(),
-            ncbi_feature,
-            n_ncbi_feature as u32,
-        ));
-        results.push(Stat::new(
-            "callers_cat".to_string(),
-            callers_cat,
-            n_caller_data as u32,
-        ));
-
-        // let res = serde_json::to_string(&results)?;
-
-        Ok(results)
-    }
-
-    pub fn save_sql(&self, path: &str) -> Result<()> {
-        insert_variants(self, path)
-    }
-
-    pub fn stats_sql(&self, path: &str) -> Result<()> {
-        insert_stats(
-            "VCF".to_string(),
-            serde_json::to_string(&self.stats_vcf)?,
-            path,
-        )?;
-        insert_stats(
-            "BAM".to_string(),
-            serde_json::to_string(&self.stats_bam)?,
-            path,
-        )?;
-        Ok(())
-    }
-
-    pub fn save_bytes(&self, path: &str) -> Result<()> {
-        let serialized = pot::to_vec(&self.data)?;
-        let mut w = noodles_bgzf::writer::Builder::default().build_with_writer(File::create(path)?);
-        w.write_all(&serialized)?;
-        Ok(())
-    }
-
-    pub fn new_from_bytes(name: &str, path: &str, mp: MultiProgress) -> Result<Self> {
-        info!("Loading variants from: {path}");
-        let r = in_out::get_reader_progress(path, &mp)?;
-
-        let data: Vec<Variant> = pot::from_reader(r)?;
-        Ok(Self {
-            name: name.to_string(),
-            data,
-            constit: DashMap::new(),
-            stats_vcf: StatsVCF::default(),
-            stats_bam: StatsBAM::default(),
-            cfg: Config::get()?,
-            mp,
-        })
-    }
-
-    pub fn filter_category(&self, and_categories: &Vec<Category>) -> Vec<&Variant> {
-        self.data
-            .par_iter()
-            .flat_map(|v| {
-                if v.is_from_category(and_categories) {
-                    vec![v]
-                } else {
-                    vec![]
-                }
-            })
-            .collect()
-    }
-}
-
-#[derive(Debug, Clone, Serialize, ToSchema)]
-pub struct Stat {
-    name: String,
-    counts: HashMap<String, u32>,
-    n_with_annotation: u32,
-}
-
-impl Stat {
-    pub fn new(name: String, counts: HashMap<String, u32>, n_with_annotation: u32) -> Self {
-        Stat {
-            counts,
-            n_with_annotation,
-            name,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
-pub struct Variant {
-    pub contig: String,
-    pub position: u32,
-    pub reference: ReferenceAlternative,
-    pub alternative: ReferenceAlternative,
-    pub callers_data: Vec<CallerData>,
-    pub n_alt: Option<u32>,
-    pub n_ref: Option<u32>,
-    pub vaf: Option<f32>,
-    pub depth: Option<u32>,
-    pub variant_type: VariantType,
-    pub source: Vec<VCFSource>,
-    pub annotations: Vec<AnnotationType>,
-}
-
-#[derive(Debug, PartialEq, Serialize, Deserialize, Clone, ToSchema)]
-pub struct CallerData {
-    pub qual: Option<f32>,
-    pub format: Format,
-    pub info: Info,
-}
-
-impl CallerData {
-    pub fn get_vaf(&self) -> f64 {
-        match &self.format {
-            Format::DeepVariant(v) => v.vaf as f64,
-            Format::ClairS(v) => v.af,
-            Format::Sniffles(v) => v.dv as f64 / (v.dv as f64 + v.dr as f64),
-            Format::Nanomonsv(v) => v.vr as f64 / v.tr as f64,
-        }
-    }
-    pub fn get_depth(&mut self) -> u32 {
-        match &self.format {
-            Format::DeepVariant(v) => v.dp,
-            Format::ClairS(v) => v.dp,
-            Format::Sniffles(v) => v.dv + v.dr,
-            Format::Nanomonsv(v) => v.tr,
-        }
-    }
-    pub fn get_n_alt(&mut self) -> u32 {
-        match &self.format {
-            Format::DeepVariant(v) => v.ad.get(1).unwrap().to_owned(),
-            Format::ClairS(v) => v.ad.get(1).unwrap().to_owned(),
-            Format::Sniffles(v) => v.dv,
-            Format::Nanomonsv(v) => v.tr - v.vr,
-        }
-    }
-
-    /// Variants filter rules
-    pub fn should_filter(&self) -> bool {
-        if let Info::Sniffles(info) = &self.info {
-            let imprecise = info
-                .tags
-                .iter()
-                .filter(|s| s.to_string() == "IMPRECISE".to_string())
-                .count();
-            let mut n_alt = 0;
-            if let Format::Sniffles(f) = &self.format {
-                n_alt = f.dv;
-            }
-            if imprecise == 0 && n_alt >= 3 {
-                return false;
-            } else {
-                return true;
-            }
-        } else {
-            return false;
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Eq, PartialEq, Deserialize, ToSchema)]
-pub enum VariantType {
-    Somatic,
-    Constitutionnal,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
-pub enum VCFSource {
-    DeepVariant,
-    ClairS,
-    Sniffles,
-    Nanomonsv,
-}
-
-impl FromStr for VCFSource {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self> {
-        match s {
-            "DeepVariant" => Ok(VCFSource::DeepVariant),
-            "ClairS" => Ok(VCFSource::ClairS),
-            "Sniffles" => Ok(VCFSource::Sniffles),
-            "Nanomonsv" => Ok(VCFSource::Nanomonsv),
-            _ => Err(anyhow!("Error parsing VCFSource")),
-        }
-    }
-}
-
-impl ToString for VCFSource {
-    fn to_string(&self) -> String {
-        let s = match self {
-            VCFSource::DeepVariant => "DeepVariant",
-            VCFSource::ClairS => "ClairS",
-            VCFSource::Sniffles => "Sniffles",
-            VCFSource::Nanomonsv => "Nanomonsv",
-        };
-        s.to_string()
-    }
-}
-
-impl Variant {
-    pub fn from_vcfrow(row: &VCFRow, source: VCFSource, variant_type: VariantType) -> Result<Self> {
-        let callers_data = vec![CallerData {
-            qual: row.qual.parse::<f32>().ok(),
-            info: parse_info(&row.info, &source).context(anyhow!(
-                "Can't parse {:?} info for {}",
-                source,
-                row.info
-            ))?,
-            format: parse_format(&source, &row.value).context(anyhow!(
-                "Can't parse {:?} format for {}",
-                source,
-                row.value
-            ))?,
-        }];
-
-        Ok(Variant {
-            contig: row.chr.to_string(),
-            position: row.pos,
-            reference: row
-                .reference
-                .parse()
-                .context(anyhow!("Error while parsing {}", row.reference))?,
-            alternative: row
-                .alt
-                .parse()
-                .context(anyhow!("Error while parsing {}", row.alt))?,
-            n_ref: None,
-            n_alt: None,
-            vaf: None,
-            depth: None,
-            callers_data,
-            source: vec![source],
-            variant_type,
-            annotations: Vec::new(),
-        })
-    }
-
-    pub fn get_depth(&mut self) -> u32 {
-        if let Some(depth) = self.depth {
-            return depth;
-        } else {
-            let depth = self
-                .callers_data
-                .iter_mut()
-                .map(|v| v.get_depth())
-                .max()
-                .unwrap();
-            self.depth = Some(depth);
-            return depth;
-        }
-    }
-
-    pub fn get_n_alt(&mut self) -> u32 {
-        if let Some(n_alt) = self.n_alt {
-            return n_alt;
-        } else {
-            let n_alt = self
-                .callers_data
-                .iter_mut()
-                .map(|v| v.get_n_alt())
-                .max()
-                .unwrap();
-            self.n_alt = Some(n_alt);
-            return n_alt;
-        }
-    }
-
-    pub fn vaf(&mut self) -> f32 {
-        let n_alt = self.get_n_alt() as f32;
-        let depth = self.get_depth() as f32;
-        self.vaf = Some(n_alt / depth);
-        self.vaf.unwrap()
-    }
-
-    fn is_ins(&self) -> bool {
-        match (&self.reference, &self.alternative) {
-            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => true,
-            _ => false,
-        }
-    }
-
-    fn alt_cat(&self) -> AlterationCategory {
-        match (&self.reference, &self.alternative) {
-            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::SNV
-            }
-            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
-                AlterationCategory::INS
-            }
-            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::DEL
-            }
-            (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b)) => {
-                let a = a.len();
-                let b = b.len();
-                if a < b {
-                    AlterationCategory::INS
-                } else if a > b {
-                    AlterationCategory::DEL
-                } else {
-                    AlterationCategory::REP
-                }
-            }
-            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
-                AlterationCategory::Other
-            }
-        }
-    }
-
-    pub fn to_min_string(&mut self) -> String {
-        let depth = self.get_depth();
-        let n_alt = self.get_n_alt();
-
-        format!(
-            "DP:AD\t{}:{}",
-            depth,
-            vec![(depth - n_alt).to_string(), n_alt.to_string()].join(",")
-        )
-    }
-
-    pub fn get_veps(&self) -> Vec<VEP> {
-        self.annotations
-            .iter()
-            .flat_map(|e| {
-                if let AnnotationType::VEP(e) = e {
-                    e.clone()
-                } else {
-                    vec![]
-                }
-            })
-            .collect()
-    }
-    pub fn get_best_vep(&self) -> Result<VEP> {
-        get_best_vep(&self.get_veps())
-    }
-
-    pub fn is_from_category(&self, and_categories: &Vec<Category>) -> bool {
-        let mut vec_bools = Vec::new();
-        for category in and_categories.iter() {
-            match category {
-                Category::VariantCategory(vc) => {
-                    for annotations in self.annotations.iter() {
-                        match annotations {
-                            AnnotationType::VariantCategory(vvc) => {
-                                if vc == vvc {
-                                    vec_bools.push(true);
-                                    break;
-                                }
-                            }
-                            _ => (),
-                        }
-                    }
-                }
-                Category::PositionRange { contig, from, to } => {
-                    if self.contig == *contig {
-                        match (from, to) {
-                            (None, None) => vec_bools.push(true),
-                            (None, Some(to)) => vec_bools.push(self.position <= *to),
-                            (Some(from), None) => vec_bools.push(self.position >= *from),
-                            (Some(from), Some(to)) => {
-                                vec_bools.push(self.position >= *from && self.position <= *to)
-                            }
-                        }
-                    } else {
-                        vec_bools.push(false);
-                    }
-                }
-                Category::VCFSource(_) => (),
-                Category::NCosmic(n) => {
-                    let mut bools = Vec::new();
-                    for annotations in self.annotations.iter() {
-                        match annotations {
-                            AnnotationType::Cosmic(c) => {
-                                bools.push(c.cosmic_cnt >= *n);
-                                break;
-                            }
-                            _ => (),
-                        }
-                    }
-                    vec_bools.push(bools.iter().any(|&b| b));
-                }
-                Category::NCBIFeature(ncbi_feature) => {
-                    let mut bools = Vec::new();
-                    for annotations in self.annotations.iter() {
-                        match annotations {
-                            AnnotationType::NCBIGFF(v) => {
-                                bools.push(v.feature == *ncbi_feature);
-                            }
-                            _ => (),
-                        }
-                    }
-                    vec_bools.push(bools.iter().any(|&b| b));
-                }
-                Category::VAF { min, max } => {
-                    let v = if self.vaf.is_none() {
-                        let mut s = self.clone();
-                        s.vaf()
-                    } else {
-                        self.vaf.unwrap()
-                    };
-                    vec_bools.push(v >= *min && v <= *max);
-                }
-                Category::Pangolin => {
-                    vec_bools.push(self.annotations.iter().filter(|a| match a {
-                        AnnotationType::Pangolin(_) => true,
-                        _ => false 
-                    }).count() > 0);
-                }
-            }
-        }
-        vec_bools.iter().all(|&x| x)
-    }
-
-    pub fn callers(&self) -> Vec<String> {
-        self.source
-            .iter()
-            .map(|source| source.to_string())
-            .collect()
-    }
-}
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
-enum AlterationCategory {
-    SNV,
-    INS,
-    DEL,
-    REP,
-    Other,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
-pub enum AnnotationType {
-    VariantCategory(VariantCategory),
-    VEP(Vec<VEP>),
-    Cluster(i32),
-    Cosmic(Cosmic),
-    GnomAD(GnomAD),
-    NCBIGFF(NCBIGFF),
-    Pangolin(Pangolin)
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
-pub enum VariantCategory {
-    Somatic,
-    LowMRDDepth,
-    LOH,
-    Constit,
-    LowDiversity,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, ToSchema)]
-pub enum ReferenceAlternative {
-    Nucleotide(Base),
-    Nucleotides(Vec<Base>),
-    Unstructured(String),
-}
-
-impl FromStr for ReferenceAlternative {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self> {
-        let mut possible_bases = s.as_bytes().iter();
-        let mut res: Vec<Base> = Vec::new();
-        while let Some(&base) = possible_bases.next() {
-            match base.try_into() {
-                std::result::Result::Ok(b) => res.push(b),
-                Err(_) => {
-                    return Ok(Self::Unstructured(s.to_string()));
-                }
-            }
-        }
-
-        if res.len() == 1 {
-            return Ok(Self::Nucleotide(res.pop().unwrap()));
-        } else {
-            return Ok(Self::Nucleotides(res));
-        }
-    }
-}
-
-impl fmt::Display for ReferenceAlternative {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let string = match self {
-            ReferenceAlternative::Nucleotide(b) => b.to_string(),
-            ReferenceAlternative::Nucleotides(bases) => bases
-                .iter()
-                .fold(String::new(), |acc, e| format!("{}{}", acc, e.to_string())),
-            ReferenceAlternative::Unstructured(s) => s.to_string(),
-        };
-        write!(f, "{}", string)
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, ToSchema)]
-pub enum Base {
-    A,
-    T,
-    C,
-    G,
-    N,
-}
-
-impl TryFrom<u8> for Base {
-    type Error = anyhow::Error;
-    fn try_from(base: u8) -> Result<Self> {
-        match base {
-            b'A' => Ok(Base::A),
-            b'T' => Ok(Base::T),
-            b'C' => Ok(Base::C),
-            b'G' => Ok(Base::G),
-            b'N' => Ok(Base::N),
-            _ => Err(anyhow!(
-                "Unknown base: {}",
-                String::from_utf8_lossy(&vec![base])
-            )),
-        }
-    }
-}
-
-impl Base {
-    pub fn into_u8(self) -> u8 {
-        return match self {
-            Base::A => b'A',
-            Base::T => b'T',
-            Base::C => b'C',
-            Base::G => b'G',
-            Base::N => b'N',
-        };
-    }
-}
-
-impl fmt::Display for Base {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // Use `self.number` to refer to each positional data point.
-        let str = match self {
-            Base::A => "A",
-            Base::T => "T",
-            Base::C => "C",
-            Base::G => "G",
-            Base::N => "N",
-        };
-        write!(f, "{}", str)
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, ToSchema)]
-pub enum Format {
-    DeepVariant(DeepVariantFormat),
-    ClairS(ClairSFormat),
-    Sniffles(SnifflesFormat),
-    Nanomonsv(NanomonsvFormat),
-}
-
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, ToSchema)]
-pub enum Info {
-    #[schema(value_type=String)]
-    DeepVariant(DeepVariantInfo),
-    #[schema(value_type=String)]
-    ClairS(ClairSInfo),
-    #[schema(value_type=String)]
-    Sniffles(SnifflesInfo),
-    #[schema(value_type=String)]
-    Nanomonsv(NanomonsvInfo),
-}
-
-fn parse_info(s: &str, source: &VCFSource) -> Result<Info> {
-    match source {
-        VCFSource::DeepVariant => Ok(Info::DeepVariant(s.parse()?)),
-        VCFSource::ClairS => Ok(Info::ClairS(s.parse()?)),
-        VCFSource::Sniffles => Ok(Info::Sniffles(s.parse()?)),
-        VCFSource::Nanomonsv => Ok(Info::Nanomonsv(s.parse()?)),
-    }
-}
-
-fn parse_format(vcf_source: &VCFSource, data: &str) -> Result<Format> {
-    let res = match vcf_source {
-        VCFSource::DeepVariant => Format::DeepVariant(data.parse()?),
-        VCFSource::ClairS => Format::ClairS(data.parse()?),
-        VCFSource::Sniffles => Format::Sniffles(data.parse()?),
-        VCFSource::Nanomonsv => Format::Nanomonsv(data.parse()?),
-    };
-    Ok(res)
-}
-
-pub fn sort_variants(d: Vec<Variant>, dict_path: &str) -> Result<Vec<Variant>> {
-    info!("Sorting {} entries", d.len());
-    let dict = read_dict(dict_path)?;
-
-    let mut store: HashMap<String, Vec<Variant>> = HashMap::new();
-
-    // add to store
-    d.iter().for_each(|e| {
-        if let Some(vec) = store.get_mut(&e.contig) {
-            vec.push(e.clone());
-        } else {
-            store.insert(e.contig.to_string(), vec![e.clone()]);
-        }
-    });
-
-    // sort in each contig
-    store
-        .iter_mut()
-        .for_each(|(_, vec)| vec.sort_by(|a, b| a.position.partial_cmp(&b.position).unwrap()));
-
-    // return contig in the order of dict file
-    Ok(dict
-        .iter()
-        .flat_map(|(chr, _)| {
-            if let Some((_, vec)) = store.remove_entry(chr) {
-                vec
-            } else {
-                vec![]
-            }
-        })
-        .collect())
-}
-
-#[derive(Debug)]
-pub enum Category {
-    VariantCategory(VariantCategory),
-    PositionRange {
-        contig: String,
-        from: Option<u32>,
-        to: Option<u32>,
-    },
-    VCFSource(VCFSource),
-    NCosmic(u64),
-    NCBIFeature(String),
-    VAF {
-        min: f32,
-        max: f32,
-    },
-    Pangolin
-}
-
-pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
-    let cfg = config::Config::get()?;
-    let deepvariant_diag_vcf = format!(
-        "{}/{name}/diag/DeepVariant/{name}_diag_DeepVariant_PASSED.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&deepvariant_diag_vcf).exists() {
-        return Err(anyhow!("{deepvariant_diag_vcf} is required"));
-        // panic!("{deepvariant_diag_vcf} is required")
-    }
-    let deepvariant_mrd_vcf = format!(
-        "{}/{name}/mrd/DeepVariant/{name}_mrd_DeepVariant_PASSED.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&deepvariant_mrd_vcf).exists() {
-        return Err(anyhow!("{deepvariant_mrd_vcf} is required"));
-    }
-    let mrd_bam = format!(
-        "{}/{name}/mrd/{name}_mrd_hs1.bam",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&mrd_bam).exists() {
-        return Err(anyhow!("{mrd_bam} is required"));
-    }
-    let clairs_vcf = format!(
-        "{}/{name}/diag/ClairS/{name}_diag_clairs_PASSED.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&clairs_vcf).exists() {
-        return Err(anyhow!("{clairs_vcf} is required"));
-    }
-    let clairs_indels_vcf = format!(
-        "{}/{name}/diag/ClairS/{name}_diag_clairs_indel_PASSED.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&clairs_indels_vcf).exists() {
-        return Err(anyhow!("{clairs_indels_vcf} is required"));
-    }
-    let sniffles_vcf = format!(
-        "{}/{name}/diag/Sniffles/{name}_diag_sniffles.vcf",
-        cfg.longreads_results_dir
-    );
-    let sniffles_mrd_vcf = format!(
-        "{}/{name}/mrd/Sniffles/{name}_mrd_sniffles.vcf",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&sniffles_vcf).exists() {
-        return Err(anyhow!("{sniffles_vcf} is required"));
-    }
-    let nanomonsv_vcf = format!(
-        "{}/{name}/diag/nanomonsv/{name}_diag_nanomonsv_PASSED.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    if !std::path::Path::new(&nanomonsv_vcf).exists() {
-        return Err(anyhow!("{nanomonsv_vcf} is required"));
-    }
-
-    // let db_path = "/data/db_results.sqlite".to_string();
-    // `${data_dir}/${name}/diag/${name}_variants.sqlite`
-    let db_path = format!(
-        "{}/{name}/diag/{name}_variants.sqlite",
-        cfg.longreads_results_dir
-    );
-    let bytes_path = format!(
-        "{}/{name}/diag/{name}_variants.bytes.gz",
-        cfg.longreads_results_dir
-    );
-
-    let loh_path = format!(
-        "{}/{name}/diag/{name}_loh.vcf.gz",
-        cfg.longreads_results_dir
-    );
-    // let db_constit_path = format!(
-    //     "{}/{name}/diag/{name}_constit.sqlite",
-    //     cfg.longreads_results_dir
-    // );
-    let bytes_constit_path = format!(
-        "{}/{name}/diag/{name}_constit.bytes.gz",
-        cfg.longreads_results_dir
-    );
-
-    let sources = vec![
-        (
-            deepvariant_diag_vcf.as_str(),
-            &VCFSource::DeepVariant,
-            &VariantType::Somatic,
-        ),
-        (
-            deepvariant_mrd_vcf.as_str(),
-            &VCFSource::DeepVariant,
-            &VariantType::Constitutionnal,
-        ),
-        (
-            clairs_vcf.as_str(),
-            &VCFSource::ClairS,
-            &VariantType::Somatic,
-        ),
-        (
-            sniffles_vcf.as_str(),
-            &VCFSource::Sniffles,
-            &VariantType::Somatic,
-        ),
-        (
-            sniffles_mrd_vcf.as_str(),
-            &VCFSource::Sniffles,
-            &VariantType::Constitutionnal,
-        ),
-        (
-            nanomonsv_vcf.as_str(),
-            &VCFSource::Nanomonsv,
-            &VariantType::Somatic,
-        ),
-    ];
-    let mut variants = Variants::from_vcfs(name.to_string(), sources, &cfg, multi.clone())?;
-
-    variants.vcf_filters();
-    variants.write_vcf_cat(&loh_path, &VariantCategory::LOH)?;
-    variants.bam_filters(&mrd_bam);
-
-    let constits = variants.get_cat(&VariantCategory::Constit);
-    let constits = Variants::from_vec(name.to_string(), &multi, constits);
-    constits.save_bytes(&bytes_constit_path)?;
-
-    variants.keep_somatics_un();
-    info!("Variants retained: {}", variants.len());
-
-    // TODO check if SNP are matching
-    if variants.len() > 100_000 {
-        return Err(anyhow!(
-            "Too many variants, verify if somatic and tumoral samples match."
-        ));
-    }
-
-    variants.merge();
-    variants.sort()?;
-    info!("Variants retained: {}", variants.len());
-    variants.vep();
-    variants.pangolin()?;
-
-    variants.annotate_gff_feature(&cfg.gff_path)?;
-
-    variants.echtvar_annotate(&deepvariant_mrd_vcf)?;
-    variants.filter_snp()?;
-
-    variants.save_bytes(&bytes_path)?;
-    // variants.stats()?;
-    //
-    // if std::path::Path::new(&db_path).exists() {
-    //     crate::sql::variants_sql::remove_variants_names(&db_path, &name)?;
-    // }
-    //
-    variants.save_sql(&db_path)?;
-    variants.stats_sql(&db_path)?;
-    info!("Variants : {}", variants.len());
-
-    Ok(())
-}
-
-// pub fn cluster_variants(d: &mut Vec<Variant>, max_dist: u32) -> i32 {
-//     let mut cluster_id = 0;
-//     let first = d.get(0).unwrap();
-//     let mut last_pos = first.position;
-//     let mut last_contig = first.contig.to_string();
-//
-//     d.iter_mut().for_each(|e| {
-//         if e.contig != last_contig {
-//             cluster_id += 1;
-//             last_contig = e.contig.to_string();
-//         } else if e.position - last_pos > max_dist {
-//             cluster_id += 1;
-//         }
-//         e.annotations.push(AnnotationType::Cluster(cluster_id));
-//         last_pos = e.position;
-//     });
-//
-//     cluster_id
-// }

+ 8 - 5
src/annotations/echtvar.rs

@@ -1,7 +1,10 @@
-use std::{process::{Command, Stdio}, io::{BufReader, BufRead}};
+use std::{
+    io::{BufRead, BufReader},
+    process::{Command, Stdio},
+};
 
 use anyhow::{Context, Ok, Result};
-use log::{info, warn};
+use log::warn;
 
 use super::{cosmic::Cosmic, gnomad::GnomAD};
 
@@ -9,7 +12,7 @@ use super::{cosmic::Cosmic, gnomad::GnomAD};
 pub fn run_echtvar(in_path: &str, out_path: &str) -> Result<()> {
     let bin_dir = "/data/tools";
 
-    let annot_sources: Vec<&str> = vec![
+    let annot_sources: Vec<&str> = [
         "/data/ref/hs1/CosmicCodingMuts.echtvar.zip",
         "/data/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
     ]
@@ -31,8 +34,8 @@ pub fn run_echtvar(in_path: &str, out_path: &str) -> Result<()> {
     let reader = BufReader::new(stderr);
     reader
         .lines()
-        .filter_map(|line| line.ok())
-        .filter(|line| line.find("error").is_some())
+        .map_while(Result::ok)
+        .filter(|line| line.contains("error"))
         .for_each(|line| warn!("{}", line));
 
     cmd.wait()?;

+ 17 - 23
src/annotations/ncbi_gff.rs

@@ -19,11 +19,7 @@ impl From<noodles_gff::Record> for NCBIGFF {
         let attr = r.attributes();
 
         let inner_string = |name: &str| {
-            if let Some(e) = attr.get(name) {
-                Some(e.to_string())
-            } else {
-                None
-            }
+            attr.get(name).map(|e| e.to_string())
         };
 
         NCBIGFF {
@@ -60,24 +56,16 @@ impl FromStr for NCBIAcc {
                 number: num.parse().context("Error parsing NCBI accession number")?,
                 version: v.parse().context("Error parsing NCBI accession version")?,
             })
-        } else {
-            if s.contains("_") {
-                if s.contains(".") {
-                    let (rest, v) = s.split_once(".").context("first split error")?;
-                    let (pref, num) = rest.split_once("_").context("second split error")?;
-                    let v = v.replace("_", ".");
-                    Ok(NCBIAcc {
-                        prefix: pref.to_string(),
-                        number: num.parse().context("Error parsing NCBI accession number")?,
-                        version: v.parse().context("Error parsing NCBI accession version")?,
-                    })
-                } else {
-                    Ok(NCBIAcc {
-                        prefix: s.to_string(),
-                        number: u64::MAX,
-                        version: 0.0,
-                    })
-                }
+        } else if s.contains("_") {
+            if s.contains(".") {
+                let (rest, v) = s.split_once(".").context("first split error")?;
+                let (pref, num) = rest.split_once("_").context("second split error")?;
+                let v = v.replace("_", ".");
+                Ok(NCBIAcc {
+                    prefix: pref.to_string(),
+                    number: num.parse().context("Error parsing NCBI accession number")?,
+                    version: v.parse().context("Error parsing NCBI accession version")?,
+                })
             } else {
                 Ok(NCBIAcc {
                     prefix: s.to_string(),
@@ -85,6 +73,12 @@ impl FromStr for NCBIAcc {
                     version: 0.0,
                 })
             }
+        } else {
+            Ok(NCBIAcc {
+                prefix: s.to_string(),
+                number: u64::MAX,
+                version: 0.0,
+            })
         }
     }
 }

+ 11 - 9
src/annotations/pangolin.rs

@@ -2,21 +2,21 @@ use anyhow::{Context, Result};
 use log::{info, warn};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-use std::io::{self, Write};
+use std::io::Write;
 use std::str::FromStr;
 use std::{
     fs::File,
     io::{BufRead, BufReader},
     process::{Command, Stdio},
 };
+use utoipa::ToSchema;
 use uuid::Uuid;
 
 use crate::variants::{ReferenceAlternative, Variants};
 
 #[derive(Debug, ToSchema, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Pangolin {
-    pub predictions: Vec<(u32, f64)>
+    pub predictions: Vec<(u32, f64)>,
 }
 
 // pangolin -c CHROM,POS,REF,ALT -s 0.1 /tmp/hattab_test_pango.csv /data/ref/hs1/hs1_simple_chr.fa /data/ref/hs1/gencode.v44.liftedTohs1.db gg.vcf
@@ -45,8 +45,8 @@ pub fn run_pangolin(in_path: &str) -> Result<String> {
     let reader = BufReader::new(stdout);
     reader
         .lines()
-        .filter_map(|line| line.ok())
-        .filter(|line| line.find("error").is_some())
+        .map_while(Result::ok)
+        .filter(|line| line.contains("error"))
         .for_each(|line| warn!("{}", line));
 
     cmd.wait()?;
@@ -69,7 +69,7 @@ pub fn pangolin_save_variants(variants: &Variants) -> Result<String> {
         writeln!(
             file,
             "{}",
-            vec![
+            [
                 v.contig.to_string(),
                 v.position.to_string(),
                 v.reference.to_string(),
@@ -110,7 +110,7 @@ pub fn pangolin_parse_results(
         if parts.len() != 5 {
             continue;
         }
-        if parts[4] == "" {
+        if parts[4].is_empty() {
             continue;
         }
 
@@ -124,13 +124,15 @@ pub fn pangolin_parse_results(
             })
             .collect();
 
-        if pangolin_res.len() > 0 {
+        if !pangolin_res.is_empty() {
             res.push((
                 parts[0].to_string(),
                 parts[1].parse::<u32>()?,
                 ReferenceAlternative::from_str(parts[2])?,
                 ReferenceAlternative::from_str(parts[3])?,
-                Pangolin { predictions: pangolin_res}
+                Pangolin {
+                    predictions: pangolin_res,
+                },
             ));
         }
     }

+ 21 - 39
src/annotations/vep.rs

@@ -1,9 +1,8 @@
 use anyhow::{anyhow, Context, Ok, Result};
 use csv::ReaderBuilder;
 use hashbrown::HashMap;
-use log::{info, warn};
+use log::warn;
 use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
 use std::io::Write;
 use std::{
     env::temp_dir,
@@ -12,6 +11,7 @@ use std::{
     process::{Command, Stdio},
     str::FromStr,
 };
+use utoipa::ToSchema;
 
 use crate::variants::{AnnotationType, Variant};
 
@@ -56,11 +56,8 @@ impl VEP {
             _ => Some(s.to_string()),
         };
 
-        let consequence = if let Some(c) = or_opt(&d.consequence) {
-            Some(c.split(",").map(|e| e.to_string()).collect::<Vec<String>>())
-        } else {
-            None
-        };
+        let consequence = or_opt(&d.consequence)
+            .map(|c| c.split(",").map(|e| e.to_string()).collect::<Vec<String>>());
 
         Ok(VEP {
             gene: or_opt(&d.gene),
@@ -98,7 +95,7 @@ impl FromStr for VEPExtra {
 
         for e in elements.iter() {
             let (k, v) = e.split_once("=").ok_or(err("in split '='"))?;
-            if !kv.insert(k, v).is_none() {
+            if kv.insert(k, v).is_some() {
                 return Err(err("kv insert"));
             };
         }
@@ -108,26 +105,14 @@ impl FromStr for VEPExtra {
         } else {
             None
         };
-        let symbol: Option<String> = if let Some(v) = kv.get("SYMBOL") {
-            Some(v.to_string())
-        } else {
-            None
-        };
+        let symbol: Option<String> = kv.get("SYMBOL").map(|v| v.to_string());
         let distance: Option<u32> = if let Some(v) = kv.get("DISTANCE") {
             Some(v.parse()?)
         } else {
             None
         };
-        let hgvs_c: Option<String> = if let Some(v) = kv.get("HGVSc") {
-            Some(v.to_string())
-        } else {
-            None
-        };
-        let hgvs_p: Option<String> = if let Some(v) = kv.get("HGVSp") {
-            Some(v.to_string())
-        } else {
-            None
-        };
+        let hgvs_c: Option<String> = kv.get("HGVSc").map(|v| v.to_string());
+        let hgvs_p: Option<String> = kv.get("HGVSp").map(|v| v.to_string());
 
         Ok(VEPExtra {
             impact,
@@ -207,15 +192,12 @@ pub fn vep_chunk(data: &mut [Variant]) -> Result<()> {
     for (i, row) in data.iter().enumerate() {
         writeln!(
             vcf,
-            "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}\t{}",
+            "{}\t{}\t{}\t{}\t{}\t.\tPASS\t.\t.\t.",
             row.contig,
             row.position,
             i + 1,
             row.reference,
-            row.alternative,
-            ".",
-            ".",
-            "."
+            row.alternative
         )?;
     }
 
@@ -310,27 +292,27 @@ fn run_vep(in_path: &str, out_path: &str) -> Result<()> {
         // .stderr(Stdio::null())
         .spawn()
         .expect("VEP failed to start");
-        // .stderr
-        // .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "Could not capture standard output.")).unwrap();
+    // .stderr
+    // .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "Could not capture standard output.")).unwrap();
 
     let stderr = cmd.stderr.take().unwrap();
     let reader = BufReader::new(stderr);
     reader
         .lines()
-        .filter_map(|line| line.ok())
-        .filter(|line| line.find("error").is_some())
+        .map_while(Result::ok)
+        .filter(|line| line.contains("error"))
         .for_each(|line| warn!("{}", line));
 
     cmd.wait()?;
     Ok(())
 }
 
-pub fn get_best_vep(d: &Vec<VEP>) -> Result<VEP> {
-    if d.len() == 0 {
+pub fn get_best_vep(d: &[VEP]) -> Result<VEP> {
+    if d.is_empty() {
         return Err(anyhow!("No element in VEP vector"));
     }
     if d.len() == 1 {
-        return Ok(d.get(0).unwrap().clone());
+        return Ok(d.first().unwrap().clone());
     }
 
     let mut parsed: Vec<(usize, NCBIAcc)> = Vec::new();
@@ -352,14 +334,14 @@ pub fn get_best_vep(d: &Vec<VEP>) -> Result<VEP> {
     let nm: Vec<(usize, NCBIAcc)> = parsed
         .clone()
         .into_iter()
-        .filter(|(_, e)| e.prefix == "NM".to_string())
+        .filter(|(_, e)| e.prefix == *"NM")
         .collect();
 
-    if nm.len() > 0 {
-        let (k, _) = nm.get(0).unwrap();
+    if !nm.is_empty() {
+        let (k, _) = nm.first().unwrap();
         return Ok(d.get(*k).unwrap().clone());
     } else {
-        let (k, _) = parsed.get(0).unwrap();
+        let (k, _) = parsed.first().unwrap();
         return Ok(d.get(*k).unwrap().clone());
     }
 }

+ 4 - 13
src/in_out/mod.rs

@@ -1,12 +1,9 @@
 use anyhow::{Ok, Result};
 use bgzip::BGZFReader;
 use indicatif::MultiProgress;
-use std::{
-    fs::{File, Metadata},
-    io::BufReader,
-};
+use std::{fs::File, io::BufReader};
 
-use crate::utils::{new_pg_speed, new_pg_bytes};
+use crate::utils::new_pg_bytes;
 
 pub mod dict_reader;
 pub mod vcf_reader;
@@ -14,7 +11,6 @@ pub mod vcf_writer;
 
 pub fn get_reader(path: &str) -> Result<Box<dyn std::io::Read>> {
     let file_type = *path.split(".").collect::<Vec<&str>>().last().unwrap();
-
     assert!(file_type == "gz" || file_type == "vcf");
 
     let raw_reader: Box<dyn std::io::Read> = Box::new(File::open(path)?);
@@ -25,7 +21,6 @@ pub fn get_reader(path: &str) -> Result<Box<dyn std::io::Read>> {
             Ok(Box::new(BufReader::new(reader)))
         }
         "vcf" => {
-            // let reader = Box::new(BzDecoder::new(raw_reader));
             Ok(Box::new(BufReader::new(raw_reader)))
         }
         t => {
@@ -33,16 +28,13 @@ pub fn get_reader(path: &str) -> Result<Box<dyn std::io::Read>> {
         }
     }
 }
-pub fn get_reader_progress(
-    path: &str,
-    mp: &MultiProgress,
-) -> Result<Box<dyn std::io::Read>> {
+pub fn get_reader_progress(path: &str, mp: &MultiProgress) -> Result<Box<dyn std::io::Read>> {
     let file_type = *path.split(".").collect::<Vec<&str>>().last().unwrap();
 
     assert!(file_type == "gz" || file_type == "vcf");
     let file = File::open(path)?;
     let metadata = file.metadata()?;
-    let pg = mp.add(new_pg_bytes(metadata.len() as u64));
+    let pg = mp.add(new_pg_bytes(metadata.len()));
     pg.set_message(format!("Reading {path}"));
 
     let raw_reader: Box<dyn std::io::Read> = Box::new(file);
@@ -54,7 +46,6 @@ pub fn get_reader_progress(
             Ok(Box::new(BufReader::new(reader)))
         }
         "vcf" => {
-            // let reader = Box::new(BzDecoder::new(raw_reader));
             Ok(Box::new(BufReader::new(raw_reader)))
         }
         t => {

+ 17 - 24
src/in_out/vcf_reader.rs

@@ -1,14 +1,12 @@
-use std::{fs, fmt::Write};
-
+use std::{fmt::Write, fs};
 use crate::{
     in_out::get_reader,
-    utils::new_pg_speed,
     variants::{VCFSource, Variant, VariantType},
 };
 use anyhow::{Ok, Result};
 use csv::ReaderBuilder;
-use indicatif::{MultiProgress, ProgressBar, ProgressStyle, ProgressState};
-use log::{info, warn};
+use indicatif::{MultiProgress, ProgressBar, ProgressState, ProgressStyle};
+use log::info;
 use rayon::prelude::*;
 
 #[derive(Debug, serde::Deserialize, Eq, PartialEq, Clone)]
@@ -37,12 +35,12 @@ pub fn read_vcf(
         .comment(Some(b'#'))
         .has_headers(false)
         .flexible(true)
-        .from_reader(get_reader(&path)?);
-    let mut iter = reader.deserialize();
+        .from_reader(get_reader(path)?);
+    let iter = reader.deserialize();
 
     let mut all = Vec::new();
 
-    while let Some(result) = iter.next() {
+    for result in iter {
         let record: VCFRow = result?;
 
         // Normalize into multirows
@@ -62,7 +60,7 @@ pub fn read_vcf(
                     vals[0],
                     vals[1],
                     vals[2],
-                    vec![ads[0], ads[i + 1]].join(","),
+                    [ads[0], ads[i + 1]].join(","),
                     vafs[i],
                     cp.join(",")
                 );
@@ -84,12 +82,10 @@ pub fn read_vcf(
         .par_iter_mut()
         .map(|row| {
             // for Sniffles normalize insertion/deletion position (after the pos)
-            if source == &VCFSource::Sniffles {
-                if row.reference == base_n && row.alt.len() > 1 {
-                    row.pos -= 1;
-                }
+            if source == &VCFSource::Sniffles && row.reference == base_n && row.alt.len() > 1 {
+                row.pos -= 1;
             }
-            return Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap();
+            Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap()
         })
         .filter(|v| {
             for cd in v.callers_data.iter() {
@@ -97,7 +93,7 @@ pub fn read_vcf(
                     return false;
                 }
             }
-            return true;
+            true
         })
         .collect();
 
@@ -113,7 +109,6 @@ pub fn read_vcf_progress(
     info!("Reading VCF {}", path);
     let metadata = fs::metadata(path)?;
 
-    let mut downloaded = 0;
     let total_size = metadata.len();
 
     let pb = ProgressBar::new(total_size);
@@ -127,7 +122,7 @@ pub fn read_vcf_progress(
         .comment(Some(b'#'))
         .has_headers(false)
         .flexible(true)
-        .from_reader(get_reader(&path)?);
+        .from_reader(get_reader(path)?);
     let mut iter = reader.deserialize();
     // let r = iter.reader();
 
@@ -154,7 +149,7 @@ pub fn read_vcf_progress(
                     vals[0],
                     vals[1],
                     vals[2],
-                    vec![ads[0], ads[i + 1]].join(","),
+                    [ads[0], ads[i + 1]].join(","),
                     vafs[i],
                     cp.join(",")
                 );
@@ -178,12 +173,10 @@ pub fn read_vcf_progress(
         .par_iter_mut()
         .map(|row| {
             // for Sniffles normalize insertion/deletion position (after the pos)
-            if source == &VCFSource::Sniffles {
-                if row.reference == base_n && row.alt.len() > 1 {
-                    row.pos -= 1;
-                }
+            if source == &VCFSource::Sniffles && row.reference == base_n && row.alt.len() > 1 {
+                row.pos -= 1;
             }
-            return Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap();
+            Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap()
         })
         .filter(|v| {
             for cd in v.callers_data.iter() {
@@ -191,7 +184,7 @@ pub fn read_vcf_progress(
                     return false;
                 }
             }
-            return true;
+            true
         })
         .collect();
 

+ 2 - 4
src/lib.rs

@@ -13,11 +13,9 @@ mod tests {
     use indicatif_log_bridge::LogWrapper;
     use log::info;
     use noodles_core::{Position, Region};
-
     use crate::{
         annotations::phase, config::Config, sql::variants_sql::load_variants_name, utils::count_repetitions, variants::{AnnotationType, Category, Variants}
     };
-
     use self::annotations::phase::PhaserConfig;
 
     use super::*;
@@ -179,7 +177,7 @@ mod tests {
 
     #[test]
     fn phasing() -> anyhow::Result<()> {
-        let id = "SALICETTO";
+        let id = "MACCAGN";
         let min_records = 2;
 
         let logger =
@@ -188,7 +186,7 @@ mod tests {
         let multi = MultiProgress::new();
         LogWrapper::new(multi.clone(), logger).try_init().unwrap();
 
-        let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records, 0.22);
+        let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records, 0.35);
         phase::phase(config, multi)
     }
 

+ 5 - 11
src/utils.rs

@@ -1,9 +1,8 @@
-use std::time::Duration;
-
 use anyhow::{Context, Ok, Result};
 use hashbrown::HashMap;
 use indicatif::{ProgressBar, ProgressStyle};
 use statrs::distribution::{ChiSquared, ContinuousCDF};
+use std::time::Duration;
 
 pub fn chi_square_test_impl(observed: &[f64], expected: &[f64]) -> anyhow::Result<f64> {
     if observed.len() != expected.len() {
@@ -82,10 +81,8 @@ pub fn get_hts_nt_pileup(
                             if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
                                 bases.push(b);
                             }
-                        } else {
-                            if alignment.is_del() {
-                                bases.push(b'D');
-                            }
+                        } else if alignment.is_del() {
+                            bases.push(b'D');
                         }
                     }
                 }
@@ -170,13 +167,13 @@ pub fn estimate_shannon_entropy(dna_sequence: &str) -> f64 {
 
 pub fn print_stat_cat(s: &HashMap<String, u32>, denum: u32) {
     let denum = denum as f32;
-    let mut v: Vec<(&String, &u32)> = s.iter().map(|e| e).collect();
+    let mut v: Vec<(&String, &u32)> = s.iter().collect();
     v.sort_by(|(_, a), (_, b)| b.cmp(a));
 
     let mut table = prettytable::table!(["category", "n", "%"]);
 
     v.iter().for_each(|(k, v)| {
-        let p = (**v as f32) * 100 as f32 / denum;
+        let p = (**v as f32) * 100_f32 / denum;
         let p = format!("{:.2}", p);
         table.add_row([*k, &v.to_string(), &p].into());
     });
@@ -248,6 +245,3 @@ pub fn count_repetitions(sequence: &str, pattern_size: usize) -> usize {
 
     max_repetitions
 }
-
-
-