Thomas 11 hónapja
szülő
commit
7d4f9c06da

+ 97 - 24
src/annotation/mod.rs

@@ -11,13 +11,16 @@ use std::{
     sync::Arc,
 };
 
-use crate::{helpers::{mean, Blake3BuildHasher, Hash128}, variant::variant_collection::VariantCollection};
+use crate::{
+    helpers::{mean, Blake3BuildHasher, Hash128},
+    variant::{variant::AlterationCategory, variant_collection::VariantCollection},
+};
 use cosmic::Cosmic;
 use dashmap::DashMap;
 use gnomad::GnomAD;
 use log::info;
 use rayon::prelude::*;
-use vep::VEP;
+use vep::{get_best_vep, VepConsequence, VepImpact, VEP};
 
 #[derive(Debug, Clone, PartialEq)]
 pub enum Annotation {
@@ -26,6 +29,7 @@ pub enum Annotation {
     Callers(Caller),
     Germline,
     Somatic,
+    AlterationCategory(AlterationCategory),
     ShannonEntropy(f64),
     ConstitDepth(u16),
     ConstitAlt(u16),
@@ -54,6 +58,7 @@ impl fmt::Display for Annotation {
             Annotation::GnomAD(_) => "GnomAD",
             Annotation::LowEntropy => "LowEntropy",
             Annotation::VEP(_) => "VEP",
+            Annotation::AlterationCategory(alt_cat) => &alt_cat.to_string(),
         };
         write!(f, "{}", str)
     }
@@ -67,6 +72,10 @@ impl FromStr for Annotation {
             "SoloTumor" => Ok(Annotation::SoloTumor),
             "SoloConstit" => Ok(Annotation::SoloConstit),
             "DeepVariant" => Ok(Annotation::Callers(Caller::DeepVariant)),
+            "DeepSomatic" => Ok(Annotation::Callers(Caller::DeepSomatic)),
+            "Savana" => Ok(Annotation::Callers(Caller::Savana)),
+            "NanomonSV" => Ok(Annotation::Callers(Caller::NanomonSV)),
+            "Severus" => Ok(Annotation::Callers(Caller::Severus)),
             "ClairS" => Ok(Annotation::Callers(Caller::ClairS)),
             "Germline" => Ok(Annotation::Germline),
             "Somatic" => Ok(Annotation::Somatic),
@@ -124,6 +133,7 @@ pub struct AnnotationsStats {
     pub numeric: DashMap<String, HashMap<String, Vec<f64>>>,
 }
 
+#[allow(clippy::type_complexity)]
 impl Annotations {
     pub fn insert_update(&self, key: Hash128, add: &[Annotation]) {
         self.store
@@ -132,16 +142,27 @@ impl Annotations {
             .extend(add.iter().cloned())
     }
 
-    pub fn callers_stat(&self) -> AnnotationsStats {
+    pub fn callers_stat(
+        &self,
+        annotations: Option<Box<dyn Fn(&Annotation) -> bool + Send + Sync>>,
+    ) -> AnnotationsStats {
         let map: DashMap<String, u64> = DashMap::new();
         let num_maps: DashMap<String, HashMap<String, Vec<f64>>> = DashMap::new();
 
         self.store.par_iter().for_each(|e| {
-            let anns = e.value();
+            let anns = if let Some(sel) = &annotations {
+                e.value()
+                    .iter()
+                    .filter(|a| sel(a))
+                    .cloned()
+                    .collect()
+            } else {
+                e.value().clone()
+            };
 
             let mut categorical = Vec::new();
             let mut numerical = Vec::new();
-            for ann in anns {
+            for ann in anns.iter() {
                 match ann {
                     Annotation::SoloTumor
                     | Annotation::SoloConstit
@@ -158,6 +179,9 @@ impl Annotations {
                         numerical.push((ann.to_string(), *v as f64));
                     }
                     Annotation::Cosmic(c) => numerical.push((ann.to_string(), c.cosmic_cnt as f64)),
+                    Annotation::AlterationCategory(alt_cat) => {
+                        categorical.push(alt_cat.to_string())
+                    }
                 }
             }
 
@@ -177,24 +201,28 @@ impl Annotations {
             }
         });
         println!("\nCallers stats:");
-        println!("\tcategories: {}", map.len());
+        println!("\tn categories: {}", map.len());
         let mut n = 0;
-        map.iter().for_each(|e| {
-            let k = e.key();
-            let v = e.value();
-            n += v;
-            let mut num_str = Vec::new();
-            if let Some(nums) = num_maps.get(k) {
-                num_str.extend(
-                    nums.iter()
-                        .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))),
-                )
-            }
-            num_str.sort();
-            println!("\t{k}\t{v}\t{}", num_str.join("\t"));
-        });
+        let lines: Vec<String> = map
+            .iter()
+            .map(|e| {
+                let k = e.key();
+                let v = e.value();
+                n += v;
+                let mut num_str = Vec::new();
+                if let Some(nums) = num_maps.get(k) {
+                    num_str.extend(
+                        nums.iter()
+                            .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))),
+                    )
+                }
+                num_str.sort();
+                format!("\t- {k}\t{v}\t{}", num_str.join("\t"))
+            })
+            .collect();
 
-        println!("Total\t{n}");
+        println!("{}", lines.join("\n"));
+        println!("Total\t{n}\n");
 
         AnnotationsStats {
             categorical: map,
@@ -202,6 +230,47 @@ impl Annotations {
         }
     }
 
+    pub fn vep_stats(&self) -> anyhow::Result<()> {
+        let (n_coding, n_syn, total) = self
+            .store
+            .par_iter()
+            .map(|v| {
+                v.value()
+                    .iter()
+                    .find_map(|ann| {
+                        if let Annotation::VEP(veps) = ann {
+                            get_best_vep(veps).ok()
+                        } else {
+                            None
+                        }
+                    })
+                    .and_then(|vep| vep.consequence.clone())
+                    .map_or((0, 0, 0), |consequences| {
+                        let impact = consequences
+                            .iter()
+                            .map(VepImpact::from)
+                            .min()
+                            .unwrap_or(VepImpact::MODIFIER);
+                        match impact {
+                            VepImpact::HIGH | VepImpact::MODERATE => (1, 0, 1),
+                            _ => {
+                                if consequences.contains(&VepConsequence::SynonymousVariant) {
+                                    (0, 1, 1)
+                                } else {
+                                    (0, 0, 1)
+                                }
+                            }
+                        }
+                    })
+            })
+            .reduce(|| (0, 0, 0), |a, b| (a.0 + b.0, a.1 + b.1, a.2 + b.2));
+
+        println!(
+            "VEP annotations\n\t- Coding: {n_coding}\n\t- Synonymous: {n_syn}\nTotal: {total}"
+        );
+        Ok(())
+    }
+
     pub fn get_keys_filter(
         &self,
         filter: impl Fn(&Vec<Annotation>) -> bool + Send + Sync,
@@ -218,7 +287,6 @@ impl Annotations {
         variants: &mut Vec<VariantCollection>,
         filter: impl Fn(&Vec<Annotation>) -> bool + Send + Sync,
     ) -> usize {
-        info!("Variant Keys lookup");
         let mut keys = HashSet::new();
         self.store.retain(|key, value| {
             if filter(value) {
@@ -230,7 +298,6 @@ impl Annotations {
         });
         info!("{} unique Variants to keep", keys.len());
 
-        info!("Removing variants from collections");
         let n_removed: usize = variants
             .par_iter_mut()
             .map(|c| {
@@ -244,7 +311,13 @@ impl Annotations {
                 //     .cloned()
                 //     .collect();
                 let after = c.variants.len();
-                info!("{} {}\t{}/{}", c.caller, c.category, before - after, before);
+                info!(
+                    "\t- {} {}\t{}/{}",
+                    c.caller,
+                    c.category,
+                    before - after,
+                    before
+                );
                 before - after
             })
             .sum();

+ 49 - 49
src/annotation/vep.rs

@@ -213,55 +213,55 @@ impl From<&VepConsequence> for VepImpact {
     }
 }
 
-impl VepImpact {
-    // pub fn from_consequence(consequence: &VepConsequence) -> VepImpact {
-    //     match consequence {
-    //         VepConsequence::TranscriptAblation
-    //         | VepConsequence::SpliceAcceptorVariant
-    //         | VepConsequence::SpliceDonorVariant
-    //         | VepConsequence::StopGained
-    //         | VepConsequence::FrameshiftVariant
-    //         | VepConsequence::StopLost
-    //         | VepConsequence::StartLost
-    //         | VepConsequence::TranscriptAmplification
-    //         | VepConsequence::FeatureElongation
-    //         | VepConsequence::FeatureTruncation => VepImpact::HIGH,
-    //
-    //         VepConsequence::InframeInsertion
-    //         | VepConsequence::InframeDeletion
-    //         | VepConsequence::MissenseVariant
-    //         | VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
-    //
-    //         VepConsequence::SpliceDonor5thBaseVariant
-    //         | VepConsequence::SpliceRegionVariant
-    //         | VepConsequence::SpliceDonorRegionVariant
-    //         | VepConsequence::SplicePolyrimidineTractVariant
-    //         | VepConsequence::IncompleteTerminalCodonVariant
-    //         | VepConsequence::StartRetainedVariant
-    //         | VepConsequence::StopRetainedVariant
-    //         | VepConsequence::SynonymousVariant => VepImpact::LOW,
-    //
-    //         VepConsequence::CodingSequenceVariant
-    //         | VepConsequence::MatureMiRnaVariant
-    //         | VepConsequence::FivePrimeUtrVariant
-    //         | VepConsequence::ThreePrimeUtrVariant
-    //         | VepConsequence::NonCodingTranscriptExonVariant
-    //         | VepConsequence::IntronVariant
-    //         | VepConsequence::NmdTranscriptVariant
-    //         | VepConsequence::NonCodingTranscriptVariant
-    //         | VepConsequence::UpstreamGeneVariant
-    //         | VepConsequence::DownstreamGeneVariant
-    //         | VepConsequence::TfbsAblation
-    //         | VepConsequence::TfbsAmplification
-    //         | VepConsequence::TfBindingSiteVariant
-    //         | VepConsequence::RegulatoryRegionAblation
-    //         | VepConsequence::RegulatoryRegionAmplification
-    //         | VepConsequence::RegulatoryRegionVariant
-    //         | VepConsequence::SequenceVariant
-    //         | VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
-    //     }
-    // }
-}
+// impl VepImpact {
+//     pub fn from_consequence(consequence: &VepConsequence) -> VepImpact {
+//         match consequence {
+//             VepConsequence::TranscriptAblation
+//             | VepConsequence::SpliceAcceptorVariant
+//             | VepConsequence::SpliceDonorVariant
+//             | VepConsequence::StopGained
+//             | VepConsequence::FrameshiftVariant
+//             | VepConsequence::StopLost
+//             | VepConsequence::StartLost
+//             | VepConsequence::TranscriptAmplification
+//             | VepConsequence::FeatureElongation
+//             | VepConsequence::FeatureTruncation => VepImpact::HIGH,
+//
+//             VepConsequence::InframeInsertion
+//             | VepConsequence::InframeDeletion
+//             | VepConsequence::MissenseVariant
+//             | VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
+//
+//             VepConsequence::SpliceDonor5thBaseVariant
+//             | VepConsequence::SpliceRegionVariant
+//             | VepConsequence::SpliceDonorRegionVariant
+//             | VepConsequence::SplicePolyrimidineTractVariant
+//             | VepConsequence::IncompleteTerminalCodonVariant
+//             | VepConsequence::StartRetainedVariant
+//             | VepConsequence::StopRetainedVariant
+//             | VepConsequence::SynonymousVariant => VepImpact::LOW,
+//
+//             VepConsequence::CodingSequenceVariant
+//             | VepConsequence::MatureMiRnaVariant
+//             | VepConsequence::FivePrimeUtrVariant
+//             | VepConsequence::ThreePrimeUtrVariant
+//             | VepConsequence::NonCodingTranscriptExonVariant
+//             | VepConsequence::IntronVariant
+//             | VepConsequence::NmdTranscriptVariant
+//             | VepConsequence::NonCodingTranscriptVariant
+//             | VepConsequence::UpstreamGeneVariant
+//             | VepConsequence::DownstreamGeneVariant
+//             | VepConsequence::TfbsAblation
+//             | VepConsequence::TfbsAmplification
+//             | VepConsequence::TfBindingSiteVariant
+//             | VepConsequence::RegulatoryRegionAblation
+//             | VepConsequence::RegulatoryRegionAmplification
+//             | VepConsequence::RegulatoryRegionVariant
+//             | VepConsequence::SequenceVariant
+//             | VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
+//         }
+//     }
+// }
 
 impl From<VepConsequence> for String {
     fn from(consequence: VepConsequence) -> Self {

+ 9 - 1
src/callers/nanomonsv.rs

@@ -274,7 +274,15 @@ impl Variants for NanomonSVSolo {
         let variants = read_vcf(&self.vcf_passed)?;
 
         variants.par_iter().for_each(|v| {
-            annotations.insert_update(v.hash(), &add);
+            let var_cat = v.alteration_category();
+            annotations.insert_update(
+                v.hash(),
+                &vec![
+                    Annotation::Callers(caller.clone()),
+                    category.clone(),
+                    Annotation::AlterationCategory(var_cat),
+                ],
+            );
         });
         Ok(VariantCollection {
             variants,

+ 13 - 9
src/callers/savana.rs

@@ -6,7 +6,6 @@ use crate::{
         longphase::{LongphaseConfig, LongphaseHap, LongphasePhase},
     },
     config::Config,
-    helpers::force_or_not,
     io::vcf::read_vcf,
     runners::{run_wait, CommandRun, Run},
     variant::{
@@ -64,15 +63,15 @@ impl Initialize for Savana {
 
 impl Run for Savana {
     fn run(&mut self) -> anyhow::Result<()> {
-        force_or_not(
-            &self.config.savana_output_vcf(&self.id),
-            self.config.savana_force,
-        )?;
-        info!("Running Savana v{}", Savana::version(&self.config)?);
+        // force_or_not(
+        //     &self.config.savana_output_vcf(&self.id),
+        //     self.config.savana_force,
+        // )?;
 
         let id = &self.id;
         let output_vcf = &self.config.savana_output_vcf(id);
         if !Path::new(&output_vcf).exists() {
+            info!("Running Savana v{}", Savana::version(&self.config)?);
             // Check for haplotagged bam
             if !Path::new(&self.normal_hp_bam).exists() {
                 LongphaseHap::new(
@@ -196,14 +195,19 @@ impl Variants for Savana {
         let (caller, category) = self.caller_cat();
         let add = vec![Annotation::Callers(caller.clone()), category.clone()];
         info!(
-            "Loading variants from Savana {} with annotations: {:?}",
-            self.id, add
+            "Loading variants from {} {}: {}",
+            caller, category, self.passed_vcf
         );
         let variants = read_vcf(&self.passed_vcf)?;
-
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
+        info!(
+            "{} {}, {} variants loaded.",
+            caller,
+            category,
+            variants.len()
+        );
         Ok(VariantCollection {
             variants,
             vcf: Vcf::new(self.passed_vcf.clone().into())?,

+ 2 - 2
src/config.rs

@@ -113,7 +113,7 @@ impl Default for Config {
             savana_bin: "savana".to_string(),
             savana_threads: 150,
             savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
-            savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf".to_string(),
+            savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf.gz".to_string(),
             savana_force: false,
 
             // Severus
@@ -353,7 +353,7 @@ impl Config {
     pub fn savana_output_vcf(&self, id: &str) -> String {
         let output_dir = self.savana_output_dir(id);
 
-        format!("{output_dir}/{id}_diag_hs1_hp.classified.somatic.vcf")
+        format!("{output_dir}/{id}_{}_{}_{}.classified.somatic.vcf", self.tumoral_name, self.reference_name, self.haplotagged_bam_tag_name)
     }
 
     pub fn savana_passed_vcf(&self, id: &str) -> String {

+ 214 - 14
src/pipes/somatic.rs

@@ -1,8 +1,8 @@
-use log::info;
-use std::{fs::File, sync::Arc};
+use log::{info, kv::Source};
+use std::{collections::HashMap, fs::File, sync::Arc};
 
 use crate::{
-    annotation::{Annotation, Annotations, Caller},
+    annotation::{Annotation, Annotations, AnnotationsStats, Caller},
     callers::{
         clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
         savana::Savana, severus::Severus,
@@ -83,6 +83,125 @@ impl SomaticStats {
             ..Default::default()
         }
     }
+    pub fn annot_init(&self, stats: AnnotationsStats) {
+        let stats: Vec<(Vec<Annotation>, u64)> = stats
+            .categorical
+            .iter()
+            .map(|e| {
+                let anns = e
+                    .key()
+                    .split(" + ")
+                    .map(|k| k.parse())
+                    .collect::<anyhow::Result<Vec<Annotation>>>()
+                    .unwrap();
+                (anns, *e.value())
+            })
+            .collect();
+
+        let callers_somatic_solo_tumor = [
+            self.input
+                .somatic
+                .iter()
+                .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Somatic])
+                .collect::<Vec<Vec<Annotation>>>(),
+            self.input
+                .solo_tumor
+                .iter()
+                .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::SoloTumor])
+                .collect(),
+        ]
+        .concat();
+
+        let callers_germline_solo_constit = [
+            self.input
+                .germline
+                .iter()
+                .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Germline])
+                .collect::<Vec<Vec<Annotation>>>(),
+            self.input
+                .solo_constit
+                .iter()
+                .map(|(caller, _)| {
+                    vec![Annotation::Callers(caller.clone()), Annotation::SoloConstit]
+                })
+                .collect(),
+        ]
+        .concat();
+
+        let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
+        stats.iter().for_each(|(anns, v)| {
+            if anns
+                .iter()
+                .any(|a| matches!(a, Annotation::SoloConstit | Annotation::Germline))
+            {
+                let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
+                    .iter()
+                    .flat_map(|tumor| {
+                        if tumor.iter().all(|a| anns.contains(a)) {
+                            let tum_call =
+                                format!("{} {}", tumor.first().unwrap(), tumor.get(1).unwrap());
+                            vec![(tum_call, *v)]
+                        } else {
+                            vec![]
+                        }
+                    })
+                    .collect();
+
+                let mut germline_caller: Vec<String> = callers_germline_solo_constit
+                    .iter()
+                    .flat_map(|germ| {
+                        if germ.iter().all(|a| anns.contains(a)) {
+                            let germ_call =
+                                format!("{} {}", germ.first().unwrap(), germ.get(1).unwrap());
+                            vec![germ_call]
+                        } else {
+                            vec![]
+                        }
+                    })
+                    .collect();
+                germline_caller.sort();
+                let germline_caller = germline_caller.join(" + ");
+
+                
+                n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
+                    if let Some(row) = with_germline.get_mut(tumoral_caller) {
+                        // germline_caller.iter().for_each(|germline_caller| {
+                            if tumoral_caller == "ClairS Somatic" {
+                                println!("{tumoral_caller} {germline_caller} {n}");
+                            }
+                            if let Some(col) = row.get_mut(&germline_caller) {
+                                *col += *n;
+                            } else {
+                                row.insert(germline_caller.to_string(), *n);
+                            }
+                        // });
+                    } else {
+                        let mut row = HashMap::new();
+                        // germline_caller.iter().for_each(|germline_caller| {
+                            row.insert(germline_caller.to_string(), *n);
+                        // });
+                        with_germline.insert(tumoral_caller.to_string(), row);
+                    }
+                });
+            }
+        });
+
+        let mut germlines_callers: Vec<String> = with_germline.iter().flat_map(|(_, r)| {
+            r.iter().map(|(k,_)| k.to_string()).collect::<Vec<String>>()
+        }).collect();
+        germlines_callers.sort();
+        germlines_callers.dedup();
+
+        with_germline.iter().for_each(|(tumor, row)| {
+            print!("{tumor}\t");
+            germlines_callers.iter().for_each(|g| {
+                let v = row.get(g).unwrap_or(&0);
+                print!("{g}:{v}\t");
+            });
+            println!();
+        });
+        println!();
+    }
 }
 
 impl Run for Somatic {
@@ -137,7 +256,18 @@ impl Run for Somatic {
 
         let mut annotations = Arc::try_unwrap(annotations)
             .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
-        annotations.callers_stat();
+        let caller_cat_anns = |v: &Annotation| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+            )
+        };
+        let annot_init = annotations.callers_stat(Some(Box::new(caller_cat_anns)));
+        somatic_stats.annot_init(annot_init);
 
         // Filter: Variants neither Germline nor SoloConstit
         info!("Keeping somatic variants (variants neither in solo nor in germline).");
@@ -145,7 +275,7 @@ impl Run for Somatic {
             annotations.retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
             });
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(caller_cat_anns)));
 
         // Annotation: BAM depth, n_alt
         info!("Reading Constit BAM file for depth and pileup annotation.");
@@ -156,7 +286,20 @@ impl Run for Somatic {
             self.config.solo_max_alt_constit,
             self.config.solo_min_constit_depth,
         );
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::ConstitAlt(_)
+                    | Annotation::ConstitDepth(_)
+                    | Annotation::HighConstitAlt
+                    | Annotation::LowConstitDepth
+            )
+        })));
 
         // Filter: Remove LowConstitDepth from annotations and variants collections
         info!(
@@ -167,7 +310,6 @@ impl Run for Somatic {
             .retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::LowConstitDepth)
             });
-        annotations.callers_stat();
         info!(
             "{} variants removed when depth in constit bam < {}.",
             somatic_stats.n_low_constit, self.config.solo_min_constit_depth
@@ -182,12 +324,24 @@ impl Run for Somatic {
             .retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::HighConstitAlt)
             });
-        annotations.callers_stat();
         info!(
             "{} variants removed with SNP/indel inside the constit alignements > {}",
             somatic_stats.n_high_alt_constit, self.config.solo_max_alt_constit
         );
 
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::ConstitAlt(_)
+                    | Annotation::ConstitDepth(_)
+            )
+        })));
+
         // Annotation: Entropy
         info!(
             "Entropy annotation from {} sequences.",
@@ -196,7 +350,6 @@ impl Run for Somatic {
         variants_collections.iter().for_each(|c| {
             c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
         });
-        annotations.callers_stat();
 
         // Annotation: Cosmic and GnomAD
         info!("Annotation with Cosmic and GnomAD.");
@@ -207,7 +360,18 @@ impl Run for Somatic {
                 ext_annot.annotate(&c.variants, &annotations)?;
                 Ok(())
             })?;
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::ConstitAlt(_)
+                    | Annotation::GnomAD(_)
+            )
+        })));
 
         // Filter: Remove variants in Gnomad and in constit bam
         info!("Filtering out variants in GnomAD and in constit bam at low AF.");
@@ -235,23 +399,57 @@ impl Run for Somatic {
                     })
                     .unwrap_or(false)
             });
+
         info!(
             "{} variants filtered, with constit alt <= max contig alt ({}) and in GnomAD.",
             somatic_stats.n_high_alt_constit_gnomad, self.config.solo_max_alt_constit
         );
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::ConstitAlt(_)
+                    | Annotation::GnomAD(_)
+            )
+        })));
 
         // Annotation low entropy
         annotations.low_shannon_entropy(self.config.min_shannon_entropy);
-        annotations.callers_stat();
+        // annotations.callers_stat();
 
         // Filtering low entropy for solo variants.
         info!("Filtering low entropies");
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::LowEntropy
+            )
+        })));
+
         somatic_stats.n_low_entropies = annotations
             .retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::LowEntropy)
             });
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(|v| {
+            matches!(
+                v,
+                Annotation::Callers(_)
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::SoloConstit
+                    | Annotation::SoloTumor
+                    | Annotation::LowEntropy
+            )
+        })));
 
         // VEP
         info!("VEP annotation.");
@@ -262,7 +460,9 @@ impl Run for Somatic {
                 ext_annot.annotate_vep(&c.variants, &annotations)?;
                 Ok(())
             })?;
-        annotations.callers_stat();
+        annotations.callers_stat(Some(Box::new(caller_cat_anns)));
+
+        annotations.vep_stats()?;
 
         Ok(())
     }

+ 119 - 28
src/variant/variant.rs

@@ -156,58 +156,149 @@ impl VcfVariant {
         self.infos.0.iter().any(|i| matches!(i, Info::SVTYPE(_)))
     }
 
+    pub fn svtype(&self) -> Option<SVType> {
+        self.infos.0.iter().find_map(|e| {
+            if let Info::SVTYPE(sv_type) = e {
+                Some(sv_type.clone())
+            } else {
+                None
+            }
+        })
+    }
+
     pub fn alteration_category(&self) -> AlterationCategory {
         match (&self.reference, &self.alternative) {
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::Snv
+                AlterationCategory::SNV
             }
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
-                AlterationCategory::Ins
+                AlterationCategory::INS
             }
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
                 AlterationCategory::Other
             }
             (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::Del
+                AlterationCategory::DEL
             }
             (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
                 if a.len() < b.len() =>
             {
-                AlterationCategory::Ins
+                AlterationCategory::INS
             }
             (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
                 if a.len() > b.len() =>
             {
-                AlterationCategory::Del
-            }
-            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
-                AlterationCategory::Rep
-            }
-            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
-                AlterationCategory::Other
-            }
-            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
-                AlterationCategory::Other
+                AlterationCategory::DEL
             }
+            _ => match self.svtype() {
+                Some(sv_type) => AlterationCategory::from(sv_type),
+                None => AlterationCategory::Other,
+            }, // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
+               //     AlterationCategory::Rep
+               // }
+               // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
+               //     AlterationCategory::Other
+               // }
+               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
+               //     AlterationCategory::Other
+               // }
+               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
+               //     AlterationCategory::Other
+               // }
+               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
+               //     AlterationCategory::Other
+               // }
         }
     }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
 pub enum AlterationCategory {
-    Snv,
-    Ins,
-    Del,
-    Rep,
+    SNV,
+    DEL,
+    INS,
+    DUP,
+    INV,
+    CNV,
+    BND,
     Other,
 }
 
+impl fmt::Display for AlterationCategory {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                AlterationCategory::SNV => "SNV",
+                AlterationCategory::DEL => "DEL",
+                AlterationCategory::INS => "INS",
+                AlterationCategory::DUP => "DUP",
+                AlterationCategory::INV => "INV",
+                AlterationCategory::CNV => "CNV",
+                AlterationCategory::BND => "BND",
+                AlterationCategory::Other => "Other",
+            }
+        )
+    }
+}
+
+impl From<SVType> for AlterationCategory {
+    fn from(sv_type: SVType) -> Self {
+        match sv_type {
+            SVType::DEL => AlterationCategory::DEL,
+            SVType::INS => AlterationCategory::INS,
+            SVType::DUP => AlterationCategory::DUP,
+            SVType::INV => AlterationCategory::INV,
+            SVType::CNV => AlterationCategory::CNV,
+            SVType::BND => AlterationCategory::BND,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+pub enum SVType {
+    DEL,
+    INS,
+    DUP,
+    INV,
+    CNV,
+    BND,
+}
+
+impl FromStr for SVType {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        match s {
+            "DEL" => Ok(SVType::DEL),
+            "INS" => Ok(SVType::INS),
+            "DUP" => Ok(SVType::DUP),
+            "INV" => Ok(SVType::INV),
+            "CNV" => Ok(SVType::CNV),
+            "BND" => Ok(SVType::BND),
+            _ => Err(anyhow!("Can't parse SVTYPE={s}")),
+        }
+    }
+}
+
+impl fmt::Display for SVType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                SVType::DEL => "DEL",
+                SVType::INS => "INS",
+                SVType::DUP => "DUP",
+                SVType::INV => "INV",
+                SVType::CNV => "CNV",
+                SVType::BND => "BND",
+            }
+        )
+    }
+}
+
 impl VariantId for VcfVariant {
     fn variant_id(&self) -> String {
         format!("{}_{}>{}", self.position, self.reference, self.alternative)
@@ -277,7 +368,7 @@ pub enum Info {
     RCU(u32),
     RGU(u32),
     RTU(u32),
-    SVTYPE(String),
+    SVTYPE(SVType),
     SVLEN(i32),
     END(u32),
     MATEID(String),
@@ -334,7 +425,7 @@ impl FromStr for Info {
                         .parse()
                         .context(format!("Can't parse into u32: {value}"))?,
                 ),
-                "SVTYPE" => Info::SVTYPE(value.to_string()),
+                "SVTYPE" => Info::SVTYPE(value.parse()?),
                 "SVLEN" => Info::SVLEN(
                     value
                         .parse()
@@ -484,8 +575,8 @@ impl TryFrom<(&str, &str)> for Format {
                     .map(|e| e.parse().context("Failed to parse AD"))
                     .collect::<anyhow::Result<Vec<_>>>()?,
             ),
-            "TR" =>Format::TR(value.parse()?),
-            "VR" =>Format::TR(value.parse()?),
+            "TR" => Format::TR(value.parse()?),
+            "VR" => Format::TR(value.parse()?),
             _ => Format::Other((key.to_string(), value.to_string())),
         })
     }

+ 2 - 2
src/variant/variant_collection.rs

@@ -150,11 +150,11 @@ impl VariantCollection {
                         let mut depth = 0;
                         let mut alt_seq = var.alternative.to_string();
 
-                        let c = if var.alteration_category() == AlterationCategory::Ins {
+                        let c = if var.alteration_category() == AlterationCategory::INS {
                             // TODO: Add stretch comparison
                             alt_seq = alt_seq.split_off(1);
                             counts_ins_at(&mut bam, &var.position.contig(), var.position.position)?
-                        } else if var.alteration_category() == AlterationCategory::Del {
+                        } else if var.alteration_category() == AlterationCategory::DEL {
                             alt_seq = "D".to_string();
                             counts_at(&mut bam, &var.position.contig(), var.position.position + 1)?
                         } else {