Browse Source

get_alteration_cats

Thomas 8 months ago
parent
commit
fb515b9bd7
3 changed files with 108 additions and 29 deletions
  1. 53 10
      src/annotation/mod.rs
  2. 28 19
      src/lib.rs
  3. 27 0
      src/variant/variant_collection.rs

+ 53 - 10
src/annotation/mod.rs

@@ -651,17 +651,60 @@ pub trait CallerCat {
     fn caller_cat(&self) -> Annotation;
 }
 
-/// Returns true if the annotations include both:
-/// - a GnomAD entry with AF > 0
-/// - and a ConstitAlt entry with n_alt > 0
+/// Determines whether a variant annotation meets either of the following criteria:
+///
+/// 1. It is observed in gnomAD with a non-zero allele frequency *and* has 
+///    at least one constitutive alternate read.
+/// 2. All variant calls are from `SoloTumor` samples and there is at least one 
+///    constitutive alternate read.
+///
+/// This function is typically used to identify variants that are either likely 
+/// germline (based on population frequency and constitutive signal) or artifacts 
+/// (appearing only in tumor calls but with constitutive support).
+///
+/// # Arguments
+///
+/// * `anns` - A slice of `Annotation` values describing variant annotations,
+///            such as gnomAD frequency, sample types, and alternate counts.
+///
+/// # Returns
+///
+/// * `true` if the variant matches either of the criteria described above.
+/// * `false` otherwise.
+///
+/// # Example
+///
+/// ```rust
+/// let anns = vec![
+///     Annotation::GnomAD(GnomAD { gnomad_af: 0.01 }),
+///     Annotation::ConstitAlt(2),
+/// ];
+/// assert!(is_gnomad_and_constit_alt(&anns));
+/// ```
 pub fn is_gnomad_and_constit_alt(anns: &[Annotation]) -> bool {
-    let gnomad = anns
-        .iter()
-        .any(|a| matches!(a, Annotation::GnomAD(g) if g.gnomad_af > 0.0));
+    let mut has_gnomad = false;
+    let mut constit_alt_count = 0;
+    let mut n_callers = 0;
+    let mut n_tumor_callers = 0;
+
+    for ann in anns {
+        match ann {
+            Annotation::GnomAD(g) if g.gnomad_af > 0.0 => has_gnomad = true,
+            Annotation::ConstitAlt(n) => constit_alt_count += *n,
+            Annotation::Callers(_, Sample::SoloTumor) => {
+                n_callers += 1;
+                n_tumor_callers += 1;
+            }
+            Annotation::Callers(..) => {
+                n_callers += 1;
+            }
+            _ => {}
+        }
+    }
 
-    let constit_alt = anns
-        .iter()
-        .any(|a| matches!(a, Annotation::ConstitAlt(n) if *n > 0));
+    let has_constit_alt = constit_alt_count > 0;
+    let only_tumor_with_constit = n_callers > 0 && n_callers == n_tumor_callers && has_constit_alt;
 
-    gnomad && constit_alt
+    (has_gnomad && has_constit_alt) || only_tumor_with_constit
 }
+

+ 28 - 19
src/lib.rs

@@ -177,7 +177,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::gff::features_ranges, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::AlterationCategory, variants_stats::{self, VariantsStats}}};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::AlterationCategory, variants_stats::{self, VariantsStats}}};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -665,24 +665,24 @@ mod tests {
     #[test]
     fn pipe_somatic() -> anyhow::Result<()> {   
         init();
-        let collections = Collections::new(
-            CollectionsConfig::default()
-        )?;
-        for (a, _) in collections.bam_pairs().iter() {
-            if a.id.as_str() != "PASSARD" {
-                continue;
-            }
-            if let Err(e) = SomaticPipe::initialize(&a.id, Config::default()).map(|mut p| if p.should_run() {
-                if let Err(e) = p.run() {
-                    error!("{e}");
-                }
-            }) {
-                error!("{e}");
-            }
-        }
-        Ok(())
-        // let id = "ACHITE";
-        // SomaticPipe::initialize(id, Config::default())?.run()
+        // let collections = Collections::new(
+        //     CollectionsConfig::default()
+        // )?;
+        // for (a, _) in collections.bam_pairs().iter() {
+        //     if a.id.as_str() != "CHAMPION" {
+        //         continue;
+        //     }
+        //     if let Err(e) = SomaticPipe::initialize(&a.id, Config::default()).map(|mut p| if p.should_run() {
+        //         if let Err(e) = p.run() {
+        //             error!("{e}");
+        //         }
+        //     }) {
+        //         error!("{e}");
+        //     }
+        // }
+        // Ok(())
+        let id = "ACHITE";
+        SomaticPipe::initialize(id, Config::default())?.run()
     }
 
     #[test]
@@ -713,6 +713,15 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_read_dict() -> anyhow::Result<()> {
+        init();
+        let genome = read_dict(&Config::default().dict_file)?;
+        let genome_length: usize = genome.into_iter().map(|(_, len)| len as usize).sum();
+        println!("{genome_length}");
+        Ok(())
+    }
+
     #[test]
     fn bases_at() -> anyhow::Result<()> {
         init();

+ 27 - 0
src/variant/variant_collection.rs

@@ -701,6 +701,33 @@ impl Variants {
             .collect()
     }
 
+    /// Returns all variants that match at least one of the specified alteration categories.
+    ///
+    /// This method filters the internal variant list and returns those for which
+    /// `alteration_category()` contains any of the provided categories.
+    ///
+    /// # Arguments
+    ///
+    /// * `cats` - A slice of `AlterationCategory` values to match against each variant.
+    ///
+    /// # Returns
+    ///
+    /// A `Vec<Variant>` containing all variants with a matching alteration category.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// let categories = &[AlterationCategory::DEL, AlterationCategory::INS];
+    /// let selected = dataset.get_alteration_cats(categories);
+    /// ```
+    pub fn get_alteration_cats(&self, cats: &[AlterationCategory]) -> Vec<Variant> {
+        self.data
+            .iter()
+            .filter(|v| cats.iter().any(|cat| v.alteration_category().contains(cat)))
+            .cloned()
+            .collect()
+    }
+
     /// Annotates variants overlapping a list of genome ranges and meeting specified constraints.
     ///
     /// # Arguments