|
|
@@ -29,29 +29,124 @@ use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
use vep::{get_best_vep, VEP};
|
|
|
|
|
|
+/// Represents various types of annotations that can be associated with a variant.
|
|
|
+///
|
|
|
+/// These annotations cover caller-specific metadata, biological properties, database hits,
|
|
|
+/// and computed statistics like entropy or trinucleotide context.
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)]
|
|
|
pub enum Annotation {
|
|
|
+ /// Annotation from a specific variant caller and associated sample type.
|
|
|
Callers(Caller, Sample),
|
|
|
+
|
|
|
+ /// Categorization of the alteration (e.g., SNV, indel, etc.).
|
|
|
AlterationCategory(AlterationCategory),
|
|
|
+
|
|
|
+ /// Shannon entropy of the surrounding genomic region or read distribution.
|
|
|
ShannonEntropy(f64),
|
|
|
+
|
|
|
+ /// Depth of coverage in the constitutional (normal) sample.
|
|
|
ConstitDepth(u16),
|
|
|
+
|
|
|
+ /// Alternate allele count in the constitutional (normal) sample.
|
|
|
ConstitAlt(u16),
|
|
|
+
|
|
|
+ /// Flag indicating low depth in the constitutional sample.
|
|
|
LowConstitDepth,
|
|
|
+
|
|
|
+ /// Flag indicating high alternate allele count in the constitutional sample.
|
|
|
HighConstitAlt,
|
|
|
+
|
|
|
+ /// COSMIC database hit (cancer-associated mutation).
|
|
|
Cosmic(Cosmic),
|
|
|
+
|
|
|
+ /// GnomAD population frequency database annotation.
|
|
|
GnomAD(GnomAD),
|
|
|
+
|
|
|
+ /// Flag indicating low Shannon entropy (possibly less reliable region).
|
|
|
LowEntropy,
|
|
|
+
|
|
|
+ /// Trinucleotide context surrounding the variant.
|
|
|
+ TriNucleotides([Base; 3]),
|
|
|
+
|
|
|
+ /// Variant Effect Predictor (VEP) annotations.
|
|
|
VEP(Vec<VEP>),
|
|
|
+
|
|
|
+ /// Timing of replication for the variant's genomic position.
|
|
|
+ ReplicationTiming(ReplicationClass),
|
|
|
}
|
|
|
|
|
|
+/// Denotes the biological sample type associated with a variant call.
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)]
|
|
|
pub enum Sample {
|
|
|
+ /// Tumor-only sample without matched normal.
|
|
|
SoloTumor,
|
|
|
+
|
|
|
+ /// Constitutional (normal) sample without matched tumor.
|
|
|
SoloConstit,
|
|
|
+
|
|
|
+ /// Variant observed in germline context.
|
|
|
Germline,
|
|
|
+
|
|
|
+ /// Variant identified as somatic (tumor-specific).
|
|
|
Somatic,
|
|
|
}
|
|
|
|
|
|
+/// A nucleotide base used for representing DNA sequence.
|
|
|
+///
|
|
|
+/// Includes the four standard bases (A, T, C, G) and `N` for ambiguous or unknown bases.
|
|
|
+#[derive(Copy, Debug, Clone, PartialEq, Serialize, Deserialize, Encode, Decode)]
|
|
|
+pub enum Base {
|
|
|
+ /// Adenine
|
|
|
+ A,
|
|
|
+ /// Thymine
|
|
|
+ T,
|
|
|
+ /// Cytosine
|
|
|
+ C,
|
|
|
+ /// Guanine
|
|
|
+ G,
|
|
|
+ /// Unknown or ambiguous nucleotide
|
|
|
+ N,
|
|
|
+}
|
|
|
+
|
|
|
+impl fmt::Display for Base {
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
+ write!(
|
|
|
+ f,
|
|
|
+ "{}",
|
|
|
+ match self {
|
|
|
+ Base::A => "A",
|
|
|
+ Base::T => "T",
|
|
|
+ Base::C => "C",
|
|
|
+ Base::G => "G",
|
|
|
+ Base::N => "N",
|
|
|
+ }
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/// Helper to convert a 3-base string into a [Base; 3] array.
|
|
|
+/// Returns `None` if any base is invalid.
|
|
|
+pub fn parse_trinuc(s: &str) -> [Base; 3] {
|
|
|
+ fn char_to_base(c: char) -> Base {
|
|
|
+ match c.to_ascii_uppercase() {
|
|
|
+ 'A' => Base::A,
|
|
|
+ 'T' => Base::T,
|
|
|
+ 'C' => Base::C,
|
|
|
+ 'G' => Base::G,
|
|
|
+ _ => Base::N,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let chars: Vec<Base> = s.chars().map(char_to_base).collect();
|
|
|
+ [chars[0], chars[1], chars[2]]
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)]
|
|
|
+pub enum ReplicationClass {
|
|
|
+ Early,
|
|
|
+ Late,
|
|
|
+}
|
|
|
+
|
|
|
impl fmt::Display for Sample {
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
write!(
|
|
|
@@ -81,22 +176,35 @@ impl FromStr for Sample {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// Implements string formatting for `Annotation`, providing a human-readable summary of each variant.
|
|
|
+///
|
|
|
+/// This is primarily used for debugging, logging, or text-based output of annotated variants.
|
|
|
+/// For most variants, a short descriptive label is used. Some variants include additional detail,
|
|
|
+/// such as base content or sample-specific information.
|
|
|
impl fmt::Display for Annotation {
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
- let str = match self {
|
|
|
- Annotation::Callers(caller, sample) => &format!("{caller} {sample}"),
|
|
|
- Annotation::ShannonEntropy(_) => "ShannonEntropy",
|
|
|
- Annotation::ConstitDepth(_) => "ConstitDepth",
|
|
|
- Annotation::ConstitAlt(_) => "ConstitAlt",
|
|
|
- Annotation::LowConstitDepth => "LowConstitDepth",
|
|
|
- Annotation::HighConstitAlt => "HighConstitAlt",
|
|
|
- Annotation::Cosmic(_) => "Cosmic",
|
|
|
- Annotation::GnomAD(_) => "GnomAD",
|
|
|
- Annotation::LowEntropy => "LowEntropy",
|
|
|
- Annotation::VEP(_) => "VEP",
|
|
|
- Annotation::AlterationCategory(alt_cat) => &alt_cat.to_string(),
|
|
|
+ let s = match self {
|
|
|
+ Annotation::Callers(caller, sample) => format!("{caller} {sample}"),
|
|
|
+ Annotation::AlterationCategory(alt_cat) => alt_cat.to_string(),
|
|
|
+ Annotation::ShannonEntropy(_) => "ShannonEntropy".into(),
|
|
|
+ Annotation::ConstitDepth(_) => "ConstitDepth".into(),
|
|
|
+ Annotation::ConstitAlt(_) => "ConstitAlt".into(),
|
|
|
+ Annotation::LowConstitDepth => "LowConstitDepth".into(),
|
|
|
+ Annotation::HighConstitAlt => "HighConstitAlt".into(),
|
|
|
+ Annotation::Cosmic(_) => "Cosmic".into(),
|
|
|
+ Annotation::GnomAD(_) => "GnomAD".into(),
|
|
|
+ Annotation::LowEntropy => "LowEntropy".into(),
|
|
|
+ Annotation::VEP(_) => "VEP".into(),
|
|
|
+ Annotation::TriNucleotides(bases) => format!(
|
|
|
+ "Trinucleotides({})",
|
|
|
+ bases.iter().map(|b| b.to_string()).collect::<String>(),
|
|
|
+ ),
|
|
|
+ Annotation::ReplicationTiming(rt) => match rt {
|
|
|
+ ReplicationClass::Early => "ReplicationEarly".into(),
|
|
|
+ ReplicationClass::Late => "ReplicationLate".into(),
|
|
|
+ },
|
|
|
};
|
|
|
- write!(f, "{}", str)
|
|
|
+ write!(f, "{}", s)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -234,6 +342,8 @@ impl Annotations {
|
|
|
| Annotation::LowEntropy
|
|
|
| Annotation::GnomAD(_)
|
|
|
| Annotation::VEP(_)
|
|
|
+ | Annotation::TriNucleotides(_)
|
|
|
+ | Annotation::ReplicationTiming(_)
|
|
|
| Annotation::HighConstitAlt => categorical.push(ann.to_string()),
|
|
|
Annotation::Callers(caller, sample) => {
|
|
|
categorical.push(format!("{caller} {sample}"))
|
|
|
@@ -533,14 +643,13 @@ pub trait CallerCat {
|
|
|
/// - a GnomAD entry with AF > 0
|
|
|
/// - and a ConstitAlt entry with n_alt > 0
|
|
|
pub fn is_gnomad_and_constit_alt(anns: &[Annotation]) -> bool {
|
|
|
- let gnomad = anns.iter().any(|a| {
|
|
|
- matches!(a, Annotation::GnomAD(g) if g.gnomad_af > 0.0)
|
|
|
- });
|
|
|
+ let gnomad = anns
|
|
|
+ .iter()
|
|
|
+ .any(|a| matches!(a, Annotation::GnomAD(g) if g.gnomad_af > 0.0));
|
|
|
|
|
|
- let constit_alt = anns.iter().any(|a| {
|
|
|
- matches!(a, Annotation::ConstitAlt(n) if *n > 0)
|
|
|
- });
|
|
|
+ let constit_alt = anns
|
|
|
+ .iter()
|
|
|
+ .any(|a| matches!(a, Annotation::ConstitAlt(n) if *n > 0));
|
|
|
|
|
|
gnomad && constit_alt
|
|
|
}
|
|
|
-
|