|
|
@@ -0,0 +1,200 @@
|
|
|
+use ordered_float::OrderedFloat;
|
|
|
+use serde::Serialize;
|
|
|
+
|
|
|
+use crate::{
|
|
|
+ helpers::estimate_shannon_entropy,
|
|
|
+ positions::contig_to_num,
|
|
|
+ variant::{variant_collection::Variant, vcf_variant::{AlterationCategory, BNDDesc}},
|
|
|
+};
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Default, Eq, PartialEq, Serialize)]
|
|
|
+pub enum Representation {
|
|
|
+ #[default]
|
|
|
+ SNV,
|
|
|
+ Deletion {
|
|
|
+ start: u32,
|
|
|
+ end: u32,
|
|
|
+ len: u32,
|
|
|
+ inserted_sequence: Option<String>,
|
|
|
+ },
|
|
|
+ DeletionInversion {
|
|
|
+ start: u32,
|
|
|
+ start_sens: bool,
|
|
|
+ end: u32,
|
|
|
+ end_sens: bool,
|
|
|
+ len: u32,
|
|
|
+ inserted_sequence: Option<String>,
|
|
|
+ },
|
|
|
+ Insertion {
|
|
|
+ len: u32,
|
|
|
+ entropy: OrderedFloat<f64>,
|
|
|
+ inserted_sequence: String,
|
|
|
+ },
|
|
|
+ Translocation {
|
|
|
+ left_sens: bool,
|
|
|
+ left_contig: String,
|
|
|
+ left_position: u32,
|
|
|
+ left_genes: Vec<String>,
|
|
|
+ right_contig: String,
|
|
|
+ right_position: u32,
|
|
|
+ right_sens: bool,
|
|
|
+ right_genes: Vec<String>,
|
|
|
+ inserted_sequence: Option<String>,
|
|
|
+ },
|
|
|
+ Duplication {
|
|
|
+ start: u32,
|
|
|
+ end: u32,
|
|
|
+ len: u32,
|
|
|
+ },
|
|
|
+ Inversion {
|
|
|
+ inserted_sequence: Option<String>,
|
|
|
+ start: u32,
|
|
|
+ start_sens: bool,
|
|
|
+ end: u32,
|
|
|
+ end_sens: bool,
|
|
|
+ len: u32,
|
|
|
+ },
|
|
|
+}
|
|
|
+
|
|
|
+impl TryFrom<&Variant> for Representation {
|
|
|
+ type Error = anyhow::Error;
|
|
|
+
|
|
|
+ fn try_from(variant: &Variant) -> anyhow::Result<Self> {
|
|
|
+ let mut contig = variant.position.contig;
|
|
|
+ let mut position = variant.position.position + 1;
|
|
|
+
|
|
|
+ use AlterationCategory::*;
|
|
|
+
|
|
|
+ let category = variant
|
|
|
+ .alteration_category()
|
|
|
+ .first()
|
|
|
+ .cloned()
|
|
|
+ .ok_or_else(|| anyhow::anyhow!("variant has no alteration category"))?;
|
|
|
+
|
|
|
+ let repr: Option<Representation> = match category {
|
|
|
+ SNV => Some(Representation::SNV),
|
|
|
+
|
|
|
+ DEL => variant
|
|
|
+ .vcf_variants
|
|
|
+ .iter()
|
|
|
+ .find_map(|v| v.deletion_desc())
|
|
|
+ .map(|d| Representation::Deletion {
|
|
|
+ start: d.start,
|
|
|
+ end: d.end,
|
|
|
+ len: variant.deletion_length().unwrap_or_default(),
|
|
|
+ inserted_sequence: None,
|
|
|
+ }),
|
|
|
+
|
|
|
+ INS => variant.vcf_variants.iter().find_map(|v| {
|
|
|
+ v.inserted_seq().map(|inserted_sequence| {
|
|
|
+ let entropy = estimate_shannon_entropy(&inserted_sequence).into();
|
|
|
+
|
|
|
+ Representation::Insertion {
|
|
|
+ len: inserted_sequence.len() as u32,
|
|
|
+ entropy,
|
|
|
+ inserted_sequence,
|
|
|
+ }
|
|
|
+ })
|
|
|
+ }),
|
|
|
+
|
|
|
+ TRL => variant
|
|
|
+ .vcf_variants
|
|
|
+ .iter()
|
|
|
+ .find_map(|v| v.bnd_desc().ok())
|
|
|
+ .map(|d| {
|
|
|
+ let d = normalize_bnd(d);
|
|
|
+
|
|
|
+ contig = contig_to_num(&d.a_contig);
|
|
|
+ position = d.a_position;
|
|
|
+
|
|
|
+ let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
|
|
|
+
|
|
|
+ Representation::Translocation {
|
|
|
+ left_sens: d.a_sens,
|
|
|
+ left_contig: d.a_contig,
|
|
|
+ left_position: d.a_position,
|
|
|
+ right_contig: d.b_contig,
|
|
|
+ right_position: d.b_position,
|
|
|
+ right_sens: d.b_sens,
|
|
|
+ inserted_sequence,
|
|
|
+ left_genes: Vec::new(),
|
|
|
+ right_genes: Vec::new(),
|
|
|
+ }
|
|
|
+ }),
|
|
|
+
|
|
|
+ DUP => variant
|
|
|
+ .vcf_variants
|
|
|
+ .iter()
|
|
|
+ .find_map(|v| v.bnd_desc().ok())
|
|
|
+ .map(|d| Representation::Duplication {
|
|
|
+ start: d.b_position,
|
|
|
+ end: d.a_position,
|
|
|
+ len: d.a_position.saturating_sub(d.b_position),
|
|
|
+ }),
|
|
|
+
|
|
|
+ DELINV => variant
|
|
|
+ .vcf_variants
|
|
|
+ .iter()
|
|
|
+ .find_map(|v| v.bnd_desc().ok())
|
|
|
+ .map(|d| {
|
|
|
+ let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
|
|
|
+
|
|
|
+ let d = normalize_bnd(d);
|
|
|
+
|
|
|
+ position = d.a_position;
|
|
|
+
|
|
|
+ Representation::DeletionInversion {
|
|
|
+ inserted_sequence,
|
|
|
+ start: d.a_position,
|
|
|
+ start_sens: d.a_sens,
|
|
|
+ end: d.b_position,
|
|
|
+ end_sens: d.b_sens,
|
|
|
+ len: d.b_position.saturating_sub(d.a_position),
|
|
|
+ }
|
|
|
+ }),
|
|
|
+
|
|
|
+ INV => variant
|
|
|
+ .vcf_variants
|
|
|
+ .iter()
|
|
|
+ .find_map(|v| v.bnd_desc().ok())
|
|
|
+ .map(|d| {
|
|
|
+ let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
|
|
|
+
|
|
|
+ let d = normalize_bnd(d);
|
|
|
+
|
|
|
+ position = d.a_position;
|
|
|
+
|
|
|
+ Representation::Inversion {
|
|
|
+ inserted_sequence,
|
|
|
+ start: d.a_position,
|
|
|
+ start_sens: d.a_sens,
|
|
|
+ end: d.b_position,
|
|
|
+ end_sens: d.b_sens,
|
|
|
+ len: d.b_position.saturating_sub(d.a_position),
|
|
|
+ }
|
|
|
+ }),
|
|
|
+
|
|
|
+ CNV | BND | Other => None,
|
|
|
+ };
|
|
|
+
|
|
|
+ repr.ok_or_else(|| {
|
|
|
+ anyhow::anyhow!(
|
|
|
+ "could not build Representation for category {:?} at {}:{}",
|
|
|
+ category,
|
|
|
+ contig,
|
|
|
+ position
|
|
|
+ )
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn normalize_bnd(d: BNDDesc) -> BNDDesc {
|
|
|
+ let a = contig_to_num(&d.a_contig);
|
|
|
+ let b = contig_to_num(&d.b_contig);
|
|
|
+
|
|
|
+ if a > b || (a == b && d.a_position > d.b_position) {
|
|
|
+ d.rc()
|
|
|
+ } else {
|
|
|
+ d
|
|
|
+ }
|
|
|
+}
|