STEIMLE Thomas 2 недель назад
Родитель
Сommit
66b85fb388
2 измененных файлов с 201 добавлено и 0 удалено
  1. 1 0
      src/variant/mod.rs
  2. 200 0
      src/variant/variant_representation.rs

+ 1 - 0
src/variant/mod.rs

@@ -42,3 +42,4 @@
 pub mod variant_collection;
 pub mod variants_stats;
 pub mod vcf_variant;
+pub mod variant_representation;

+ 200 - 0
src/variant/variant_representation.rs

@@ -0,0 +1,200 @@
+use ordered_float::OrderedFloat;
+use serde::Serialize;
+
+use crate::{
+    helpers::estimate_shannon_entropy,
+    positions::contig_to_num,
+    variant::{variant_collection::Variant, vcf_variant::{AlterationCategory, BNDDesc}},
+};
+
+#[derive(Debug, Clone, Default, Eq, PartialEq, Serialize)]
+pub enum Representation {
+    #[default]
+    SNV,
+    Deletion {
+        start: u32,
+        end: u32,
+        len: u32,
+        inserted_sequence: Option<String>,
+    },
+    DeletionInversion {
+        start: u32,
+        start_sens: bool,
+        end: u32,
+        end_sens: bool,
+        len: u32,
+        inserted_sequence: Option<String>,
+    },
+    Insertion {
+        len: u32,
+        entropy: OrderedFloat<f64>,
+        inserted_sequence: String,
+    },
+    Translocation {
+        left_sens: bool,
+        left_contig: String,
+        left_position: u32,
+        left_genes: Vec<String>,
+        right_contig: String,
+        right_position: u32,
+        right_sens: bool,
+        right_genes: Vec<String>,
+        inserted_sequence: Option<String>,
+    },
+    Duplication {
+        start: u32,
+        end: u32,
+        len: u32,
+    },
+    Inversion {
+        inserted_sequence: Option<String>,
+        start: u32,
+        start_sens: bool,
+        end: u32,
+        end_sens: bool,
+        len: u32,
+    },
+}
+
+impl TryFrom<&Variant> for Representation {
+    type Error = anyhow::Error;
+
+    fn try_from(variant: &Variant) -> anyhow::Result<Self> {
+        let mut contig = variant.position.contig;
+        let mut position = variant.position.position + 1;
+
+        use AlterationCategory::*;
+
+        let category = variant
+            .alteration_category()
+            .first()
+            .cloned()
+            .ok_or_else(|| anyhow::anyhow!("variant has no alteration category"))?;
+
+        let repr: Option<Representation> = match category {
+            SNV => Some(Representation::SNV),
+
+            DEL => variant
+                .vcf_variants
+                .iter()
+                .find_map(|v| v.deletion_desc())
+                .map(|d| Representation::Deletion {
+                    start: d.start,
+                    end: d.end,
+                    len: variant.deletion_length().unwrap_or_default(),
+                    inserted_sequence: None,
+                }),
+
+            INS => variant.vcf_variants.iter().find_map(|v| {
+                v.inserted_seq().map(|inserted_sequence| {
+                    let entropy = estimate_shannon_entropy(&inserted_sequence).into();
+
+                    Representation::Insertion {
+                        len: inserted_sequence.len() as u32,
+                        entropy,
+                        inserted_sequence,
+                    }
+                })
+            }),
+
+            TRL => variant
+                .vcf_variants
+                .iter()
+                .find_map(|v| v.bnd_desc().ok())
+                .map(|d| {
+                    let d = normalize_bnd(d);
+
+                    contig = contig_to_num(&d.a_contig);
+                    position = d.a_position;
+
+                    let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
+
+                    Representation::Translocation {
+                        left_sens: d.a_sens,
+                        left_contig: d.a_contig,
+                        left_position: d.a_position,
+                        right_contig: d.b_contig,
+                        right_position: d.b_position,
+                        right_sens: d.b_sens,
+                        inserted_sequence,
+                        left_genes: Vec::new(),
+                        right_genes: Vec::new(),
+                    }
+                }),
+
+            DUP => variant
+                .vcf_variants
+                .iter()
+                .find_map(|v| v.bnd_desc().ok())
+                .map(|d| Representation::Duplication {
+                    start: d.b_position,
+                    end: d.a_position,
+                    len: d.a_position.saturating_sub(d.b_position),
+                }),
+
+            DELINV => variant
+                .vcf_variants
+                .iter()
+                .find_map(|v| v.bnd_desc().ok())
+                .map(|d| {
+                    let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
+
+                    let d = normalize_bnd(d);
+
+                    position = d.a_position;
+
+                    Representation::DeletionInversion {
+                        inserted_sequence,
+                        start: d.a_position,
+                        start_sens: d.a_sens,
+                        end: d.b_position,
+                        end_sens: d.b_sens,
+                        len: d.b_position.saturating_sub(d.a_position),
+                    }
+                }),
+
+            INV => variant
+                .vcf_variants
+                .iter()
+                .find_map(|v| v.bnd_desc().ok())
+                .map(|d| {
+                    let inserted_sequence = (!d.added_nt.is_empty()).then(|| d.added_nt.clone());
+
+                    let d = normalize_bnd(d);
+
+                    position = d.a_position;
+
+                    Representation::Inversion {
+                        inserted_sequence,
+                        start: d.a_position,
+                        start_sens: d.a_sens,
+                        end: d.b_position,
+                        end_sens: d.b_sens,
+                        len: d.b_position.saturating_sub(d.a_position),
+                    }
+                }),
+
+            CNV | BND | Other => None,
+        };
+
+        repr.ok_or_else(|| {
+            anyhow::anyhow!(
+                "could not build Representation for category {:?} at {}:{}",
+                category,
+                contig,
+                position
+            )
+        })
+    }
+}
+
+fn normalize_bnd(d: BNDDesc) -> BNDDesc {
+    let a = contig_to_num(&d.a_contig);
+    let b = contig_to_num(&d.b_contig);
+
+    if a > b || (a == b && d.a_position > d.b_position) {
+        d.rc()
+    } else {
+        d
+    }
+}