|
|
@@ -1,12 +1,20 @@
|
|
|
-use crate::variant::variant_collection::VariantCollection;
|
|
|
+use crate::{
|
|
|
+ annotation::Annotations,
|
|
|
+ positions::{GenomePosition, GetGenomePosition, VcfPosition},
|
|
|
+ variant::variant_collection::VariantCollection,
|
|
|
+};
|
|
|
use anyhow::{anyhow, Context, Ok};
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
-use std::{cmp::Ordering, fmt, str::FromStr};
|
|
|
+use std::{
|
|
|
+ cmp::Ordering,
|
|
|
+ fmt,
|
|
|
+ hash::{Hash, Hasher},
|
|
|
+ str::FromStr,
|
|
|
+};
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
pub struct VcfVariant {
|
|
|
- pub contig: String,
|
|
|
- pub position: u32,
|
|
|
+ pub position: GenomePosition,
|
|
|
pub id: String,
|
|
|
pub reference: ReferenceAlternative,
|
|
|
pub alternative: ReferenceAlternative,
|
|
|
@@ -19,19 +27,31 @@ pub struct VcfVariant {
|
|
|
impl PartialEq for VcfVariant {
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
// Nota bene: id, filter, info, format and quality is intentionally not compared
|
|
|
- self.contig == other.contig
|
|
|
- && self.position == other.position
|
|
|
+ self.position == other.position
|
|
|
&& self.reference == other.reference
|
|
|
&& self.alternative == other.alternative
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl Hash for VcfVariant {
|
|
|
+ fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
|
|
+ self.position.hash(state);
|
|
|
+ self.reference.hash(state);
|
|
|
+ self.alternative.hash(state);
|
|
|
+ }
|
|
|
+}
|
|
|
impl Eq for VcfVariant {}
|
|
|
impl FromStr for VcfVariant {
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
let v: Vec<&str> = s.split('\t').collect();
|
|
|
+ let vcf_position: VcfPosition = (
|
|
|
+ *v.first().ok_or(anyhow!("Can't get contig from: {s}"))?,
|
|
|
+ *v.get(1).ok_or(anyhow!("Can't get position from: {s}"))?,
|
|
|
+ )
|
|
|
+ .try_into()
|
|
|
+ .context(format!("Can't parse position from: {s}"))?;
|
|
|
|
|
|
let formats = if v.len() == 10 {
|
|
|
(
|
|
|
@@ -45,15 +65,7 @@ impl FromStr for VcfVariant {
|
|
|
};
|
|
|
|
|
|
Ok(Self {
|
|
|
- contig: v
|
|
|
- .first()
|
|
|
- .ok_or(anyhow!("Can't parse contig from: {s}"))?
|
|
|
- .to_string(),
|
|
|
- position: v
|
|
|
- .get(1)
|
|
|
- .ok_or(anyhow!("Can't parse contig from: {s}"))?
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse position from: {s}"))?,
|
|
|
+ position: vcf_position.into(),
|
|
|
id: v
|
|
|
.get(2)
|
|
|
.ok_or(anyhow!("Can't parse id from: {s}"))?
|
|
|
@@ -90,9 +102,12 @@ impl FromStr for VcfVariant {
|
|
|
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
|
|
|
impl VcfVariant {
|
|
|
pub fn into_vcf_row(&self) -> String {
|
|
|
+ let vcf_position: VcfPosition = self.position.clone().into();
|
|
|
+ let (contig, position) = vcf_position.into();
|
|
|
+
|
|
|
let mut columns = vec![
|
|
|
- self.contig.to_string(),
|
|
|
- self.position.to_string(),
|
|
|
+ contig,
|
|
|
+ position,
|
|
|
self.id.to_string(),
|
|
|
self.reference.to_string(),
|
|
|
self.alternative.to_string(),
|
|
|
@@ -112,17 +127,9 @@ impl VcfVariant {
|
|
|
columns.join("\t")
|
|
|
}
|
|
|
|
|
|
- pub fn chr_num(&self) -> u32 {
|
|
|
- self.contig
|
|
|
- .trim_start_matches("chr")
|
|
|
- .parse()
|
|
|
- .unwrap_or(u32::MAX)
|
|
|
- }
|
|
|
-
|
|
|
pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
|
|
|
VcfVariant {
|
|
|
- contig: self.contig.clone(),
|
|
|
- position: self.position,
|
|
|
+ position: self.position.clone(),
|
|
|
id: self.id.clone(),
|
|
|
reference: self.reference.clone(),
|
|
|
alternative: self.alternative.clone(),
|
|
|
@@ -132,6 +139,18 @@ impl VcfVariant {
|
|
|
formats: self.formats.commun_deepvariant_clairs(),
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ pub fn hash_variant(&self) -> u64 {
|
|
|
+ let mut hasher = std::collections::hash_map::DefaultHasher::new();
|
|
|
+ self.hash(&mut hasher);
|
|
|
+ hasher.finish()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl GetGenomePosition for VcfVariant {
|
|
|
+ fn position(&self) -> &GenomePosition {
|
|
|
+ &self.position
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl PartialOrd for VcfVariant {
|
|
|
@@ -142,25 +161,10 @@ impl PartialOrd for VcfVariant {
|
|
|
|
|
|
impl Ord for VcfVariant {
|
|
|
fn cmp(&self, other: &Self) -> Ordering {
|
|
|
- let self_num = self.chr_num();
|
|
|
- let other_num = other.chr_num();
|
|
|
-
|
|
|
- match self_num.cmp(&other_num) {
|
|
|
- Ordering::Equal => self.position.cmp(&other.position),
|
|
|
- other => other,
|
|
|
- }
|
|
|
+ self.position.cmp(&other.position)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-// Tag
|
|
|
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
-pub enum Annotation {
|
|
|
- Source(String),
|
|
|
- Diag,
|
|
|
- Constit,
|
|
|
- Other((String, String)), // (key, value)
|
|
|
-}
|
|
|
-
|
|
|
/// Info
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
|
|
pub struct Infos(Vec<Info>);
|
|
|
@@ -393,6 +397,7 @@ impl From<Format> for (String, String) {
|
|
|
.collect::<Vec<_>>()
|
|
|
.join(",")
|
|
|
};
|
|
|
+
|
|
|
match format {
|
|
|
Format::GT(value) => ("GT".to_string(), value),
|
|
|
Format::GQ(value) => ("GQ".to_string(), value.to_string()),
|
|
|
@@ -469,7 +474,7 @@ impl fmt::Display for Filter {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
|
|
|
pub enum ReferenceAlternative {
|
|
|
Nucleotide(Base),
|
|
|
Nucleotides(Vec<Base>),
|
|
|
@@ -512,7 +517,7 @@ impl fmt::Display for ReferenceAlternative {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
|
|
|
pub enum Base {
|
|
|
A,
|
|
|
T,
|
|
|
@@ -565,5 +570,5 @@ impl fmt::Display for Base {
|
|
|
}
|
|
|
|
|
|
pub trait Variants {
|
|
|
- fn variants(&self) -> anyhow::Result<VariantCollection>;
|
|
|
+ fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection>;
|
|
|
}
|