|
@@ -1,9 +1,10 @@
|
|
|
|
|
+use crate::variant::variant_collection::VariantCollection;
|
|
|
use anyhow::{anyhow, Context, Ok};
|
|
use anyhow::{anyhow, Context, Ok};
|
|
|
use serde::{Deserialize, Serialize};
|
|
use serde::{Deserialize, Serialize};
|
|
|
-use std::{fmt, str::FromStr};
|
|
|
|
|
|
|
+use std::{cmp::Ordering, fmt, str::FromStr};
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
-pub struct Variant {
|
|
|
|
|
|
|
+pub struct VcfVariant {
|
|
|
pub contig: String,
|
|
pub contig: String,
|
|
|
pub position: u32,
|
|
pub position: u32,
|
|
|
pub id: String,
|
|
pub id: String,
|
|
@@ -11,12 +12,11 @@ pub struct Variant {
|
|
|
pub alternative: ReferenceAlternative,
|
|
pub alternative: ReferenceAlternative,
|
|
|
pub quality: Option<f32>,
|
|
pub quality: Option<f32>,
|
|
|
pub filter: Filter,
|
|
pub filter: Filter,
|
|
|
- pub info: String,
|
|
|
|
|
|
|
+ pub infos: Infos,
|
|
|
pub formats: Formats,
|
|
pub formats: Formats,
|
|
|
- pub annotations: Vec<Annotation>,
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-impl PartialEq for Variant {
|
|
|
|
|
|
|
+impl PartialEq for VcfVariant {
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
// Nota bene: id, filter, info, format and quality is intentionally not compared
|
|
// Nota bene: id, filter, info, format and quality is intentionally not compared
|
|
|
self.contig == other.contig
|
|
self.contig == other.contig
|
|
@@ -26,8 +26,8 @@ impl PartialEq for Variant {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-impl Eq for Variant {}
|
|
|
|
|
-impl FromStr for Variant {
|
|
|
|
|
|
|
+impl Eq for VcfVariant {}
|
|
|
|
|
+impl FromStr for VcfVariant {
|
|
|
type Err = anyhow::Error;
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
|
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
@@ -77,18 +77,18 @@ impl FromStr for Variant {
|
|
|
.ok_or(anyhow!("Can't parse filter from: {s}"))?
|
|
.ok_or(anyhow!("Can't parse filter from: {s}"))?
|
|
|
.parse()
|
|
.parse()
|
|
|
.context(format!("Can't parse filter from: {s}"))?,
|
|
.context(format!("Can't parse filter from: {s}"))?,
|
|
|
- info: v
|
|
|
|
|
|
|
+ infos: v
|
|
|
.get(7)
|
|
.get(7)
|
|
|
- .ok_or(anyhow!("Can't parse id from: {s}"))?
|
|
|
|
|
- .to_string(),
|
|
|
|
|
|
|
+ .ok_or(anyhow!("Can't parse infos from: {s}"))?
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse infos from: {s}"))?,
|
|
|
formats,
|
|
formats,
|
|
|
- annotations: Vec::new(),
|
|
|
|
|
})
|
|
})
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
|
|
// #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
|
|
|
-impl Variant {
|
|
|
|
|
|
|
+impl VcfVariant {
|
|
|
pub fn into_vcf_row(&self) -> String {
|
|
pub fn into_vcf_row(&self) -> String {
|
|
|
let mut columns = vec![
|
|
let mut columns = vec![
|
|
|
self.contig.to_string(),
|
|
self.contig.to_string(),
|
|
@@ -100,7 +100,7 @@ impl Variant {
|
|
|
.map(|v| v.to_string())
|
|
.map(|v| v.to_string())
|
|
|
.unwrap_or(".".to_string()),
|
|
.unwrap_or(".".to_string()),
|
|
|
self.filter.to_string(),
|
|
self.filter.to_string(),
|
|
|
- self.info.to_string(),
|
|
|
|
|
|
|
+ self.infos.to_string(),
|
|
|
];
|
|
];
|
|
|
|
|
|
|
|
if !self.formats.0.is_empty() {
|
|
if !self.formats.0.is_empty() {
|
|
@@ -111,6 +111,45 @@ impl Variant {
|
|
|
|
|
|
|
|
columns.join("\t")
|
|
columns.join("\t")
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ pub fn chr_num(&self) -> u32 {
|
|
|
|
|
+ self.contig
|
|
|
|
|
+ .trim_start_matches("chr")
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .unwrap_or(u32::MAX)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
|
|
|
|
|
+ VcfVariant {
|
|
|
|
|
+ contig: self.contig.clone(),
|
|
|
|
|
+ position: self.position,
|
|
|
|
|
+ id: self.id.clone(),
|
|
|
|
|
+ reference: self.reference.clone(),
|
|
|
|
|
+ alternative: self.alternative.clone(),
|
|
|
|
|
+ quality: self.quality,
|
|
|
|
|
+ filter: Filter::Other(".".to_string()),
|
|
|
|
|
+ infos: Infos(vec![Info::Empty]),
|
|
|
|
|
+ formats: self.formats.commun_deepvariant_clairs(),
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl PartialOrd for VcfVariant {
|
|
|
|
|
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
|
|
|
+ Some(self.cmp(other))
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl Ord for VcfVariant {
|
|
|
|
|
+ fn cmp(&self, other: &Self) -> Ordering {
|
|
|
|
|
+ let self_num = self.chr_num();
|
|
|
|
|
+ let other_num = other.chr_num();
|
|
|
|
|
+
|
|
|
|
|
+ match self_num.cmp(&other_num) {
|
|
|
|
|
+ Ordering::Equal => self.position.cmp(&other.position),
|
|
|
|
|
+ other => other,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Tag
|
|
// Tag
|
|
@@ -122,15 +161,161 @@ pub enum Annotation {
|
|
|
Other((String, String)), // (key, value)
|
|
Other((String, String)), // (key, value)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Info
|
|
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
|
|
|
|
+pub struct Infos(Vec<Info>);
|
|
|
|
|
+
|
|
|
|
|
+impl FromStr for Infos {
|
|
|
|
|
+ type Err = anyhow::Error;
|
|
|
|
|
+
|
|
|
|
|
+ fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
|
|
+ Ok(Self(
|
|
|
|
|
+ s.split(";")
|
|
|
|
|
+ .map(Info::from_str)
|
|
|
|
|
+ .collect::<Result<Vec<Info>, _>>()
|
|
|
|
|
+ .map_err(|e| anyhow::anyhow!("Failed to parse info: {e}"))?,
|
|
|
|
|
+ ))
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl fmt::Display for Infos {
|
|
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
|
+ write!(
|
|
|
|
|
+ f,
|
|
|
|
|
+ "{}",
|
|
|
|
|
+ self.0
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .map(|e| e.to_string())
|
|
|
|
|
+ .collect::<Vec<String>>()
|
|
|
|
|
+ .join(";")
|
|
|
|
|
+ )
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
|
|
|
+pub enum Info {
|
|
|
|
|
+ Empty,
|
|
|
|
|
+ H,
|
|
|
|
|
+ F,
|
|
|
|
|
+ P,
|
|
|
|
|
+ FAU(u32),
|
|
|
|
|
+ FCU(u32),
|
|
|
|
|
+ FGU(u32),
|
|
|
|
|
+ FTU(u32),
|
|
|
|
|
+ RAU(u32),
|
|
|
|
|
+ RCU(u32),
|
|
|
|
|
+ RGU(u32),
|
|
|
|
|
+ RTU(u32),
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl FromStr for Info {
|
|
|
|
|
+ type Err = anyhow::Error;
|
|
|
|
|
+
|
|
|
|
|
+ fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
|
|
+ if s.contains("=") {
|
|
|
|
|
+ let (key, value) = s
|
|
|
|
|
+ .split_once('=')
|
|
|
|
|
+ .context(format!("Can't split with `=` {s}"))?;
|
|
|
|
|
+ Ok(match key {
|
|
|
|
|
+ "FAU" => Info::FAU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "FCU" => Info::FCU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "FGU" => Info::FGU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "FTU" => Info::FTU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "RAU" => Info::RAU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "RCU" => Info::RCU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "RGU" => Info::RGU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+ "RTU" => Info::RTU(
|
|
|
|
|
+ value
|
|
|
|
|
+ .parse()
|
|
|
|
|
+ .context(format!("Can't parse into u32: {value}"))?,
|
|
|
|
|
+ ),
|
|
|
|
|
+
|
|
|
|
|
+ _ => Info::Empty,
|
|
|
|
|
+ })
|
|
|
|
|
+ } else {
|
|
|
|
|
+ Ok(match s {
|
|
|
|
|
+ "H" => Info::H,
|
|
|
|
|
+ "F" => Info::F,
|
|
|
|
|
+ "P" => Info::P,
|
|
|
|
|
+
|
|
|
|
|
+ _ => Info::Empty,
|
|
|
|
|
+ })
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl fmt::Display for Info {
|
|
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
|
+ match self {
|
|
|
|
|
+ Info::Empty => write!(f, "."),
|
|
|
|
|
+ Info::H => write!(f, "H"),
|
|
|
|
|
+ Info::F => write!(f, "F"),
|
|
|
|
|
+ Info::P => write!(f, "P"),
|
|
|
|
|
+ Info::FAU(v) => write!(f, "FAU={v}"),
|
|
|
|
|
+ Info::FCU(v) => write!(f, "FCU={v}"),
|
|
|
|
|
+ Info::FGU(v) => write!(f, "FGU={v}"),
|
|
|
|
|
+ Info::FTU(v) => write!(f, "FTU={v}"),
|
|
|
|
|
+ Info::RAU(v) => write!(f, "RAU={v}"),
|
|
|
|
|
+ Info::RCU(v) => write!(f, "RCU={v}"),
|
|
|
|
|
+ Info::RGU(v) => write!(f, "RGU={v}"),
|
|
|
|
|
+ Info::RTU(v) => write!(f, "RTU={v}"),
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
/// Format
|
|
/// Format
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
|
pub enum Format {
|
|
pub enum Format {
|
|
|
|
|
+ // DeepVariant
|
|
|
GT(String),
|
|
GT(String),
|
|
|
GQ(u32),
|
|
GQ(u32),
|
|
|
DP(u32),
|
|
DP(u32),
|
|
|
AD(Vec<u32>),
|
|
AD(Vec<u32>),
|
|
|
VAF(f32),
|
|
VAF(f32),
|
|
|
PL(Vec<u32>),
|
|
PL(Vec<u32>),
|
|
|
|
|
+
|
|
|
|
|
+ // Clairs
|
|
|
|
|
+ AF(f32),
|
|
|
|
|
+ NAF(u32),
|
|
|
|
|
+ NDP(u32),
|
|
|
|
|
+ NAD(Vec<u32>),
|
|
|
|
|
+ AU(u32),
|
|
|
|
|
+ CU(u32),
|
|
|
|
|
+ GU(u32),
|
|
|
|
|
+ TU(u32),
|
|
|
|
|
+ NAU(u32),
|
|
|
|
|
+ NCU(u32),
|
|
|
|
|
+ NGU(u32),
|
|
|
|
|
+ NTU(u32),
|
|
|
|
|
+
|
|
|
Other((String, String)), // (key, value)
|
|
Other((String, String)), // (key, value)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -201,32 +386,62 @@ impl TryFrom<(&str, &str)> for Format {
|
|
|
|
|
|
|
|
impl From<Format> for (String, String) {
|
|
impl From<Format> for (String, String) {
|
|
|
fn from(format: Format) -> Self {
|
|
fn from(format: Format) -> Self {
|
|
|
|
|
+ let concat = |values: Vec<u32>| -> String {
|
|
|
|
|
+ values
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .map(|v| v.to_string())
|
|
|
|
|
+ .collect::<Vec<_>>()
|
|
|
|
|
+ .join(",")
|
|
|
|
|
+ };
|
|
|
match format {
|
|
match format {
|
|
|
Format::GT(value) => ("GT".to_string(), value),
|
|
Format::GT(value) => ("GT".to_string(), value),
|
|
|
Format::GQ(value) => ("GQ".to_string(), value.to_string()),
|
|
Format::GQ(value) => ("GQ".to_string(), value.to_string()),
|
|
|
Format::DP(value) => ("DP".to_string(), value.to_string()),
|
|
Format::DP(value) => ("DP".to_string(), value.to_string()),
|
|
|
- Format::AD(values) => {
|
|
|
|
|
- let value_str = values
|
|
|
|
|
- .iter()
|
|
|
|
|
- .map(|v| v.to_string())
|
|
|
|
|
- .collect::<Vec<_>>()
|
|
|
|
|
- .join(",");
|
|
|
|
|
- ("AD".to_string(), value_str)
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ Format::AD(values) => ("AD".to_string(), concat(values)),
|
|
|
Format::VAF(value) => ("VAF".to_string(), value.to_string()),
|
|
Format::VAF(value) => ("VAF".to_string(), value.to_string()),
|
|
|
- Format::PL(values) => {
|
|
|
|
|
- let value_str = values
|
|
|
|
|
- .iter()
|
|
|
|
|
- .map(|v| v.to_string())
|
|
|
|
|
- .collect::<Vec<_>>()
|
|
|
|
|
- .join(",");
|
|
|
|
|
- ("PL".to_string(), value_str)
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ Format::PL(values) => ("PL".to_string(), concat(values)),
|
|
|
Format::Other((key, value)) => (key, value),
|
|
Format::Other((key, value)) => (key, value),
|
|
|
|
|
+ Format::AF(value) => ("AF".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NAF(value) => ("NAF".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NDP(value) => ("NDP".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NAD(values) => ("NAD".to_string(), concat(values)),
|
|
|
|
|
+ Format::AU(value) => ("AU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::CU(value) => ("CU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::GU(value) => ("GU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::TU(value) => ("TU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NAU(value) => ("NAU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NCU(value) => ("NCU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NGU(value) => ("NGU".to_string(), value.to_string()),
|
|
|
|
|
+ Format::NTU(value) => ("NTU".to_string(), value.to_string()),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+impl Formats {
|
|
|
|
|
+ pub fn commun_deepvariant_clairs(&self) -> Self {
|
|
|
|
|
+ let filtered_vec: Vec<Format> = self
|
|
|
|
|
+ .0
|
|
|
|
|
+ .clone()
|
|
|
|
|
+ .into_iter()
|
|
|
|
|
+ .map(|e| {
|
|
|
|
|
+ if let Format::VAF(v) = e {
|
|
|
|
|
+ Format::AF(v)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ e
|
|
|
|
|
+ }
|
|
|
|
|
+ })
|
|
|
|
|
+ .filter(|format| {
|
|
|
|
|
+ matches!(
|
|
|
|
|
+ format,
|
|
|
|
|
+ Format::GT(_) | Format::GQ(_) | Format::DP(_) | Format::AD(_) | Format::AF(_)
|
|
|
|
|
+ )
|
|
|
|
|
+ })
|
|
|
|
|
+ .collect();
|
|
|
|
|
+
|
|
|
|
|
+ Formats(filtered_vec)
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
/// Filter
|
|
/// Filter
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
pub enum Filter {
|
|
pub enum Filter {
|
|
@@ -350,5 +565,5 @@ impl fmt::Display for Base {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub trait Variants {
|
|
pub trait Variants {
|
|
|
- fn variants(&self) -> anyhow::Result<Vec<Variant>>;
|
|
|
|
|
|
|
+ fn variants(&self) -> anyhow::Result<VariantCollection>;
|
|
|
}
|
|
}
|