|
|
@@ -1,23 +1,19 @@
|
|
|
-use anyhow::{anyhow, Context};
|
|
|
-use csv::ReaderBuilder;
|
|
|
+use anyhow::anyhow;
|
|
|
use hashbrown::HashMap;
|
|
|
+use itertools::Itertools;
|
|
|
use log::warn;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
-use std::io::Write;
|
|
|
use std::{
|
|
|
- env::temp_dir,
|
|
|
- fs::{self, File},
|
|
|
+ cmp::{Ordering, Reverse},
|
|
|
io::{BufRead, BufReader},
|
|
|
process::{Command, Stdio},
|
|
|
str::FromStr,
|
|
|
};
|
|
|
|
|
|
-use crate::io::vcf::vcf_header;
|
|
|
-
|
|
|
use super::ncbi::NCBIAcc;
|
|
|
|
|
|
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
|
|
-pub struct VEPLine {
|
|
|
+pub struct VepLine {
|
|
|
pub uploaded_variation: String,
|
|
|
pub location: String,
|
|
|
pub allele: String,
|
|
|
@@ -34,6 +30,34 @@ pub struct VEPLine {
|
|
|
pub extra: String,
|
|
|
}
|
|
|
|
|
|
+impl FromStr for VepLine {
|
|
|
+ type Err = anyhow::Error;
|
|
|
+
|
|
|
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
|
+ let parts: Vec<&str> = s.split('\t').collect();
|
|
|
+ if parts.len() != 14 {
|
|
|
+ return Err(anyhow!("Invalid number of fields in VEP line"));
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(VepLine {
|
|
|
+ uploaded_variation: parts[0].to_string(),
|
|
|
+ location: parts[1].to_string(),
|
|
|
+ allele: parts[2].to_string(),
|
|
|
+ gene: parts[3].to_string(),
|
|
|
+ feature: parts[4].to_string(),
|
|
|
+ feature_type: parts[5].to_string(),
|
|
|
+ consequence: parts[6].to_string(),
|
|
|
+ cdna_position: parts[7].to_string(),
|
|
|
+ cds_position: parts[8].to_string(),
|
|
|
+ protein_position: parts[9].to_string(),
|
|
|
+ amino_acids: parts[10].to_string(),
|
|
|
+ codons: parts[11].to_string(),
|
|
|
+ existing_variation: parts[12].to_string(),
|
|
|
+ extra: parts[13].to_string(),
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
pub struct VEP {
|
|
|
pub gene: Option<String>,
|
|
|
@@ -94,8 +118,7 @@ pub enum VepConsequence {
|
|
|
SequenceVariant,
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
pub enum VepImpact {
|
|
|
HIGH,
|
|
|
MODERATE,
|
|
|
@@ -103,56 +126,143 @@ pub enum VepImpact {
|
|
|
MODIFIER,
|
|
|
}
|
|
|
|
|
|
-impl VepImpact {
|
|
|
- pub fn from_conseque(consequence: &VepConsequence) -> VepImpact {
|
|
|
+impl PartialOrd for VepImpact {
|
|
|
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
|
+ Some(self.cmp(other))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Ord for VepImpact {
|
|
|
+ fn cmp(&self, other: &Self) -> Ordering {
|
|
|
+ match (self, other) {
|
|
|
+ (VepImpact::HIGH, VepImpact::HIGH) => Ordering::Equal,
|
|
|
+ (VepImpact::HIGH, _) => Ordering::Less,
|
|
|
+ (VepImpact::MODERATE, VepImpact::HIGH) => Ordering::Greater,
|
|
|
+ (VepImpact::MODERATE, VepImpact::MODERATE) => Ordering::Equal,
|
|
|
+ (VepImpact::MODERATE, _) => Ordering::Less,
|
|
|
+ (VepImpact::LOW, VepImpact::HIGH | VepImpact::MODERATE) => Ordering::Greater,
|
|
|
+ (VepImpact::LOW, VepImpact::LOW) => Ordering::Equal,
|
|
|
+ (VepImpact::LOW, VepImpact::MODIFIER) => Ordering::Less,
|
|
|
+ (VepImpact::MODIFIER, VepImpact::MODIFIER) => Ordering::Equal,
|
|
|
+ (VepImpact::MODIFIER, _) => Ordering::Greater,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl FromStr for VepImpact {
|
|
|
+ type Err = anyhow::Error;
|
|
|
+
|
|
|
+ fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
+ match s {
|
|
|
+ "LOW" => Ok(VepImpact::LOW),
|
|
|
+ "MODERATE" => Ok(VepImpact::MODERATE),
|
|
|
+ "HIGH" => Ok(VepImpact::HIGH),
|
|
|
+ "MODIFIER" => Ok(VepImpact::MODIFIER),
|
|
|
+ _ => Err(anyhow!("Unexpected VEP Impact value")),
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl From<&VepConsequence> for VepImpact {
|
|
|
+ fn from(consequence: &VepConsequence) -> Self {
|
|
|
match consequence {
|
|
|
- VepConsequence::TranscriptAblation |
|
|
|
- VepConsequence::SpliceAcceptorVariant |
|
|
|
- VepConsequence::SpliceDonorVariant |
|
|
|
- VepConsequence::StopGained |
|
|
|
- VepConsequence::FrameshiftVariant |
|
|
|
- VepConsequence::StopLost |
|
|
|
- VepConsequence::StartLost |
|
|
|
- VepConsequence::TranscriptAmplification |
|
|
|
- VepConsequence::FeatureElongation |
|
|
|
- VepConsequence::FeatureTruncation => VepImpact::HIGH,
|
|
|
-
|
|
|
- VepConsequence::InframeInsertion |
|
|
|
- VepConsequence::InframeDeletion |
|
|
|
- VepConsequence::MissenseVariant |
|
|
|
- VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
|
|
|
-
|
|
|
- VepConsequence::SpliceDonor5thBaseVariant |
|
|
|
- VepConsequence::SpliceRegionVariant |
|
|
|
- VepConsequence::SpliceDonorRegionVariant |
|
|
|
- VepConsequence::SplicePolyrimidineTractVariant |
|
|
|
- VepConsequence::IncompleteTerminalCodonVariant |
|
|
|
- VepConsequence::StartRetainedVariant |
|
|
|
- VepConsequence::StopRetainedVariant |
|
|
|
- VepConsequence::SynonymousVariant => VepImpact::LOW,
|
|
|
-
|
|
|
- VepConsequence::CodingSequenceVariant |
|
|
|
- VepConsequence::MatureMiRnaVariant |
|
|
|
- VepConsequence::FivePrimeUtrVariant |
|
|
|
- VepConsequence::ThreePrimeUtrVariant |
|
|
|
- VepConsequence::NonCodingTranscriptExonVariant |
|
|
|
- VepConsequence::IntronVariant |
|
|
|
- VepConsequence::NmdTranscriptVariant |
|
|
|
- VepConsequence::NonCodingTranscriptVariant |
|
|
|
- VepConsequence::UpstreamGeneVariant |
|
|
|
- VepConsequence::DownstreamGeneVariant |
|
|
|
- VepConsequence::TfbsAblation |
|
|
|
- VepConsequence::TfbsAmplification |
|
|
|
- VepConsequence::TfBindingSiteVariant |
|
|
|
- VepConsequence::RegulatoryRegionAblation |
|
|
|
- VepConsequence::RegulatoryRegionAmplification |
|
|
|
- VepConsequence::RegulatoryRegionVariant |
|
|
|
- VepConsequence::SequenceVariant |
|
|
|
- VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
|
|
|
+ VepConsequence::TranscriptAblation
|
|
|
+ | VepConsequence::SpliceAcceptorVariant
|
|
|
+ | VepConsequence::SpliceDonorVariant
|
|
|
+ | VepConsequence::StopGained
|
|
|
+ | VepConsequence::FrameshiftVariant
|
|
|
+ | VepConsequence::StopLost
|
|
|
+ | VepConsequence::StartLost
|
|
|
+ | VepConsequence::TranscriptAmplification
|
|
|
+ | VepConsequence::FeatureElongation
|
|
|
+ | VepConsequence::FeatureTruncation => VepImpact::HIGH,
|
|
|
+
|
|
|
+ VepConsequence::InframeInsertion
|
|
|
+ | VepConsequence::InframeDeletion
|
|
|
+ | VepConsequence::MissenseVariant
|
|
|
+ | VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
|
|
|
+
|
|
|
+ VepConsequence::SpliceDonor5thBaseVariant
|
|
|
+ | VepConsequence::SpliceRegionVariant
|
|
|
+ | VepConsequence::SpliceDonorRegionVariant
|
|
|
+ | VepConsequence::SplicePolyrimidineTractVariant
|
|
|
+ | VepConsequence::IncompleteTerminalCodonVariant
|
|
|
+ | VepConsequence::StartRetainedVariant
|
|
|
+ | VepConsequence::StopRetainedVariant
|
|
|
+ | VepConsequence::SynonymousVariant => VepImpact::LOW,
|
|
|
+
|
|
|
+ VepConsequence::CodingSequenceVariant
|
|
|
+ | VepConsequence::MatureMiRnaVariant
|
|
|
+ | VepConsequence::FivePrimeUtrVariant
|
|
|
+ | VepConsequence::ThreePrimeUtrVariant
|
|
|
+ | VepConsequence::NonCodingTranscriptExonVariant
|
|
|
+ | VepConsequence::IntronVariant
|
|
|
+ | VepConsequence::NmdTranscriptVariant
|
|
|
+ | VepConsequence::NonCodingTranscriptVariant
|
|
|
+ | VepConsequence::UpstreamGeneVariant
|
|
|
+ | VepConsequence::DownstreamGeneVariant
|
|
|
+ | VepConsequence::TfbsAblation
|
|
|
+ | VepConsequence::TfbsAmplification
|
|
|
+ | VepConsequence::TfBindingSiteVariant
|
|
|
+ | VepConsequence::RegulatoryRegionAblation
|
|
|
+ | VepConsequence::RegulatoryRegionAmplification
|
|
|
+ | VepConsequence::RegulatoryRegionVariant
|
|
|
+ | VepConsequence::SequenceVariant
|
|
|
+ | VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl VepImpact {
|
|
|
+ // pub fn from_consequence(consequence: &VepConsequence) -> VepImpact {
|
|
|
+ // match consequence {
|
|
|
+ // VepConsequence::TranscriptAblation
|
|
|
+ // | VepConsequence::SpliceAcceptorVariant
|
|
|
+ // | VepConsequence::SpliceDonorVariant
|
|
|
+ // | VepConsequence::StopGained
|
|
|
+ // | VepConsequence::FrameshiftVariant
|
|
|
+ // | VepConsequence::StopLost
|
|
|
+ // | VepConsequence::StartLost
|
|
|
+ // | VepConsequence::TranscriptAmplification
|
|
|
+ // | VepConsequence::FeatureElongation
|
|
|
+ // | VepConsequence::FeatureTruncation => VepImpact::HIGH,
|
|
|
+ //
|
|
|
+ // VepConsequence::InframeInsertion
|
|
|
+ // | VepConsequence::InframeDeletion
|
|
|
+ // | VepConsequence::MissenseVariant
|
|
|
+ // | VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
|
|
|
+ //
|
|
|
+ // VepConsequence::SpliceDonor5thBaseVariant
|
|
|
+ // | VepConsequence::SpliceRegionVariant
|
|
|
+ // | VepConsequence::SpliceDonorRegionVariant
|
|
|
+ // | VepConsequence::SplicePolyrimidineTractVariant
|
|
|
+ // | VepConsequence::IncompleteTerminalCodonVariant
|
|
|
+ // | VepConsequence::StartRetainedVariant
|
|
|
+ // | VepConsequence::StopRetainedVariant
|
|
|
+ // | VepConsequence::SynonymousVariant => VepImpact::LOW,
|
|
|
+ //
|
|
|
+ // VepConsequence::CodingSequenceVariant
|
|
|
+ // | VepConsequence::MatureMiRnaVariant
|
|
|
+ // | VepConsequence::FivePrimeUtrVariant
|
|
|
+ // | VepConsequence::ThreePrimeUtrVariant
|
|
|
+ // | VepConsequence::NonCodingTranscriptExonVariant
|
|
|
+ // | VepConsequence::IntronVariant
|
|
|
+ // | VepConsequence::NmdTranscriptVariant
|
|
|
+ // | VepConsequence::NonCodingTranscriptVariant
|
|
|
+ // | VepConsequence::UpstreamGeneVariant
|
|
|
+ // | VepConsequence::DownstreamGeneVariant
|
|
|
+ // | VepConsequence::TfbsAblation
|
|
|
+ // | VepConsequence::TfbsAmplification
|
|
|
+ // | VepConsequence::TfBindingSiteVariant
|
|
|
+ // | VepConsequence::RegulatoryRegionAblation
|
|
|
+ // | VepConsequence::RegulatoryRegionAmplification
|
|
|
+ // | VepConsequence::RegulatoryRegionVariant
|
|
|
+ // | VepConsequence::SequenceVariant
|
|
|
+ // | VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+}
|
|
|
+
|
|
|
impl From<VepConsequence> for String {
|
|
|
fn from(consequence: VepConsequence) -> Self {
|
|
|
match consequence {
|
|
|
@@ -169,7 +279,9 @@ impl From<VepConsequence> for String {
|
|
|
VepConsequence::MissenseVariant => "missense_variant".to_string(),
|
|
|
VepConsequence::ProteinAlteringVariant => "protein_altering_variant".to_string(),
|
|
|
VepConsequence::SpliceRegionVariant => "splice_region_variant".to_string(),
|
|
|
- VepConsequence::IncompleteTerminalCodonVariant => "incomplete_terminal_codon_variant".to_string(),
|
|
|
+ VepConsequence::IncompleteTerminalCodonVariant => {
|
|
|
+ "incomplete_terminal_codon_variant".to_string()
|
|
|
+ }
|
|
|
VepConsequence::StartRetainedVariant => "start_retained_variant".to_string(),
|
|
|
VepConsequence::StopRetainedVariant => "stop_retained_variant".to_string(),
|
|
|
VepConsequence::SynonymousVariant => "synonymous_variant".to_string(),
|
|
|
@@ -177,23 +289,33 @@ impl From<VepConsequence> for String {
|
|
|
VepConsequence::MatureMiRnaVariant => "mature_miRNA_variant".to_string(),
|
|
|
VepConsequence::FivePrimeUtrVariant => "5_prime_UTR_variant".to_string(),
|
|
|
VepConsequence::ThreePrimeUtrVariant => "3_prime_UTR_variant".to_string(),
|
|
|
- VepConsequence::NonCodingTranscriptExonVariant => "non_coding_transcript_exon_variant".to_string(),
|
|
|
+ VepConsequence::NonCodingTranscriptExonVariant => {
|
|
|
+ "non_coding_transcript_exon_variant".to_string()
|
|
|
+ }
|
|
|
VepConsequence::IntronVariant => "intron_variant".to_string(),
|
|
|
VepConsequence::NmdTranscriptVariant => "NMD_transcript_variant".to_string(),
|
|
|
- VepConsequence::NonCodingTranscriptVariant => "non_coding_transcript_variant".to_string(),
|
|
|
+ VepConsequence::NonCodingTranscriptVariant => {
|
|
|
+ "non_coding_transcript_variant".to_string()
|
|
|
+ }
|
|
|
VepConsequence::UpstreamGeneVariant => "upstream_gene_variant".to_string(),
|
|
|
VepConsequence::DownstreamGeneVariant => "downstream_gene_variant".to_string(),
|
|
|
VepConsequence::TfbsAblation => "TFBS_ablation".to_string(),
|
|
|
VepConsequence::TfbsAmplification => "TFBS_amplification".to_string(),
|
|
|
VepConsequence::TfBindingSiteVariant => "TF_binding_site_variant".to_string(),
|
|
|
VepConsequence::RegulatoryRegionAblation => "regulatory_region_ablation".to_string(),
|
|
|
- VepConsequence::RegulatoryRegionAmplification => "regulatory_region_amplification".to_string(),
|
|
|
+ VepConsequence::RegulatoryRegionAmplification => {
|
|
|
+ "regulatory_region_amplification".to_string()
|
|
|
+ }
|
|
|
VepConsequence::FeatureElongation => "feature_elongation".to_string(),
|
|
|
VepConsequence::RegulatoryRegionVariant => "regulatory_region_variant".to_string(),
|
|
|
VepConsequence::FeatureTruncation => "feature_truncation".to_string(),
|
|
|
- VepConsequence::SpliceDonor5thBaseVariant => "splice_donor_5th_base_variant".to_string(),
|
|
|
+ VepConsequence::SpliceDonor5thBaseVariant => {
|
|
|
+ "splice_donor_5th_base_variant".to_string()
|
|
|
+ }
|
|
|
VepConsequence::SpliceDonorRegionVariant => "splice_donor_region_variant".to_string(),
|
|
|
- VepConsequence::SplicePolyrimidineTractVariant => "splice_polyrimidine_tract_variant".to_string(),
|
|
|
+ VepConsequence::SplicePolyrimidineTractVariant => {
|
|
|
+ "splice_polyrimidine_tract_variant".to_string()
|
|
|
+ }
|
|
|
VepConsequence::SequenceVariant => "sequence_variant".to_string(),
|
|
|
VepConsequence::IntergenicVariant => "intergenic_variant".to_string(),
|
|
|
}
|
|
|
@@ -223,9 +345,13 @@ impl FromStr for VepConsequence {
|
|
|
"splice_donor_5th_base_variant" => Ok(VepConsequence::SpliceDonor5thBaseVariant),
|
|
|
"splice_region_variant" => Ok(VepConsequence::SpliceRegionVariant),
|
|
|
"splice_donor_region_variant" => Ok(VepConsequence::SpliceDonorRegionVariant),
|
|
|
- "splice_polypyrimidine_tract_variant" => Ok(VepConsequence::SplicePolyrimidineTractVariant),
|
|
|
+ "splice_polypyrimidine_tract_variant" => {
|
|
|
+ Ok(VepConsequence::SplicePolyrimidineTractVariant)
|
|
|
+ }
|
|
|
|
|
|
- "incomplete_terminal_codon_variant" => Ok(VepConsequence::IncompleteTerminalCodonVariant),
|
|
|
+ "incomplete_terminal_codon_variant" => {
|
|
|
+ Ok(VepConsequence::IncompleteTerminalCodonVariant)
|
|
|
+ }
|
|
|
"start_retained_variant" => Ok(VepConsequence::StartRetainedVariant),
|
|
|
"stop_retained_variant" => Ok(VepConsequence::StopRetainedVariant),
|
|
|
"synonymous_variant" => Ok(VepConsequence::SynonymousVariant),
|
|
|
@@ -233,7 +359,9 @@ impl FromStr for VepConsequence {
|
|
|
"mature_miRNA_variant" => Ok(VepConsequence::MatureMiRnaVariant),
|
|
|
"5_prime_UTR_variant" => Ok(VepConsequence::FivePrimeUtrVariant),
|
|
|
"3_prime_UTR_variant" => Ok(VepConsequence::ThreePrimeUtrVariant),
|
|
|
- "non_coding_transcript_exon_variant" => Ok(VepConsequence::NonCodingTranscriptExonVariant),
|
|
|
+ "non_coding_transcript_exon_variant" => {
|
|
|
+ Ok(VepConsequence::NonCodingTranscriptExonVariant)
|
|
|
+ }
|
|
|
"intron_variant" => Ok(VepConsequence::IntronVariant),
|
|
|
|
|
|
"NMD_transcript_variant" => Ok(VepConsequence::NmdTranscriptVariant),
|
|
|
@@ -254,15 +382,20 @@ impl FromStr for VepConsequence {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-impl VEP {
|
|
|
- fn from_vep_line(d: &VEPLine) -> anyhow::Result<VEP> {
|
|
|
+impl TryFrom<&VepLine> for VEP {
|
|
|
+ type Error = anyhow::Error;
|
|
|
+
|
|
|
+ fn try_from(d: &VepLine) -> anyhow::Result<Self> {
|
|
|
let or_opt = |s: &str| match s {
|
|
|
"-" => None,
|
|
|
_ => Some(s.to_string()),
|
|
|
};
|
|
|
|
|
|
- let consequence = or_opt(&d.consequence)
|
|
|
- .map(|c| c.split(",").map(|e| e.parse()).collect::<Vec<VepConsequence>>());
|
|
|
+ let consequence = or_opt(&d.consequence).map(|c| {
|
|
|
+ c.split(',')
|
|
|
+ .filter_map(|e| e.parse::<VepConsequence>().ok())
|
|
|
+ .collect::<Vec<VepConsequence>>()
|
|
|
+ });
|
|
|
|
|
|
Ok(VEP {
|
|
|
gene: or_opt(&d.gene),
|
|
|
@@ -282,42 +415,24 @@ impl VEP {
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
pub struct VEPExtra {
|
|
|
- pub impact: Option<VEPImpact>,
|
|
|
+ pub impact: Option<VepImpact>,
|
|
|
pub symbol: Option<String>,
|
|
|
pub distance: Option<u32>,
|
|
|
pub hgvs_c: Option<String>,
|
|
|
pub hgvs_p: Option<String>,
|
|
|
}
|
|
|
+
|
|
|
impl FromStr for VEPExtra {
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
- let err = |c| anyhow!("Error {} parsing VEP Extra field {}", c, s);
|
|
|
-
|
|
|
- let elements = s.split(";").collect::<Vec<&str>>();
|
|
|
-
|
|
|
- let mut kv = HashMap::new();
|
|
|
+ let kv: HashMap<_, _> = s.split(';').filter_map(|e| e.split_once('=')).collect();
|
|
|
|
|
|
- for e in elements.iter() {
|
|
|
- let (k, v) = e.split_once("=").ok_or(err("in split '='"))?;
|
|
|
- if kv.insert(k, v).is_some() {
|
|
|
- return Err(err("kv insert"));
|
|
|
- };
|
|
|
- }
|
|
|
-
|
|
|
- let impact: Option<VEPImpact> = if let Some(v) = kv.get("IMPACT") {
|
|
|
- Some(v.parse()?)
|
|
|
- } else {
|
|
|
- None
|
|
|
- };
|
|
|
- let symbol: Option<String> = kv.get("SYMBOL").map(|v| v.to_string());
|
|
|
- let distance: Option<u32> = if let Some(v) = kv.get("DISTANCE") {
|
|
|
- Some(v.parse()?)
|
|
|
- } else {
|
|
|
- None
|
|
|
- };
|
|
|
- let hgvs_c: Option<String> = kv.get("HGVSc").map(|v| v.to_string());
|
|
|
- let hgvs_p: Option<String> = kv.get("HGVSp").map(|v| v.to_string());
|
|
|
+ let impact = kv.get("IMPACT").map(|&v| v.parse()).transpose()?;
|
|
|
+ let symbol = kv.get("SYMBOL").map(ToString::to_string);
|
|
|
+ let distance = kv.get("DISTANCE").map(|&v| v.parse()).transpose()?;
|
|
|
+ let hgvs_c = kv.get("HGVSc").map(ToString::to_string);
|
|
|
+ let hgvs_p = kv.get("HGVSp").map(ToString::to_string);
|
|
|
|
|
|
Ok(VEPExtra {
|
|
|
impact,
|
|
|
@@ -329,120 +444,8 @@ impl FromStr for VEPExtra {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-// #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
-// pub enum VEPImpact {
|
|
|
-// Low,
|
|
|
-// Moderate,
|
|
|
-// High,
|
|
|
-// Modifier,
|
|
|
-// }
|
|
|
-//
|
|
|
-// impl FromStr for VEPImpact {
|
|
|
-// type Err = anyhow::Error;
|
|
|
-//
|
|
|
-// fn from_str(s: &str) -> Result<Self> {
|
|
|
-// match s {
|
|
|
-// "LOW" => Ok(VEPImpact::Low),
|
|
|
-// "MODERATE" => Ok(VEPImpact::Moderate),
|
|
|
-// "HIGH" => Ok(VEPImpact::High),
|
|
|
-// "MODIFIER" => Ok(VEPImpact::Modifier),
|
|
|
-// _ => Err(anyhow!("Unexpected VEP Impact value")),
|
|
|
-// }
|
|
|
-// }
|
|
|
-// }
|
|
|
-// pub fn vep_chunk(data: &mut [Variant]) -> Result<()> {
|
|
|
-// let in_vcf = format!(
|
|
|
-// "{}/vep_{}.vcf",
|
|
|
-// temp_dir().to_str().unwrap(),
|
|
|
-// uuid::Uuid::new_v4()
|
|
|
-// );
|
|
|
-// let out_vep = format!(
|
|
|
-// "{}/vep_{}.txt",
|
|
|
-// temp_dir().to_str().unwrap(),
|
|
|
-// uuid::Uuid::new_v4()
|
|
|
-// );
|
|
|
-//
|
|
|
-// let mut vcf = File::create(&in_vcf).unwrap();
|
|
|
-// let vcf_header = vcf_header("/data/ref/hs1/chm13v2.0.dict")?;
|
|
|
-//
|
|
|
-// writeln!(vcf, "{}", vcf_header.join("\n")).unwrap();
|
|
|
-//
|
|
|
-// for (i, row) in data.iter().enumerate() {
|
|
|
-// writeln!(
|
|
|
-// vcf,
|
|
|
-// "{}\t{}\t{}\t{}\t{}\t.\tPASS\t.\t.\t.",
|
|
|
-// row.contig,
|
|
|
-// row.position,
|
|
|
-// i + 1,
|
|
|
-// row.reference,
|
|
|
-// row.alternative
|
|
|
-// )?;
|
|
|
-// }
|
|
|
-//
|
|
|
-// if let Err(err) = run_vep(&in_vcf, &out_vep) {
|
|
|
-// panic!("{err}");
|
|
|
-// };
|
|
|
-//
|
|
|
-// // read the results in txt file, parse and add to HashMap
|
|
|
-// let mut reader_vep = ReaderBuilder::new()
|
|
|
-// .delimiter(b'\t')
|
|
|
-// .has_headers(false)
|
|
|
-// .comment(Some(b'#'))
|
|
|
-// .flexible(true)
|
|
|
-// .from_reader(fs::File::open(out_vep.clone())?);
|
|
|
-//
|
|
|
-// let mut lines: HashMap<u64, Vec<VEPLine>> = HashMap::new();
|
|
|
-// for line in reader_vep.deserialize::<VEPLine>() {
|
|
|
-// if let std::result::Result::Ok(line) = line {
|
|
|
-// if let std::result::Result::Ok(k) = line.uploaded_variation.parse::<u64>() {
|
|
|
-// lines
|
|
|
-// .raw_entry_mut()
|
|
|
-// .from_key(&k)
|
|
|
-// .or_insert_with(|| (k, vec![]))
|
|
|
-// .1
|
|
|
-// .push(line);
|
|
|
-// } else {
|
|
|
-// return Err(anyhow!("Error while parsing: {:?}", line));
|
|
|
-// }
|
|
|
-// } else {
|
|
|
-// return Err(anyhow!("Error while parsing: {:?}", line));
|
|
|
-// }
|
|
|
-// }
|
|
|
-//
|
|
|
-// // remove input and result file
|
|
|
-// fs::remove_file(in_vcf)?;
|
|
|
-// fs::remove_file(out_vep)?;
|
|
|
-//
|
|
|
-// let mut n_not_vep = 0;
|
|
|
-// data.iter_mut().enumerate().for_each(|(i, entry)| {
|
|
|
-// let k = (i + 1) as u64;
|
|
|
-//
|
|
|
-// match lines.get(&k) {
|
|
|
-// Some(vep_lines) => {
|
|
|
-// let vep: Vec<VEP> = vep_lines
|
|
|
-// .iter()
|
|
|
-// .map(|e| match VEP::from_vep_line(e) {
|
|
|
-// std::result::Result::Ok(r) => r,
|
|
|
-// Err(err) => panic!("Error while parsing: {} line: {:?}", err, e),
|
|
|
-// })
|
|
|
-// .collect();
|
|
|
-// entry.annotations.push(AnnotationType::VEP(vep.to_vec()));
|
|
|
-// }
|
|
|
-// None => {
|
|
|
-// n_not_vep += 1;
|
|
|
-// }
|
|
|
-// };
|
|
|
-// });
|
|
|
-//
|
|
|
-// if n_not_vep > 0 {
|
|
|
-// warn!("{} variants not annotated by VEP", n_not_vep);
|
|
|
-// }
|
|
|
-//
|
|
|
-// Ok(())
|
|
|
-// }
|
|
|
-//
|
|
|
// VEP need plugin Downstream and SpliceRegion /home/prom/.vep/Plugins
|
|
|
-fn run_vep(in_path: &str, out_path: &str) -> Result<()> {
|
|
|
+pub fn run_vep(in_path: &str, out_path: &str) -> anyhow::Result<()> {
|
|
|
let bin_dir = "/data/tools/ensembl-vep";
|
|
|
let dir_cache = "/data/ref/hs1/vepcache/";
|
|
|
let fasta = "/data/ref/hs1/chm13v2.0.fa";
|
|
|
@@ -490,42 +493,41 @@ fn run_vep(in_path: &str, out_path: &str) -> Result<()> {
|
|
|
}
|
|
|
|
|
|
pub fn get_best_vep(d: &[VEP]) -> anyhow::Result<VEP> {
|
|
|
- d.into_iter().filter(|v| v.)
|
|
|
-
|
|
|
- if d.is_empty() {
|
|
|
- return Err(anyhow!("No element in VEP vector"));
|
|
|
- }
|
|
|
- if d.len() == 1 {
|
|
|
- return Ok(d.first().unwrap().clone());
|
|
|
- }
|
|
|
+ let best_impact_veps = d
|
|
|
+ .iter()
|
|
|
+ .chunk_by(|vep| {
|
|
|
+ vep.consequence.as_ref().map_or(VepImpact::MODIFIER, |c| {
|
|
|
+ c.iter()
|
|
|
+ .map(VepImpact::from)
|
|
|
+ .min()
|
|
|
+ .unwrap_or(VepImpact::MODIFIER)
|
|
|
+ })
|
|
|
+ })
|
|
|
+ .into_iter()
|
|
|
+ .min_by_key(|(impact, _)| impact.clone())
|
|
|
+ .map(|(_, group)| group.cloned().collect::<Vec<_>>())
|
|
|
+ .ok_or_else(|| anyhow!("No element in VEP vector"))?;
|
|
|
|
|
|
- let mut parsed: Vec<(usize, NCBIAcc)> = Vec::new();
|
|
|
- for (i, vep) in d.iter().enumerate() {
|
|
|
- if let Some(feat) = &vep.feature {
|
|
|
- if let std::result::Result::Ok(f) = feat
|
|
|
- .parse::<NCBIAcc>()
|
|
|
- .context("Error parsing NCBI accession")
|
|
|
- {
|
|
|
- parsed.push((i, f));
|
|
|
- } else {
|
|
|
- warn!("Can't parse {}", feat);
|
|
|
- }
|
|
|
- }
|
|
|
+ if best_impact_veps.len() == 1 {
|
|
|
+ return Ok(best_impact_veps[0].clone());
|
|
|
}
|
|
|
|
|
|
- parsed.sort_by(|(_, a), (_, b)| a.number.cmp(&b.number));
|
|
|
+ let parsed_veps = best_impact_veps
|
|
|
+ .iter()
|
|
|
+ .enumerate()
|
|
|
+ .filter_map(|(i, vep)| {
|
|
|
+ vep.feature.as_ref().and_then(|feat| {
|
|
|
+ feat.parse::<NCBIAcc>()
|
|
|
+ .map(|acc| (i, acc))
|
|
|
+ .map_err(|e| warn!("Can't parse {}: {}", feat, e))
|
|
|
+ .ok()
|
|
|
+ })
|
|
|
+ })
|
|
|
+ .sorted_by_key(|(_, acc)| (Reverse(acc.prefix == "NM"), acc.number))
|
|
|
+ .collect::<Vec<_>>();
|
|
|
|
|
|
- let nm: Vec<(usize, NCBIAcc)> = parsed
|
|
|
- .clone()
|
|
|
- .into_iter()
|
|
|
- .filter(|(_, e)| e.prefix == *"NM")
|
|
|
- .collect();
|
|
|
-
|
|
|
- if !nm.is_empty() {
|
|
|
- let (k, _) = nm.first().unwrap();
|
|
|
- return Ok(d.get(*k).unwrap().clone());
|
|
|
- } else {
|
|
|
- let (k, _) = parsed.first().unwrap();
|
|
|
- return Ok(d.get(*k).unwrap().clone());
|
|
|
- }
|
|
|
+ parsed_veps
|
|
|
+ .first()
|
|
|
+ .map(|(k, _)| best_impact_veps[*k].clone())
|
|
|
+ .ok_or_else(|| anyhow!("No valid NCBI accession found"))
|
|
|
}
|