|
|
@@ -1,4 +1,4 @@
|
|
|
-use anyhow::{anyhow, Context, Ok, Result};
|
|
|
+use anyhow::{anyhow, Context};
|
|
|
use csv::ReaderBuilder;
|
|
|
use hashbrown::HashMap;
|
|
|
use log::warn;
|
|
|
@@ -39,7 +39,7 @@ pub struct VEP {
|
|
|
pub gene: Option<String>,
|
|
|
pub feature: Option<String>,
|
|
|
pub feature_type: Option<String>,
|
|
|
- pub consequence: Option<Vec<String>>,
|
|
|
+ pub consequence: Option<Vec<VepConsequence>>,
|
|
|
pub cdna_position: Option<String>,
|
|
|
pub cds_position: Option<String>,
|
|
|
pub protein_position: Option<String>,
|
|
|
@@ -50,55 +50,219 @@ pub struct VEP {
|
|
|
}
|
|
|
|
|
|
// ensembl.org/info/genome/variation/prediction/predicted_data.html
|
|
|
-#[derive(Debug, PartialEq, Eq)]
|
|
|
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
pub enum VepConsequence {
|
|
|
- Transcript_ablation,
|
|
|
- Splice_acceptor_variant,
|
|
|
- Splice_donor_variant,
|
|
|
- Stop_gained,
|
|
|
- Frameshift_variant,
|
|
|
- Stop_lost,
|
|
|
- Start_lost,
|
|
|
- Transcript_amplification,
|
|
|
- Inframe_insertion,
|
|
|
- Inframe_deletion,
|
|
|
- Missense_variant,
|
|
|
- Protein_altering_variant,
|
|
|
- Splice_region_variant,
|
|
|
- Incomplete_terminal_codon_variant,
|
|
|
- Start_retained_variant,
|
|
|
- Stop_retained_variant,
|
|
|
- Synonymous_variant,
|
|
|
- Coding_sequence_variant,
|
|
|
- Mature_miRNA_variant,
|
|
|
- Five_prime_UTR_variant,
|
|
|
- Three_prime_UTR_variant,
|
|
|
- Non_coding_transcript_exon_variant,
|
|
|
- Intron_variant,
|
|
|
- NMD_transcript_variant,
|
|
|
- Non_coding_transcript_variant,
|
|
|
- Upstream_gene_variant,
|
|
|
- Downstream_gene_variant,
|
|
|
- TFBS_ablation,
|
|
|
- TFBS_amplification,
|
|
|
- TF_binding_site_variant,
|
|
|
- Regulatory_region_ablation,
|
|
|
- Regulatory_region_amplification,
|
|
|
- Feature_elongation,
|
|
|
- Regulatory_region_variant,
|
|
|
- Feature_truncation,
|
|
|
- Intergenic_variant,
|
|
|
+ TranscriptAblation,
|
|
|
+ SpliceAcceptorVariant,
|
|
|
+ SpliceDonorVariant,
|
|
|
+ StopGained,
|
|
|
+ FrameshiftVariant,
|
|
|
+ StopLost,
|
|
|
+ StartLost,
|
|
|
+ TranscriptAmplification,
|
|
|
+ InframeInsertion,
|
|
|
+ InframeDeletion,
|
|
|
+ MissenseVariant,
|
|
|
+ ProteinAlteringVariant,
|
|
|
+ SpliceDonor5thBaseVariant,
|
|
|
+ SpliceRegionVariant,
|
|
|
+ SpliceDonorRegionVariant,
|
|
|
+ SplicePolyrimidineTractVariant,
|
|
|
+ IncompleteTerminalCodonVariant,
|
|
|
+ StartRetainedVariant,
|
|
|
+ StopRetainedVariant,
|
|
|
+ SynonymousVariant,
|
|
|
+ CodingSequenceVariant,
|
|
|
+ MatureMiRnaVariant,
|
|
|
+ FivePrimeUtrVariant,
|
|
|
+ ThreePrimeUtrVariant,
|
|
|
+ NonCodingTranscriptExonVariant,
|
|
|
+ IntronVariant,
|
|
|
+ NmdTranscriptVariant,
|
|
|
+ NonCodingTranscriptVariant,
|
|
|
+ UpstreamGeneVariant,
|
|
|
+ DownstreamGeneVariant,
|
|
|
+ TfbsAblation,
|
|
|
+ TfbsAmplification,
|
|
|
+ TfBindingSiteVariant,
|
|
|
+ RegulatoryRegionAblation,
|
|
|
+ RegulatoryRegionAmplification,
|
|
|
+ FeatureElongation,
|
|
|
+ RegulatoryRegionVariant,
|
|
|
+ FeatureTruncation,
|
|
|
+ IntergenicVariant,
|
|
|
+ SequenceVariant,
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
|
|
+pub enum VepImpact {
|
|
|
+ HIGH,
|
|
|
+ MODERATE,
|
|
|
+ LOW,
|
|
|
+ MODIFIER,
|
|
|
+}
|
|
|
+
|
|
|
+impl VepImpact {
|
|
|
+ pub fn from_conseque(consequence: &VepConsequence) -> VepImpact {
|
|
|
+ match consequence {
|
|
|
+ VepConsequence::TranscriptAblation |
|
|
|
+ VepConsequence::SpliceAcceptorVariant |
|
|
|
+ VepConsequence::SpliceDonorVariant |
|
|
|
+ VepConsequence::StopGained |
|
|
|
+ VepConsequence::FrameshiftVariant |
|
|
|
+ VepConsequence::StopLost |
|
|
|
+ VepConsequence::StartLost |
|
|
|
+ VepConsequence::TranscriptAmplification |
|
|
|
+ VepConsequence::FeatureElongation |
|
|
|
+ VepConsequence::FeatureTruncation => VepImpact::HIGH,
|
|
|
+
|
|
|
+ VepConsequence::InframeInsertion |
|
|
|
+ VepConsequence::InframeDeletion |
|
|
|
+ VepConsequence::MissenseVariant |
|
|
|
+ VepConsequence::ProteinAlteringVariant => VepImpact::MODERATE,
|
|
|
+
|
|
|
+ VepConsequence::SpliceDonor5thBaseVariant |
|
|
|
+ VepConsequence::SpliceRegionVariant |
|
|
|
+ VepConsequence::SpliceDonorRegionVariant |
|
|
|
+ VepConsequence::SplicePolyrimidineTractVariant |
|
|
|
+ VepConsequence::IncompleteTerminalCodonVariant |
|
|
|
+ VepConsequence::StartRetainedVariant |
|
|
|
+ VepConsequence::StopRetainedVariant |
|
|
|
+ VepConsequence::SynonymousVariant => VepImpact::LOW,
|
|
|
+
|
|
|
+ VepConsequence::CodingSequenceVariant |
|
|
|
+ VepConsequence::MatureMiRnaVariant |
|
|
|
+ VepConsequence::FivePrimeUtrVariant |
|
|
|
+ VepConsequence::ThreePrimeUtrVariant |
|
|
|
+ VepConsequence::NonCodingTranscriptExonVariant |
|
|
|
+ VepConsequence::IntronVariant |
|
|
|
+ VepConsequence::NmdTranscriptVariant |
|
|
|
+ VepConsequence::NonCodingTranscriptVariant |
|
|
|
+ VepConsequence::UpstreamGeneVariant |
|
|
|
+ VepConsequence::DownstreamGeneVariant |
|
|
|
+ VepConsequence::TfbsAblation |
|
|
|
+ VepConsequence::TfbsAmplification |
|
|
|
+ VepConsequence::TfBindingSiteVariant |
|
|
|
+ VepConsequence::RegulatoryRegionAblation |
|
|
|
+ VepConsequence::RegulatoryRegionAmplification |
|
|
|
+ VepConsequence::RegulatoryRegionVariant |
|
|
|
+ VepConsequence::SequenceVariant |
|
|
|
+ VepConsequence::IntergenicVariant => VepImpact::MODIFIER,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl From<VepConsequence> for String {
|
|
|
+ fn from(consequence: VepConsequence) -> Self {
|
|
|
+ match consequence {
|
|
|
+ VepConsequence::TranscriptAblation => "transcript_ablation".to_string(),
|
|
|
+ VepConsequence::SpliceAcceptorVariant => "splice_acceptor_variant".to_string(),
|
|
|
+ VepConsequence::SpliceDonorVariant => "splice_donor_variant".to_string(),
|
|
|
+ VepConsequence::StopGained => "stop_gained".to_string(),
|
|
|
+ VepConsequence::FrameshiftVariant => "frameshift_variant".to_string(),
|
|
|
+ VepConsequence::StopLost => "stop_lost".to_string(),
|
|
|
+ VepConsequence::StartLost => "start_lost".to_string(),
|
|
|
+ VepConsequence::TranscriptAmplification => "transcript_amplification".to_string(),
|
|
|
+ VepConsequence::InframeInsertion => "inframe_insertion".to_string(),
|
|
|
+ VepConsequence::InframeDeletion => "inframe_deletion".to_string(),
|
|
|
+ VepConsequence::MissenseVariant => "missense_variant".to_string(),
|
|
|
+ VepConsequence::ProteinAlteringVariant => "protein_altering_variant".to_string(),
|
|
|
+ VepConsequence::SpliceRegionVariant => "splice_region_variant".to_string(),
|
|
|
+ VepConsequence::IncompleteTerminalCodonVariant => "incomplete_terminal_codon_variant".to_string(),
|
|
|
+ VepConsequence::StartRetainedVariant => "start_retained_variant".to_string(),
|
|
|
+ VepConsequence::StopRetainedVariant => "stop_retained_variant".to_string(),
|
|
|
+ VepConsequence::SynonymousVariant => "synonymous_variant".to_string(),
|
|
|
+ VepConsequence::CodingSequenceVariant => "coding_sequence_variant".to_string(),
|
|
|
+ VepConsequence::MatureMiRnaVariant => "mature_miRNA_variant".to_string(),
|
|
|
+ VepConsequence::FivePrimeUtrVariant => "5_prime_UTR_variant".to_string(),
|
|
|
+ VepConsequence::ThreePrimeUtrVariant => "3_prime_UTR_variant".to_string(),
|
|
|
+ VepConsequence::NonCodingTranscriptExonVariant => "non_coding_transcript_exon_variant".to_string(),
|
|
|
+ VepConsequence::IntronVariant => "intron_variant".to_string(),
|
|
|
+ VepConsequence::NmdTranscriptVariant => "NMD_transcript_variant".to_string(),
|
|
|
+ VepConsequence::NonCodingTranscriptVariant => "non_coding_transcript_variant".to_string(),
|
|
|
+ VepConsequence::UpstreamGeneVariant => "upstream_gene_variant".to_string(),
|
|
|
+ VepConsequence::DownstreamGeneVariant => "downstream_gene_variant".to_string(),
|
|
|
+ VepConsequence::TfbsAblation => "TFBS_ablation".to_string(),
|
|
|
+ VepConsequence::TfbsAmplification => "TFBS_amplification".to_string(),
|
|
|
+ VepConsequence::TfBindingSiteVariant => "TF_binding_site_variant".to_string(),
|
|
|
+ VepConsequence::RegulatoryRegionAblation => "regulatory_region_ablation".to_string(),
|
|
|
+ VepConsequence::RegulatoryRegionAmplification => "regulatory_region_amplification".to_string(),
|
|
|
+ VepConsequence::FeatureElongation => "feature_elongation".to_string(),
|
|
|
+ VepConsequence::RegulatoryRegionVariant => "regulatory_region_variant".to_string(),
|
|
|
+ VepConsequence::FeatureTruncation => "feature_truncation".to_string(),
|
|
|
+ VepConsequence::SpliceDonor5thBaseVariant => "splice_donor_5th_base_variant".to_string(),
|
|
|
+ VepConsequence::SpliceDonorRegionVariant => "splice_donor_region_variant".to_string(),
|
|
|
+ VepConsequence::SplicePolyrimidineTractVariant => "splice_polyrimidine_tract_variant".to_string(),
|
|
|
+ VepConsequence::SequenceVariant => "sequence_variant".to_string(),
|
|
|
+ VepConsequence::IntergenicVariant => "intergenic_variant".to_string(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl FromStr for VepConsequence {
|
|
|
+ type Err = anyhow::Error;
|
|
|
+
|
|
|
+ fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
+ match s {
|
|
|
+ "transcript_ablation" => Ok(VepConsequence::TranscriptAblation),
|
|
|
+ "splice_acceptor_variant" => Ok(VepConsequence::SpliceAcceptorVariant),
|
|
|
+ "splice_donor_variant" => Ok(VepConsequence::SpliceDonorVariant),
|
|
|
+ "stop_gained" => Ok(VepConsequence::StopGained),
|
|
|
+ "frameshift_variant" => Ok(VepConsequence::FrameshiftVariant),
|
|
|
+ "stop_lost" => Ok(VepConsequence::StopLost),
|
|
|
+ "start_lost" => Ok(VepConsequence::StartLost),
|
|
|
+ "transcript_amplification" => Ok(VepConsequence::TranscriptAmplification),
|
|
|
+ "feature_elongation" => Ok(VepConsequence::FeatureElongation),
|
|
|
+ "feature_truncation" => Ok(VepConsequence::FeatureTruncation),
|
|
|
+
|
|
|
+ "inframe_insertion" => Ok(VepConsequence::InframeInsertion),
|
|
|
+ "inframe_deletion" => Ok(VepConsequence::InframeDeletion),
|
|
|
+ "missense_variant" => Ok(VepConsequence::MissenseVariant),
|
|
|
+ "protein_altering_variant" => Ok(VepConsequence::ProteinAlteringVariant),
|
|
|
+ "splice_donor_5th_base_variant" => Ok(VepConsequence::SpliceDonor5thBaseVariant),
|
|
|
+ "splice_region_variant" => Ok(VepConsequence::SpliceRegionVariant),
|
|
|
+ "splice_donor_region_variant" => Ok(VepConsequence::SpliceDonorRegionVariant),
|
|
|
+ "splice_polypyrimidine_tract_variant" => Ok(VepConsequence::SplicePolyrimidineTractVariant),
|
|
|
+
|
|
|
+ "incomplete_terminal_codon_variant" => Ok(VepConsequence::IncompleteTerminalCodonVariant),
|
|
|
+ "start_retained_variant" => Ok(VepConsequence::StartRetainedVariant),
|
|
|
+ "stop_retained_variant" => Ok(VepConsequence::StopRetainedVariant),
|
|
|
+ "synonymous_variant" => Ok(VepConsequence::SynonymousVariant),
|
|
|
+ "coding_sequence_variant" => Ok(VepConsequence::CodingSequenceVariant),
|
|
|
+ "mature_miRNA_variant" => Ok(VepConsequence::MatureMiRnaVariant),
|
|
|
+ "5_prime_UTR_variant" => Ok(VepConsequence::FivePrimeUtrVariant),
|
|
|
+ "3_prime_UTR_variant" => Ok(VepConsequence::ThreePrimeUtrVariant),
|
|
|
+ "non_coding_transcript_exon_variant" => Ok(VepConsequence::NonCodingTranscriptExonVariant),
|
|
|
+ "intron_variant" => Ok(VepConsequence::IntronVariant),
|
|
|
+
|
|
|
+ "NMD_transcript_variant" => Ok(VepConsequence::NmdTranscriptVariant),
|
|
|
+ "non_coding_transcript_variant" => Ok(VepConsequence::NonCodingTranscriptVariant),
|
|
|
+ "upstream_gene_variant" => Ok(VepConsequence::UpstreamGeneVariant),
|
|
|
+ "downstream_gene_variant" => Ok(VepConsequence::DownstreamGeneVariant),
|
|
|
+ "TFBS_ablation" => Ok(VepConsequence::TfbsAblation),
|
|
|
+ "TFBS_amplification" => Ok(VepConsequence::TfbsAmplification),
|
|
|
+ "TF_binding_site_variant" => Ok(VepConsequence::TfBindingSiteVariant),
|
|
|
+ "regulatory_region_ablation" => Ok(VepConsequence::RegulatoryRegionAblation),
|
|
|
+ "regulatory_region_amplification" => Ok(VepConsequence::RegulatoryRegionAmplification),
|
|
|
+ "regulatory_region_variant" => Ok(VepConsequence::RegulatoryRegionVariant),
|
|
|
+
|
|
|
+ "intergenic_variant" => Ok(VepConsequence::IntergenicVariant),
|
|
|
+ "sequence_variant" => Ok(VepConsequence::SequenceVariant),
|
|
|
+ _ => Err(anyhow!("Unknown VepConsequence: {s}")),
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl VEP {
|
|
|
- fn from_vep_line(d: &VEPLine) -> Result<VEP> {
|
|
|
+ fn from_vep_line(d: &VEPLine) -> anyhow::Result<VEP> {
|
|
|
let or_opt = |s: &str| match s {
|
|
|
"-" => None,
|
|
|
_ => Some(s.to_string()),
|
|
|
};
|
|
|
|
|
|
let consequence = or_opt(&d.consequence)
|
|
|
- .map(|c| c.split(",").map(|e| e.to_string()).collect::<Vec<String>>());
|
|
|
+ .map(|c| c.split(",").map(|e| e.parse()).collect::<Vec<VepConsequence>>());
|
|
|
|
|
|
Ok(VEP {
|
|
|
gene: or_opt(&d.gene),
|
|
|
@@ -127,7 +291,7 @@ pub struct VEPExtra {
|
|
|
impl FromStr for VEPExtra {
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
- fn from_str(s: &str) -> Result<Self> {
|
|
|
+ fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
let err = |c| anyhow!("Error {} parsing VEP Extra field {}", c, s);
|
|
|
|
|
|
let elements = s.split(";").collect::<Vec<&str>>();
|
|
|
@@ -165,27 +329,27 @@ impl FromStr for VEPExtra {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
-pub enum VEPImpact {
|
|
|
- Low,
|
|
|
- Moderate,
|
|
|
- High,
|
|
|
- Modifier,
|
|
|
-}
|
|
|
-
|
|
|
-impl FromStr for VEPImpact {
|
|
|
- type Err = anyhow::Error;
|
|
|
-
|
|
|
- fn from_str(s: &str) -> Result<Self> {
|
|
|
- match s {
|
|
|
- "LOW" => Ok(VEPImpact::Low),
|
|
|
- "MODERATE" => Ok(VEPImpact::Moderate),
|
|
|
- "HIGH" => Ok(VEPImpact::High),
|
|
|
- "MODIFIER" => Ok(VEPImpact::Modifier),
|
|
|
- _ => Err(anyhow!("Unexpected VEP Impact value")),
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
+// #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
+// pub enum VEPImpact {
|
|
|
+// Low,
|
|
|
+// Moderate,
|
|
|
+// High,
|
|
|
+// Modifier,
|
|
|
+// }
|
|
|
+//
|
|
|
+// impl FromStr for VEPImpact {
|
|
|
+// type Err = anyhow::Error;
|
|
|
+//
|
|
|
+// fn from_str(s: &str) -> Result<Self> {
|
|
|
+// match s {
|
|
|
+// "LOW" => Ok(VEPImpact::Low),
|
|
|
+// "MODERATE" => Ok(VEPImpact::Moderate),
|
|
|
+// "HIGH" => Ok(VEPImpact::High),
|
|
|
+// "MODIFIER" => Ok(VEPImpact::Modifier),
|
|
|
+// _ => Err(anyhow!("Unexpected VEP Impact value")),
|
|
|
+// }
|
|
|
+// }
|
|
|
+// }
|
|
|
// pub fn vep_chunk(data: &mut [Variant]) -> Result<()> {
|
|
|
// let in_vcf = format!(
|
|
|
// "{}/vep_{}.vcf",
|
|
|
@@ -325,7 +489,7 @@ fn run_vep(in_path: &str, out_path: &str) -> Result<()> {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-pub fn get_best_vep(d: &[VEP]) -> Result<VEP> {
|
|
|
+pub fn get_best_vep(d: &[VEP]) -> anyhow::Result<VEP> {
|
|
|
d.into_iter().filter(|v| v.)
|
|
|
|
|
|
if d.is_empty() {
|