Thomas 3 settimane fa
parent
commit
ae20fc03a4
2 ha cambiato i file con 48 aggiunte e 40 eliminazioni
  1. 39 29
      src/annotation/vep.rs
  2. 9 11
      src/variant/variant_collection.rs

+ 39 - 29
src/annotation/vep.rs

@@ -249,17 +249,17 @@ pub enum VepConsequence {
     /// Deletion of a regulatory region
     RegulatoryRegionAblation,
     /// Amplification of a regulatory region
-RegulatoryRegionAmplification,
-/// Variant causing a feature to be extended
-FeatureElongation,
-/// Variant in a regulatory region
-RegulatoryRegionVariant,
-/// Variant causing a feature to be shortened
-FeatureTruncation,
-/// Variant in intergenic region
-IntergenicVariant,
-/// General sequence variant
-SequenceVariant,
+    RegulatoryRegionAmplification,
+    /// Variant causing a feature to be extended
+    FeatureElongation,
+    /// Variant in a regulatory region
+    RegulatoryRegionVariant,
+    /// Variant causing a feature to be shortened
+    FeatureTruncation,
+    /// Variant in intergenic region
+    IntergenicVariant,
+    /// General sequence variant
+    SequenceVariant,
 }
 
 /// Represents the severity of a variant's impact as predicted by the
@@ -268,27 +268,27 @@ SequenceVariant,
 /// The impact categories are ordered from most severe (HIGH) to least severe (MODIFIER).
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Encode, Decode)]
 pub enum VepImpact {
-/// High impact variants are expected to have high (disruptive) impact in the protein,
-/// probably causing protein truncation, loss of function or triggering nonsense mediated decay.
-HIGH,
-/// Moderate impact variants are non-disruptive variants that might change protein effectiveness.
-MODERATE,
-/// Low impact variants are mostly harmless or unlikely to change protein behavior.
-LOW,
-/// Modifier variants are usually non-coding variants or variants affecting non-coding genes,
-/// where predictions are difficult or there is no evidence of impact.
-MODIFIER,
+    /// High impact variants are expected to have high (disruptive) impact in the protein,
+    /// probably causing protein truncation, loss of function or triggering nonsense mediated decay.
+    HIGH,
+    /// Moderate impact variants are non-disruptive variants that might change protein effectiveness.
+    MODERATE,
+    /// Low impact variants are mostly harmless or unlikely to change protein behavior.
+    LOW,
+    /// Modifier variants are usually non-coding variants or variants affecting non-coding genes,
+    /// where predictions are difficult or there is no evidence of impact.
+    MODIFIER,
 }
 
 impl Display for VepImpact {
-fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-    write!(
-        f,
-        "{}",
-        match self {
-            VepImpact::HIGH => "HIGH",
-            VepImpact::MODERATE => "MODERATE",
-            VepImpact::LOW => "LOW",
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}",
+            match self {
+                VepImpact::HIGH => "HIGH",
+                VepImpact::MODERATE => "MODERATE",
+                VepImpact::LOW => "LOW",
                 VepImpact::MODIFIER => "MODIFIER",
             }
         )
@@ -649,6 +649,16 @@ pub struct VepJob {
     config: Config,
 }
 
+impl VepJob {
+    pub fn new(in_path: &str, out_path: &str, config: &Config) -> Self {
+        VepJob {
+            in_vcf: in_path.into(),
+            out_vcf: out_path.into(),
+            config: config.clone(),
+        }
+    }
+}
+
 impl JobCommand for VepJob {
     fn cmd(&self) -> String {
         let bind_flags = singularity_bind_flags([

+ 9 - 11
src/variant/variant_collection.rs

@@ -20,12 +20,7 @@ use super::vcf_variant::{
 };
 use crate::{
     annotation::{
-        cosmic::Cosmic,
-        echtvar::{parse_echtvar_val, run_echtvar},
-        gnomad::GnomAD,
-        parse_trinuc,
-        vep::{get_best_vep, run_vep, VepLine, VEP},
-        Annotation, Annotations,
+        Annotation, Annotations, cosmic::Cosmic, echtvar::{parse_echtvar_val, run_echtvar}, gnomad::GnomAD, parse_trinuc, vep::{VEP, VepJob, VepLine, get_best_vep, run_vep}
     },
     collection::{
         bam::{counts_at, counts_ins_at},
@@ -33,11 +28,10 @@ use crate::{
     },
     config::Config,
     helpers::{
-        app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path,
-        Hash128, Repeat,
+        Hash128, Repeat, app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path
     },
     io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
-    positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
+    positions::{GenomePosition, GenomeRange, GetGenomePosition, overlaps_par}, run,
 };
 
 /// A collection of VCF variants along with associated metadata.
@@ -1700,6 +1694,8 @@ impl ExternalAnnotation {
         let min_chunk_size = 1000;
         let max_chunks = 150;
 
+        let config: &Config = &self.config;
+
         let mut results: Vec<(Hash128, Vec<VEP>)> = if !unfound.is_empty() {
             let optimal_chunk_size = unfound.len().div_ceil(max_chunks as usize);
             let optimal_chunk_size = optimal_chunk_size.max(min_chunk_size);
@@ -1710,7 +1706,7 @@ impl ExternalAnnotation {
                 .enumerate()
                 .map(|(chunk_i, chunk)| {
                     debug!("Processing chunk {chunk_i}");
-                    process_vep_chunk(chunk, &header).map_err(|e| {
+                    process_vep_chunk(chunk, &header, config).map_err(|e| {
                         error!("Error processing chunk {chunk_i}: {e}");
                         e
                     })
@@ -1848,6 +1844,7 @@ impl ExternalAnnotation {
 fn process_vep_chunk(
     chunk: &[VcfVariant],
     header: &str,
+    config: &Config,
 ) -> anyhow::Result<Vec<(Hash128, Vec<VEP>)>> {
     let in_tmp = temp_file_path("vcf")?
         .to_str()
@@ -1876,7 +1873,8 @@ fn process_vep_chunk(
         )?;
     }
 
-    if let Err(e) = run_vep(&in_tmp, &out_vep) {
+    let mut vep_job= VepJob::new(&in_tmp, &out_vep, config);
+    if let Err(e) = run!(config, &mut vep_job) {
         error!("VEP error: {e}");
         return Err(anyhow::anyhow!("VEP execution failed: {}", e)); // Propagate the error.
     }