Thomas 3 週間 前
コミット
61e40ca4b2
2 ファイル変更16 行追加94 行削除
  1. 1 88
      src/annotation/vep.rs
  2. 15 6
      src/variant/variant_collection.rs

+ 1 - 88
src/annotation/vep.rs

@@ -2,14 +2,12 @@ use anyhow::anyhow;
 use bitcode::{Decode, Encode};
 use hashbrown::HashMap;
 use itertools::Itertools;
-use log::{debug, warn};
+use log::{ warn};
 use serde::{Deserialize, Serialize};
 use std::{
     cmp::{Ordering, Reverse},
     fmt::Display,
-    io::{BufRead, BufReader},
     path::PathBuf,
-    process::{Command, Stdio},
     str::FromStr,
 };
 
@@ -722,91 +720,6 @@ impl SbatchRunner for VepJob {
 //     }
 //     // add code here
 // }
-/// Runs the Variant Effect Predictor (VEP) on a given input file and outputs the results.
-///
-/// This function executes the VEP tool with specific parameters to annotate genetic variants.
-/// It uses a predefined set of reference data and plugins to enhance the annotation process.
-///
-/// # Arguments
-/// * `in_path` - A string slice that holds the path to the input file
-/// * `out_path` - A string slice that holds the path where the output should be written
-///
-/// # Returns
-/// * `Ok(())` if VEP runs successfully
-/// * `Err(anyhow::Error)` if there's an error during execution
-///
-/// # Configuration
-/// The function uses hardcoded paths for:
-/// - VEP binary directory
-/// - VEP cache directory
-/// - Reference FASTA file
-/// - GFF annotation file
-///
-/// # VEP Parameters
-/// - Uses offline cache
-/// - Includes symbol information
-/// - Applies SpliceRegion and Downstream plugins
-/// - Generates HGVS notations
-///
-/// # Error Handling
-/// - Logs any lines containing "error" from VEP's stderr output as warnings
-/// - Returns an error if the VEP process fails to complete
-///
-/// # Example
-/// ```
-/// match run_vep("input.vcf", "output.vcf") {
-///     Ok(()) => println!("VEP annotation completed successfully"),
-///     Err(e) => eprintln!("VEP annotation failed: {}", e),
-/// }
-/// ```
-///
-/// # Note
-/// Ensure that the VEP tool and all necessary reference data are correctly installed
-/// and accessible at the specified paths before running this function.
-pub fn run_vep(in_path: &str, out_path: &str) -> anyhow::Result<()> {
-    // VEP need plugin Downstream and SpliceRegion /home/prom/.vep/Plugins
-    debug!("Run VEP for {in_path} and ouput {out_path}");
-
-    let bin_dir = "/data/tools/ensembl-vep";
-    let dir_cache = "/data/ref/hs1/vepcache/";
-    let fasta = "/data/ref/hs1/chm13v2.0.fa";
-    let gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz";
-
-    let mut cmd = Command::new(format!("{}/vep", bin_dir))
-        .arg("--dir_cache")
-        .arg(dir_cache)
-        .arg("--cache")
-        .arg("--offline")
-        .arg("--fasta")
-        .arg(fasta)
-        .arg("--gff")
-        .arg(gff)
-        .arg("--symbol")
-        .arg("--plugin")
-        .arg("SpliceRegion")
-        .arg("--plugin")
-        .arg("Downstream")
-        .arg("--hgvs")
-        .arg("-i")
-        .arg(in_path)
-        .arg("-o")
-        .arg(out_path)
-        .stderr(Stdio::piped())
-        .spawn()
-        .expect("VEP failed to start");
-
-    let stderr = cmd.stderr.take().unwrap();
-    let reader = BufReader::new(stderr);
-    reader
-        .lines()
-        .map_while(Result::ok)
-        // .inspect(|y| println!("{y}"))
-        .filter(|line| line.contains("error"))
-        .for_each(|line| warn!("{}", line));
-
-    cmd.wait()?;
-    Ok(())
-}
 
 /// Selects the "best" VEP annotation from a list, based on biological impact and transcript priority.
 ///

+ 15 - 6
src/variant/variant_collection.rs

@@ -2,7 +2,8 @@ use std::{
     collections::{HashMap, HashSet},
     fs::{self, File},
     io::{Read, Write},
-    path::{Path, PathBuf}, sync::Arc,
+    path::{Path, PathBuf},
+    sync::Arc,
 };
 
 use anyhow::Context;
@@ -20,7 +21,12 @@ use super::vcf_variant::{
 };
 use crate::{
     annotation::{
-        Annotation, Annotations, cosmic::Cosmic, echtvar::{parse_echtvar_val, run_echtvar}, gnomad::GnomAD, parse_trinuc, vep::{VEP, VepJob, VepLine, get_best_vep, run_vep}
+        cosmic::Cosmic,
+        echtvar::{parse_echtvar_val, run_echtvar},
+        gnomad::GnomAD,
+        parse_trinuc,
+        vep::{get_best_vep, VepJob, VepLine, VEP},
+        Annotation, Annotations,
     },
     collection::{
         bam::{counts_at, counts_ins_at},
@@ -28,10 +34,12 @@ use crate::{
     },
     config::Config,
     helpers::{
-        Hash128, Repeat, app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path
+        app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path,
+        Hash128, Repeat,
     },
     io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
-    positions::{GenomePosition, GenomeRange, GetGenomePosition, overlaps_par}, run,
+    positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
+    run,
 };
 
 /// A collection of VCF variants along with associated metadata.
@@ -1750,7 +1758,8 @@ impl ExternalAnnotation {
                         writeln!(vcf, "{s}",)?;
                     }
 
-                    run_vep(&in_tmp, &out_vep).context("Error while running VEP.")?;
+                    let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
+                    run!(config, &mut vep_job).context("Error while running VEP.")?;
 
                     let mut reader_vep = ReaderBuilder::new()
                         .delimiter(b'\t')
@@ -1873,7 +1882,7 @@ fn process_vep_chunk(
         )?;
     }
 
-    let mut vep_job= VepJob::new(&in_tmp, &out_vep, config);
+    let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
     if let Err(e) = run!(config, &mut vep_job) {
         error!("VEP error: {e}");
         return Err(anyhow::anyhow!("VEP execution failed: {}", e)); // Propagate the error.