|
@@ -2,7 +2,7 @@ use std::{
|
|
|
collections::{HashMap, HashSet},
|
|
collections::{HashMap, HashSet},
|
|
|
fs::{self, File},
|
|
fs::{self, File},
|
|
|
io::{Read, Write},
|
|
io::{Read, Write},
|
|
|
- path::{Path, PathBuf},
|
|
|
|
|
|
|
+ path::PathBuf,
|
|
|
sync::Arc,
|
|
sync::Arc,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -34,8 +34,8 @@ use crate::{
|
|
|
},
|
|
},
|
|
|
config::Config,
|
|
config::Config,
|
|
|
helpers::{
|
|
helpers::{
|
|
|
- app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path,
|
|
|
|
|
- Hash128, Repeat,
|
|
|
|
|
|
|
+ app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, Hash128, Repeat,
|
|
|
|
|
+ TempFileGuard,
|
|
|
},
|
|
},
|
|
|
io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
|
|
io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
|
|
|
positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
|
|
positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
|
|
@@ -1705,8 +1705,10 @@ impl ExternalAnnotation {
|
|
|
let config: &Config = &self.config;
|
|
let config: &Config = &self.config;
|
|
|
|
|
|
|
|
let mut results: Vec<(Hash128, Vec<VEP>)> = if !unfound.is_empty() {
|
|
let mut results: Vec<(Hash128, Vec<VEP>)> = if !unfound.is_empty() {
|
|
|
- let optimal_chunk_size = unfound.len().div_ceil(max_chunks as usize);
|
|
|
|
|
- let optimal_chunk_size = optimal_chunk_size.max(min_chunk_size);
|
|
|
|
|
|
|
+ let optimal_chunk_size = unfound
|
|
|
|
|
+ .len()
|
|
|
|
|
+ .div_ceil(max_chunks as usize)
|
|
|
|
|
+ .max(min_chunk_size);
|
|
|
|
|
|
|
|
debug!("{} chunks to process.", unfound.len() / optimal_chunk_size);
|
|
debug!("{} chunks to process.", unfound.len() / optimal_chunk_size);
|
|
|
unfound
|
|
unfound
|
|
@@ -1719,7 +1721,7 @@ impl ExternalAnnotation {
|
|
|
e
|
|
e
|
|
|
})
|
|
})
|
|
|
})
|
|
})
|
|
|
- .collect::<Result<Vec<_>, _>>()? // Collect results into a Result<Vec<_>>
|
|
|
|
|
|
|
+ .collect::<Result<Vec<_>, _>>()?
|
|
|
.into_iter()
|
|
.into_iter()
|
|
|
.flatten()
|
|
.flatten()
|
|
|
.collect::<Vec<_>>()
|
|
.collect::<Vec<_>>()
|
|
@@ -1728,107 +1730,106 @@ impl ExternalAnnotation {
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
if !sv.is_empty() {
|
|
if !sv.is_empty() {
|
|
|
- let optimal_chunk_size = sv.len().div_ceil(max_chunks as usize);
|
|
|
|
|
- let optimal_chunk_size = optimal_chunk_size.max(min_chunk_size);
|
|
|
|
|
|
|
+ let optimal_chunk_size = sv.len().div_ceil(max_chunks as usize).max(min_chunk_size);
|
|
|
|
|
|
|
|
- let results_sv = sv
|
|
|
|
|
|
|
+ let results_sv: Vec<(Hash128, Vec<VEP>)> = sv
|
|
|
.par_chunks(optimal_chunk_size)
|
|
.par_chunks(optimal_chunk_size)
|
|
|
- .flat_map(|chunk| -> anyhow::Result<Vec<_>> {
|
|
|
|
|
- let in_tmp = temp_file_path(".vcf")
|
|
|
|
|
- .context("Can't create tmp path for in tmp")?
|
|
|
|
|
- .to_str()
|
|
|
|
|
- .unwrap()
|
|
|
|
|
- .to_string();
|
|
|
|
|
- let out_vep = temp_file_path("_vep.txt")
|
|
|
|
|
- .context("Can't create tmp path for in tmp")?
|
|
|
|
|
- .to_str()
|
|
|
|
|
- .unwrap()
|
|
|
|
|
- .to_string();
|
|
|
|
|
-
|
|
|
|
|
- let out_summary = format!("{out_vep}_summary.html");
|
|
|
|
|
- let out_warnings = format!("{out_vep}_warnings.txt");
|
|
|
|
|
-
|
|
|
|
|
- // Write input VCF
|
|
|
|
|
- let mut vcf =
|
|
|
|
|
- File::create(&in_tmp).context("Can't create input vcf file for VEP.")?;
|
|
|
|
|
- writeln!(vcf, "{}", header)?;
|
|
|
|
|
- for (i, mut row) in chunk.iter().cloned().enumerate() {
|
|
|
|
|
- row.id = (i + 1).to_string();
|
|
|
|
|
- let s = row.into_vcf_row();
|
|
|
|
|
- writeln!(vcf, "{s}",)?;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ .enumerate()
|
|
|
|
|
+ .map(
|
|
|
|
|
+ |(chunk_i, chunk)| -> anyhow::Result<Vec<(Hash128, Vec<VEP>)>> {
|
|
|
|
|
+ debug!("Processing SV chunk {chunk_i}");
|
|
|
|
|
+
|
|
|
|
|
+ let mut guard = TempFileGuard::new();
|
|
|
|
|
+ let in_tmp = guard.tmp_path(".vcf", &config.tmp_dir);
|
|
|
|
|
+ let out_vep = guard.tmp_path("_vep.txt", &config.tmp_dir);
|
|
|
|
|
+
|
|
|
|
|
+ let out_summary =
|
|
|
|
|
+ PathBuf::from(format!("{}_summary.html", out_vep.display()));
|
|
|
|
|
+ let out_warnings =
|
|
|
|
|
+ PathBuf::from(format!("{}_warnings.txt", out_vep.display()));
|
|
|
|
|
+ guard.track(out_summary.clone());
|
|
|
|
|
+ guard.track(out_warnings.clone());
|
|
|
|
|
+
|
|
|
|
|
+ // Write input file
|
|
|
|
|
+ {
|
|
|
|
|
+ let mut vcf =
|
|
|
|
|
+ File::create(&in_tmp).context("Can't create input file.")?;
|
|
|
|
|
+ writeln!(vcf, "{}", header)?;
|
|
|
|
|
+ for (i, mut row) in chunk.iter().cloned().enumerate() {
|
|
|
|
|
+ row.id = (i + 1).to_string();
|
|
|
|
|
+ let s = row.into_vcf_row();
|
|
|
|
|
+ writeln!(vcf, "{s}")?;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
|
|
|
|
|
- run!(config, &mut vep_job).context("Error while running VEP.")?;
|
|
|
|
|
-
|
|
|
|
|
- let mut reader_vep = ReaderBuilder::new()
|
|
|
|
|
- .delimiter(b'\t')
|
|
|
|
|
- .has_headers(false)
|
|
|
|
|
- .comment(Some(b'#'))
|
|
|
|
|
- .flexible(true)
|
|
|
|
|
- .from_reader(
|
|
|
|
|
- fs::File::open(&out_vep).context("Can't open VEP result file.")?,
|
|
|
|
|
- );
|
|
|
|
|
-
|
|
|
|
|
- let mut lines: HashMap<u64, Vec<VepLine>> = HashMap::new();
|
|
|
|
|
- for line in reader_vep.deserialize() {
|
|
|
|
|
- let line: VepLine = line.context("Failed to deserialize VepLine")?;
|
|
|
|
|
- let key = line
|
|
|
|
|
- .uploaded_variation
|
|
|
|
|
- .parse::<u64>()
|
|
|
|
|
- .context("Failed to parse uploaded_variation as u64")?;
|
|
|
|
|
-
|
|
|
|
|
- lines.entry(key).or_default().push(line);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
|
|
|
|
|
+ run!(config, &mut vep_job).context("Error while running VEP.")?;
|
|
|
|
|
|
|
|
- fs::remove_file(&in_tmp).context(format!("Can't remove file {in_tmp}"))?;
|
|
|
|
|
|
|
+ let mut reader_vep = ReaderBuilder::new()
|
|
|
|
|
+ .delimiter(b'\t')
|
|
|
|
|
+ .has_headers(false)
|
|
|
|
|
+ .comment(Some(b'#'))
|
|
|
|
|
+ .flexible(true)
|
|
|
|
|
+ .from_reader(
|
|
|
|
|
+ fs::File::open(&out_vep).context("Can't open result file.")?,
|
|
|
|
|
+ );
|
|
|
|
|
|
|
|
- let mut n_not_vep = 0;
|
|
|
|
|
- let mut chunk_results: Vec<(Hash128, Vec<VEP>)> = Vec::new();
|
|
|
|
|
|
|
+ let mut lines: HashMap<u64, Vec<VepLine>> = HashMap::new();
|
|
|
|
|
+ for line in reader_vep.deserialize() {
|
|
|
|
|
+ let line: VepLine = line.context("Failed to deserialize VepLine")?;
|
|
|
|
|
+ let key = line
|
|
|
|
|
+ .uploaded_variation
|
|
|
|
|
+ .parse::<u64>()
|
|
|
|
|
+ .context("Failed to parse uploaded_variation as u64")?;
|
|
|
|
|
+ lines.entry(key).or_default().push(line);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ let mut n_not_vep = 0usize;
|
|
|
|
|
+ let mut chunk_results: Vec<(Hash128, Vec<VEP>)> = Vec::new();
|
|
|
|
|
|
|
|
- chunk.iter().enumerate().for_each(|(i, entry)| {
|
|
|
|
|
- let k = (i + 1) as u64;
|
|
|
|
|
|
|
+ chunk.iter().enumerate().for_each(|(i, entry)| {
|
|
|
|
|
+ let k = (i + 1) as u64;
|
|
|
|
|
|
|
|
- if let Some(vep_lines) = lines.get(&k) {
|
|
|
|
|
- if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
|
|
|
|
|
- chunk_results.push((entry.hash(), veps));
|
|
|
|
|
|
|
+ if let Some(vep_lines) = lines.get(&k) {
|
|
|
|
|
+ if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
|
|
|
|
|
+ chunk_results.push((entry.hash(), veps));
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ warn!(
|
|
|
|
|
+ "No entry for {}\t{}\t{}",
|
|
|
|
|
+ entry.position, entry.reference, entry.alternative
|
|
|
|
|
+ );
|
|
|
|
|
+ n_not_vep += 1;
|
|
|
}
|
|
}
|
|
|
- } else {
|
|
|
|
|
- warn!(
|
|
|
|
|
- "No VEP entry for {}\t{}\t{}",
|
|
|
|
|
- entry.position, entry.reference, entry.alternative
|
|
|
|
|
- );
|
|
|
|
|
- n_not_vep += 1;
|
|
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ if n_not_vep > 0 && out_warnings.exists() {
|
|
|
|
|
+ debug!("{n_not_vep} entries not annotated.");
|
|
|
|
|
+ let warnings =
|
|
|
|
|
+ fs::read_to_string(&out_warnings).with_context(|| {
|
|
|
|
|
+ format!("Can't read warnings file {}", out_warnings.display())
|
|
|
|
|
+ })?;
|
|
|
|
|
+ warn!("Warnings:\n{warnings}");
|
|
|
}
|
|
}
|
|
|
- });
|
|
|
|
|
- fs::remove_file(&out_vep).context(format!("Can't remove file {out_vep}"))?;
|
|
|
|
|
-
|
|
|
|
|
- if n_not_vep > 0 {
|
|
|
|
|
- debug!("{n_not_vep} variants not annotated by VEP.");
|
|
|
|
|
- let warnings = fs::read_to_string(&out_warnings)
|
|
|
|
|
- .context(format!("Can't read VEP warnings: {out_warnings}"))?;
|
|
|
|
|
- warn!("VEP warnings:\n{warnings}");
|
|
|
|
|
- }
|
|
|
|
|
- if Path::new(&out_warnings).exists() {
|
|
|
|
|
- fs::remove_file(&out_warnings)
|
|
|
|
|
- .context(format!("Can't remove file {out_warnings}"))?;
|
|
|
|
|
- }
|
|
|
|
|
- if Path::new(&out_summary).exists() {
|
|
|
|
|
- fs::remove_file(&out_summary)
|
|
|
|
|
- .context(format!("Can't remove file {out_summary}"))?;
|
|
|
|
|
- }
|
|
|
|
|
- Ok(chunk_results)
|
|
|
|
|
- })
|
|
|
|
|
|
|
+
|
|
|
|
|
+ // Success: remove temps and silence Drop warning.
|
|
|
|
|
+ guard.cleanup();
|
|
|
|
|
+ guard.disarm();
|
|
|
|
|
+
|
|
|
|
|
+ Ok(chunk_results)
|
|
|
|
|
+ },
|
|
|
|
|
+ )
|
|
|
|
|
+ .collect::<Result<Vec<_>, _>>()?
|
|
|
|
|
+ .into_iter()
|
|
|
.flatten()
|
|
.flatten()
|
|
|
- .collect::<Vec<_>>();
|
|
|
|
|
|
|
+ .collect();
|
|
|
|
|
|
|
|
results.extend(results_sv);
|
|
results.extend(results_sv);
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
info!("{} total variants annotaded by VEP.", results.len());
|
|
info!("{} total variants annotaded by VEP.", results.len());
|
|
|
|
|
|
|
|
for (hash, veps) in results {
|
|
for (hash, veps) in results {
|
|
|
- // self.update_database(hash, "vep", &serde_json::to_vec(&veps)?)?;
|
|
|
|
|
-
|
|
|
|
|
annotations
|
|
annotations
|
|
|
.store
|
|
.store
|
|
|
.entry(hash)
|
|
.entry(hash)
|
|
@@ -1855,17 +1856,15 @@ fn process_vep_chunk(
|
|
|
header: &str,
|
|
header: &str,
|
|
|
config: &Config,
|
|
config: &Config,
|
|
|
) -> anyhow::Result<Vec<(Hash128, Vec<VEP>)>> {
|
|
) -> anyhow::Result<Vec<(Hash128, Vec<VEP>)>> {
|
|
|
- let in_tmp = temp_file_path("vcf")?
|
|
|
|
|
- .to_str()
|
|
|
|
|
- .ok_or_else(|| anyhow::anyhow!("Failed to convert temp file path to string"))?
|
|
|
|
|
- .to_string();
|
|
|
|
|
- let out_vep = temp_file_path("_vep.txt")?
|
|
|
|
|
- .to_str()
|
|
|
|
|
- .ok_or_else(|| anyhow::anyhow!("Failed to convert temp file path to string"))?
|
|
|
|
|
- .to_string();
|
|
|
|
|
-
|
|
|
|
|
- let out_summary = format!("{out_vep}_summary.html");
|
|
|
|
|
- let out_warnings = format!("{out_vep}_warnings.txt");
|
|
|
|
|
|
|
+ let mut guard = TempFileGuard::new();
|
|
|
|
|
+
|
|
|
|
|
+ let in_tmp = guard.tmp_path(".vcf", &config.tmp_dir);
|
|
|
|
|
+ let out_vep = guard.tmp_path("_vep.txt", &config.tmp_dir);
|
|
|
|
|
+
|
|
|
|
|
+ let out_summary = PathBuf::from(format!("{}_summary.html", out_vep.display()));
|
|
|
|
|
+ let out_warnings = PathBuf::from(format!("{}_warnings.txt", out_vep.display()));
|
|
|
|
|
+ guard.track(out_summary.clone());
|
|
|
|
|
+ guard.track(out_warnings.clone());
|
|
|
|
|
|
|
|
let mut vcf = File::create(&in_tmp)?;
|
|
let mut vcf = File::create(&in_tmp)?;
|
|
|
writeln!(vcf, "{}", header)?;
|
|
writeln!(vcf, "{}", header)?;
|
|
@@ -1885,7 +1884,7 @@ fn process_vep_chunk(
|
|
|
let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
|
|
let mut vep_job = VepJob::new(&in_tmp, &out_vep, config);
|
|
|
if let Err(e) = run!(config, &mut vep_job) {
|
|
if let Err(e) = run!(config, &mut vep_job) {
|
|
|
error!("VEP error: {e}");
|
|
error!("VEP error: {e}");
|
|
|
- return Err(anyhow::anyhow!("VEP execution failed: {}", e)); // Propagate the error.
|
|
|
|
|
|
|
+ return Err(anyhow::anyhow!("VEP execution failed: {}", e));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
let mut reader_vep = ReaderBuilder::new()
|
|
let mut reader_vep = ReaderBuilder::new()
|
|
@@ -1893,21 +1892,19 @@ fn process_vep_chunk(
|
|
|
.has_headers(false)
|
|
.has_headers(false)
|
|
|
.comment(Some(b'#'))
|
|
.comment(Some(b'#'))
|
|
|
.flexible(true)
|
|
.flexible(true)
|
|
|
- .from_reader(fs::File::open(&out_vep)?); // If this fails, the error is propagated.
|
|
|
|
|
|
|
+ .from_reader(fs::File::open(&out_vep)?);
|
|
|
|
|
|
|
|
let mut lines: HashMap<u64, Vec<VepLine>> = HashMap::new();
|
|
let mut lines: HashMap<u64, Vec<VepLine>> = HashMap::new();
|
|
|
for line in reader_vep.deserialize() {
|
|
for line in reader_vep.deserialize() {
|
|
|
- let line: VepLine = line.context("Failed to deserialize VepLine")?; // Propagate the error.
|
|
|
|
|
|
|
+ let line: VepLine = line.context("Failed to deserialize VepLine")?;
|
|
|
let key = line
|
|
let key = line
|
|
|
.uploaded_variation
|
|
.uploaded_variation
|
|
|
.parse::<u64>()
|
|
.parse::<u64>()
|
|
|
- .context("Failed to parse uploaded_variation as u64")?; // Propagate the error.
|
|
|
|
|
|
|
+ .context("Failed to parse uploaded_variation as u64")?;
|
|
|
lines.entry(key).or_default().push(line);
|
|
lines.entry(key).or_default().push(line);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- fs::remove_file(&in_tmp).context(format!("Can't remove file {in_tmp}"))?;
|
|
|
|
|
-
|
|
|
|
|
- let mut n_not_vep = 0;
|
|
|
|
|
|
|
+ let mut n_not_vep = 0usize;
|
|
|
let mut chunk_results: Vec<(Hash128, Vec<VEP>)> = Vec::new();
|
|
let mut chunk_results: Vec<(Hash128, Vec<VEP>)> = Vec::new();
|
|
|
|
|
|
|
|
chunk.iter().enumerate().for_each(|(i, entry)| {
|
|
chunk.iter().enumerate().for_each(|(i, entry)| {
|
|
@@ -1925,21 +1922,17 @@ fn process_vep_chunk(
|
|
|
n_not_vep += 1;
|
|
n_not_vep += 1;
|
|
|
}
|
|
}
|
|
|
});
|
|
});
|
|
|
- fs::remove_file(&out_vep).context(format!("Can't remove file {out_vep}"))?;
|
|
|
|
|
|
|
|
|
|
- if n_not_vep > 0 {
|
|
|
|
|
- debug!("{n_not_vep} variants not annotated by VEP.");
|
|
|
|
|
|
|
+ if n_not_vep > 0 && out_warnings.exists() {
|
|
|
|
|
+ debug!("{n_not_vep} variants not annotated.");
|
|
|
let warnings = fs::read_to_string(&out_warnings)
|
|
let warnings = fs::read_to_string(&out_warnings)
|
|
|
- .context(format!("Can't read VEP warnings: {out_warnings}"))?;
|
|
|
|
|
- warn!("VEP warnings:\n{warnings}");
|
|
|
|
|
|
|
+ .with_context(|| format!("Can't read warnings file {}", out_warnings.display()))?;
|
|
|
|
|
+ warn!("Warnings:\n{warnings}");
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if Path::new(&out_warnings).exists() {
|
|
|
|
|
- fs::remove_file(&out_warnings).context(format!("Can't remove file {out_warnings}"))?;
|
|
|
|
|
- }
|
|
|
|
|
- if Path::new(&out_summary).exists() {
|
|
|
|
|
- fs::remove_file(&out_summary).context(format!("Can't remove file {out_summary}"))?;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ // Success: remove temps and avoid Drop warning.
|
|
|
|
|
+ guard.cleanup();
|
|
|
|
|
+ guard.disarm();
|
|
|
|
|
|
|
|
- Ok(chunk_results) // Return the successful result.
|
|
|
|
|
|
|
+ Ok(chunk_results)
|
|
|
}
|
|
}
|