|
@@ -8,8 +8,8 @@ use std::{
|
|
|
|
|
|
|
|
use anyhow::Context;
|
|
use anyhow::Context;
|
|
|
// use bgzip::{BGZFReader, BGZFWriter};
|
|
// use bgzip::{BGZFReader, BGZFWriter};
|
|
|
-use bitcode::{Decode, Encode};
|
|
|
|
|
use crate::io::tsv::TsvLine;
|
|
use crate::io::tsv::TsvLine;
|
|
|
|
|
+use bitcode::{Decode, Encode};
|
|
|
use dashmap::DashMap;
|
|
use dashmap::DashMap;
|
|
|
use log::{debug, error, info, warn};
|
|
use log::{debug, error, info, warn};
|
|
|
use rayon::prelude::*;
|
|
use rayon::prelude::*;
|
|
@@ -22,15 +22,21 @@ use super::vcf_variant::{
|
|
|
};
|
|
};
|
|
|
use crate::{
|
|
use crate::{
|
|
|
annotation::{
|
|
annotation::{
|
|
|
- Annotation, Annotations, cosmic::Cosmic, echtvar::{parse_echtvar_val, run_echtvar}, gnomad::GnomAD, parse_trinuc, vep::{VEP, VepJob, VepLine, get_best_vep}
|
|
|
|
|
|
|
+ cosmic::Cosmic,
|
|
|
|
|
+ echtvar::{parse_echtvar_val, run_echtvar},
|
|
|
|
|
+ gnomad::GnomAD,
|
|
|
|
|
+ parse_trinuc,
|
|
|
|
|
+ vep::{get_best_vep, VepJob, VepLine, VEP},
|
|
|
|
|
+ Annotation, Annotations,
|
|
|
},
|
|
},
|
|
|
collection::{
|
|
collection::{
|
|
|
- bam::{PileBase, counts_at},
|
|
|
|
|
|
|
+ bam::{counts_at, PileBase},
|
|
|
vcf::Vcf,
|
|
vcf::Vcf,
|
|
|
},
|
|
},
|
|
|
config::Config,
|
|
config::Config,
|
|
|
helpers::{
|
|
helpers::{
|
|
|
- Hash128, Repeat, TempFileGuard, app_storage_dir, detect_repetition, estimate_shannon_entropy, mean
|
|
|
|
|
|
|
+ app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, Hash128, Repeat,
|
|
|
|
|
+ TempFileGuard,
|
|
|
},
|
|
},
|
|
|
io::{
|
|
io::{
|
|
|
fasta::{open_indexed_fasta, sequence_at},
|
|
fasta::{open_indexed_fasta, sequence_at},
|
|
@@ -39,7 +45,7 @@ use crate::{
|
|
|
vcf::vcf_header,
|
|
vcf::vcf_header,
|
|
|
writers::{finalize_bgzf_file, get_gz_writer},
|
|
writers::{finalize_bgzf_file, get_gz_writer},
|
|
|
},
|
|
},
|
|
|
- positions::{GenomePosition, GenomeRange, GetGenomePosition, overlaps_par},
|
|
|
|
|
|
|
+ positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
|
|
|
run,
|
|
run,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -1186,7 +1192,6 @@ impl Variants {
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
/// Merges this Variants collection with another VariantCollection, combining overlapping variants.
|
|
/// Merges this Variants collection with another VariantCollection, combining overlapping variants.
|
|
|
///
|
|
///
|
|
|
/// This method performs a merge operation, combining the current Variants with a VariantCollection.
|
|
/// This method performs a merge operation, combining the current Variants with a VariantCollection.
|
|
@@ -1259,7 +1264,11 @@ impl Variants {
|
|
|
|
|
|
|
|
// Drain all other-variants at this position, merging into
|
|
// Drain all other-variants at this position, merging into
|
|
|
// the matching self-variant by (REF, ALT) or creating a new one.
|
|
// the matching self-variant by (REF, ALT) or creating a new one.
|
|
|
- while others_iter.peek().map(|v| v.position == pos).unwrap_or(false) {
|
|
|
|
|
|
|
+ while others_iter
|
|
|
|
|
+ .peek()
|
|
|
|
|
+ .map(|v| v.position == pos)
|
|
|
|
|
+ .unwrap_or(false)
|
|
|
|
|
+ {
|
|
|
let other = others_iter.next().unwrap();
|
|
let other = others_iter.next().unwrap();
|
|
|
if let Some(existing) = self_at_pos.iter_mut().find(|v| {
|
|
if let Some(existing) = self_at_pos.iter_mut().find(|v| {
|
|
|
v.reference == other.reference && v.alternative == other.alternative
|
|
v.reference == other.reference && v.alternative == other.alternative
|
|
@@ -1637,7 +1646,9 @@ impl Variants {
|
|
|
/// This function assumes that all VcfVariants in the input vector represent the same genomic variant
|
|
/// This function assumes that all VcfVariants in the input vector represent the same genomic variant
|
|
|
/// and should be consolidated. It's the caller's responsibility to ensure this is the case.
|
|
/// and should be consolidated. It's the caller's responsibility to ensure this is the case.
|
|
|
fn create_variant(vcf_variants: Vec<VcfVariant>, annotations: &Annotations) -> Variant {
|
|
fn create_variant(vcf_variants: Vec<VcfVariant>, annotations: &Annotations) -> Variant {
|
|
|
- let first = &vcf_variants[0];
|
|
|
|
|
|
|
+ let first = vcf_variants
|
|
|
|
|
+ .first()
|
|
|
|
|
+ .expect("create_variant called with empty vec");
|
|
|
let annotations = annotations
|
|
let annotations = annotations
|
|
|
.store
|
|
.store
|
|
|
.get(&first.hash)
|
|
.get(&first.hash)
|
|
@@ -1803,9 +1814,8 @@ impl ExternalAnnotation {
|
|
|
// fs::remove_file(in_tmp)?;
|
|
// fs::remove_file(in_tmp)?;
|
|
|
|
|
|
|
|
// Parse echtvar output
|
|
// Parse echtvar output
|
|
|
- let mut echtvar_rdr = std::io::BufReader::new(
|
|
|
|
|
- get_reader(out_tmp.to_str().unwrap())?
|
|
|
|
|
- );
|
|
|
|
|
|
|
+ let mut echtvar_rdr =
|
|
|
|
|
+ std::io::BufReader::new(get_reader(out_tmp.to_str().unwrap())?);
|
|
|
let mut echtvar_line = TsvLine::new();
|
|
let mut echtvar_line = TsvLine::new();
|
|
|
let mut chunk_results = Vec::new();
|
|
let mut chunk_results = Vec::new();
|
|
|
let mut i = 0usize;
|
|
let mut i = 0usize;
|
|
@@ -1814,7 +1824,9 @@ impl ExternalAnnotation {
|
|
|
if echtvar_line.as_str().starts_with('#') || echtvar_line.as_str().is_empty() {
|
|
if echtvar_line.as_str().starts_with('#') || echtvar_line.as_str().is_empty() {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- let row: crate::io::vcf::VCFRow = echtvar_line.as_str().parse()
|
|
|
|
|
|
|
+ let row: crate::io::vcf::VCFRow = echtvar_line
|
|
|
|
|
+ .as_str()
|
|
|
|
|
+ .parse()
|
|
|
.context("Failed to parse echtvar VCF row")?;
|
|
.context("Failed to parse echtvar VCF row")?;
|
|
|
|
|
|
|
|
// Verify that the ID corresponds to the input
|
|
// Verify that the ID corresponds to the input
|
|
@@ -1822,7 +1834,8 @@ impl ExternalAnnotation {
|
|
|
if id != i + 1 {
|
|
if id != i + 1 {
|
|
|
return Err(anyhow::anyhow!(
|
|
return Err(anyhow::anyhow!(
|
|
|
"Echtvar output ID {} does not match expected ID {}",
|
|
"Echtvar output ID {} does not match expected ID {}",
|
|
|
- id, i + 1
|
|
|
|
|
|
|
+ id,
|
|
|
|
|
+ i + 1
|
|
|
));
|
|
));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2003,7 +2016,9 @@ impl ExternalAnnotation {
|
|
|
if vep_line.as_str().starts_with('#') || vep_line.as_str().is_empty() {
|
|
if vep_line.as_str().starts_with('#') || vep_line.as_str().is_empty() {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- let line: VepLine = vep_line.as_str().parse()
|
|
|
|
|
|
|
+ let line: VepLine = vep_line
|
|
|
|
|
+ .as_str()
|
|
|
|
|
+ .parse()
|
|
|
.context("Failed to parse VepLine")?;
|
|
.context("Failed to parse VepLine")?;
|
|
|
let key = line
|
|
let key = line
|
|
|
.uploaded_variation
|
|
.uploaded_variation
|
|
@@ -2122,7 +2137,9 @@ fn process_vep_chunk(
|
|
|
if vep_line.as_str().starts_with('#') || vep_line.as_str().is_empty() {
|
|
if vep_line.as_str().starts_with('#') || vep_line.as_str().is_empty() {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- let line: VepLine = vep_line.as_str().parse()
|
|
|
|
|
|
|
+ let line: VepLine = vep_line
|
|
|
|
|
+ .as_str()
|
|
|
|
|
+ .parse()
|
|
|
.context("Failed to parse VepLine")?;
|
|
.context("Failed to parse VepLine")?;
|
|
|
let key = line
|
|
let key = line
|
|
|
.uploaded_variation
|
|
.uploaded_variation
|