|
@@ -107,7 +107,13 @@
|
|
|
//! ```
|
|
//! ```
|
|
|
|
|
|
|
|
use crate::{
|
|
use crate::{
|
|
|
- annotation::Annotations, helpers::{Hash128, estimate_shannon_entropy, mean, revcomp}, io::fasta::sequence_range, pipes::ShouldRun, positions::{GenomePosition, GetGenomePosition, VcfPosition, contig_to_num}, runners::Run, variant::variant_collection::VariantCollection
|
|
|
|
|
|
|
+ annotation::Annotations,
|
|
|
|
|
+ helpers::{estimate_shannon_entropy, mean, revcomp, Hash128},
|
|
|
|
|
+ io::fasta::sequence_range,
|
|
|
|
|
+ pipes::ShouldRun,
|
|
|
|
|
+ positions::{contig_to_num, GenomePosition, GetGenomePosition, VcfPosition},
|
|
|
|
|
+ runners::Run,
|
|
|
|
|
+ variant::variant_collection::VariantCollection,
|
|
|
};
|
|
};
|
|
|
use anyhow::{anyhow, Context};
|
|
use anyhow::{anyhow, Context};
|
|
|
use bitcode::{Decode, Encode};
|
|
use bitcode::{Decode, Encode};
|
|
@@ -116,7 +122,12 @@ use log::{error, info};
|
|
|
use rayon::prelude::*;
|
|
use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize};
|
|
use serde::{Deserialize, Serialize};
|
|
|
use std::{
|
|
use std::{
|
|
|
- cmp::Ordering, collections::{BTreeSet, HashSet}, fmt, fs::File, hash::Hash, str::FromStr
|
|
|
|
|
|
|
+ cmp::Ordering,
|
|
|
|
|
+ collections::{BTreeSet, HashSet},
|
|
|
|
|
+ fmt,
|
|
|
|
|
+ fs::File,
|
|
|
|
|
+ hash::Hash,
|
|
|
|
|
+ str::FromStr,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
/// Represents a variant in the Variant Call Format (VCF).
|
|
/// Represents a variant in the Variant Call Format (VCF).
|
|
@@ -315,7 +326,9 @@ impl VcfVariant {
|
|
|
|
|
|
|
|
// Fetch REF from target assembly
|
|
// Fetch REF from target assembly
|
|
|
let mut new_ref = sequence_range(fasta, tgt_contig_name, tgt_start0, tgt_end0_inclusive)
|
|
let mut new_ref = sequence_range(fasta, tgt_contig_name, tgt_start0, tgt_end0_inclusive)
|
|
|
- .with_context(|| format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}"))?;
|
|
|
|
|
|
|
+ .with_context(|| {
|
|
|
|
|
+ format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}")
|
|
|
|
|
+ })?;
|
|
|
|
|
|
|
|
if on_reverse {
|
|
if on_reverse {
|
|
|
new_ref = revcomp(&new_ref);
|
|
new_ref = revcomp(&new_ref);
|
|
@@ -327,18 +340,14 @@ impl VcfVariant {
|
|
|
contig: tgt_contig,
|
|
contig: tgt_contig,
|
|
|
position: tgt_start0 as u32,
|
|
position: tgt_start0 as u32,
|
|
|
};
|
|
};
|
|
|
- out.reference = new_ref
|
|
|
|
|
- .parse()
|
|
|
|
|
- .context("Failed to parse rewritten REF")?;
|
|
|
|
|
|
|
+ out.reference = new_ref.parse().context("Failed to parse rewritten REF")?;
|
|
|
|
|
|
|
|
if on_reverse {
|
|
if on_reverse {
|
|
|
// Only revcomp ALT if it is a plain sequence allele.
|
|
// Only revcomp ALT if it is a plain sequence allele.
|
|
|
// If you have symbolic alleles (<DEL>, etc.), guard here.
|
|
// If you have symbolic alleles (<DEL>, etc.), guard here.
|
|
|
let alt = out.alternative.to_string();
|
|
let alt = out.alternative.to_string();
|
|
|
let alt_rc = revcomp(&alt);
|
|
let alt_rc = revcomp(&alt);
|
|
|
- out.alternative = alt_rc
|
|
|
|
|
- .parse()
|
|
|
|
|
- .context("Failed to parse rewritten ALT")?;
|
|
|
|
|
|
|
+ out.alternative = alt_rc.parse().context("Failed to parse rewritten ALT")?;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
Ok(Some(out))
|
|
Ok(Some(out))
|
|
@@ -1379,6 +1388,10 @@ pub enum Info {
|
|
|
MATE_ID(String),
|
|
MATE_ID(String),
|
|
|
INSIDE_VNTR(String),
|
|
INSIDE_VNTR(String),
|
|
|
ALINGED_POS(String),
|
|
ALINGED_POS(String),
|
|
|
|
|
+ // dbSNP
|
|
|
|
|
+ FREQ(String),
|
|
|
|
|
+ COMMON,
|
|
|
|
|
+ RS(u32),
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl FromStr for Info {
|
|
impl FromStr for Info {
|
|
@@ -1459,6 +1472,8 @@ impl FromStr for Info {
|
|
|
"MATE_ID" => Info::MATE_ID(value.to_string()),
|
|
"MATE_ID" => Info::MATE_ID(value.to_string()),
|
|
|
"INSIDE_VNTR" => Info::INSIDE_VNTR(value.to_string()),
|
|
"INSIDE_VNTR" => Info::INSIDE_VNTR(value.to_string()),
|
|
|
"ALINGED_POS" => Info::ALINGED_POS(value.to_string()),
|
|
"ALINGED_POS" => Info::ALINGED_POS(value.to_string()),
|
|
|
|
|
+ "FREQ" => Info::FREQ(value.to_string()),
|
|
|
|
|
+ "RS" => Info::RS(parse_value(value, key)?),
|
|
|
|
|
|
|
|
_ => Info::Empty,
|
|
_ => Info::Empty,
|
|
|
})
|
|
})
|
|
@@ -1469,6 +1484,7 @@ impl FromStr for Info {
|
|
|
"P" => Info::P,
|
|
"P" => Info::P,
|
|
|
"PRECISE" => Info::PRECISE,
|
|
"PRECISE" => Info::PRECISE,
|
|
|
"IMPRECISE" => Info::IMPRECISE,
|
|
"IMPRECISE" => Info::IMPRECISE,
|
|
|
|
|
+ "COMMON" => Info::COMMON,
|
|
|
|
|
|
|
|
_ => Info::Empty,
|
|
_ => Info::Empty,
|
|
|
})
|
|
})
|
|
@@ -1553,6 +1569,11 @@ impl fmt::Display for Info {
|
|
|
Info::MATE_ID(v) => write!(f, "MATE_ID={v}"),
|
|
Info::MATE_ID(v) => write!(f, "MATE_ID={v}"),
|
|
|
Info::INSIDE_VNTR(v) => write!(f, "INSIDE_VNTR={v}"),
|
|
Info::INSIDE_VNTR(v) => write!(f, "INSIDE_VNTR={v}"),
|
|
|
Info::ALINGED_POS(v) => write!(f, "ALINGED_POS={v}"),
|
|
Info::ALINGED_POS(v) => write!(f, "ALINGED_POS={v}"),
|
|
|
|
|
+
|
|
|
|
|
+ // dbSNP
|
|
|
|
|
+ Info::FREQ(v) => write!(f, "FREQ={v}"),
|
|
|
|
|
+ Info::RS(v) => write!(f, "RS={v}"),
|
|
|
|
|
+ Info::COMMON => write!(f, "COMMON"),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|