|
|
@@ -5,6 +5,7 @@ use crate::{
|
|
|
gnomad::GnomAD,
|
|
|
ncbi_gff::NCBIGFF,
|
|
|
pangolin::{pangolin_parse_results, pangolin_save_variants, run_pangolin, Pangolin},
|
|
|
+ phase::Phase,
|
|
|
vep::{get_best_vep, vep_chunk, VEP},
|
|
|
},
|
|
|
callers::{
|
|
|
@@ -38,10 +39,11 @@ use noodles_fasta::indexed_reader::Builder as FastaBuilder;
|
|
|
use noodles_gff as gff;
|
|
|
|
|
|
use rayon::prelude::*;
|
|
|
+use rust_htslib::bam::IndexedReader;
|
|
|
use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
|
|
|
use std::{
|
|
|
env::temp_dir,
|
|
|
- fmt::{self, Display},
|
|
|
+ fmt,
|
|
|
fs::File,
|
|
|
str::FromStr,
|
|
|
sync::{
|
|
|
@@ -331,7 +333,7 @@ impl Variants {
|
|
|
n_already.fetch_add(1, Ordering::SeqCst);
|
|
|
continue;
|
|
|
}
|
|
|
- let (pos, is_ins) = match tumoral.alt_cat() {
|
|
|
+ let (pos, is_ins) = match tumoral.alteration_category() {
|
|
|
AlterationCategory::Ins => (tumoral.position, true),
|
|
|
AlterationCategory::Del => (tumoral.position, false),
|
|
|
_ => (tumoral.position, false),
|
|
|
@@ -449,7 +451,7 @@ impl Variants {
|
|
|
VariantCategory::Somatic,
|
|
|
));
|
|
|
}
|
|
|
- } else if tumoral.alt_cat() == AlterationCategory::Del {
|
|
|
+ } else if tumoral.alteration_category() == AlterationCategory::Del {
|
|
|
let n_alt_mrd =
|
|
|
bases.clone().into_iter().filter(|e| *e == b'D').count();
|
|
|
if n_alt_mrd > 0 {
|
|
|
@@ -511,6 +513,30 @@ impl Variants {
|
|
|
.collect::<Vec<Variant>>()
|
|
|
}
|
|
|
|
|
|
+ pub fn with_annotation(&self, annotation: &AnnotationType) -> Vec<Variant> {
|
|
|
+ self.data
|
|
|
+ .clone()
|
|
|
+ .into_iter()
|
|
|
+ .filter(|v| {
|
|
|
+ v.annotations.iter().any(|a| {
|
|
|
+ matches!(
|
|
|
+ (annotation, a),
|
|
|
+ (
|
|
|
+ AnnotationType::VariantCategory(_),
|
|
|
+ AnnotationType::VariantCategory(_)
|
|
|
+ ) | (AnnotationType::VEP(_), AnnotationType::VEP(_))
|
|
|
+ | (AnnotationType::Cluster(_), AnnotationType::Cluster(_))
|
|
|
+ | (AnnotationType::Cosmic(_), AnnotationType::Cosmic(_))
|
|
|
+ | (AnnotationType::GnomAD(_), AnnotationType::GnomAD(_))
|
|
|
+ | (AnnotationType::NCBIGFF(_), AnnotationType::NCBIGFF(_))
|
|
|
+ | (AnnotationType::Pangolin(_), AnnotationType::Pangolin(_))
|
|
|
+ | (AnnotationType::Phase(_), AnnotationType::Phase(_))
|
|
|
+ )
|
|
|
+ })
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+
|
|
|
pub fn write_vcf_cat(&mut self, path: &str, cat: &VariantCategory) -> Result<()> {
|
|
|
info!("Writing VCF {}", path);
|
|
|
|
|
|
@@ -777,6 +803,10 @@ impl Variants {
|
|
|
self.data.len()
|
|
|
}
|
|
|
|
|
|
+ pub fn is_empty(&self) -> bool {
|
|
|
+ self.data.is_empty()
|
|
|
+ }
|
|
|
+
|
|
|
pub fn constit_len(&self) -> usize {
|
|
|
self.constit.len()
|
|
|
}
|
|
|
@@ -789,6 +819,87 @@ impl Variants {
|
|
|
.collect()
|
|
|
}
|
|
|
|
|
|
+ pub fn phase_contig(
|
|
|
+ &mut self,
|
|
|
+ phases: &[pandora_lib_scan::phase::Phase],
|
|
|
+ bam_path: &str,
|
|
|
+ contig: &str,
|
|
|
+ ) {
|
|
|
+ type Iv = rust_lapper::Interval<u64, pandora_lib_scan::phase::Phase>;
|
|
|
+
|
|
|
+ let data: Vec<Iv> = phases
|
|
|
+ .iter()
|
|
|
+ .map(|p| Iv {
|
|
|
+ start: p.range.unwrap().0,
|
|
|
+ stop: p.range.unwrap().3 + 1, // TODO verif if inclusif
|
|
|
+ val: p.clone(),
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ let lapper = rust_lapper::Lapper::new(data);
|
|
|
+
|
|
|
+ let mut bam = IndexedReader::from_path(bam_path).unwrap();
|
|
|
+ let mut annotations = 0;
|
|
|
+ for v in self.data.iter_mut().filter(|v| v.contig == contig) {
|
|
|
+ let over_phases: Vec<&pandora_lib_scan::phase::Phase> = lapper
|
|
|
+ .find(v.position as u64, v.position as u64)
|
|
|
+ .map(|iv| &iv.val)
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ if !over_phases.is_empty() {
|
|
|
+ let reads: Vec<String> = match v.alteration_category() {
|
|
|
+ AlterationCategory::Snv => {
|
|
|
+ let base = v.alternative.to_string().as_bytes()[0];
|
|
|
+ pandora_lib_pileup::qnames_at_base(
|
|
|
+ &mut bam,
|
|
|
+ &v.contig,
|
|
|
+ v.position as i32,
|
|
|
+ false,
|
|
|
+ )
|
|
|
+ .unwrap()
|
|
|
+ .iter()
|
|
|
+ .filter(|(_, b)| *b == base)
|
|
|
+ .map(|(r, _)| r.to_string())
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+ AlterationCategory::Ins => pandora_lib_pileup::qnames_at_base(
|
|
|
+ &mut bam,
|
|
|
+ &v.contig,
|
|
|
+ v.position as i32,
|
|
|
+ true,
|
|
|
+ )
|
|
|
+ .unwrap()
|
|
|
+ .iter()
|
|
|
+ .filter(|(_, b)| *b == b'I')
|
|
|
+ .map(|(r, _)| r.to_string())
|
|
|
+ .collect(),
|
|
|
+ AlterationCategory::Del => pandora_lib_pileup::qnames_at_base(
|
|
|
+ &mut bam,
|
|
|
+ &v.contig,
|
|
|
+ v.position as i32,
|
|
|
+ false,
|
|
|
+ )
|
|
|
+ .unwrap()
|
|
|
+ .iter()
|
|
|
+ .filter(|(_, b)| *b == b'D')
|
|
|
+ .map(|(r, _)| r.to_string())
|
|
|
+ .collect(),
|
|
|
+ AlterationCategory::Rep => Vec::new(),
|
|
|
+ AlterationCategory::Other => Vec::new(),
|
|
|
+ };
|
|
|
+
|
|
|
+ over_phases
|
|
|
+ .iter()
|
|
|
+ .filter(|p| p.contains(&reads))
|
|
|
+ .for_each(|p| {
|
|
|
+ if let Some(id) = p.id() {
|
|
|
+ annotations += 1;
|
|
|
+ v.annotations.push(AnnotationType::Phase(Phase { id }));
|
|
|
+ };
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
pub fn stats(&self) -> Result<Vec<Stat>> {
|
|
|
let mut callers_cat = HashMap::new();
|
|
|
let mut n_caller_data = 0;
|
|
|
@@ -1193,7 +1304,7 @@ impl Variant {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- pub fn alt_cat(&self) -> AlterationCategory {
|
|
|
+ pub fn alteration_category(&self) -> AlterationCategory {
|
|
|
match (&self.reference, &self.alternative) {
|
|
|
(ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
|
|
|
AlterationCategory::Snv
|
|
|
@@ -1358,7 +1469,7 @@ pub enum AnnotationType {
|
|
|
GnomAD(GnomAD),
|
|
|
NCBIGFF(NCBIGFF),
|
|
|
Pangolin(Pangolin),
|
|
|
- // Phase(Phase)
|
|
|
+ Phase(Phase),
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
|
|
|
@@ -1441,13 +1552,13 @@ impl TryFrom<u8> for Base {
|
|
|
|
|
|
impl Base {
|
|
|
pub fn into_u8(self) -> u8 {
|
|
|
- return match self {
|
|
|
+ match self {
|
|
|
Base::A => b'A',
|
|
|
Base::T => b'T',
|
|
|
Base::C => b'C',
|
|
|
Base::G => b'G',
|
|
|
Base::N => b'N',
|
|
|
- };
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|