|
@@ -22,8 +22,7 @@ use crate::{
|
|
|
},
|
|
},
|
|
|
sql::{stats_sql::insert_stats, variants_sql::insert_variants},
|
|
sql::{stats_sql::insert_stats, variants_sql::insert_variants},
|
|
|
utils::{
|
|
utils::{
|
|
|
- chi_square_test_for_proportions, estimate_shannon_entropy, get_hts_nt_pileup, new_pg,
|
|
|
|
|
- new_pg_speed, print_stat_cat,
|
|
|
|
|
|
|
+ chi_square_test_for_proportions, count_repetitions, estimate_shannon_entropy, get_hts_nt_pileup, new_pg, new_pg_speed, print_stat_cat
|
|
|
},
|
|
},
|
|
|
};
|
|
};
|
|
|
use anyhow::{anyhow, Context, Ok, Result};
|
|
use anyhow::{anyhow, Context, Ok, Result};
|
|
@@ -38,7 +37,6 @@ use noodles_gff as gff;
|
|
|
|
|
|
|
|
use rayon::prelude::*;
|
|
use rayon::prelude::*;
|
|
|
use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
|
|
use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
|
|
|
-use utoipa::ToSchema;
|
|
|
|
|
use std::io::Write;
|
|
use std::io::Write;
|
|
|
use std::{
|
|
use std::{
|
|
|
env::temp_dir,
|
|
env::temp_dir,
|
|
@@ -50,6 +48,7 @@ use std::{
|
|
|
Arc,
|
|
Arc,
|
|
|
},
|
|
},
|
|
|
};
|
|
};
|
|
|
|
|
+use utoipa::ToSchema;
|
|
|
|
|
|
|
|
// chr12:25116542|G>T KRAS
|
|
// chr12:25116542|G>T KRAS
|
|
|
#[derive(Debug, Clone)]
|
|
#[derive(Debug, Clone)]
|
|
@@ -362,17 +361,63 @@ impl Variants {
|
|
|
let ent = estimate_shannon_entropy(&s.to_lowercase());
|
|
let ent = estimate_shannon_entropy(&s.to_lowercase());
|
|
|
|
|
|
|
|
if ent < cfg.min_diversity {
|
|
if ent < cfg.min_diversity {
|
|
|
- if tumoral.position == 148725437 {
|
|
|
|
|
- warn!("POS {}", ent);
|
|
|
|
|
- }
|
|
|
|
|
n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
|
tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
|
VariantCategory::LowDiversity,
|
|
VariantCategory::LowDiversity,
|
|
|
));
|
|
));
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // Check triplets or doublets if DeepVariant
|
|
|
|
|
+ let callers = tumoral.callers();
|
|
|
|
|
+
|
|
|
|
|
+ if callers.len() == 1 {
|
|
|
|
|
+ if callers[0] == "DeepVariant".to_string() {
|
|
|
|
|
+ let seq_left = &s[0..20];
|
|
|
|
|
+ let seq_right = &s[21..s.len() - 1];
|
|
|
|
|
+
|
|
|
|
|
+ // Triplet right
|
|
|
|
|
+ if count_repetitions(seq_right, 3) >= 3 {
|
|
|
|
|
+ n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+ tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
|
|
|
+ VariantCategory::LowDiversity,
|
|
|
|
|
+ ));
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Doublet right
|
|
|
|
|
+ if count_repetitions(seq_right, 2) >= 4 {
|
|
|
|
|
+ n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+ tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
|
|
|
+ VariantCategory::LowDiversity,
|
|
|
|
|
+ ));
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Triplet left
|
|
|
|
|
+ if count_repetitions(seq_left, 3) >= 3 {
|
|
|
|
|
+ n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+ tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
|
|
|
+ VariantCategory::LowDiversity,
|
|
|
|
|
+ ));
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Doublet left
|
|
|
|
|
+ if count_repetitions(seq_left, 2) >= 4 {
|
|
|
|
|
+ n_low_diversity.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+ tumoral.annotations.push(AnnotationType::VariantCategory(
|
|
|
|
|
+ VariantCategory::LowDiversity,
|
|
|
|
|
+ ));
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
// Check if the base is in constitutionnal pileup
|
|
// Check if the base is in constitutionnal pileup
|
|
|
if let ReferenceAlternative::Nucleotide(alt_b) = &tumoral.alternative {
|
|
if let ReferenceAlternative::Nucleotide(alt_b) = &tumoral.alternative {
|
|
|
let alt_b = alt_b.clone().into_u8();
|
|
let alt_b = alt_b.clone().into_u8();
|
|
@@ -1044,6 +1089,18 @@ impl FromStr for VCFSource {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+impl ToString for VCFSource {
|
|
|
|
|
+ fn to_string(&self) -> String {
|
|
|
|
|
+ let s = match self {
|
|
|
|
|
+ VCFSource::DeepVariant => "DeepVariant",
|
|
|
|
|
+ VCFSource::ClairS => "ClairS",
|
|
|
|
|
+ VCFSource::Sniffles => "Sniffles",
|
|
|
|
|
+ VCFSource::Nanomonsv => "Nanomonsv",
|
|
|
|
|
+ };
|
|
|
|
|
+ s.to_string()
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
impl Variant {
|
|
impl Variant {
|
|
|
pub fn from_vcfrow(row: &VCFRow, source: VCFSource, variant_type: VariantType) -> Result<Self> {
|
|
pub fn from_vcfrow(row: &VCFRow, source: VCFSource, variant_type: VariantType) -> Result<Self> {
|
|
|
let callers_data = vec![CallerData {
|
|
let callers_data = vec![CallerData {
|
|
@@ -1059,6 +1116,7 @@ impl Variant {
|
|
|
row.value
|
|
row.value
|
|
|
))?,
|
|
))?,
|
|
|
}];
|
|
}];
|
|
|
|
|
+
|
|
|
Ok(Variant {
|
|
Ok(Variant {
|
|
|
contig: row.chr.to_string(),
|
|
contig: row.chr.to_string(),
|
|
|
position: row.pos,
|
|
position: row.pos,
|
|
@@ -1262,6 +1320,13 @@ impl Variant {
|
|
|
}
|
|
}
|
|
|
vec_bools.iter().all(|&x| x)
|
|
vec_bools.iter().all(|&x| x)
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ pub fn callers(&self) -> Vec<String> {
|
|
|
|
|
+ self.source
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .map(|source| source.to_string())
|
|
|
|
|
+ .collect()
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
enum AlterationCategory {
|
|
enum AlterationCategory {
|