|
|
@@ -1,137 +1,94 @@
|
|
|
-use anyhow::Context;
|
|
|
use log::info;
|
|
|
-use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
|
|
-use std::{
|
|
|
- collections::HashMap, fs::File, io::{BufRead, BufReader}, ops::Range, sync::Arc, thread
|
|
|
-};
|
|
|
+use rayon::prelude::*;
|
|
|
+use std::{collections::HashSet, sync::Arc};
|
|
|
|
|
|
use crate::{
|
|
|
- annotation::Annotations, callers::{clairs::ClairS, deep_variant::DeepVariant}, collection::{Initialize, InitializeSolo}, config::Config, helpers::{par_intersection, VectorIntersection}, io::{bed::read_bed, vcf::write_vcf}, positions::{overlaps_par, par_overlaps}, runners::Run, variant::variant::Variants
|
|
|
+ annotation::{Annotation, Annotations},
|
|
|
+ callers::{clairs::ClairS, deep_variant::DeepVariant},
|
|
|
+ collection::{Initialize, InitializeSolo},
|
|
|
+ config::Config,
|
|
|
+ runners::Run,
|
|
|
+ variant::variant::{load_variants, parallel_intersection, RunnerVariants},
|
|
|
};
|
|
|
|
|
|
-#[derive(Debug)]
|
|
|
pub struct Somatic {
|
|
|
pub id: String,
|
|
|
pub config: Config,
|
|
|
+ pub annotations: Annotations,
|
|
|
}
|
|
|
|
|
|
impl Initialize for Somatic {
|
|
|
fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
|
|
|
let id = id.to_string();
|
|
|
- Ok(Self { id, config })
|
|
|
+ Ok(Self {
|
|
|
+ id,
|
|
|
+ config,
|
|
|
+ annotations: Annotations::default(),
|
|
|
+ })
|
|
|
}
|
|
|
}
|
|
|
|
|
|
impl Run for Somatic {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- info!("Running Somatic pipe for {}", self.id);
|
|
|
+ info!("Running somatic pipe for {}.", self.id);
|
|
|
+ let id = self.id.clone();
|
|
|
|
|
|
info!("Initialization...");
|
|
|
+ let mut v: Vec<Box<dyn RunnerVariants + Send + Sync>> = vec![
|
|
|
+ Box::new(ClairS::initialize(&id, self.config.clone())?),
|
|
|
+ Box::new(DeepVariant::initialize(&id, "diag", self.config.clone())?),
|
|
|
+ Box::new(DeepVariant::initialize(&id, "mrd", self.config.clone())?),
|
|
|
+ ];
|
|
|
+
|
|
|
+ let annotations = Arc::new(self.annotations.clone());
|
|
|
+
|
|
|
+ let mut variants_collection = load_variants(&mut v, &annotations)?;
|
|
|
+ let clairs_germline =
|
|
|
+ ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
|
|
|
+ variants_collection.push(clairs_germline);
|
|
|
+ info!("Variants sources loaded: {}", variants_collection.len());
|
|
|
+
|
|
|
+ // Annotations Stats
|
|
|
+ let mut annotations = Arc::unwrap_or_clone(annotations);
|
|
|
+ annotations.callers_stat();
|
|
|
+
|
|
|
+ // Filtering Somatic variants
|
|
|
+ info!("Filtering somatic variants (variants from somatic callers or not found in germline or in a constit sample).");
|
|
|
+ let germline_or_somatic_keys = annotations.get_keys_filter(|anns| {
|
|
|
+ anns.contains(&Annotation::Somatic)
|
|
|
+ | (anns.contains(&Annotation::SoloDiag)
|
|
|
+ && !anns
|
|
|
+ .iter()
|
|
|
+ .any(|ann| matches!(ann, Annotation::Germline | Annotation::SoloConstit)))
|
|
|
+ });
|
|
|
+
|
|
|
+ let (somatic_keys, germline_keys, remains) = parallel_intersection(
|
|
|
+ &variants_collection
|
|
|
+ .iter()
|
|
|
+ .flat_map(|e| e.keys())
|
|
|
+ .collect::<Vec<u64>>(),
|
|
|
+ &germline_or_somatic_keys,
|
|
|
+ );
|
|
|
+ assert_eq!(0, remains.len());
|
|
|
|
|
|
- let mut clairs = ClairS::initialize(&self.id, self.config.clone())?;
|
|
|
- let mut deep_variant_mrd = DeepVariant::initialize(&self.id, "mrd", self.config.clone())?;
|
|
|
-
|
|
|
- info!("Running callers if necessary...");
|
|
|
- clairs.run()?;
|
|
|
- deep_variant_mrd.run()?;
|
|
|
-
|
|
|
- info!("Loading Germlines VCF from DeepVariant mrd and ClairS germline, in parallel...");
|
|
|
- let annotations = Arc::new(Annotations::default());
|
|
|
-
|
|
|
- let clairs_handle = {
|
|
|
- let clairs = clairs.clone();
|
|
|
- let annotations = Arc::clone(&annotations);
|
|
|
-
|
|
|
- thread::spawn(move || clairs.germline(&annotations))
|
|
|
- };
|
|
|
-
|
|
|
- let deep_variant_mrd_handle = {
|
|
|
- let deep_variant_mrd = deep_variant_mrd.clone();
|
|
|
- let annotations = Arc::clone(&annotations);
|
|
|
-
|
|
|
- thread::spawn(move || deep_variant_mrd.variants(&annotations))
|
|
|
- };
|
|
|
-
|
|
|
- let annotations = Arc::unwrap_or_clone(annotations);
|
|
|
-
|
|
|
- let clairs_germline = clairs_handle
|
|
|
- .join()
|
|
|
- .map_err(|e| anyhow::anyhow!("Thread panic in clairs germline: {:?}", e))
|
|
|
- .context("Failed to join clairs_handle thread")?
|
|
|
- .context(format!("Error in clairs germline loading for {}", self.id))?;
|
|
|
-
|
|
|
- let deep_variant_germline = deep_variant_mrd_handle
|
|
|
- .join()
|
|
|
- .map_err(|e| anyhow::anyhow!("Thread panic in clairs germline: {:?}", e))
|
|
|
- .context("Failed to join deep_variant_mrd_handle thread")?
|
|
|
- .context(format!(
|
|
|
- "Error in deepvariant germline loading for {}",
|
|
|
- self.id
|
|
|
- ))?;
|
|
|
-
|
|
|
- info!("Merging variants");
|
|
|
+ info!("Somatic variants positions {}.", somatic_keys.len());
|
|
|
+ info!("Germline variants positions {}.", germline_keys.len());
|
|
|
|
|
|
- let VectorIntersection {
|
|
|
- common: germline_common,
|
|
|
- only_in_first: only_in_deep_variant_mrd,
|
|
|
- only_in_second: only_in_clairs_germline,
|
|
|
- } = par_intersection(&deep_variant_germline.variants, &clairs_germline.variants);
|
|
|
+ let somatic_keys: HashSet<u64> = somatic_keys.into_iter().collect();
|
|
|
+ annotations.retain_keys(&somatic_keys);
|
|
|
+ annotations.callers_stat();
|
|
|
|
|
|
- info!(
|
|
|
- "common: {}, only DeepVariant: {}, only ClairS: {}",
|
|
|
- germline_common.len(),
|
|
|
- only_in_deep_variant_mrd.len(),
|
|
|
- only_in_clairs_germline.len()
|
|
|
- );
|
|
|
+ variants_collection.par_iter_mut().for_each(|c| {
|
|
|
+ let before = c.variants.len();
|
|
|
+ c.retain_keys(&somatic_keys);
|
|
|
+ let after = c.variants.len();
|
|
|
+ info!("Variants removed from {}: {}", c.vcf.path.display(), before - after);
|
|
|
+ });
|
|
|
|
|
|
- // Write vcf
|
|
|
- // [
|
|
|
- // (germline_common, "common.vcf.gz"),
|
|
|
- // (only_in_deep_variant_mrd, "deep_variant_only.vcf.gz"),
|
|
|
- // (only_in_clairs_germline, "clairs_only.vcf.gz"),
|
|
|
- // ]
|
|
|
- // .par_iter()
|
|
|
- // .for_each(|(v, p)| {
|
|
|
- // write_vcf(v, p).unwrap();
|
|
|
- // });
|
|
|
-
|
|
|
- let deep_variant_diag =
|
|
|
- DeepVariant::initialize(&self.id, "diag", self.config.clone())?.variants(&annotations)?;
|
|
|
-
|
|
|
- info!("Intersection...");
|
|
|
- // filter common
|
|
|
- let deep_diag_int =
|
|
|
- par_intersection(&deep_variant_diag.variants, &germline_common);
|
|
|
- println!("filtering out common germline\n{deep_diag_int}");
|
|
|
- let somatic = deep_diag_int.only_in_first;
|
|
|
- println!("N somatic: {}", somatic.len());
|
|
|
-
|
|
|
- // filter only in clairs
|
|
|
- let deep_diag_int =
|
|
|
- par_intersection(&somatic, &only_in_clairs_germline);
|
|
|
- println!("filtering out germline only in clairs\n{deep_diag_int}");
|
|
|
- let somatic = deep_diag_int.only_in_first;
|
|
|
- println!("N somatic: {}", somatic.len());
|
|
|
-
|
|
|
- // filter only in deepvariant mrd
|
|
|
- let deep_diag_int =
|
|
|
- par_intersection(&somatic, &only_in_deep_variant_mrd);
|
|
|
- println!("fiiltering out germline only in deep_variant_mrd\n{deep_diag_int}");
|
|
|
- let deep_variant_diag_f = deep_diag_int.only_in_first;
|
|
|
- println!("N somatic: {}", somatic.len());
|
|
|
-
|
|
|
- // Load clairs somatic
|
|
|
- let clairs_somatic = ClairS::initialize(&self.id, self.config.clone())?.variants(&annotations)?;
|
|
|
- let masked = read_bed(&self.config.mask_bed(&self.id))?;
|
|
|
-
|
|
|
-
|
|
|
- let somatic_int = par_intersection(&deep_variant_diag_f, &clairs_somatic.variants);
|
|
|
-
|
|
|
- println!("deep_variant_diag_filtered & clairs\n{somatic_int}");
|
|
|
- let clairs_masked = par_overlaps(&clairs_somatic.variants, &masked);
|
|
|
- let deepvariant_masked = par_overlaps(&deep_variant_diag_f, &masked);
|
|
|
- info!("Clairs masked: {}", clairs_masked.len());
|
|
|
- info!("Deepvariant masked: {}", deepvariant_masked.len());
|
|
|
Ok(())
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+pub fn filter_entropy() {
|
|
|
+
|
|
|
+}
|