use log::info; use rayon::prelude::*; use std::{collections::HashSet, fs::File, sync::Arc}; use crate::{ annotation::{Annotation, Annotations}, callers::{clairs::ClairS, deep_variant::DeepVariant}, collection::{Initialize, InitializeSolo}, config::Config, io::vcf::write_vcf, runners::Run, variant::variant::{load_variants, parallel_intersection, RunnerVariants, VcfVariant}, }; pub struct Somatic { pub id: String, pub config: Config, pub annotations: Annotations, } impl Initialize for Somatic { fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result { let id = id.to_string(); Ok(Self { id, config, annotations: Annotations::default(), }) } } impl Run for Somatic { fn run(&mut self) -> anyhow::Result<()> { info!("Running somatic pipe for {}.", self.id); let id = self.id.clone(); info!("Initialization..."); let mut v: Vec> = vec![ Box::new(ClairS::initialize(&id, self.config.clone())?), Box::new(DeepVariant::initialize(&id, "diag", self.config.clone())?), Box::new(DeepVariant::initialize(&id, "mrd", self.config.clone())?), ]; let annotations = Arc::new(self.annotations.clone()); let mut variants_collection = load_variants(&mut v, &annotations)?; let clairs_germline = ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?; variants_collection.push(clairs_germline); info!("Variants sources loaded: {}", variants_collection.len()); // Annotations Stats let mut annotations = Arc::unwrap_or_clone(annotations); annotations.callers_stat(); // TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error // in ClairS somatic) // Filtering Somatic variants info!("Filtering somatic variants (variants not in salo callers on constit sample or germline)."); let germline_or_somatic_keys = annotations.get_keys_filter(|anns| { !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit) }); let (somatic_keys, germline_keys, remains) = parallel_intersection( &variants_collection .iter() .flat_map(|e| e.keys()) .collect::>(), &germline_or_somatic_keys, ); assert_eq!(0, remains.len()); info!("Somatic variants positions {}.", somatic_keys.len()); info!("Germline variants positions {}.", germline_keys.len()); let somatic_keys: HashSet = somatic_keys.into_iter().collect(); annotations.retain_keys(&somatic_keys); annotations.callers_stat(); variants_collection.par_iter_mut().for_each(|c| { let before = c.variants.len(); c.retain_keys(&somatic_keys); let after = c.variants.len(); info!( "Variants removed from {}: {}", c.vcf.path.display(), before - after ); }); variants_collection.retain(|e| !e.variants.is_empty()); info!("Constit Bam annotation..."); variants_collection.iter().try_for_each(|c| { c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150) })?; annotations.solo_constit_boundaries( self.config.solo_max_alt_constit, self.config.solo_min_constit_depth, ); info!("Entropy annotation..."); variants_collection.iter().for_each(|c| { c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150); }); annotations.callers_stat(); let prob_keys: HashSet = annotations .get_keys_filter(|anns| { // let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag)); // let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic)); // // contains && contains_not anns.contains(&Annotation::SoloDiag) && !anns.contains(&Annotation::Somatic) && !anns.contains(&Annotation::LowConstitDepth) && !anns.contains(&Annotation::HighConstitAlt) }) .into_iter() .collect(); info!("Problematic variants {}", prob_keys.len()); let mut problematic_variants = variants_collection.clone(); let problematic_variants: Vec = problematic_variants .iter_mut() .flat_map(|e| { e.retain_keys(&prob_keys); e.variants.clone() }) .collect(); write_vcf(&problematic_variants, "prob.vcf.gz")?; Ok(()) } } // 0-based position pub fn sequence_at( fasta_reader: &mut noodles_fasta::IndexedReader>, contig: &str, position: usize, len: usize, ) -> anyhow::Result { // convert to 1-based let position = position + 1; let start = position.saturating_sub(len / 2).max(1); let end = start + len - 1; // debug!("Region {contig}:{start}-{end} (1-based inclusive)"); let start = noodles_core::Position::try_from(start)?; let end = noodles_core::Position::try_from(end)?; let interval = noodles_core::region::interval::Interval::from(start..=end); let r = noodles_core::Region::new(contig.to_string(), interval); let record = fasta_reader.query(&r)?; let s = String::from_utf8(record.sequence().as_ref().to_vec())?.to_uppercase(); Ok(s) }