Thomas 1 год назад
Родитель
Сommit
19e0144f68
1 измененных файлов с 20 добавлено и 257 удалено
  1. 20 257
      src/pipes/somatic.rs

+ 20 - 257
src/pipes/somatic.rs

@@ -1,6 +1,5 @@
 use log::info;
-use rayon::prelude::*;
-use std::{collections::HashSet, fs::File, sync::Arc};
+use std::{fs::File, sync::Arc};
 
 use crate::{
     annotation::{Annotation, Annotations, Caller},
@@ -10,7 +9,7 @@ use crate::{
     init_solo_callers, init_somatic_callers,
     runners::Run,
     variant::{
-        variant::{load_variants, parallel_intersection, CallerBox},
+        variant::{load_variants, CallerBox},
         variant_collection::{ExternalAnnotation, VariantCollection},
     },
 };
@@ -83,30 +82,6 @@ impl SomaticStats {
     }
 }
 
-// pub fn to_callers_cat(s: &str) -> (Annotation, Annotation) {
-//     let splits: Vec<&str> = s.split("_").collect();
-//
-//     if splits.contains(&"clairs") {
-//         (Annotation::Callers(Caller::ClairS), Annotation::Somatic)
-//     } else if splits.contains(&"clair3-germline") {
-//         (Annotation::Callers(Caller::ClairS), Annotation::Germline)
-//     } else if splits.contains(&"DeepVariant") && splits.contains(&"mrd") {
-//         (
-//             Annotation::Callers(Caller::DeepVariant),
-//             Annotation::SoloConstit,
-//         )
-//     } else if splits.contains(&"DeepVariant") && splits.contains(&"diag") {
-//         (
-//             Annotation::Callers(Caller::DeepVariant),
-//             Annotation::SoloTumor,
-//         )
-//     } else if splits.contains(&"nanomonsv") {
-//         (Annotation::Callers(Caller::NanomonSV), Annotation::Somatic)
-//     } else {
-//         panic!("unknown caller: {s}");
-//     }
-// }
-
 impl Run for Somatic {
     fn run(&mut self) -> anyhow::Result<()> {
         let id = self.id.clone();
@@ -140,30 +115,12 @@ impl Run for Somatic {
         info!(
             "Variants collections from {} vcf ({} variants)",
             variants_collections.len(),
-            variants_collections.iter().map(|v| v.variants.len()).sum::<usize>()
+            variants_collections
+                .iter()
+                .map(|v| v.variants.len())
+                .sum::<usize>()
         );
 
-        // Annotations Stats
-        // let mut annotations = Arc::unwrap_or_clone(annotations);
-        // somatic_stats.push_annotations_stats();
-
-        // TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error
-        // in ClairS somatic)
-
-        // Filtering Somatic variants
-        // let mut variants_collections: Vec<VariantCollection> = variants_collections
-        //     .into_iter()
-        //     .map(|var| {
-        //         let total = var.variants.len();
-        //         let (left, _) = annotations.filter_variants(var, |anns| {
-        //             !anns.contains(&Annotation::Germline)
-        //                 && !anns.contains(&Annotation::SoloConstit)
-        //         });
-        //         info!("{} {}\t{}/{}", left.caller, left.category, left.variants.len(), total);
-        //         left
-        //     })
-        //     .collect();
-
         let mut annotations = Arc::try_unwrap(annotations)
             .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
         annotations.callers_stat();
@@ -176,51 +133,6 @@ impl Run for Somatic {
             });
         annotations.callers_stat();
 
-        // return Ok(());
-        // let tumor_or_somatic_keys = annotations.get_keys_filter(|anns| {
-        //     !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
-        // });
-        //
-        // let (somatic_keys, germline_keys, remains) = parallel_intersection(
-        //     &variants_collections
-        //         .iter()
-        //         .flat_map(|e| e.keys())
-        //         .collect::<Vec<u128>>(),
-        //     &tumor_or_somatic_keys,
-        // );
-        // assert_eq!(0, remains.len());
-        //
-        // info!(
-        //     "Germline variants positions removed {}.",
-        //     germline_keys.len()
-        // );
-        // germline_keys.len();
-        //
-        // let somatic_keys: HashSet<u128> = somatic_keys.into_iter().collect();
-        // // let mut annotations = Arc::try_unwrap(annotations)
-        // //     .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
-        // annotations.retain_keys(&somatic_keys);
-        // annotations.callers_stat();
-        //
-        // somatic_stats.n_constit_germline = variants_collections
-        //     .par_iter_mut()
-        //     .map(|c| {
-        //         let before = c.variants.len();
-        //         c.retain_keys(&somatic_keys);
-        //         let after = c.variants.len();
-        //         info!(
-        //             "Variants removed from {}: {}",
-        //             c.vcf.path.display(),
-        //             before - after
-        //         );
-        //         before - after
-        //     })
-        //     .sum();
-        //
-        // variants_collections.retain(|e| !e.variants.is_empty());
-        //
-        // variants_collections.iter().for_each(|c| c.stats());
-
         // Annotation: BAM depth, n_alt
         info!("Reading Constit Bam file for depth and pileup annotation.");
         variants_collections.iter().try_for_each(|c| {
@@ -234,7 +146,7 @@ impl Run for Somatic {
 
         // Filter: Remove LowConstitDepth from annotations and variants collections
         info!(
-            "Removing low constit depth (depth < {})",
+            "Removing variants when depth in constit bam < {}.",
             self.config.solo_min_constit_depth
         );
         somatic_stats.n_low_constit = annotations
@@ -242,66 +154,31 @@ impl Run for Somatic {
                 !anns.contains(&Annotation::LowConstitDepth)
             });
         annotations.callers_stat();
-
-        // let low_constit_keys: HashSet<u128> = annotations
-        //     .get_keys_filter(|anns| anns.contains(&Annotation::LowConstitDepth))
-        //     .into_iter()
-        //     .collect();
-        //
-        // annotations.remove_keys(&low_constit_keys);
-        //
-        // somatic_stats.n_low_constit = variants_collections
-        //     .par_iter_mut()
-        //     .map(|c| {
-        //         let before = c.variants.len();
-        //         c.remove_keys(&low_constit_keys);
-        //         let after = c.variants.len();
-        //         let rm = before - after;
-        //         info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
-        //         rm
-        //     })
-        //     .sum();
-        //
-        // variants_collections.retain(|e| !e.variants.is_empty());
-        //
-        // variants_collections.iter().for_each(|c| c.stats());
+        info!(
+            "{} variants removed when depth in constit bam < {}.",
+            somatic_stats.n_low_constit, self.config.solo_min_constit_depth
+        );
 
         // Filter: remove HighConstitAlt
         info!(
-            "Removing high constit alternative allele (alt constit > {})",
+            "Removing variants with SNP/indel inside the constit alignements > {}",
             self.config.solo_max_alt_constit
         );
-
         somatic_stats.n_high_alt_constit = annotations
             .retain_variants(&mut variants_collections, |anns| {
                 !anns.contains(&Annotation::HighConstitAlt)
             });
         annotations.callers_stat();
-
-        // let high_alt_keys: HashSet<u128> = annotations
-        //     .get_keys_filter(|anns| anns.contains(&Annotation::HighConstitAlt))
-        //     .into_iter()
-        //     .collect();
-        //
-        // annotations.remove_keys(&high_alt_keys);
-        // somatic_stats.n_high_alt_constit = variants_collections
-        //     .par_iter_mut()
-        //     .map(|c| {
-        //         let before = c.variants.len();
-        //         c.remove_keys(&high_alt_keys);
-        //         let after = c.variants.len();
-        //         let rm = before - after;
-        //         info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
-        //         rm
-        //     })
-        //     .sum();
-        // variants_collections.retain(|e| !e.variants.is_empty());
-        //
-        // variants_collections.iter().for_each(|c| c.stats());
-
+        info!(
+            "{} variants removed with SNP/indel inside the constit alignements > {}",
+            somatic_stats.n_high_alt_constit, self.config.solo_max_alt_constit
+        );
 
         // Annotation: Entropy
-        info!("Entropy annotation from {} sequences.", self.config.reference);
+        info!(
+            "Entropy annotation from {} sequences.",
+            self.config.reference
+        );
         variants_collections.iter().for_each(|c| {
             c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
         });
@@ -350,98 +227,6 @@ impl Run for Somatic {
         );
         annotations.callers_stat();
 
-        // let high_alt_gnomad_keys: HashSet<u128> = annotations
-        //     .get_keys_filter(|anns| {
-        //         anns.iter()
-        //             .find_map(|a| {
-        //                 if let Annotation::GnomAD(gnomad) = a {
-        //                     Some(gnomad.gnomad_af > 0.0)
-        //                 } else {
-        //                     None
-        //                 }
-        //             })
-        //             .and_then(|gnomad_condition| {
-        //                 anns.iter()
-        //                     .find_map(|a| {
-        //                         if let Annotation::ConstitAlt(n_alt) = a {
-        //                             Some(*n_alt > 0)
-        //                         } else {
-        //                             None
-        //                         }
-        //                     })
-        //                     .map(|constit_alt_condition| gnomad_condition && constit_alt_condition)
-        //             })
-        //             .unwrap_or(false)
-        //     })
-        //     .into_iter()
-        //     .collect();
-        //
-        // info!(
-        //     "Filtering out {} positions, with constit alt <= max contig alt ({}) and in GnomAD.",
-        //     high_alt_gnomad_keys.len(),
-        //     self.config.solo_max_alt_constit
-        // );
-        //
-        // annotations.remove_keys(&high_alt_gnomad_keys);
-        // somatic_stats.n_high_alt_constit_gnomad = variants_collections
-        //     .par_iter_mut()
-        //     .map(|c| {
-        //         let before = c.variants.len();
-        //         c.remove_keys(&high_alt_gnomad_keys);
-        //         let after = c.variants.len();
-        //         let rm = before - after;
-        //         info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
-        //         rm
-        //     })
-        //     .sum();
-        // variants_collections.retain(|e| !e.variants.is_empty());
-        
-
-        // let prob_keys: HashSet<u128> = annotations
-        //     .get_keys_filter(|anns| {
-        //         anns.contains(&Annotation::SoloTumor)
-        //             && !anns.contains(&Annotation::Callers(Caller::ClairS))
-        //     })
-        //     .into_iter()
-        //     .collect();
-        //
-        // info!("Problematic variants {}", prob_keys.len());
-        //
-        // let mut problematic_variants = variants_collection.clone();
-        //
-        // let problematic_variants: Vec<VcfVariant> = problematic_variants
-        //     .iter_mut()
-        //     .flat_map(|e| {
-        //         info!("{} {}:\t{}", e.vcf.caller, e.vcf.time, e.variants.len());
-        //         e.retain_keys(&prob_keys);
-        //         e.variants.clone()
-        //     })
-        //     .collect();
-        // write_vcf(&problematic_variants, "prob.vcf.gz")?;
-        //
-        // // Entropies
-        // let entropies: Vec<f64> = problematic_variants
-        //     .iter()
-        //     .flat_map(|v| {
-        //         annotations
-        //             .store
-        //             .get(&v.hash_variant())
-        //             .unwrap()
-        //             .iter()
-        //             .flat_map(|a| {
-        //                 if let Annotation::ShannonEntropy(ent) = a {
-        //                     vec![*ent]
-        //                 } else {
-        //                     Vec::new()
-        //                 }
-        //             })
-        //             .collect::<Vec<_>>()
-        //     })
-        //     .collect();
-        // let json_string = serde_json::to_string(&entropies)?;
-        // let mut file = File::create("/data/entropies.json")?;
-        // file.write_all(json_string.as_bytes())?;
-
         // Annotation low entropy
         annotations.low_shannon_entropy(self.config.min_shannon_entropy);
         annotations.callers_stat();
@@ -454,28 +239,6 @@ impl Run for Somatic {
             });
         annotations.callers_stat();
 
-        // let low_ent_keys: HashSet<u128> = annotations
-        //     .get_keys_filter(|anns| anns.contains(&Annotation::LowEntropy))
-        //     .into_iter()
-        //     .collect();
-        //
-        // annotations.remove_keys(&low_ent_keys);
-        // annotations.callers_stat();
-        // somatic_stats.n_low_entropies = variants_collections
-        //     .par_iter_mut()
-        //     .map(|c| {
-        //         let before = c.variants.len();
-        //         c.remove_keys(&low_ent_keys);
-        //         let after = c.variants.len();
-        //         let rm = before - after;
-        //         info!("Variants removed from {}: {rm}", c.vcf.path.display());
-        //         rm
-        //     })
-        //     .sum();
-        // variants_collections.retain(|e| !e.variants.is_empty());
-        // let tw: Vec<VcfVariant> = variants_collection.iter().flat_map(|c| c.variants.clone()).collect();
-        // write_vcf(&tw, "low_ent.vcf.gz")?;
-
         // VEP
         info!("VEP annotation.");
         variants_collections