somatic.rs 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. use log::info;
  2. use rayon::prelude::*;
  3. use std::{collections::HashSet, fs::File, sync::Arc};
  4. use crate::{
  5. annotation::{Annotation, Annotations},
  6. callers::{clairs::ClairS, deep_variant::DeepVariant},
  7. collection::{Initialize, InitializeSolo},
  8. config::Config,
  9. io::vcf::write_vcf,
  10. runners::Run,
  11. variant::variant::{load_variants, parallel_intersection, RunnerVariants, VcfVariant},
  12. };
  13. pub struct Somatic {
  14. pub id: String,
  15. pub config: Config,
  16. pub annotations: Annotations,
  17. }
  18. impl Initialize for Somatic {
  19. fn initialize(id: &str, config: crate::config::Config) -> anyhow::Result<Self> {
  20. let id = id.to_string();
  21. Ok(Self {
  22. id,
  23. config,
  24. annotations: Annotations::default(),
  25. })
  26. }
  27. }
  28. impl Run for Somatic {
  29. fn run(&mut self) -> anyhow::Result<()> {
  30. info!("Running somatic pipe for {}.", self.id);
  31. let id = self.id.clone();
  32. info!("Initialization...");
  33. let mut v: Vec<Box<dyn RunnerVariants + Send + Sync>> = vec![
  34. Box::new(ClairS::initialize(&id, self.config.clone())?),
  35. Box::new(DeepVariant::initialize(&id, "diag", self.config.clone())?),
  36. Box::new(DeepVariant::initialize(&id, "mrd", self.config.clone())?),
  37. ];
  38. let annotations = Arc::new(self.annotations.clone());
  39. let mut variants_collection = load_variants(&mut v, &annotations)?;
  40. let clairs_germline =
  41. ClairS::initialize(&id, self.config.clone())?.germline(&annotations)?;
  42. variants_collection.push(clairs_germline);
  43. info!("Variants sources loaded: {}", variants_collection.len());
  44. // Annotations Stats
  45. let mut annotations = Arc::unwrap_or_clone(annotations);
  46. annotations.callers_stat();
  47. // TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error
  48. // in ClairS somatic)
  49. // Filtering Somatic variants
  50. info!("Filtering somatic variants (variants not in salo callers on constit sample or germline).");
  51. let germline_or_somatic_keys = annotations.get_keys_filter(|anns| {
  52. !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
  53. });
  54. let (somatic_keys, germline_keys, remains) = parallel_intersection(
  55. &variants_collection
  56. .iter()
  57. .flat_map(|e| e.keys())
  58. .collect::<Vec<u128>>(),
  59. &germline_or_somatic_keys,
  60. );
  61. assert_eq!(0, remains.len());
  62. info!("Somatic variants positions {}.", somatic_keys.len());
  63. info!("Germline variants positions {}.", germline_keys.len());
  64. let somatic_keys: HashSet<u128> = somatic_keys.into_iter().collect();
  65. annotations.retain_keys(&somatic_keys);
  66. annotations.callers_stat();
  67. variants_collection.par_iter_mut().for_each(|c| {
  68. let before = c.variants.len();
  69. c.retain_keys(&somatic_keys);
  70. let after = c.variants.len();
  71. info!(
  72. "Variants removed from {}: {}",
  73. c.vcf.path.display(),
  74. before - after
  75. );
  76. });
  77. variants_collection.retain(|e| !e.variants.is_empty());
  78. info!("Constit Bam annotation...");
  79. variants_collection.iter().try_for_each(|c| {
  80. c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150)
  81. })?;
  82. annotations.solo_constit_boundaries(
  83. self.config.solo_max_alt_constit,
  84. self.config.solo_min_constit_depth,
  85. );
  86. info!("Entropy annotation...");
  87. variants_collection.iter().for_each(|c| {
  88. c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
  89. });
  90. annotations.callers_stat();
  91. let prob_keys: HashSet<u128> = annotations
  92. .get_keys_filter(|anns| {
  93. // let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag));
  94. // let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
  95. //
  96. // contains && contains_not
  97. anns.contains(&Annotation::SoloDiag)
  98. && !anns.contains(&Annotation::Somatic)
  99. && !anns.contains(&Annotation::LowConstitDepth)
  100. && !anns.contains(&Annotation::HighConstitAlt)
  101. })
  102. .into_iter()
  103. .collect();
  104. info!("Problematic variants {}", prob_keys.len());
  105. let mut problematic_variants = variants_collection.clone();
  106. let problematic_variants: Vec<VcfVariant> = problematic_variants
  107. .iter_mut()
  108. .flat_map(|e| {
  109. e.retain_keys(&prob_keys);
  110. e.variants.clone()
  111. })
  112. .collect();
  113. write_vcf(&problematic_variants, "prob.vcf.gz")?;
  114. Ok(())
  115. }
  116. }
  117. // 0-based position
  118. pub fn sequence_at(
  119. fasta_reader: &mut noodles_fasta::IndexedReader<noodles_fasta::io::BufReader<File>>,
  120. contig: &str,
  121. position: usize,
  122. len: usize,
  123. ) -> anyhow::Result<String> {
  124. // convert to 1-based
  125. let position = position + 1;
  126. let start = position.saturating_sub(len / 2).max(1);
  127. let end = start + len - 1;
  128. // debug!("Region {contig}:{start}-{end} (1-based inclusive)");
  129. let start = noodles_core::Position::try_from(start)?;
  130. let end = noodles_core::Position::try_from(end)?;
  131. let interval = noodles_core::region::interval::Interval::from(start..=end);
  132. let r = noodles_core::Region::new(contig.to_string(), interval);
  133. let record = fasta_reader.query(&r)?;
  134. let s = String::from_utf8(record.sequence().as_ref().to_vec())?.to_uppercase();
  135. Ok(s)
  136. }