Bladeren bron

group_variants_by_bnd_rc

Thomas 7 maanden geleden
bovenliggende
commit
6054343630
2 gewijzigde bestanden met toevoegingen van 48 en 7 verwijderingen
  1. 6 3
      src/lib.rs
  2. 42 4
      src/variant/variant_collection.rs

+ 6 - 3
src/lib.rs

@@ -178,7 +178,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, Variant}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, group_variants_by_bnd_rc, Variant}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -895,10 +895,13 @@ mod tests {
 
         let translocations = variants.get_alteration_cat(AlterationCategory::TRL);
         println!("{} translocations", translocations.len());
+        let threshold = 5;
         let res = group_variants_by_bnd_desc(&translocations, 5);
+        
+        let rres = group_variants_by_bnd_rc(&res, threshold);
 
-        res.iter().for_each(|group| {
-            println!("{:?}", group.len());
+        rres.iter().for_each(|group| {
+            println!("{} {}", group.0.len(), group.1.len());
         });
        
         Ok(())

+ 42 - 4
src/variant/variant_collection.rs

@@ -14,7 +14,9 @@ use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
-use super::variant::{AlterationCategory, Formats, Info, Infos, ReferenceAlternative, VcfVariant};
+use super::variant::{
+    AlterationCategory, BNDDesc, Formats, Info, Infos, ReferenceAlternative, VcfVariant,
+};
 use crate::{
     annotation::{
         cosmic::Cosmic,
@@ -614,15 +616,13 @@ impl Variant {
     }
 
     pub fn callers_vec(&self) -> Vec<Annotation> {
-        self
-            .annotations
+        self.annotations
             .iter()
             .filter(|a| matches!(a, Annotation::Callers(..)))
             .cloned()
             .collect()
     }
 
-
     pub fn callers(&self) -> String {
         let mut callers: Vec<String> = self
             .annotations
@@ -757,6 +757,44 @@ pub fn group_variants_by_bnd_desc(variants: &[Variant], threshold: u32) -> Vec<V
     groups
 }
 
+pub fn group_variants_by_bnd_rc(
+    res: &[Vec<Variant>],
+    threshold: i64,
+) -> Vec<(Vec<Variant>, Vec<Variant>)> {
+    let mut map: HashMap<BNDDesc, (Vec<Variant>, Vec<Variant>)> = HashMap::new();
+
+    // Iterate over each variant group in the results
+    for variant_group in res.iter() {
+        // Safely extract the first variant's BND descriptor, skipping invalid or missing descriptors
+        if let Some(Ok(desc)) = variant_group[0].vcf_variants.first().map(|v| v.bnd_desc()) {
+            // Use cloned key to avoid borrowing issues
+            let existing_key = map
+                .keys()
+                .find(|key| {
+                    key.a_contig == desc.b_contig
+                        && key.a_sens == !desc.b_sens
+                        && (key.a_position as i64 - desc.b_position as i64).abs() < threshold
+                })
+                .cloned();
+
+            match existing_key {
+                // Update existing entry if found
+                Some(key) => {
+                    if let Some(existing_entry) = map.get_mut(&key) {
+                        existing_entry.1 = variant_group.clone();
+                    }
+                }
+                // Insert a new entry if none exists
+                None => {
+                    map.insert(desc.clone(), (variant_group.clone(), Vec::new()));
+                }
+            }
+        }
+    }
+
+    map.into_values().collect()
+}
+
 /// A collection of genomic variants.
 ///
 /// This struct represents a set of `Variant` instances, providing a container