Ver código fonte

group_variants_by_bnd_desc

Thomas 7 meses atrás
pai
commit
3eeb7b514b
3 arquivos alterados com 73 adições e 6 exclusões
  1. 17 6
      src/lib.rs
  2. 32 0
      src/variant/variant.rs
  3. 24 0
      src/variant/variant_collection.rs

+ 17 - 6
src/lib.rs

@@ -168,6 +168,7 @@ mod tests {
     use functions::assembler::{Assembler, AssemblerConfig};
     use helpers::estimate_shannon_entropy;
     use io::bed::read_bed;
+    use itertools::Itertools;
     use log::{error, info, warn};
     use pipes::somatic::SomaticPipe;
     use positions::{overlaps_par, GenomePosition, GenomeRange};
@@ -177,7 +178,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, ToBNDGraph}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, Variant}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -846,6 +847,8 @@ mod tests {
         )?;
         let bams = collections.bam.by_id_completed(15.0, 10.0);
         let n = bams.len();
+        let mut config = Config::default();
+        // config.somatic_scan_force = true;
         warn!("{n} cases");
         for (i, bam) in bams.iter().enumerate() {
             let id = &bam.id;
@@ -858,7 +861,7 @@ mod tests {
             }
 
 
-            match SomaticPipe::initialize(id, Config::default())?.run() {
+            match SomaticPipe::initialize(id, config.clone())?.run() {
                 Ok(_) => (),
                 Err(e) => error!("{id} {e}"),
             };
@@ -881,14 +884,23 @@ mod tests {
     #[test]
     fn load_variants() -> anyhow::Result<()> {
         init();
-        let id = "ADJAGBA";
+        let id = "ACHITE";
         let config = Config::default();
-        let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
-        let variants = variant_collection::Variants::load_from_json(&path)?;
+        let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
+        let variants = variant_collection::Variants::load_from_file(&path)?;
         println!("n variants {}", variants.data.len());
 
         let n_vep: usize = variants.data.iter().map(|v| v.vep().len()).sum();
         println!("VEP: {n_vep}");
+
+        let translocations = variants.get_alteration_cat(AlterationCategory::TRL);
+        println!("{} translocations", translocations.len());
+        let res = group_variants_by_bnd_desc(&translocations, 5);
+
+        res.iter().for_each(|group| {
+            println!("{:?}", group.len());
+        });
+       
         Ok(())
     }
 
@@ -1319,5 +1331,4 @@ mod tests {
         assert_eq!(comps[0].len(), 3);
         assert_eq!(comps[1].len(), 3);
     }
-
 }

+ 32 - 0
src/variant/variant.rs

@@ -369,6 +369,38 @@ pub struct BNDDesc {
     pub added_nt: String,
 }
 
+pub trait GroupByThreshold {
+    fn group_by(&self, threshold: u32) -> Vec<Vec<BNDDesc>>;
+}
+
+impl GroupByThreshold for Vec<BNDDesc> {
+    fn group_by(&self, threshold: u32) -> Vec<Vec<BNDDesc>> {
+        let mut sorted_vec = self.clone();
+        sorted_vec.sort_by(|a, b| {
+            (&a.a_contig, a.a_sens, a.a_position).cmp(&(&b.a_contig, b.a_sens, b.a_position))
+        });
+
+        let mut grouped: Vec<Vec<BNDDesc>> = Vec::new();
+        for desc in sorted_vec {
+            if let Some(last_group) = grouped.last_mut() {
+                if let Some(last_desc) = last_group.last() {
+                    if last_desc.a_contig == desc.a_contig
+                        && last_desc.a_sens == desc.a_sens
+                        && (desc.a_position as i64 - last_desc.a_position as i64).abs()
+                            <= threshold as i64
+                    {
+                        last_group.push(desc);
+                        continue;
+                    }
+                }
+            }
+            grouped.push(vec![desc]);
+        }
+
+        grouped
+    }
+}
+
 impl BNDDesc {
     /// Construct a `BNDDesc` from VCF `CHROM`, `POS`, and break‑end `ALT` string.
     /// Supports the four VCF break‑end ALT forms:

+ 24 - 0
src/variant/variant_collection.rs

@@ -718,6 +718,30 @@ impl Variant {
     }
 }
 
+pub fn group_variants_by_bnd_desc(variants: &[Variant], threshold: u32) -> Vec<Vec<Variant>> {
+    let mut groups: Vec<Vec<Variant>> = Vec::new();
+    let mut mapping: HashMap<(String, bool, u32), usize> = HashMap::new();
+
+    for variant in variants {
+        if let Some(Some(first_bnd)) = variant.vcf_variants.first().map(|e| e.bnd_desc().ok()) {
+            let key = (
+                first_bnd.a_contig.clone(),
+                first_bnd.a_sens,
+                first_bnd.a_position / threshold,
+            );
+
+            if let Some(&index) = mapping.get(&key) {
+                groups[index].push(variant.clone());
+            } else {
+                mapping.insert(key, groups.len());
+                groups.push(vec![variant.clone()]);
+            }
+        }
+    }
+
+    groups
+}
+
 /// A collection of genomic variants.
 ///
 /// This struct represents a set of `Variant` instances, providing a container