Browse Source

Variant n_alt_depth(&self) -> (f64, f64)

Thomas 9 months ago
parent
commit
6d1afb30dd
4 changed files with 95 additions and 49 deletions
  1. 5 5
      src/callers/clairs.rs
  2. 45 1
      src/lib.rs
  3. 31 42
      src/variant/variant.rs
  4. 14 1
      src/variant/variant_collection.rs

+ 5 - 5
src/callers/clairs.rs

@@ -102,6 +102,8 @@ impl Run for ClairS {
                 "--include_all_ctgs",
                 "--print_germline_calls",
                 "--enable_clair3_germline_output",
+                "--use_longphase_for_intermediate_haplotagging",
+                "true",
                 "--output_dir",
                 &self.output_dir,
                 "-s",
@@ -143,7 +145,7 @@ impl Run for ClairS {
 
         if !Path::new(&self.vcf_passed).exists() {
             // Concat output and indels
-            let tmp_file = temp_file_path("vcf.gz")?.to_str().unwrap().to_string();
+            let tmp_file = temp_file_path(".vcf.gz")?.to_str().unwrap().to_string();
             let report = bcftools_concat(
                 vec![
                     self.output_vcf.to_string(),
@@ -156,7 +158,6 @@ impl Run for ClairS {
                 "Error while running bcftools concat for {} and {}",
                 &self.output_vcf, &self.output_indels_vcf
             ))?;
-
             let log_file = format!("{}/bcftools_concat_", self.log_dir);
             report
                 .save_to_file(&log_file)
@@ -192,9 +193,8 @@ impl Variants for ClairS {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
         info!("Loading variants from {}: {}", caller, self.vcf_passed);
-        let variants = read_vcf(&self.vcf_passed).map_err(|e| {
-            anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", self.vcf_passed)
-        })?;
+        let variants = read_vcf(&self.vcf_passed)
+            .map_err(|e| anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", self.vcf_passed))?;
 
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);

+ 45 - 1
src/lib.rs

@@ -449,6 +449,33 @@ mod tests {
         let row = "chr1\t161417408\tr_10_0\tT\t[chr1:161417447[TTGGCAGGTTCC\t.\tPASS\tSVTYPE=BND;MATEID=r_10_1;SVINSLEN=11;SVINSSEQ=TTGGCAGGTTC\tTR:VR\t22:3\t12:0";
         let variant: VcfVariant = row.parse()?;
         println!("{variant:#?}");
+        let u = variant.bnd_desc();
+        println!("{u:#?}");
+
+        // Severus mates are not in RC
+        let vcf = "chr7\t27304522\tseverus_BND6747_1\tN\t[chr6:32688062[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_2;STRANDS=--;MAPQ=60;CLUSTERID=severus_2\tGT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
+        let variant: VcfVariant = vcf.parse()?;
+        let bnd_a = variant.bnd_desc()?;
+
+        let vcf = "chr6\t32688062\tseverus_BND6747_2\tN\t[chr7:27304522[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_1;STRANDS=--;MAPQ=60;CLUSTERID=severus_2 GT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
+        let variant: VcfVariant = vcf.parse()?;
+        let bnd_b = variant.bnd_desc()?;
+        // assert_eq!(bnd_a, bnd_b);
+
+        println!("{bnd_a:#?}\n{bnd_b:#?}");
+
+        // Savana here each mate are in RC but the problem is in BP_notation
+        let vcf = "chr10\t102039096\tID_35957_2\tG\t]chr10:101973386]G\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_1;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=38,29;TUMOUR_DP_AT=44,21;TUMOUR_DP_AFTER=44,21;NORMAL_DP_BEFORE=13,15;NORMAL_DP_AT=13,15;NORMAL_DP_AFTER=13,15;TUMOUR_AF=0.159,0.333;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=20,16,8;NORMAL_TOTAL_HP_AT=6,7,0;TUMOUR_ALT_HP=0,1,6;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
+        let variant: VcfVariant = vcf.parse()?;
+        let bnd_a = variant.bnd_desc()?;
+
+        let vcf = "chr10\t101973386\tID_35957_1\tA\tA[chr10:102039096[\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_2;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=29,38;TUMOUR_DP_AT=21,44;TUMOUR_DP_AFTER=21,44;NORMAL_DP_BEFORE=15,13;NORMAL_DP_AT=15,13;NORMAL_DP_AFTER=15,13;TUMOUR_AF=0.333,0.159;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=17,0,4;NORMAL_TOTAL_HP_AT=5,7,3;TUMOUR_ALT_HP=0,6,1;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
+        let variant: VcfVariant = vcf.parse()?;
+        let bnd_b = variant.bnd_desc()?;
+        assert_eq!(bnd_a, bnd_b);
+
+        println!("{bnd_a:#?}\n{bnd_b:#?}");
+
 
         Ok(())
     }
@@ -677,7 +704,7 @@ mod tests {
     #[test]
     fn run_somatic_case() -> anyhow::Result<()> {
         init();
-        let id = "COIFFET";
+        let id = "TAHIR";
         let mut config = Config::default();
         config.somatic_pipe_force = true;
         match Somatic::initialize(id, config)?.run() {
@@ -749,4 +776,21 @@ Ok(())
 
         const_stats(id.to_string(), config);
     }
+
+    #[test]
+    fn test_bnd() -> anyhow::Result<()> {
+        init();
+        let id = "COIFFET";
+        let config = Config::default();
+
+        let annotations = Annotations::default();
+        let s = Savana::initialize(id, config)?.variants(&annotations)?;
+        s.variants.iter().for_each(|e| {
+            if let Ok(bnd) = e.bnd_desc() {
+                println!("{}\t{}\t{}", e.position , e.reference, e.alternative);
+                println!("{:#?}", bnd);
+            }
+        });
+        Ok(())
+    }
 }

+ 31 - 42
src/variant/variant.rs

@@ -280,7 +280,6 @@ impl VcfVariant {
     pub fn bnd_desc(&self) -> anyhow::Result<BNDDesc> {
         let alt = self.alternative.to_string();
         if alt.contains('[') || alt.contains(']') {
-            let extending_right = alt.contains('[');
             let alt_rep = alt.replace("[", ";").replace("]", ";");
             let alt_is_joined_after = !alt_rep.starts_with(";");
             let parts = alt_rep
@@ -334,47 +333,6 @@ impl VcfVariant {
                     added_nt,
                 })
             }
-
-            // let b_sens = alt.contains('[');
-            //
-            // let a_sens = if b_sens {
-            //     !alt.starts_with('[')
-            // } else {
-            //     !alt.starts_with(']')
-            // };
-            //
-            // let parts: Vec<&str> = alt
-            //     .split(&['[', ']', ':'])
-            //     .filter(|v| !v.is_empty())
-            //     .collect();
-            //
-            // if parts.len() != 3 {
-            //     return Err(anyhow::anyhow!("Failed to parse parts: {parts:?}"));
-            // }
-            //
-            // let (nt, b_contig, b_position) = if a_sens {
-            //     (parts[0], parts[1], parts[2])
-            // } else {
-            //     (parts[2], parts[0], parts[1])
-            // };
-            //
-            // let added_nt = if nt.len() > 1 {
-            //     nt[1..].to_string()
-            // } else {
-            //     nt.to_string()
-            // };
-            //
-            // Ok(BNDDesc {
-            //     a_contig: self.position.contig(),
-            //     a_position: self.position.position + 1,
-            //     a_sens,
-            //     b_contig: b_contig.to_string(),
-            //     b_position: b_position
-            //         .parse()
-            //         .map_err(|e| anyhow::anyhow!("Failed to parse: {b_position}\n{e}"))?,
-            //     b_sens,
-            //     added_nt,
-            // })
         } else {
             Err(anyhow::anyhow!("The alteration is not BND: {alt}"))
         }
@@ -766,6 +724,37 @@ pub enum Format {
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
 pub struct Formats(pub Vec<Format>);
 
+impl Formats {
+    /// Get the tumoral alternative read depth and total depth as an Option<(u32, u32)>.
+    pub fn n_alt_depth(&self) -> Option<(u32, u32)> {
+        let mut tumor_alt_depth: Option<u32> = None;
+        let mut tumor_total_depth: Option<u32> = None;
+
+        for format in &self.0 {
+            match format {
+                // Tumor Allelic Depth (AD)
+                Format::AD(values) => {
+                    if values.len() > 1 {
+                        // Sum all alternative allele depths (excluding reference allele)
+                        tumor_alt_depth = Some(values[1..].iter().sum());
+                    }
+                }
+                // Tumor Total Depth (DP)
+                Format::DP(value) => {
+                    tumor_total_depth = Some(*value);
+                }
+                _ => {}
+            }
+        }
+
+        // Return a tuple (tumor_alt_depth, tumor_total_depth) if both are available
+        match (tumor_alt_depth, tumor_total_depth) {
+            (Some(alt), Some(total)) => Some((alt, total)),
+            _ => None,
+        }
+    }
+}
+
 impl TryFrom<(&str, &str)> for Formats {
     type Error = anyhow::Error;
 

+ 14 - 1
src/variant/variant_collection.rs

@@ -30,6 +30,7 @@ use crate::{
     helpers::{app_storage_dir, estimate_shannon_entropy, mean, temp_file_path, Hash128},
     io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header},
     positions::GenomePosition,
+    variant::variant::Format,
 };
 
 /// A collection of VCF variants along with associated metadata.
@@ -444,6 +445,16 @@ impl Variant {
             .into_iter()
             .collect()
     }
+
+    pub fn n_alt_depth(&self) -> (f64, f64) {
+        let (n_alts, depths): (Vec<u32>, Vec<u32>) = self
+            .vcf_variants
+            .iter()
+            .filter_map(|vv| vv.formats.n_alt_depth())
+            .unzip();
+
+        (mean(&n_alts), mean(&depths))
+    }
 }
 
 /// A collection of genomic variants.
@@ -763,7 +774,9 @@ impl VariantsStats {
             alteration_category_str.sort();
             alteration_category_str.dedup();
 
-            *alteration_categories.entry(alteration_category_str.join(", ")).or_default() += 1;
+            *alteration_categories
+                .entry(alteration_category_str.join(", "))
+                .or_default() += 1;
         });
 
         gnomad.iter().for_each(|e| {