Quellcode durchsuchen

clearer parse bnddesc

Thomas vor 7 Monaten
Ursprung
Commit
5a323d50bc
2 geänderte Dateien mit 64 neuen und 33 gelöschten Zeilen
  1. 5 5
      src/lib.rs
  2. 59 28
      src/variant/variant.rs

+ 5 - 5
src/lib.rs

@@ -555,7 +555,7 @@ mod tests {
 
     #[test]
     fn variant_parse() -> anyhow::Result<()> {
-        let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.666667:6,4,0";
+        let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.66667:6,4,0";
         let variant: VcfVariant = row.parse()?;
         let var_string = variant.into_vcf_row();
         assert_eq!(row, &var_string);
@@ -597,11 +597,11 @@ mod tests {
         let vcf = "chr6\t32688062\tseverus_BND6747_2\tN\t[chr7:27304522[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_1;STRANDS=--;MAPQ=60;CLUSTERID=severus_2 GT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
         let variant: VcfVariant = vcf.parse()?;
         let bnd_b = variant.bnd_desc()?;
-        // assert_eq!(bnd_a, bnd_b);
+        assert_eq!(bnd_a, bnd_b.rc());
 
         println!("{bnd_a:#?}\n{bnd_b:#?}");
 
-        // Savana here each mate are in RC but the problem is in BP_notation
+        // Savana here each mate are in RC 
         let vcf = "chr10\t102039096\tID_35957_2\tG\t]chr10:101973386]G\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_1;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=38,29;TUMOUR_DP_AT=44,21;TUMOUR_DP_AFTER=44,21;NORMAL_DP_BEFORE=13,15;NORMAL_DP_AT=13,15;NORMAL_DP_AFTER=13,15;TUMOUR_AF=0.159,0.333;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=20,16,8;NORMAL_TOTAL_HP_AT=6,7,0;TUMOUR_ALT_HP=0,1,6;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
         let variant: VcfVariant = vcf.parse()?;
         let bnd_a = variant.bnd_desc()?;
@@ -612,7 +612,7 @@ mod tests {
         assert_eq!(bnd_a, bnd_b);
 
         println!("{bnd_a:#?}\n{bnd_b:#?}");
-
+        println!("{variant:#?}");
 
         Ok(())
     }
@@ -872,7 +872,7 @@ mod tests {
     #[test]
     fn somatic_cases() -> anyhow::Result<()> {
         init();
-        let id = "CHENU";
+        let id = "ACHITE";
         let config = Config { somatic_pipe_force: true, ..Default::default() };
         match SomaticPipe::initialize(id, config)?.run() {
             Ok(_) => (),

+ 59 - 28
src/variant/variant.rs

@@ -405,13 +405,13 @@ impl BNDDesc {
     /// Construct a `BNDDesc` from VCF `CHROM`, `POS`, and break‑end `ALT` string.
     /// Supports the four VCF break‑end ALT forms:
     ///
-    /// 1) `t[p[` → A(5′)=t flank, B(3′)=p flank (forward-forward)
-    /// 2) `t]p]` → A(5′)=t flank, B(3′)=p flank but on reverse (`a_sens=true`, `b_sens=false`)
-    /// 3) `]p]t` → A(5′)=p flank, B(3′)=t flank (`a_sens=false`, `b_sens=true`)
-    /// 4) `[p[t` → A(5′)=p flank, B(3′)=t flank with both on reverse (`a_sens=false`, `b_sens=false`)
+    /// 1) `t[p[` → A(5′)=t flank, B(3′)=p flank (+ + )
+    /// 2) `t]p]` → A(5′)=t flank, B(3′)=p flank but on reverse (+ -)
+    /// 3) `]p]t` → A(5′)=p flank, B(3′)=t flank (-  -)
+    /// 4) `[p[t` → A(5′)=p flank, B(3′)=t flank with both on reverse (- +)
     ///
     /// Here `t` is the local flank sequence, `p` is the remote contig:pos.
-    pub fn from_vcf_breakend(chrom: &str, pos: u32, alt: &str) -> Option<Self> {
+    pub fn from_vcf_breakend(t_chrom: &str, t_pos: u32, alt: &str) -> Option<Self> {
         // locate first bracket
         let (open, bracket) = alt.char_indices().find(|&(_, c)| c == '[' || c == ']')?;
         // find matching bracket
@@ -419,32 +419,49 @@ impl BNDDesc {
         // parse remote contig:pos between brackets
         let addr = &alt[open + 1..close];
         let mut parts = addr.splitn(2, ':');
-        let rc = parts.next()?;
-        let rp: u32 = parts.next()?.parse().ok()?;
+        let p_contig = parts.next()?;
+        let p_position: u32 = parts.next()?.parse().ok()?;
         // inserted sequence
         let seq_before = &alt[..open];
         let seq_after = &alt[close + 1..];
         let mut added_nt = String::new();
         added_nt.push_str(seq_before);
         added_nt.push_str(seq_after);
+        added_nt.remove(0);
         // determine form and orientation
         // form 1 & 2: bracket after t (open>0): local before remote
         // form 3 & 4: bracket before t (open==0): remote before local
         let after_local = open > 0;
-        let (a_sens, b_sens) = match (bracket, after_local) {
-            ('[', true) => (true, true),    // form 1
-            (']', true) => (true, false),   // form 2
-            (']', false) => (false, true),  // form 3
-            ('[', false) => (false, false), // form 4
+        let (t_sens, p_sens) = match (bracket, after_local) {
+            ('[', true) => (true, true),   // form 1 t[p[
+            (']', true) => (true, false),  // form 2 t]p]
+            (']', false) => (true, true),  // form 3 ]p]t
+            ('[', false) => (true, false), // form 4 [p[t
             _ => return None,
         };
+
         // assign A/B depending on form
-        let (a_contig, a_position, b_contig, b_position) = if after_local {
-            // forms 1 & 2: A is local, B is remote
-            (chrom.into(), pos, rc.into(), rp)
+        let (a_contig, a_position, a_sens, b_contig, b_position, b_sens, added_nt) = if after_local {
+            (
+                t_chrom.into(),
+                t_pos,
+                t_sens,
+                p_contig.into(),
+                p_position,
+                p_sens,
+                added_nt
+            )
         } else {
             // forms 3 & 4: A is remote, B is local
-            (rc.into(), rp, chrom.into(), pos)
+            (
+                p_contig.into(),
+                p_position,
+                p_sens,
+                t_chrom.into(),
+                t_pos,
+                t_sens,
+                reverse_complement(&added_nt)
+            )
         };
         Some(Self {
             a_contig,
@@ -457,22 +474,36 @@ impl BNDDesc {
         })
     }
 
-    /// Serialize this `BNDDesc` back into a VCF ALT break‑end string.
-    /// Reconstructs one of the four canonical forms using `added_nt` as the flank
-    /// sequence (`t`) and `b_contig:b_position` as the remote site (`p`).
-    pub fn to_vcf_breakend(&self) -> String {
-        // `t` is inserted NTs, `p` is remote site
-        let t = &self.added_nt;
-        let p = format!("{}:{}", self.b_contig, self.b_position);
-        match (self.a_sens, self.b_sens) {
-            (true, true) => format!("{}[{}[", t, p),   // t[p[
-            (true, false) => format!("{}]{}]", t, p),  // t]p]
-            (false, true) => format!("]{}]{}", p, t),  // ]p]t
-            (false, false) => format!("[{}[{}", p, t), // [p[t
+    pub fn rc(&self) -> Self {
+        Self {
+            a_contig: self.b_contig.clone(),
+            a_position: self.b_position,
+            a_sens: !self.b_sens,
+            b_contig: self.a_contig.clone(),
+            b_position: self.a_position,
+            b_sens: !self.a_sens,
+            // TODO change seq to RC nd should find base on fa
+            added_nt: reverse_complement(&self.added_nt),
         }
     }
 }
 
+pub fn reverse_complement(dna: &str) -> String {
+    fn complement(base: char) -> char {
+        match base {
+            'A' => 'T', 'T' => 'A', 'C' => 'G', 'G' => 'C',
+            'a' => 't', 't' => 'a', 'c' => 'g', 'g' => 'c',
+            'N' => 'N', 'n' => 'n',
+            _ => 'N',
+        }
+    }
+
+    dna.chars()
+        .rev()
+        .map(complement)
+        .collect()
+}
+
 impl core::fmt::Display for BNDDesc {
     /// Compact textual form: `(A:contig:pos<arrow>;B:contig:pos<arrow>;ins=...)`.
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {