|
|
@@ -405,13 +405,13 @@ impl BNDDesc {
|
|
|
/// Construct a `BNDDesc` from VCF `CHROM`, `POS`, and break‑end `ALT` string.
|
|
|
/// Supports the four VCF break‑end ALT forms:
|
|
|
///
|
|
|
- /// 1) `t[p[` → A(5′)=t flank, B(3′)=p flank (forward-forward)
|
|
|
- /// 2) `t]p]` → A(5′)=t flank, B(3′)=p flank but on reverse (`a_sens=true`, `b_sens=false`)
|
|
|
- /// 3) `]p]t` → A(5′)=p flank, B(3′)=t flank (`a_sens=false`, `b_sens=true`)
|
|
|
- /// 4) `[p[t` → A(5′)=p flank, B(3′)=t flank with both on reverse (`a_sens=false`, `b_sens=false`)
|
|
|
+ /// 1) `t[p[` → A(5′)=t flank, B(3′)=p flank (+ + )
|
|
|
+ /// 2) `t]p]` → A(5′)=t flank, B(3′)=p flank but on reverse (+ -)
|
|
|
+ /// 3) `]p]t` → A(5′)=p flank, B(3′)=t flank (- -)
|
|
|
+ /// 4) `[p[t` → A(5′)=p flank, B(3′)=t flank with both on reverse (- +)
|
|
|
///
|
|
|
/// Here `t` is the local flank sequence, `p` is the remote contig:pos.
|
|
|
- pub fn from_vcf_breakend(chrom: &str, pos: u32, alt: &str) -> Option<Self> {
|
|
|
+ pub fn from_vcf_breakend(t_chrom: &str, t_pos: u32, alt: &str) -> Option<Self> {
|
|
|
// locate first bracket
|
|
|
let (open, bracket) = alt.char_indices().find(|&(_, c)| c == '[' || c == ']')?;
|
|
|
// find matching bracket
|
|
|
@@ -419,32 +419,49 @@ impl BNDDesc {
|
|
|
// parse remote contig:pos between brackets
|
|
|
let addr = &alt[open + 1..close];
|
|
|
let mut parts = addr.splitn(2, ':');
|
|
|
- let rc = parts.next()?;
|
|
|
- let rp: u32 = parts.next()?.parse().ok()?;
|
|
|
+ let p_contig = parts.next()?;
|
|
|
+ let p_position: u32 = parts.next()?.parse().ok()?;
|
|
|
// inserted sequence
|
|
|
let seq_before = &alt[..open];
|
|
|
let seq_after = &alt[close + 1..];
|
|
|
let mut added_nt = String::new();
|
|
|
added_nt.push_str(seq_before);
|
|
|
added_nt.push_str(seq_after);
|
|
|
+ added_nt.remove(0);
|
|
|
// determine form and orientation
|
|
|
// form 1 & 2: bracket after t (open>0): local before remote
|
|
|
// form 3 & 4: bracket before t (open==0): remote before local
|
|
|
let after_local = open > 0;
|
|
|
- let (a_sens, b_sens) = match (bracket, after_local) {
|
|
|
- ('[', true) => (true, true), // form 1
|
|
|
- (']', true) => (true, false), // form 2
|
|
|
- (']', false) => (false, true), // form 3
|
|
|
- ('[', false) => (false, false), // form 4
|
|
|
+ let (t_sens, p_sens) = match (bracket, after_local) {
|
|
|
+ ('[', true) => (true, true), // form 1 t[p[
|
|
|
+ (']', true) => (true, false), // form 2 t]p]
|
|
|
+ (']', false) => (true, true), // form 3 ]p]t
|
|
|
+ ('[', false) => (true, false), // form 4 [p[t
|
|
|
_ => return None,
|
|
|
};
|
|
|
+
|
|
|
// assign A/B depending on form
|
|
|
- let (a_contig, a_position, b_contig, b_position) = if after_local {
|
|
|
- // forms 1 & 2: A is local, B is remote
|
|
|
- (chrom.into(), pos, rc.into(), rp)
|
|
|
+ let (a_contig, a_position, a_sens, b_contig, b_position, b_sens, added_nt) = if after_local {
|
|
|
+ (
|
|
|
+ t_chrom.into(),
|
|
|
+ t_pos,
|
|
|
+ t_sens,
|
|
|
+ p_contig.into(),
|
|
|
+ p_position,
|
|
|
+ p_sens,
|
|
|
+ added_nt
|
|
|
+ )
|
|
|
} else {
|
|
|
// forms 3 & 4: A is remote, B is local
|
|
|
- (rc.into(), rp, chrom.into(), pos)
|
|
|
+ (
|
|
|
+ p_contig.into(),
|
|
|
+ p_position,
|
|
|
+ p_sens,
|
|
|
+ t_chrom.into(),
|
|
|
+ t_pos,
|
|
|
+ t_sens,
|
|
|
+ reverse_complement(&added_nt)
|
|
|
+ )
|
|
|
};
|
|
|
Some(Self {
|
|
|
a_contig,
|
|
|
@@ -457,22 +474,36 @@ impl BNDDesc {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
- /// Serialize this `BNDDesc` back into a VCF ALT break‑end string.
|
|
|
- /// Reconstructs one of the four canonical forms using `added_nt` as the flank
|
|
|
- /// sequence (`t`) and `b_contig:b_position` as the remote site (`p`).
|
|
|
- pub fn to_vcf_breakend(&self) -> String {
|
|
|
- // `t` is inserted NTs, `p` is remote site
|
|
|
- let t = &self.added_nt;
|
|
|
- let p = format!("{}:{}", self.b_contig, self.b_position);
|
|
|
- match (self.a_sens, self.b_sens) {
|
|
|
- (true, true) => format!("{}[{}[", t, p), // t[p[
|
|
|
- (true, false) => format!("{}]{}]", t, p), // t]p]
|
|
|
- (false, true) => format!("]{}]{}", p, t), // ]p]t
|
|
|
- (false, false) => format!("[{}[{}", p, t), // [p[t
|
|
|
+ pub fn rc(&self) -> Self {
|
|
|
+ Self {
|
|
|
+ a_contig: self.b_contig.clone(),
|
|
|
+ a_position: self.b_position,
|
|
|
+ a_sens: !self.b_sens,
|
|
|
+ b_contig: self.a_contig.clone(),
|
|
|
+ b_position: self.a_position,
|
|
|
+ b_sens: !self.a_sens,
|
|
|
+ // TODO change seq to RC nd should find base on fa
|
|
|
+ added_nt: reverse_complement(&self.added_nt),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+pub fn reverse_complement(dna: &str) -> String {
|
|
|
+ fn complement(base: char) -> char {
|
|
|
+ match base {
|
|
|
+ 'A' => 'T', 'T' => 'A', 'C' => 'G', 'G' => 'C',
|
|
|
+ 'a' => 't', 't' => 'a', 'c' => 'g', 'g' => 'c',
|
|
|
+ 'N' => 'N', 'n' => 'n',
|
|
|
+ _ => 'N',
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ dna.chars()
|
|
|
+ .rev()
|
|
|
+ .map(complement)
|
|
|
+ .collect()
|
|
|
+}
|
|
|
+
|
|
|
impl core::fmt::Display for BNDDesc {
|
|
|
/// Compact textual form: `(A:contig:pos<arrow>;B:contig:pos<arrow>;ins=...)`.
|
|
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|