Переглянути джерело

fix vcf_variant.rs: liftover position overflow and redundant allocation

liftover (line 347): tgt_start0 as u32 silently truncates if the liftover
target position exceeds 4 Gbp. Replaced with u32::try_from(...).context(...)
to propagate an error rather than produce a wrong genomic coordinate.
Affects only assemblies > 4 Gbp (none of the current targets), but the cast
was incorrect regardless.

bnd_desc() (line 514): self.alternative.to_string() was called twice in
consecutive lines — once to bind `alt`, then again for the comparison.
The second call allocated a new String for no reason. Use `alt` directly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Thomas 1 місяць тому
батько
коміт
bd5f0f912a
1 змінених файлів з 28 додано та 15 видалено
  1. 28 15
      src/variant/vcf_variant.rs

+ 28 - 15
src/variant/vcf_variant.rs

@@ -118,7 +118,7 @@ use crate::{
 use anyhow::{anyhow, Context};
 use bitcode::{Decode, Encode};
 use chainfile::liftover::Machine;
-use log::{error, info};
+use log::{error, info, warn};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{
@@ -186,6 +186,12 @@ impl FromStr for VcfVariant {
         )
             .try_into()
             .context(format!("Can't parse position from: {s}"))?;
+        if vcf_position.position == 0 {
+            warn!(
+                "Parsed VCF record with POS=0 telomere sentinel on {}; internal GenomePosition cannot preserve POS=0 on VCF serialization",
+                v.first().copied().unwrap_or("<unknown>")
+            );
+        }
 
         let formats = if v.len() >= 10 {
             (
@@ -321,10 +327,8 @@ impl VcfVariant {
         let tgt_start0 = target.start().position().get() as usize; // 0-based
         let tgt_end0_inclusive = tgt_start0 + ref_len - 1;
 
-        // Strand: adjust accessor to your omics version if needed.
         let on_reverse = target.strand().to_string() == "-";
 
-        // Fetch REF from target assembly
         let mut new_ref = sequence_range(fasta, tgt_contig_name, tgt_start0, tgt_end0_inclusive)
             .with_context(|| {
                 format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}")
@@ -334,11 +338,13 @@ impl VcfVariant {
             new_ref = revcomp(&new_ref);
         }
 
-        // Rewrite alleles
         let mut out = self.clone();
         out.position = GenomePosition {
             contig: tgt_contig,
-            position: tgt_start0 as u32,
+            // Liftover positions come from a usize; guard against silent truncation
+            // on unusually large assemblies (> 4 Gbp).
+            position: u32::try_from(tgt_start0)
+                .with_context(|| format!("Liftover target position {tgt_start0} overflows u32"))?,
         };
         out.reference = new_ref.parse().context("Failed to parse rewritten REF")?;
 
@@ -505,7 +511,7 @@ impl VcfVariant {
     /// - The alteration category is not BND
     pub fn bnd_desc(&self) -> anyhow::Result<BNDDesc> {
         let alt = self.alternative.to_string();
-        if self.alternative.to_string() == "<DUP>" {
+        if alt == "<DUP>" {
             //         let row = "chr1\t9218455\tr_0\tC\t<DUP>\t.\tPASS\tSVTYPE=DUP;SVLEN=12572;END=9231027\tTR:VR\t37:9";
 
             if let Some(len) = self.infos.0.iter().find_map(|i| {
@@ -1315,10 +1321,13 @@ impl Infos {
 
     pub fn freq_maf(&self) -> Option<f32> {
         self.0.iter().find_map(|i| {
-            if let Info::FREQ(s) = i { parse_maf_from_freq(s) } else { None }
+            if let Info::FREQ(s) = i {
+                parse_maf_from_freq(s)
+            } else {
+                None
+            }
         })
     }
-
 }
 
 /// Average ALT frequency across real population sources in a dbSNP FREQ field.
@@ -1340,17 +1349,21 @@ pub fn parse_maf_from_freq(freq: &str) -> Option<f32> {
             if EXCLUDED.contains(&name) {
                 return None;
             }
-            let alt = alleles
-                .split(',')
-                .nth(1)?
-                .parse::<f32>()
-                .ok()?;
+            let alt = alleles.split(',').nth(1)?.parse::<f32>().ok()?;
             // 0.0 means not observed in this source, skip rather than pulling MAF down
-            if alt <= 0.0 { None } else { Some(alt) }
+            if alt <= 0.0 {
+                None
+            } else {
+                Some(alt)
+            }
         })
         .fold((0.0_f32, 0usize), |(s, c), af| (s + af, c + 1));
 
-    if count == 0 { None } else { Some(sum / count as f32) }
+    if count == 0 {
+        None
+    } else {
+        Some(sum / count as f32)
+    }
 }
 
 /// Enum representing a single INFO field in a VCF record.