|
|
@@ -118,7 +118,7 @@ use crate::{
|
|
|
use anyhow::{anyhow, Context};
|
|
|
use bitcode::{Decode, Encode};
|
|
|
use chainfile::liftover::Machine;
|
|
|
-use log::{error, info};
|
|
|
+use log::{error, info, warn};
|
|
|
use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
use std::{
|
|
|
@@ -186,6 +186,12 @@ impl FromStr for VcfVariant {
|
|
|
)
|
|
|
.try_into()
|
|
|
.context(format!("Can't parse position from: {s}"))?;
|
|
|
+ if vcf_position.position == 0 {
|
|
|
+ warn!(
|
|
|
+ "Parsed VCF record with POS=0 telomere sentinel on {}; internal GenomePosition cannot preserve POS=0 on VCF serialization",
|
|
|
+ v.first().copied().unwrap_or("<unknown>")
|
|
|
+ );
|
|
|
+ }
|
|
|
|
|
|
let formats = if v.len() >= 10 {
|
|
|
(
|
|
|
@@ -321,10 +327,8 @@ impl VcfVariant {
|
|
|
let tgt_start0 = target.start().position().get() as usize; // 0-based
|
|
|
let tgt_end0_inclusive = tgt_start0 + ref_len - 1;
|
|
|
|
|
|
- // Strand: adjust accessor to your omics version if needed.
|
|
|
let on_reverse = target.strand().to_string() == "-";
|
|
|
|
|
|
- // Fetch REF from target assembly
|
|
|
let mut new_ref = sequence_range(fasta, tgt_contig_name, tgt_start0, tgt_end0_inclusive)
|
|
|
.with_context(|| {
|
|
|
format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}")
|
|
|
@@ -334,11 +338,13 @@ impl VcfVariant {
|
|
|
new_ref = revcomp(&new_ref);
|
|
|
}
|
|
|
|
|
|
- // Rewrite alleles
|
|
|
let mut out = self.clone();
|
|
|
out.position = GenomePosition {
|
|
|
contig: tgt_contig,
|
|
|
- position: tgt_start0 as u32,
|
|
|
+ // Liftover positions come from a usize; guard against silent truncation
|
|
|
+ // on unusually large assemblies (> 4 Gbp).
|
|
|
+ position: u32::try_from(tgt_start0)
|
|
|
+ .with_context(|| format!("Liftover target position {tgt_start0} overflows u32"))?,
|
|
|
};
|
|
|
out.reference = new_ref.parse().context("Failed to parse rewritten REF")?;
|
|
|
|
|
|
@@ -505,7 +511,7 @@ impl VcfVariant {
|
|
|
/// - The alteration category is not BND
|
|
|
pub fn bnd_desc(&self) -> anyhow::Result<BNDDesc> {
|
|
|
let alt = self.alternative.to_string();
|
|
|
- if self.alternative.to_string() == "<DUP>" {
|
|
|
+ if alt == "<DUP>" {
|
|
|
// let row = "chr1\t9218455\tr_0\tC\t<DUP>\t.\tPASS\tSVTYPE=DUP;SVLEN=12572;END=9231027\tTR:VR\t37:9";
|
|
|
|
|
|
if let Some(len) = self.infos.0.iter().find_map(|i| {
|
|
|
@@ -1315,10 +1321,13 @@ impl Infos {
|
|
|
|
|
|
pub fn freq_maf(&self) -> Option<f32> {
|
|
|
self.0.iter().find_map(|i| {
|
|
|
- if let Info::FREQ(s) = i { parse_maf_from_freq(s) } else { None }
|
|
|
+ if let Info::FREQ(s) = i {
|
|
|
+ parse_maf_from_freq(s)
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
})
|
|
|
}
|
|
|
-
|
|
|
}
|
|
|
|
|
|
/// Average ALT frequency across real population sources in a dbSNP FREQ field.
|
|
|
@@ -1340,17 +1349,21 @@ pub fn parse_maf_from_freq(freq: &str) -> Option<f32> {
|
|
|
if EXCLUDED.contains(&name) {
|
|
|
return None;
|
|
|
}
|
|
|
- let alt = alleles
|
|
|
- .split(',')
|
|
|
- .nth(1)?
|
|
|
- .parse::<f32>()
|
|
|
- .ok()?;
|
|
|
+ let alt = alleles.split(',').nth(1)?.parse::<f32>().ok()?;
|
|
|
// 0.0 means not observed in this source, skip rather than pulling MAF down
|
|
|
- if alt <= 0.0 { None } else { Some(alt) }
|
|
|
+ if alt <= 0.0 {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some(alt)
|
|
|
+ }
|
|
|
})
|
|
|
.fold((0.0_f32, 0usize), |(s, c), af| (s + af, c + 1));
|
|
|
|
|
|
- if count == 0 { None } else { Some(sum / count as f32) }
|
|
|
+ if count == 0 {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some(sum / count as f32)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/// Enum representing a single INFO field in a VCF record.
|