use std::{fmt, str::FromStr}; use bitcode::{Decode, Encode}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)] pub struct DbSnpFreqEntry { pub source: String, pub values: Vec>, } impl fmt::Display for DbSnpFreqEntry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let values = self .values .iter() .map(|v| match v { Some(v) => v.to_string(), None => ".".to_string(), }) .collect::>() .join(","); write!(f, "{}:{}", self.source, values) } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)] pub struct DbSnpFreq(pub Vec); impl FromStr for DbSnpFreq { type Err = anyhow::Error; fn from_str(s: &str) -> anyhow::Result { let entries = s .split('|') .filter(|x| !x.is_empty()) .map(|entry| { let (source, values) = entry .split_once(':') .ok_or_else(|| anyhow::anyhow!("Invalid FREQ entry: {entry}"))?; let values = values .split(',') .map(|v| { if v == "." { Ok(None) } else { v.parse::().map(Some).map_err(|e| { anyhow::anyhow!("Invalid FREQ value `{v}` in {source}: {e}") }) } }) .collect::>>()?; Ok(DbSnpFreqEntry { source: source.to_string(), values, }) }) .collect::>>()?; Ok(DbSnpFreq(entries)) } } impl fmt::Display for DbSnpFreq { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let s = self .0 .iter() .map(|e| e.to_string()) .collect::>() .join("|"); write!(f, "{s}") } } impl DbSnpFreq { /// Average ALT frequency across real population sources in a dbSNP FREQ field. pub fn maf(&self) -> Option { const EXCLUDED: &[&str] = &["SGDP_PRJ", "dbGaP_PopFreq"]; let (sum, count) = self .0 .iter() .filter_map(|entry| { if EXCLUDED.contains(&entry.source.as_str()) { return None; } let alt = entry.values.get(1).copied().flatten()? as f32; if alt <= 0.0 { None } else { Some(alt) } }) .fold((0.0_f32, 0usize), |(s, c), af| (s + af, c + 1)); (count > 0).then_some(sum / count as f32) } }