|
|
@@ -1312,6 +1312,45 @@ impl Infos {
|
|
|
_ => None,
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ pub fn freq_maf(&self) -> Option<f32> {
|
|
|
+ self.0.iter().find_map(|i| {
|
|
|
+ if let Info::FREQ(s) = i { parse_maf_from_freq(s) } else { None }
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+/// Average ALT frequency across real population sources in a dbSNP FREQ field.
|
|
|
+///
|
|
|
+/// Format: `KOREAN:0.92,0.08|TOMMO:0.94,0.06|SGDP_PRJ:0.5,0.5|...`
|
|
|
+/// - Index 0 = REF freq, index 1 = ALT freq
|
|
|
+/// - Excludes SGDP_PRJ (encodes presence as 0.5, not real frequency)
|
|
|
+/// - Excludes dbGaP_PopFreq (ascertainment bias from disease cohorts)
|
|
|
+/// - Skips sources with ALT freq == 0.0 (not observed, not absent)
|
|
|
+///
|
|
|
+/// Returns `None` if no valid sources found.
|
|
|
+pub fn parse_maf_from_freq(freq: &str) -> Option<f32> {
|
|
|
+ const EXCLUDED: &[&str] = &["SGDP_PRJ", "dbGaP_PopFreq"];
|
|
|
+
|
|
|
+ let (sum, count) = freq
|
|
|
+ .split('|')
|
|
|
+ .filter_map(|source| {
|
|
|
+ let (name, alleles) = source.split_once(':')?;
|
|
|
+ if EXCLUDED.contains(&name) {
|
|
|
+ return None;
|
|
|
+ }
|
|
|
+ let alt = alleles
|
|
|
+ .split(',')
|
|
|
+ .nth(1)?
|
|
|
+ .parse::<f32>()
|
|
|
+ .ok()?;
|
|
|
+ // 0.0 means not observed in this source, skip rather than pulling MAF down
|
|
|
+ if alt <= 0.0 { None } else { Some(alt) }
|
|
|
+ })
|
|
|
+ .fold((0.0_f32, 0usize), |(s, c), af| (s + af, c + 1));
|
|
|
+
|
|
|
+ if count == 0 { None } else { Some(sum / count as f32) }
|
|
|
}
|
|
|
|
|
|
/// Enum representing a single INFO field in a VCF record.
|