dbsnp.rs 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. use std::{fmt, str::FromStr};
  2. use bitcode::{Decode, Encode};
  3. use serde::{Deserialize, Serialize};
  4. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)]
  5. pub struct DbSnpFreqEntry {
  6. pub source: String,
  7. pub values: Vec<Option<f64>>,
  8. }
  9. impl fmt::Display for DbSnpFreqEntry {
  10. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  11. let values = self
  12. .values
  13. .iter()
  14. .map(|v| match v {
  15. Some(v) => v.to_string(),
  16. None => ".".to_string(),
  17. })
  18. .collect::<Vec<_>>()
  19. .join(",");
  20. write!(f, "{}:{}", self.source, values)
  21. }
  22. }
  23. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)]
  24. pub struct DbSnpFreq(pub Vec<DbSnpFreqEntry>);
  25. impl FromStr for DbSnpFreq {
  26. type Err = anyhow::Error;
  27. fn from_str(s: &str) -> anyhow::Result<Self> {
  28. let entries = s
  29. .split('|')
  30. .filter(|x| !x.is_empty())
  31. .map(|entry| {
  32. let (source, values) = entry
  33. .split_once(':')
  34. .ok_or_else(|| anyhow::anyhow!("Invalid FREQ entry: {entry}"))?;
  35. let values = values
  36. .split(',')
  37. .map(|v| {
  38. if v == "." {
  39. Ok(None)
  40. } else {
  41. v.parse::<f64>().map(Some).map_err(|e| {
  42. anyhow::anyhow!("Invalid FREQ value `{v}` in {source}: {e}")
  43. })
  44. }
  45. })
  46. .collect::<anyhow::Result<Vec<_>>>()?;
  47. Ok(DbSnpFreqEntry {
  48. source: source.to_string(),
  49. values,
  50. })
  51. })
  52. .collect::<anyhow::Result<Vec<_>>>()?;
  53. Ok(DbSnpFreq(entries))
  54. }
  55. }
  56. impl fmt::Display for DbSnpFreq {
  57. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  58. let s = self
  59. .0
  60. .iter()
  61. .map(|e| e.to_string())
  62. .collect::<Vec<_>>()
  63. .join("|");
  64. write!(f, "{s}")
  65. }
  66. }
  67. impl DbSnpFreq {
  68. /// Average ALT frequency across real population sources in a dbSNP FREQ field.
  69. pub fn maf(&self) -> Option<f32> {
  70. const EXCLUDED: &[&str] = &["SGDP_PRJ", "dbGaP_PopFreq"];
  71. let (sum, count) = self
  72. .0
  73. .iter()
  74. .filter_map(|entry| {
  75. if EXCLUDED.contains(&entry.source.as_str()) {
  76. return None;
  77. }
  78. let alt = entry.values.get(1).copied().flatten()? as f32;
  79. if alt <= 0.0 {
  80. None
  81. } else {
  82. Some(alt)
  83. }
  84. })
  85. .fold((0.0_f32, 0usize), |(s, c), af| (s + af, c + 1));
  86. (count > 0).then_some(sum / count as f32)
  87. }
  88. }