variant.rs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. use crate::{
  2. annotation::Annotations,
  3. helpers::Hash128,
  4. positions::{GenomePosition, GetGenomePosition, VcfPosition},
  5. runners::Run,
  6. variant::variant_collection::VariantCollection,
  7. };
  8. use anyhow::{anyhow, Context, Ok};
  9. use rayon::prelude::*;
  10. use serde::{Deserialize, Serialize};
  11. use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
  12. #[derive(Debug, Clone, Serialize, Deserialize)]
  13. pub struct VcfVariant {
  14. pub hash: Hash128,
  15. pub position: GenomePosition,
  16. pub id: String,
  17. pub reference: ReferenceAlternative,
  18. pub alternative: ReferenceAlternative,
  19. pub quality: Option<f32>,
  20. pub filter: Filter,
  21. pub infos: Infos,
  22. pub formats: Formats,
  23. }
  24. impl PartialEq for VcfVariant {
  25. fn eq(&self, other: &Self) -> bool {
  26. // Nota bene: id, filter, info, format and quality is intentionally not compared
  27. self.position == other.position
  28. && self.reference == other.reference
  29. && self.alternative == other.alternative
  30. }
  31. }
  32. impl Eq for VcfVariant {}
  33. impl FromStr for VcfVariant {
  34. type Err = anyhow::Error;
  35. fn from_str(s: &str) -> anyhow::Result<Self> {
  36. let v: Vec<&str> = s.split('\t').collect();
  37. let vcf_position: VcfPosition = (
  38. *v.first().ok_or(anyhow!("Can't get contig from: {s}"))?,
  39. *v.get(1).ok_or(anyhow!("Can't get position from: {s}"))?,
  40. )
  41. .try_into()
  42. .context(format!("Can't parse position from: {s}"))?;
  43. let formats = if v.len() == 10 {
  44. (
  45. *v.get(8).ok_or(anyhow!("Can't parse formats from: {s}"))?,
  46. *v.get(9).ok_or(anyhow!("Can't parse formats from: {s}"))?,
  47. )
  48. .try_into()
  49. .context(format!("Can't parse formats from: {s}"))?
  50. } else {
  51. Formats::default()
  52. };
  53. let position: GenomePosition = vcf_position.into();
  54. let reference: ReferenceAlternative = v
  55. .get(3)
  56. .ok_or(anyhow!("Can't parse reference from: {s}"))?
  57. .parse()
  58. .context(format!("Can't parse reference from: {s}"))?;
  59. let alternative: ReferenceAlternative = v
  60. .get(4)
  61. .ok_or(anyhow!("Can't parse alternative from: {s}"))?
  62. .parse()
  63. .context(format!("Can't parse alternative from: {s}"))?;
  64. // Blake3 128 bytes Hash
  65. let mut hasher = blake3::Hasher::new();
  66. hasher.update(&position.contig.to_ne_bytes()); // Convert position to bytes
  67. hasher.update(&position.position.to_ne_bytes()); // Convert position to bytes
  68. hasher.update(reference.to_string().as_bytes()); // Reference string as bytes
  69. hasher.update(alternative.to_string().as_bytes()); // Alternative string as bytes
  70. let hash = hasher.finalize();
  71. let hash = Hash128::new(hash.as_bytes()[..16].try_into().unwrap());
  72. Ok(Self {
  73. hash,
  74. position,
  75. id: v
  76. .get(2)
  77. .ok_or(anyhow!("Can't parse id from: {s}"))?
  78. .to_string(),
  79. reference,
  80. alternative,
  81. quality: v
  82. .get(5)
  83. .map(|s| s.parse::<f32>().ok()) // Try to parse as f64; returns Option<f64>
  84. .unwrap_or(None),
  85. filter: v
  86. .get(6)
  87. .ok_or(anyhow!("Can't parse filter from: {s}"))?
  88. .parse()
  89. .context(format!("Can't parse filter from: {s}"))?,
  90. infos: v
  91. .get(7)
  92. .ok_or(anyhow!("Can't parse infos from: {s}"))?
  93. .parse()
  94. .context(format!("Can't parse infos from: {s}"))?,
  95. formats,
  96. })
  97. }
  98. }
  99. // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
  100. impl VcfVariant {
  101. pub fn into_vcf_row(&self) -> String {
  102. let vcf_position: VcfPosition = self.position.clone().into();
  103. let (contig, position) = vcf_position.into();
  104. let mut columns = vec![
  105. contig,
  106. position,
  107. self.id.to_string(),
  108. self.reference.to_string(),
  109. self.alternative.to_string(),
  110. self.quality
  111. .map(|v| v.to_string())
  112. .unwrap_or(".".to_string()),
  113. self.filter.to_string(),
  114. self.infos.to_string(),
  115. ];
  116. if !self.formats.0.is_empty() {
  117. let (format, values) = self.formats.clone().into();
  118. columns.push(format);
  119. columns.push(values);
  120. }
  121. columns.join("\t")
  122. }
  123. pub fn hash(&self) -> Hash128 {
  124. self.hash
  125. }
  126. pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
  127. VcfVariant {
  128. hash: self.hash,
  129. position: self.position.clone(),
  130. id: self.id.clone(),
  131. reference: self.reference.clone(),
  132. alternative: self.alternative.clone(),
  133. quality: self.quality,
  134. filter: Filter::Other(".".to_string()),
  135. infos: Infos(vec![Info::Empty]),
  136. formats: self.formats.commun_deepvariant_clairs(),
  137. }
  138. }
  139. pub fn has_svtype(&self) -> bool {
  140. self.infos.0.iter().any(|i| matches!(i, Info::SVTYPE(_)))
  141. }
  142. pub fn svtype(&self) -> Option<SVType> {
  143. self.infos.0.iter().find_map(|e| {
  144. if let Info::SVTYPE(sv_type) = e {
  145. Some(sv_type.clone())
  146. } else {
  147. None
  148. }
  149. })
  150. }
  151. pub fn alteration_category(&self) -> AlterationCategory {
  152. match (&self.reference, &self.alternative) {
  153. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
  154. AlterationCategory::SNV
  155. }
  156. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
  157. AlterationCategory::INS
  158. }
  159. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
  160. AlterationCategory::Other
  161. }
  162. (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
  163. AlterationCategory::DEL
  164. }
  165. (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
  166. if a.len() < b.len() =>
  167. {
  168. AlterationCategory::INS
  169. }
  170. (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
  171. if a.len() > b.len() =>
  172. {
  173. AlterationCategory::DEL
  174. }
  175. _ => match self.svtype() {
  176. Some(sv_type) => AlterationCategory::from(sv_type),
  177. None => AlterationCategory::Other,
  178. }, // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
  179. // AlterationCategory::Rep
  180. // }
  181. // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
  182. // AlterationCategory::Other
  183. // }
  184. // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
  185. // AlterationCategory::Other
  186. // }
  187. // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
  188. // AlterationCategory::Other
  189. // }
  190. // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
  191. // AlterationCategory::Other
  192. // }
  193. }
  194. }
  195. }
  196. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
  197. pub enum AlterationCategory {
  198. SNV,
  199. DEL,
  200. INS,
  201. DUP,
  202. INV,
  203. CNV,
  204. BND,
  205. Other,
  206. }
  207. impl fmt::Display for AlterationCategory {
  208. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  209. write!(
  210. f,
  211. "{}",
  212. match self {
  213. AlterationCategory::SNV => "SNV",
  214. AlterationCategory::DEL => "DEL",
  215. AlterationCategory::INS => "INS",
  216. AlterationCategory::DUP => "DUP",
  217. AlterationCategory::INV => "INV",
  218. AlterationCategory::CNV => "CNV",
  219. AlterationCategory::BND => "BND",
  220. AlterationCategory::Other => "Other",
  221. }
  222. )
  223. }
  224. }
  225. impl From<SVType> for AlterationCategory {
  226. fn from(sv_type: SVType) -> Self {
  227. match sv_type {
  228. SVType::DEL => AlterationCategory::DEL,
  229. SVType::INS => AlterationCategory::INS,
  230. SVType::DUP => AlterationCategory::DUP,
  231. SVType::INV => AlterationCategory::INV,
  232. SVType::CNV => AlterationCategory::CNV,
  233. SVType::BND => AlterationCategory::BND,
  234. }
  235. }
  236. }
  237. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
  238. pub enum SVType {
  239. DEL,
  240. INS,
  241. DUP,
  242. INV,
  243. CNV,
  244. BND,
  245. }
  246. impl FromStr for SVType {
  247. type Err = anyhow::Error;
  248. fn from_str(s: &str) -> anyhow::Result<Self> {
  249. match s {
  250. "DEL" => Ok(SVType::DEL),
  251. "INS" => Ok(SVType::INS),
  252. "DUP" => Ok(SVType::DUP),
  253. "INV" => Ok(SVType::INV),
  254. "CNV" => Ok(SVType::CNV),
  255. "BND" => Ok(SVType::BND),
  256. _ => Err(anyhow!("Can't parse SVTYPE={s}")),
  257. }
  258. }
  259. }
  260. impl fmt::Display for SVType {
  261. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  262. write!(
  263. f,
  264. "{}",
  265. match self {
  266. SVType::DEL => "DEL",
  267. SVType::INS => "INS",
  268. SVType::DUP => "DUP",
  269. SVType::INV => "INV",
  270. SVType::CNV => "CNV",
  271. SVType::BND => "BND",
  272. }
  273. )
  274. }
  275. }
  276. impl VariantId for VcfVariant {
  277. fn variant_id(&self) -> String {
  278. format!("{}_{}>{}", self.position, self.reference, self.alternative)
  279. }
  280. }
  281. impl GetGenomePosition for VcfVariant {
  282. fn position(&self) -> &GenomePosition {
  283. &self.position
  284. }
  285. }
  286. impl PartialOrd for VcfVariant {
  287. fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
  288. Some(self.cmp(other))
  289. }
  290. }
  291. impl Ord for VcfVariant {
  292. fn cmp(&self, other: &Self) -> Ordering {
  293. self.position.cmp(&other.position)
  294. }
  295. }
  296. /// Info
  297. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
  298. pub struct Infos(Vec<Info>);
  299. impl FromStr for Infos {
  300. type Err = anyhow::Error;
  301. fn from_str(s: &str) -> anyhow::Result<Self> {
  302. Ok(Self(
  303. s.split(";")
  304. .map(Info::from_str)
  305. .collect::<Result<Vec<Info>, _>>()
  306. .map_err(|e| anyhow::anyhow!("Failed to parse info: {e}"))?,
  307. ))
  308. }
  309. }
  310. impl fmt::Display for Infos {
  311. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  312. write!(
  313. f,
  314. "{}",
  315. self.0
  316. .iter()
  317. .map(|e| e.to_string())
  318. .collect::<Vec<String>>()
  319. .join(";")
  320. )
  321. }
  322. }
  323. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  324. pub enum Info {
  325. Empty,
  326. H,
  327. F,
  328. P,
  329. FAU(u32),
  330. FCU(u32),
  331. FGU(u32),
  332. FTU(u32),
  333. RAU(u32),
  334. RCU(u32),
  335. RGU(u32),
  336. RTU(u32),
  337. SVTYPE(SVType),
  338. SVLEN(i32),
  339. END(u32),
  340. MATEID(String),
  341. SVINSLEN(u32),
  342. SVINSSEQ(String),
  343. }
  344. impl FromStr for Info {
  345. type Err = anyhow::Error;
  346. fn from_str(s: &str) -> anyhow::Result<Self> {
  347. if s.contains("=") {
  348. let (key, value) = s
  349. .split_once('=')
  350. .context(format!("Can't split with `=` {s}"))?;
  351. Ok(match key {
  352. "FAU" => Info::FAU(
  353. value
  354. .parse()
  355. .context(format!("Can't parse into u32: {value}"))?,
  356. ),
  357. "FCU" => Info::FCU(
  358. value
  359. .parse()
  360. .context(format!("Can't parse into u32: {value}"))?,
  361. ),
  362. "FGU" => Info::FGU(
  363. value
  364. .parse()
  365. .context(format!("Can't parse into u32: {value}"))?,
  366. ),
  367. "FTU" => Info::FTU(
  368. value
  369. .parse()
  370. .context(format!("Can't parse into u32: {value}"))?,
  371. ),
  372. "RAU" => Info::RAU(
  373. value
  374. .parse()
  375. .context(format!("Can't parse into u32: {value}"))?,
  376. ),
  377. "RCU" => Info::RCU(
  378. value
  379. .parse()
  380. .context(format!("Can't parse into u32: {value}"))?,
  381. ),
  382. "RGU" => Info::RGU(
  383. value
  384. .parse()
  385. .context(format!("Can't parse into u32: {value}"))?,
  386. ),
  387. "RTU" => Info::RTU(
  388. value
  389. .parse()
  390. .context(format!("Can't parse into u32: {value}"))?,
  391. ),
  392. "SVTYPE" => Info::SVTYPE(value.parse()?),
  393. "SVLEN" => Info::SVLEN(
  394. value
  395. .parse()
  396. .context(format!("Can't parse into u32: {value}"))?,
  397. ),
  398. "END" => Info::END(
  399. value
  400. .parse()
  401. .context(format!("Can't parse into u32: {value}"))?,
  402. ),
  403. "MATEID" => Info::MATEID(value.to_string()),
  404. "SVINSLEN" => Info::SVINSLEN(
  405. value
  406. .parse()
  407. .context(format!("Can't parse into u32: {value}"))?,
  408. ),
  409. "SVINSSEQ" => Info::SVINSSEQ(value.to_string()),
  410. _ => Info::Empty,
  411. })
  412. } else {
  413. Ok(match s {
  414. "H" => Info::H,
  415. "F" => Info::F,
  416. "P" => Info::P,
  417. _ => Info::Empty,
  418. })
  419. }
  420. }
  421. }
  422. impl fmt::Display for Info {
  423. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  424. match self {
  425. Info::Empty => write!(f, "."),
  426. Info::H => write!(f, "H"),
  427. Info::F => write!(f, "F"),
  428. Info::P => write!(f, "P"),
  429. Info::FAU(v) => write!(f, "FAU={v}"),
  430. Info::FCU(v) => write!(f, "FCU={v}"),
  431. Info::FGU(v) => write!(f, "FGU={v}"),
  432. Info::FTU(v) => write!(f, "FTU={v}"),
  433. Info::RAU(v) => write!(f, "RAU={v}"),
  434. Info::RCU(v) => write!(f, "RCU={v}"),
  435. Info::RGU(v) => write!(f, "RGU={v}"),
  436. Info::RTU(v) => write!(f, "RTU={v}"),
  437. Info::SVTYPE(v) => write!(f, "SVTYPE={v}"),
  438. Info::SVLEN(v) => write!(f, "SVLEN={v}"),
  439. Info::END(v) => write!(f, "END={v}"),
  440. Info::MATEID(v) => write!(f, "MATEID={v}"),
  441. Info::SVINSLEN(v) => write!(f, "SVINSLEN={v}"),
  442. Info::SVINSSEQ(v) => write!(f, "SVINSSEQ={v}"),
  443. }
  444. }
  445. }
  446. /// Format
  447. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  448. pub enum Format {
  449. // DeepVariant
  450. GT(String),
  451. GQ(u32),
  452. DP(u32),
  453. AD(Vec<u32>),
  454. VAF(f32),
  455. PL(Vec<u32>),
  456. // Clairs
  457. AF(f32),
  458. NAF(u32),
  459. NDP(u32),
  460. NAD(Vec<u32>),
  461. AU(u32),
  462. CU(u32),
  463. GU(u32),
  464. TU(u32),
  465. NAU(u32),
  466. NCU(u32),
  467. NGU(u32),
  468. NTU(u32),
  469. // nanomonsv
  470. TR(u32),
  471. VR(u32),
  472. Other((String, String)), // (key, value)
  473. }
  474. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
  475. pub struct Formats(Vec<Format>);
  476. impl TryFrom<(&str, &str)> for Formats {
  477. type Error = anyhow::Error;
  478. fn try_from((k, v): (&str, &str)) -> anyhow::Result<Self> {
  479. let keys: Vec<&str> = k.split(':').collect();
  480. let values: Vec<&str> = v.split(':').collect();
  481. if keys.len() != values.len() {
  482. anyhow::bail!("Mismatch between keys and values count for {k} {v}");
  483. }
  484. Ok(Self(
  485. keys.into_iter()
  486. .zip(values)
  487. .map(|(key, value)| Format::try_from((key, value)))
  488. .collect::<Result<Vec<Format>, _>>()
  489. .map_err(|e| anyhow::anyhow!("Failed to parse format: {e}"))?,
  490. ))
  491. }
  492. }
  493. impl From<Formats> for (String, String) {
  494. fn from(formats: Formats) -> Self {
  495. let mut keys = Vec::new();
  496. let mut values = Vec::new();
  497. for format in formats.0 {
  498. let (key, value): (String, String) = format.into();
  499. keys.push(key);
  500. values.push(value);
  501. }
  502. (keys.join(":"), values.join(":"))
  503. }
  504. }
  505. impl TryFrom<(&str, &str)> for Format {
  506. type Error = anyhow::Error;
  507. fn try_from((key, value): (&str, &str)) -> anyhow::Result<Self> {
  508. Ok(match key {
  509. "GT" => Format::GT(value.to_string()),
  510. "GQ" => Format::GQ(value.parse().context(format!("Can't parse GQ: {value}"))?),
  511. "DP" => Format::DP(value.parse().context(format!("Can't parse DP: {value}"))?),
  512. "AD" => Format::AD(
  513. value
  514. .split(',')
  515. .map(|e| e.parse().context("Failed to parse AD"))
  516. .collect::<anyhow::Result<Vec<_>>>()?,
  517. ),
  518. "VAF" => Format::VAF(value.parse().context(format!("Can't parse VAF: {value}"))?),
  519. "PL" => Format::PL(
  520. value
  521. .split(',')
  522. .map(|e| e.parse().context("Failed to parse AD"))
  523. .collect::<anyhow::Result<Vec<_>>>()?,
  524. ),
  525. "TR" => Format::TR(value.parse()?),
  526. "VR" => Format::TR(value.parse()?),
  527. _ => Format::Other((key.to_string(), value.to_string())),
  528. })
  529. }
  530. }
  531. impl From<Format> for (String, String) {
  532. fn from(format: Format) -> Self {
  533. let concat = |values: Vec<u32>| -> String {
  534. values
  535. .iter()
  536. .map(|v| v.to_string())
  537. .collect::<Vec<_>>()
  538. .join(",")
  539. };
  540. match format {
  541. Format::GT(value) => ("GT".to_string(), value),
  542. Format::GQ(value) => ("GQ".to_string(), value.to_string()),
  543. Format::DP(value) => ("DP".to_string(), value.to_string()),
  544. Format::AD(values) => ("AD".to_string(), concat(values)),
  545. Format::VAF(value) => ("VAF".to_string(), value.to_string()),
  546. Format::PL(values) => ("PL".to_string(), concat(values)),
  547. Format::Other((key, value)) => (key, value),
  548. Format::AF(value) => ("AF".to_string(), value.to_string()),
  549. Format::NAF(value) => ("NAF".to_string(), value.to_string()),
  550. Format::NDP(value) => ("NDP".to_string(), value.to_string()),
  551. Format::NAD(values) => ("NAD".to_string(), concat(values)),
  552. Format::AU(value) => ("AU".to_string(), value.to_string()),
  553. Format::CU(value) => ("CU".to_string(), value.to_string()),
  554. Format::GU(value) => ("GU".to_string(), value.to_string()),
  555. Format::TU(value) => ("TU".to_string(), value.to_string()),
  556. Format::NAU(value) => ("NAU".to_string(), value.to_string()),
  557. Format::NCU(value) => ("NCU".to_string(), value.to_string()),
  558. Format::NGU(value) => ("NGU".to_string(), value.to_string()),
  559. Format::NTU(value) => ("NTU".to_string(), value.to_string()),
  560. Format::TR(value) => ("TR".to_string(), value.to_string()),
  561. Format::VR(value) => ("VR".to_string(), value.to_string()),
  562. }
  563. }
  564. }
  565. impl Formats {
  566. pub fn commun_deepvariant_clairs(&self) -> Self {
  567. let filtered_vec: Vec<Format> = self
  568. .0
  569. .clone()
  570. .into_iter()
  571. .map(|e| {
  572. if let Format::VAF(v) = e {
  573. Format::AF(v)
  574. } else {
  575. e
  576. }
  577. })
  578. .filter(|format| {
  579. matches!(
  580. format,
  581. Format::GT(_) | Format::GQ(_) | Format::DP(_) | Format::AD(_) | Format::AF(_)
  582. )
  583. })
  584. .collect();
  585. Formats(filtered_vec)
  586. }
  587. }
  588. /// Filter
  589. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
  590. pub enum Filter {
  591. PASS,
  592. Other(String),
  593. }
  594. impl FromStr for Filter {
  595. type Err = anyhow::Error;
  596. fn from_str(s: &str) -> anyhow::Result<Self> {
  597. match s {
  598. "PASS" => Ok(Filter::PASS),
  599. _ => Ok(Filter::Other(s.to_string())),
  600. }
  601. }
  602. }
  603. impl fmt::Display for Filter {
  604. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  605. match self {
  606. Filter::PASS => write!(f, "PASS"),
  607. Filter::Other(ref s) => write!(f, "{}", s),
  608. }
  609. }
  610. }
  611. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  612. pub enum ReferenceAlternative {
  613. Nucleotide(Base),
  614. Nucleotides(Vec<Base>),
  615. Unstructured(String),
  616. }
  617. impl FromStr for ReferenceAlternative {
  618. type Err = anyhow::Error;
  619. fn from_str(s: &str) -> anyhow::Result<Self> {
  620. let possible_bases = s.as_bytes().iter();
  621. let mut res: Vec<Base> = Vec::new();
  622. for &base in possible_bases {
  623. match base.try_into() {
  624. std::result::Result::Ok(b) => res.push(b),
  625. Err(_) => {
  626. return Ok(Self::Unstructured(s.to_string()));
  627. }
  628. }
  629. }
  630. if res.len() == 1 {
  631. Ok(Self::Nucleotide(res.pop().unwrap()))
  632. } else {
  633. Ok(Self::Nucleotides(res))
  634. }
  635. }
  636. }
  637. impl fmt::Display for ReferenceAlternative {
  638. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  639. let string = match self {
  640. ReferenceAlternative::Nucleotide(b) => b.to_string(),
  641. ReferenceAlternative::Nucleotides(bases) => bases
  642. .iter()
  643. .fold(String::new(), |acc, e| format!("{}{}", acc, e)),
  644. ReferenceAlternative::Unstructured(s) => s.to_string(),
  645. };
  646. write!(f, "{}", string)
  647. }
  648. }
  649. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  650. pub enum Base {
  651. A,
  652. T,
  653. C,
  654. G,
  655. N,
  656. }
  657. impl TryFrom<u8> for Base {
  658. type Error = anyhow::Error;
  659. fn try_from(base: u8) -> anyhow::Result<Self> {
  660. match base {
  661. b'A' => Ok(Base::A),
  662. b'T' => Ok(Base::T),
  663. b'C' => Ok(Base::C),
  664. b'G' => Ok(Base::G),
  665. b'N' => Ok(Base::N),
  666. _ => Err(anyhow::anyhow!(
  667. "Unknown base: {}",
  668. String::from_utf8_lossy(&[base])
  669. )),
  670. }
  671. }
  672. }
  673. impl Base {
  674. pub fn into_u8(self) -> u8 {
  675. match self {
  676. Base::A => b'A',
  677. Base::T => b'T',
  678. Base::C => b'C',
  679. Base::G => b'G',
  680. Base::N => b'N',
  681. }
  682. }
  683. }
  684. impl fmt::Display for Base {
  685. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  686. // Use `self.number` to refer to each positional data point.
  687. let str = match self {
  688. Base::A => "A",
  689. Base::T => "T",
  690. Base::C => "C",
  691. Base::G => "G",
  692. Base::N => "N",
  693. };
  694. write!(f, "{}", str)
  695. }
  696. }
  697. pub trait Variants {
  698. fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection>;
  699. }
  700. pub trait VariantId {
  701. fn variant_id(&self) -> String;
  702. }
  703. pub trait RunnerVariants: Run + Variants + Send + Sync {}
  704. pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
  705. #[macro_export]
  706. macro_rules! init_somatic_callers {
  707. ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
  708. vec![
  709. $(
  710. Box::new(<$runner>::initialize($id, $config.clone())?) as CallerBox
  711. ),+
  712. ]
  713. };
  714. }
  715. #[macro_export]
  716. macro_rules! init_solo_callers {
  717. ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
  718. vec![
  719. $(
  720. Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as CallerBox
  721. ),+
  722. ]
  723. };
  724. }
  725. pub fn load_variants(
  726. iterable: &mut [CallerBox],
  727. annotations: &Annotations,
  728. ) -> anyhow::Result<Vec<VariantCollection>> {
  729. // First, run all items in parallel
  730. iterable
  731. .par_iter_mut()
  732. .try_for_each(|runner| runner.run())?;
  733. // Then, collect variants from all items in parallel
  734. let variants: Vec<VariantCollection> = iterable
  735. .par_iter()
  736. .map(|runner| runner.variants(annotations))
  737. .collect::<anyhow::Result<Vec<_>>>()?;
  738. Ok(variants)
  739. }
  740. pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
  741. vec1: &[T],
  742. vec2: &[T],
  743. ) -> (Vec<T>, Vec<T>, Vec<T>) {
  744. let set1: HashSet<_> = vec1.par_iter().cloned().collect();
  745. let set2: HashSet<_> = vec2.par_iter().cloned().collect();
  746. let common: Vec<T> = set1
  747. .par_iter()
  748. .filter(|item| set2.contains(item))
  749. .cloned()
  750. .collect();
  751. let only_in_first: Vec<T> = set1
  752. .par_iter()
  753. .filter(|item| !set2.contains(item))
  754. .cloned()
  755. .collect();
  756. let only_in_second: Vec<T> = set2
  757. .par_iter()
  758. .filter(|item| !set1.contains(item))
  759. .cloned()
  760. .collect();
  761. (common, only_in_first, only_in_second)
  762. }