variant.rs 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. use crate::{
  2. annotation::Annotations,
  3. helpers::Hash128,
  4. positions::{GenomePosition, GetGenomePosition, VcfPosition},
  5. runners::Run,
  6. variant::variant_collection::VariantCollection,
  7. };
  8. use anyhow::{anyhow, Context};
  9. use log::warn;
  10. use rayon::prelude::*;
  11. use serde::{Deserialize, Serialize};
  12. use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
  13. /// Represents a variant in the Variant Call Format (VCF).
  14. #[derive(Debug, Clone, Serialize, Deserialize)]
  15. pub struct VcfVariant {
  16. /// A 128-bit hash of the variant's key properties for efficient comparison and storage.
  17. pub hash: Hash128,
  18. /// The genomic position of the variant.
  19. pub position: GenomePosition,
  20. /// The identifier of the variant.
  21. pub id: String,
  22. /// The reference allele.
  23. pub reference: ReferenceAlternative,
  24. /// The alternative allele.
  25. pub alternative: ReferenceAlternative,
  26. /// The quality score of the variant call, if available.
  27. pub quality: Option<f32>,
  28. /// The filter status of the variant.
  29. pub filter: Filter,
  30. /// Additional information about the variant.
  31. pub infos: Infos,
  32. /// Genotype information and other sample-specific data.
  33. pub formats: Formats,
  34. }
  35. impl PartialEq for VcfVariant {
  36. /// Compares two VcfVariants for equality.
  37. ///
  38. /// Note: This comparison only considers position, reference, and alternative.
  39. /// It intentionally ignores id, filter, info, format, and quality.
  40. fn eq(&self, other: &Self) -> bool {
  41. // Nota bene: id, filter, info, format and quality is intentionally not compared
  42. self.position == other.position
  43. && self.reference == other.reference
  44. && self.alternative == other.alternative
  45. }
  46. }
  47. impl Eq for VcfVariant {}
  48. impl FromStr for VcfVariant {
  49. type Err = anyhow::Error;
  50. /// Parses a VcfVariant from a string representation.
  51. ///
  52. /// The input string is expected to be a tab-separated VCF line.
  53. ///
  54. /// # Errors
  55. ///
  56. /// Returns an error if parsing fails for any field.
  57. fn from_str(s: &str) -> anyhow::Result<Self> {
  58. let v: Vec<&str> = s.split('\t').collect();
  59. let vcf_position: VcfPosition = (
  60. *v.first().ok_or(anyhow!("Can't get contig from: {s}"))?,
  61. *v.get(1).ok_or(anyhow!("Can't get position from: {s}"))?,
  62. )
  63. .try_into()
  64. .context(format!("Can't parse position from: {s}"))?;
  65. let formats = if v.len() >= 10 {
  66. (
  67. *v.get(8).ok_or(anyhow!("Can't parse formats from: {s}"))?,
  68. *v.get(9).ok_or(anyhow!("Can't parse formats from: {s}"))?,
  69. )
  70. .try_into()
  71. .context(format!("Can't parse formats from: {s}"))?
  72. } else {
  73. Formats::default()
  74. };
  75. let position: GenomePosition = vcf_position.into();
  76. let reference: ReferenceAlternative = v
  77. .get(3)
  78. .ok_or(anyhow!("Can't parse reference from: {s}"))?
  79. .parse()
  80. .context(format!("Can't parse reference from: {s}"))?;
  81. let alternative: ReferenceAlternative = v
  82. .get(4)
  83. .ok_or(anyhow!("Can't parse alternative from: {s}"))?
  84. .parse()
  85. .context(format!("Can't parse alternative from: {s}"))?;
  86. // Blake3 128 bytes Hash
  87. let mut hasher = blake3::Hasher::new();
  88. hasher.update(&position.contig.to_ne_bytes()); // Convert position to bytes
  89. hasher.update(&position.position.to_ne_bytes()); // Convert position to bytes
  90. hasher.update(reference.to_string().as_bytes()); // Reference string as bytes
  91. hasher.update(alternative.to_string().as_bytes()); // Alternative string as bytes
  92. let hash = hasher.finalize();
  93. let hash = Hash128::new(hash.as_bytes()[..16].try_into().unwrap());
  94. Ok(Self {
  95. hash,
  96. position,
  97. id: v
  98. .get(2)
  99. .ok_or(anyhow!("Can't parse id from: {s}"))?
  100. .to_string(),
  101. reference,
  102. alternative,
  103. quality: v
  104. .get(5)
  105. .map(|s| s.parse::<f32>().ok()) // Try to parse as f64; returns Option<f64>
  106. .unwrap_or(None),
  107. filter: v
  108. .get(6)
  109. .ok_or(anyhow!("Can't parse filter from: {s}"))?
  110. .parse()
  111. .context(format!("Can't parse filter from: {s}"))?,
  112. infos: v
  113. .get(7)
  114. .ok_or(anyhow!("Can't parse infos from: {s}"))?
  115. .parse()
  116. .context(format!("Can't parse infos from: {s}"))?,
  117. formats,
  118. })
  119. }
  120. }
  121. // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
  122. impl VcfVariant {
  123. /// Converts the VcfVariant into a VCF-formatted row string.
  124. ///
  125. /// This method creates a tab-separated string representation of the variant,
  126. /// suitable for writing to a VCF file.
  127. pub fn into_vcf_row(&self) -> String {
  128. let vcf_position: VcfPosition = self.position.clone().into();
  129. let (contig, position) = vcf_position.into();
  130. let mut columns = vec![
  131. contig,
  132. position,
  133. self.id.to_string(),
  134. self.reference.to_string(),
  135. self.alternative.to_string(),
  136. self.quality
  137. .map(|v| v.to_string())
  138. .unwrap_or(".".to_string()),
  139. self.filter.to_string(),
  140. self.infos.to_string(),
  141. ];
  142. if !self.formats.0.is_empty() {
  143. let (format, values) = self.formats.clone().into();
  144. columns.push(format);
  145. columns.push(values);
  146. }
  147. columns.join("\t")
  148. }
  149. /// Returns the hash of the variant.
  150. pub fn hash(&self) -> Hash128 {
  151. self.hash
  152. }
  153. /// Creates a new VcfVariant with common attributes from DeepVariant and CLAIRS.
  154. ///
  155. /// This method generates a new variant with shared properties, resetting some fields
  156. /// to default or empty values.
  157. pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
  158. VcfVariant {
  159. hash: self.hash,
  160. position: self.position.clone(),
  161. id: self.id.clone(),
  162. reference: self.reference.clone(),
  163. alternative: self.alternative.clone(),
  164. quality: self.quality,
  165. filter: Filter::Other(".".to_string()),
  166. infos: Infos(vec![Info::Empty]),
  167. formats: self.formats.commun_deepvariant_clairs(),
  168. }
  169. }
  170. /// Checks if the variant has an SVTYPE info field.
  171. ///
  172. /// Returns true if the variant contains structural variation type information.
  173. pub fn has_svtype(&self) -> bool {
  174. self.infos.0.iter().any(|i| matches!(i, Info::SVTYPE(_)))
  175. }
  176. /// Retrieves the structural variation type of the variant, if present.
  177. ///
  178. /// Returns Some(SVType) if the variant has an SVTYPE info field,
  179. pub fn svtype(&self) -> Option<SVType> {
  180. self.infos.0.iter().find_map(|e| {
  181. if let Info::SVTYPE(sv_type) = e {
  182. Some(sv_type.clone())
  183. } else {
  184. None
  185. }
  186. })
  187. }
  188. /// Determines the alteration category of the variant.
  189. ///
  190. /// This method analyzes the reference and alternative alleles to classify
  191. /// the variant into one of several alteration categories:
  192. /// - SNV (Single Nucleotide Variant)
  193. /// - INS (Insertion)
  194. /// - DEL (Deletion)
  195. /// - Other (including structural variants and complex alterations)
  196. ///
  197. /// The classification is based on the following rules:
  198. /// 1. If both reference and alternative are single nucleotides, it's an SNV.
  199. /// 2. If reference is a single nucleotide and alternative is multiple nucleotides, it's an insertion.
  200. /// 3. If reference is multiple nucleotides and alternative is a single nucleotide, it's a deletion.
  201. /// 4. For cases where both are multiple nucleotides, the longer one determines if it's an insertion or deletion.
  202. /// 5. If none of the above apply, it checks for structural variant types.
  203. /// 6. If no structural variant type is found, it's classified as "Other".
  204. ///
  205. /// # Returns
  206. /// An `AlterationCategory` enum representing the type of alteration.
  207. pub fn alteration_category(&self) -> AlterationCategory {
  208. match (&self.reference, &self.alternative) {
  209. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
  210. AlterationCategory::SNV
  211. }
  212. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
  213. AlterationCategory::INS
  214. }
  215. (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
  216. AlterationCategory::Other
  217. }
  218. (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
  219. AlterationCategory::DEL
  220. }
  221. (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
  222. if a.len() < b.len() =>
  223. {
  224. AlterationCategory::INS
  225. }
  226. (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
  227. if a.len() > b.len() =>
  228. {
  229. AlterationCategory::DEL
  230. }
  231. _ => match self.svtype() {
  232. Some(sv_type) => {
  233. if let Ok(bnd_desc) = self.bnd_desc() {
  234. if bnd_desc.a_contig != bnd_desc.b_contig {
  235. AlterationCategory::TRL
  236. } else {
  237. AlterationCategory::BND
  238. }
  239. } else {
  240. AlterationCategory::from(sv_type)
  241. }
  242. }
  243. None => AlterationCategory::Other,
  244. },
  245. }
  246. }
  247. /// Parses and constructs a BND (breakend) description from the alternative string.
  248. ///
  249. /// This function interprets the BND notation in the alternative string and creates
  250. /// a `BNDDesc` struct containing detailed information about the breakend.
  251. ///
  252. /// # Returns
  253. /// - `Ok(BNDDesc)` if parsing is successful
  254. /// - `Err` if parsing fails or if the alteration is not a BND
  255. ///
  256. /// # Errors
  257. /// This function will return an error if:
  258. /// - The alteration category is not BND
  259. /// - The alternative string cannot be parsed into exactly 3 parts
  260. /// - The b_position cannot be parsed as a number
  261. pub fn bnd_desc(&self) -> anyhow::Result<BNDDesc> {
  262. let alt = self.alternative.to_string();
  263. if alt.contains('[') || alt.contains(']') {
  264. let extending_right = alt.contains('[');
  265. let alt_rep = alt.replace("[", ";").replace("]", ";");
  266. let alt_is_joined_after = !alt_rep.starts_with(";");
  267. let parts = alt_rep
  268. .split(";")
  269. .filter(|c| !c.is_empty())
  270. .collect::<Vec<&str>>();
  271. if alt_is_joined_after {
  272. // a is ref b is alt
  273. let a_sens = true;
  274. let a_contig = self.position.contig();
  275. let a_position = self.position.position + 1;
  276. let added_nt = parts[0][1..].to_string();
  277. let b_sens = alt.contains('[');
  278. let (contig, pos) = parts[1].split_once(':').unwrap();
  279. let b_contig = contig.to_string();
  280. let b_position: u32 = pos.parse()?;
  281. Ok(BNDDesc {
  282. a_contig,
  283. a_position,
  284. a_sens,
  285. b_contig,
  286. b_position,
  287. b_sens,
  288. added_nt,
  289. })
  290. } else {
  291. // a is alt b is ref
  292. let b_sens = true;
  293. let b_contig = self.position.contig();
  294. let b_position = self.position.position + 1;
  295. let mut added_nt = parts[1].to_string();
  296. added_nt.pop();
  297. let a_sens = alt.contains(']');
  298. let (contig, pos) = parts[0].split_once(':').unwrap();
  299. let a_contig = contig.to_string();
  300. let a_position: u32 = pos.parse()?;
  301. Ok(BNDDesc {
  302. a_contig,
  303. a_position,
  304. a_sens,
  305. b_contig,
  306. b_position,
  307. b_sens,
  308. added_nt,
  309. })
  310. }
  311. // let b_sens = alt.contains('[');
  312. //
  313. // let a_sens = if b_sens {
  314. // !alt.starts_with('[')
  315. // } else {
  316. // !alt.starts_with(']')
  317. // };
  318. //
  319. // let parts: Vec<&str> = alt
  320. // .split(&['[', ']', ':'])
  321. // .filter(|v| !v.is_empty())
  322. // .collect();
  323. //
  324. // if parts.len() != 3 {
  325. // return Err(anyhow::anyhow!("Failed to parse parts: {parts:?}"));
  326. // }
  327. //
  328. // let (nt, b_contig, b_position) = if a_sens {
  329. // (parts[0], parts[1], parts[2])
  330. // } else {
  331. // (parts[2], parts[0], parts[1])
  332. // };
  333. //
  334. // let added_nt = if nt.len() > 1 {
  335. // nt[1..].to_string()
  336. // } else {
  337. // nt.to_string()
  338. // };
  339. //
  340. // Ok(BNDDesc {
  341. // a_contig: self.position.contig(),
  342. // a_position: self.position.position + 1,
  343. // a_sens,
  344. // b_contig: b_contig.to_string(),
  345. // b_position: b_position
  346. // .parse()
  347. // .map_err(|e| anyhow::anyhow!("Failed to parse: {b_position}\n{e}"))?,
  348. // b_sens,
  349. // added_nt,
  350. // })
  351. } else {
  352. Err(anyhow::anyhow!("The alteration is not BND: {alt}"))
  353. }
  354. }
  355. }
  356. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  357. pub struct BNDDesc {
  358. pub a_contig: String,
  359. pub a_position: u32, // 1-based
  360. pub a_sens: bool,
  361. pub b_contig: String,
  362. pub b_position: u32, // 1-based
  363. pub b_sens: bool,
  364. pub added_nt: String,
  365. }
  366. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  367. pub enum AlterationCategory {
  368. SNV,
  369. DEL,
  370. INS,
  371. DUP,
  372. INV,
  373. CNV,
  374. TRL,
  375. BND,
  376. Other,
  377. }
  378. impl fmt::Display for AlterationCategory {
  379. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  380. write!(
  381. f,
  382. "{}",
  383. match self {
  384. AlterationCategory::SNV => "SNV",
  385. AlterationCategory::DEL => "DEL",
  386. AlterationCategory::INS => "INS",
  387. AlterationCategory::DUP => "DUP",
  388. AlterationCategory::INV => "INV",
  389. AlterationCategory::CNV => "CNV",
  390. AlterationCategory::BND | AlterationCategory::TRL => "TRL",
  391. AlterationCategory::Other => "Other",
  392. }
  393. )
  394. }
  395. }
  396. impl From<SVType> for AlterationCategory {
  397. fn from(sv_type: SVType) -> Self {
  398. match sv_type {
  399. SVType::DEL => AlterationCategory::DEL,
  400. SVType::INS => AlterationCategory::INS,
  401. SVType::DUP => AlterationCategory::DUP,
  402. SVType::INV => AlterationCategory::INV,
  403. SVType::CNV => AlterationCategory::CNV,
  404. SVType::BND => AlterationCategory::BND,
  405. }
  406. }
  407. }
  408. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
  409. pub enum SVType {
  410. DEL,
  411. INS,
  412. DUP,
  413. INV,
  414. CNV,
  415. BND,
  416. }
  417. impl FromStr for SVType {
  418. type Err = anyhow::Error;
  419. fn from_str(s: &str) -> anyhow::Result<Self> {
  420. match s {
  421. "DEL" => Ok(SVType::DEL),
  422. "INS" => Ok(SVType::INS),
  423. "DUP" => Ok(SVType::DUP),
  424. "INV" => Ok(SVType::INV),
  425. "CNV" => Ok(SVType::CNV),
  426. "BND" => Ok(SVType::BND),
  427. _ => Err(anyhow!("Can't parse SVTYPE={s}")),
  428. }
  429. }
  430. }
  431. impl fmt::Display for SVType {
  432. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  433. write!(
  434. f,
  435. "{}",
  436. match self {
  437. SVType::DEL => "DEL",
  438. SVType::INS => "INS",
  439. SVType::DUP => "DUP",
  440. SVType::INV => "INV",
  441. SVType::CNV => "CNV",
  442. SVType::BND => "BND",
  443. }
  444. )
  445. }
  446. }
  447. impl VariantId for VcfVariant {
  448. fn variant_id(&self) -> String {
  449. format!("{}_{}>{}", self.position, self.reference, self.alternative)
  450. }
  451. }
  452. impl GetGenomePosition for VcfVariant {
  453. fn position(&self) -> &GenomePosition {
  454. &self.position
  455. }
  456. }
  457. impl PartialOrd for VcfVariant {
  458. fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
  459. Some(self.cmp(other))
  460. }
  461. }
  462. impl Ord for VcfVariant {
  463. fn cmp(&self, other: &Self) -> Ordering {
  464. self.position.cmp(&other.position)
  465. }
  466. }
  467. /// Info
  468. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
  469. pub struct Infos(pub Vec<Info>);
  470. impl FromStr for Infos {
  471. type Err = anyhow::Error;
  472. fn from_str(s: &str) -> anyhow::Result<Self> {
  473. Ok(Self(
  474. s.split(";")
  475. .map(Info::from_str)
  476. .collect::<Result<Vec<Info>, _>>()
  477. .map_err(|e| anyhow::anyhow!("Failed to parse info: {e}"))?,
  478. ))
  479. }
  480. }
  481. impl fmt::Display for Infos {
  482. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  483. write!(
  484. f,
  485. "{}",
  486. self.0
  487. .iter()
  488. .map(|e| e.to_string())
  489. .collect::<Vec<String>>()
  490. .join(";")
  491. )
  492. }
  493. }
  494. #[allow(non_camel_case_types)]
  495. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  496. pub enum Info {
  497. Empty,
  498. H,
  499. F,
  500. P,
  501. FAU(u32),
  502. FCU(u32),
  503. FGU(u32),
  504. FTU(u32),
  505. RAU(u32),
  506. RCU(u32),
  507. RGU(u32),
  508. RTU(u32),
  509. SVTYPE(SVType),
  510. MATEID(String),
  511. NORMAL_READ_SUPPORT(u32),
  512. TUMOUR_READ_SUPPORT(u32),
  513. NORMAL_ALN_SUPPORT(u32),
  514. TUMOUR_ALN_SUPPORT(u32),
  515. SVLEN(i32),
  516. TUMOUR_DP_BEFORE(Vec<u32>),
  517. TUMOUR_DP_AT(Vec<u32>),
  518. TUMOUR_DP_AFTER(Vec<u32>),
  519. NORMAL_DP_BEFORE(Vec<u32>),
  520. NORMAL_DP_AT(Vec<u32>),
  521. NORMAL_DP_AFTER(Vec<u32>),
  522. TUMOUR_AF(Vec<f32>),
  523. NORMAL_AF(Vec<f32>),
  524. BP_NOTATION(String),
  525. SOURCE(String),
  526. CLUSTERED_READS_TUMOUR(u32),
  527. CLUSTERED_READS_NORMAL(u32),
  528. TUMOUR_ALT_HP(Vec<u32>),
  529. TUMOUR_PS(Vec<String>),
  530. NORMAL_ALT_HP(Vec<u32>),
  531. NORMAL_PS(Vec<String>),
  532. TUMOUR_TOTAL_HP_AT(Vec<u32>),
  533. NORMAL_TOTAL_HP_AT(Vec<u32>),
  534. ORIGIN_STARTS_STD_DEV(f32),
  535. ORIGIN_MAPQ_MEAN(f32),
  536. ORIGIN_EVENT_SIZE_STD_DEV(f32),
  537. ORIGIN_EVENT_SIZE_MEDIAN(f32),
  538. ORIGIN_EVENT_SIZE_MEAN(f32),
  539. END_STARTS_STD_DEV(f32),
  540. END_MAPQ_MEAN(f32),
  541. END_EVENT_SIZE_STD_DEV(f32),
  542. END_EVENT_SIZE_MEDIAN(f32),
  543. END_EVENT_SIZE_MEAN(f32),
  544. CLASS(String),
  545. END(u32),
  546. SVINSLEN(u32),
  547. SVINSSEQ(String),
  548. }
  549. impl FromStr for Info {
  550. type Err = anyhow::Error;
  551. fn from_str(s: &str) -> anyhow::Result<Self> {
  552. if s.contains('=') {
  553. let (key, value) = s
  554. .split_once('=')
  555. .context(format!("Can't split with `=` in string: {s}"))?;
  556. Ok(match key {
  557. "FAU" => Info::FAU(parse_value(value, key)?),
  558. "FCU" => Info::FCU(parse_value(value, key)?),
  559. "FGU" => Info::FGU(parse_value(value, key)?),
  560. "FTU" => Info::FTU(parse_value(value, key)?),
  561. "RAU" => Info::RAU(parse_value(value, key)?),
  562. "RCU" => Info::RCU(parse_value(value, key)?),
  563. "RGU" => Info::RGU(parse_value(value, key)?),
  564. "RTU" => Info::RTU(parse_value(value, key)?),
  565. "SVLEN" => Info::SVLEN(parse_value(value, key)?),
  566. "END" => Info::END(parse_value(value, key)?),
  567. "SVINSLEN" => Info::SVINSLEN(parse_value(value, key)?),
  568. "SVTYPE" => Info::SVTYPE(value.parse()?),
  569. "MATEID" => Info::MATEID(value.to_string()),
  570. "NORMAL_READ_SUPPORT" => Info::NORMAL_READ_SUPPORT(parse_value(value, key)?),
  571. "TUMOUR_READ_SUPPORT" => Info::TUMOUR_READ_SUPPORT(parse_value(value, key)?),
  572. "NORMAL_ALN_SUPPORT" => Info::NORMAL_ALN_SUPPORT(parse_value(value, key)?),
  573. "TUMOUR_ALN_SUPPORT" => Info::TUMOUR_ALN_SUPPORT(parse_value(value, key)?),
  574. "SVINSSEQ" => Info::SVINSSEQ(value.to_string()),
  575. "TUMOUR_DP_BEFORE" => Info::TUMOUR_DP_BEFORE(parse_vec_value(value, key)?),
  576. "TUMOUR_DP_AT" => Info::TUMOUR_DP_AT(parse_vec_value(value, key)?),
  577. "TUMOUR_DP_AFTER" => Info::TUMOUR_DP_AFTER(parse_vec_value(value, key)?),
  578. "NORMAL_DP_BEFORE" => Info::NORMAL_DP_BEFORE(parse_vec_value(value, key)?),
  579. "NORMAL_DP_AT" => Info::NORMAL_DP_AT(parse_vec_value(value, key)?),
  580. "NORMAL_DP_AFTER" => Info::NORMAL_DP_AFTER(parse_vec_value(value, key)?),
  581. "TUMOUR_AF" => Info::TUMOUR_AF(parse_vec_value(value, key)?),
  582. "NORMAL_AF" => Info::NORMAL_AF(parse_vec_value(value, key)?),
  583. "BP_NOTATION" => Info::BP_NOTATION(value.to_string()),
  584. "SOURCE" => Info::SOURCE(value.to_string()),
  585. "CLUSTERED_READS_TUMOUR" => Info::CLUSTERED_READS_TUMOUR(parse_value(value, key)?),
  586. "CLUSTERED_READS_NORMAL" => Info::CLUSTERED_READS_NORMAL(parse_value(value, key)?),
  587. "TUMOUR_ALT_HP" => Info::TUMOUR_ALT_HP(parse_vec_value(value, key)?),
  588. "TUMOUR_PS" => Info::TUMOUR_PS(parse_vec_value(value, key)?),
  589. "NORMAL_ALT_HP" => Info::NORMAL_ALT_HP(parse_vec_value(value, key)?),
  590. "NORMAL_PS" => Info::NORMAL_PS(parse_vec_value(value, key)?),
  591. "TUMOUR_TOTAL_HP_AT" => Info::TUMOUR_TOTAL_HP_AT(parse_vec_value(value, key)?),
  592. "NORMAL_TOTAL_HP_AT" => Info::NORMAL_TOTAL_HP_AT(parse_vec_value(value, key)?),
  593. "ORIGIN_STARTS_STD_DEV" => Info::ORIGIN_STARTS_STD_DEV(parse_value(value, key)?),
  594. "ORIGIN_MAPQ_MEAN" => Info::ORIGIN_MAPQ_MEAN(parse_value(value, key)?),
  595. "ORIGIN_EVENT_SIZE_STD_DEV" => {
  596. Info::ORIGIN_EVENT_SIZE_STD_DEV(parse_value(value, key)?)
  597. }
  598. "ORIGIN_EVENT_SIZE_MEDIAN" => {
  599. Info::ORIGIN_EVENT_SIZE_MEDIAN(parse_value(value, key)?)
  600. }
  601. "ORIGIN_EVENT_SIZE_MEAN" => Info::ORIGIN_EVENT_SIZE_MEAN(parse_value(value, key)?),
  602. "END_STARTS_STD_DEV" => Info::END_STARTS_STD_DEV(parse_value(value, key)?),
  603. "END_MAPQ_MEAN" => Info::END_MAPQ_MEAN(parse_value(value, key)?),
  604. "END_EVENT_SIZE_STD_DEV" => Info::END_EVENT_SIZE_STD_DEV(parse_value(value, key)?),
  605. "END_EVENT_SIZE_MEDIAN" => Info::END_EVENT_SIZE_MEDIAN(parse_value(value, key)?),
  606. "END_EVENT_SIZE_MEAN" => Info::END_EVENT_SIZE_MEAN(parse_value(value, key)?),
  607. "CLASS" => Info::CLASS(value.to_string()),
  608. _ => Info::Empty,
  609. })
  610. } else {
  611. Ok(match s {
  612. "H" => Info::H,
  613. "F" => Info::F,
  614. "P" => Info::P,
  615. _ => Info::Empty,
  616. })
  617. }
  618. }
  619. }
  620. impl fmt::Display for Info {
  621. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  622. match self {
  623. Info::Empty => write!(f, "."),
  624. Info::H => write!(f, "H"),
  625. Info::F => write!(f, "F"),
  626. Info::P => write!(f, "P"),
  627. Info::FAU(v) => write!(f, "FAU={v}"),
  628. Info::FCU(v) => write!(f, "FCU={v}"),
  629. Info::FGU(v) => write!(f, "FGU={v}"),
  630. Info::FTU(v) => write!(f, "FTU={v}"),
  631. Info::RAU(v) => write!(f, "RAU={v}"),
  632. Info::RCU(v) => write!(f, "RCU={v}"),
  633. Info::RGU(v) => write!(f, "RGU={v}"),
  634. Info::RTU(v) => write!(f, "RTU={v}"),
  635. Info::SVTYPE(v) => write!(f, "SVTYPE={v}"),
  636. Info::SVLEN(v) => write!(f, "SVLEN={v}"),
  637. Info::END(v) => write!(f, "END={v}"),
  638. Info::MATEID(v) => write!(f, "MATEID={v}"),
  639. Info::SVINSLEN(v) => write!(f, "SVINSLEN={v}"),
  640. Info::SVINSSEQ(v) => write!(f, "SVINSSEQ={v}"),
  641. Info::NORMAL_READ_SUPPORT(v) => write!(f, "NORMAL_READ_SUPPORT={v}"),
  642. Info::TUMOUR_READ_SUPPORT(v) => write!(f, "TUMOUR_READ_SUPPORT={v}"),
  643. Info::NORMAL_ALN_SUPPORT(v) => write!(f, "NORMAL_ALN_SUPPORT={v}"),
  644. Info::TUMOUR_ALN_SUPPORT(v) => write!(f, "TUMOUR_ALN_SUPPORT={v}"),
  645. Info::TUMOUR_DP_BEFORE(v) => write!(f, "TUMOUR_DP_BEFORE={}", concat_numbers(v)),
  646. Info::TUMOUR_DP_AT(v) => write!(f, "TUMOUR_DP_AT={}", concat_numbers(v)),
  647. Info::TUMOUR_DP_AFTER(v) => write!(f, "TUMOUR_DP_AFTER={}", concat_numbers(v)),
  648. Info::NORMAL_DP_BEFORE(v) => write!(f, "NORMAL_DP_BEFORE={}", concat_numbers(v)),
  649. Info::NORMAL_DP_AT(v) => write!(f, "NORMAL_DP_AT={}", concat_numbers(v)),
  650. Info::NORMAL_DP_AFTER(v) => write!(f, "NORMAL_DP_AFTER={}", concat_numbers(v)),
  651. Info::TUMOUR_AF(v) => write!(f, "TUMOUR_AF={}", concat_numbers(v)),
  652. Info::NORMAL_AF(v) => write!(f, "NORMAL_AF={}", concat_numbers(v)),
  653. Info::BP_NOTATION(v) => write!(f, "BP_NOTATION={v}"),
  654. Info::SOURCE(v) => write!(f, "SOURCE={v}"),
  655. Info::CLUSTERED_READS_TUMOUR(v) => write!(f, "CLUSTERED_READS_TUMOUR={v}"),
  656. Info::CLUSTERED_READS_NORMAL(v) => write!(f, "CLUSTERED_READS_NORMAL={v}"),
  657. Info::TUMOUR_ALT_HP(v) => write!(f, "TUMOUR_ALT_HP={}", concat_numbers(v)),
  658. Info::TUMOUR_PS(v) => write!(f, "TUMOUR_PS={}", v.join(",")),
  659. Info::NORMAL_ALT_HP(v) => write!(f, "NORMAL_ALT_HP={}", concat_numbers(v)),
  660. Info::NORMAL_PS(v) => write!(f, "NORMAL_PS={}", v.join(",")),
  661. Info::TUMOUR_TOTAL_HP_AT(v) => write!(f, "TUMOUR_TOTAL_HP_AT={}", concat_numbers(v)),
  662. Info::NORMAL_TOTAL_HP_AT(v) => write!(f, "NORMAL_TOTAL_HP_AT={}", concat_numbers(v)),
  663. Info::ORIGIN_STARTS_STD_DEV(v) => write!(f, "ORIGIN_STARTS_STD_DEV={v}"),
  664. Info::ORIGIN_MAPQ_MEAN(v) => write!(f, "ORIGIN_MAPQ_MEAN={v}"),
  665. Info::ORIGIN_EVENT_SIZE_STD_DEV(v) => write!(f, "ORIGIN_EVENT_SIZE_STD_DEV={v}"),
  666. Info::ORIGIN_EVENT_SIZE_MEDIAN(v) => write!(f, "ORIGIN_EVENT_SIZE_MEDIAN={v}"),
  667. Info::ORIGIN_EVENT_SIZE_MEAN(v) => write!(f, "ORIGIN_EVENT_SIZE_MEAN={v}"),
  668. Info::END_STARTS_STD_DEV(v) => write!(f, "END_STARTS_STD_DEV={v}"),
  669. Info::END_MAPQ_MEAN(v) => write!(f, "END_MAPQ_MEAN={v}"),
  670. Info::END_EVENT_SIZE_STD_DEV(v) => write!(f, "END_EVENT_SIZE_STD_DEV={v}"),
  671. Info::END_EVENT_SIZE_MEDIAN(v) => write!(f, "END_EVENT_SIZE_MEDIAN={v}"),
  672. Info::END_EVENT_SIZE_MEAN(v) => write!(f, "END_EVENT_SIZE_MEAN={v}"),
  673. Info::CLASS(v) => write!(f, "CLASS={v}"),
  674. }
  675. }
  676. }
  677. pub fn concat_numbers<T: ToString>(v: &[T]) -> String {
  678. v.iter()
  679. .map(|n| n.to_string())
  680. .collect::<Vec<String>>()
  681. .join(",")
  682. }
  683. /// Format
  684. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  685. pub enum Format {
  686. // DeepVariant
  687. GT(String),
  688. GQ(u32),
  689. DP(u32),
  690. AD(Vec<u32>),
  691. VAF(f32),
  692. PL(Vec<u32>),
  693. // Clairs
  694. // when format begins with N: normal
  695. // AF(f32),
  696. // NAF(f32), // DP(u32),
  697. NDP(u32),
  698. NAD(Vec<u32>),
  699. AU(u32),
  700. CU(u32),
  701. GU(u32),
  702. TU(u32),
  703. NAU(u32),
  704. NCU(u32),
  705. NGU(u32),
  706. NTU(u32),
  707. // nanomonsv
  708. TR(u32),
  709. VR(u32),
  710. Other((String, String)), // (key, value)
  711. }
  712. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
  713. pub struct Formats(pub Vec<Format>);
  714. impl TryFrom<(&str, &str)> for Formats {
  715. type Error = anyhow::Error;
  716. fn try_from((k, v): (&str, &str)) -> anyhow::Result<Self> {
  717. let keys: Vec<&str> = k.split(':').collect();
  718. let values: Vec<&str> = v.split(':').collect();
  719. if keys.len() != values.len() {
  720. anyhow::bail!("Mismatch between keys and values count for {k} {v}");
  721. }
  722. Ok(Self(
  723. keys.into_iter()
  724. .zip(values)
  725. .map(|(key, value)| Format::try_from((key, value)))
  726. .collect::<Result<Vec<Format>, _>>()
  727. .map_err(|e| anyhow::anyhow!("Failed to parse format: {e}"))?,
  728. ))
  729. }
  730. }
  731. impl From<Formats> for (String, String) {
  732. fn from(formats: Formats) -> Self {
  733. let mut keys = Vec::new();
  734. let mut values = Vec::new();
  735. for format in formats.0 {
  736. let (key, value): (String, String) = format.into();
  737. keys.push(key);
  738. values.push(value);
  739. }
  740. (keys.join(":"), values.join(":"))
  741. }
  742. }
  743. impl TryFrom<(&str, &str)> for Format {
  744. type Error = anyhow::Error;
  745. fn try_from((key, value): (&str, &str)) -> anyhow::Result<Self> {
  746. let format = match key {
  747. "GT" => Format::GT(value.to_string()),
  748. "GQ" => Format::GQ(parse_value(value, key)?),
  749. "DP" => Format::DP(parse_value(value, key)?),
  750. "AD" => Format::AD(parse_vec_value(value, key)?),
  751. "VAF" => Format::VAF(parse_value(value, key)?),
  752. // "AF" => Format::AF(parse_value(value, key)?),
  753. // "NAF" => Format::NAF(parse_value(value, key)?),
  754. "NDP" => Format::NDP(parse_value(value, key)?),
  755. "NAD" => Format::NAD(parse_vec_value(value, key)?),
  756. "AU" => Format::AU(parse_value(value, key)?),
  757. "CU" => Format::CU(parse_value(value, key)?),
  758. "GU" => Format::GU(parse_value(value, key)?),
  759. "TU" => Format::TU(parse_value(value, key)?),
  760. "NAU" => Format::NAU(parse_value(value, key)?),
  761. "NCU" => Format::NCU(parse_value(value, key)?),
  762. "NGU" => Format::NGU(parse_value(value, key)?),
  763. "NTU" => Format::NTU(parse_value(value, key)?),
  764. "PL" => Format::PL(parse_vec_value(value, key)?),
  765. "TR" => Format::TR(parse_value(value, key)?),
  766. "VR" => Format::VR(parse_value(value, key)?),
  767. _ => Format::Other((key.to_string(), value.to_string())),
  768. };
  769. Ok(format)
  770. }
  771. }
  772. // Helper function to parse a single value (DeepSeek)
  773. fn parse_value<T: std::str::FromStr>(value: &str, key: &str) -> anyhow::Result<T>
  774. where
  775. T::Err: std::fmt::Debug,
  776. {
  777. value
  778. .parse()
  779. .map_err(|e| anyhow::anyhow!("{:?}", e)) // Convert the error to `anyhow::Error`
  780. .context(format!("Can't parse {}: {}", key, value)) // Add context
  781. }
  782. // Helper function to parse comma-separated values (DeepSeek)
  783. fn parse_vec_value<T: std::str::FromStr>(value: &str, key: &str) -> anyhow::Result<Vec<T>>
  784. where
  785. T::Err: std::fmt::Debug,
  786. {
  787. value
  788. .split(',')
  789. .map(|e| {
  790. e.parse()
  791. .map_err(|e| anyhow::anyhow!("{:?}", e)) // Convert the error to `anyhow::Error`
  792. .context(format!("Failed to parse {}: {}", key, e)) // Add context
  793. })
  794. .collect()
  795. }
  796. impl From<Format> for (String, String) {
  797. fn from(format: Format) -> Self {
  798. let concat = |values: Vec<u32>| -> String {
  799. values
  800. .iter()
  801. .map(|v| v.to_string())
  802. .collect::<Vec<_>>()
  803. .join(",")
  804. };
  805. match format {
  806. Format::GT(value) => ("GT".to_string(), value),
  807. Format::GQ(value) => ("GQ".to_string(), value.to_string()),
  808. Format::DP(value) => ("DP".to_string(), value.to_string()),
  809. Format::AD(values) => ("AD".to_string(), concat(values)),
  810. Format::VAF(value) => ("VAF".to_string(), value.to_string()),
  811. Format::PL(values) => ("PL".to_string(), concat(values)),
  812. Format::Other((key, value)) => (key, value),
  813. // Format::AF(value) => ("AF".to_string(), value.to_string()),
  814. // Format::NAF(value) => ("NAF".to_string(), value.to_string()),
  815. Format::NDP(value) => ("NDP".to_string(), value.to_string()),
  816. Format::NAD(values) => ("NAD".to_string(), concat(values)),
  817. Format::AU(value) => ("AU".to_string(), value.to_string()),
  818. Format::CU(value) => ("CU".to_string(), value.to_string()),
  819. Format::GU(value) => ("GU".to_string(), value.to_string()),
  820. Format::TU(value) => ("TU".to_string(), value.to_string()),
  821. Format::NAU(value) => ("NAU".to_string(), value.to_string()),
  822. Format::NCU(value) => ("NCU".to_string(), value.to_string()),
  823. Format::NGU(value) => ("NGU".to_string(), value.to_string()),
  824. Format::NTU(value) => ("NTU".to_string(), value.to_string()),
  825. Format::TR(value) => ("TR".to_string(), value.to_string()),
  826. Format::VR(value) => ("VR".to_string(), value.to_string()),
  827. }
  828. }
  829. }
  830. impl Formats {
  831. pub fn commun_deepvariant_clairs(&self) -> Self {
  832. let filtered_vec: Vec<Format> = self
  833. .0
  834. .clone()
  835. .into_iter()
  836. .map(|e| {
  837. if let Format::VAF(_v) = e {
  838. e
  839. // Format::AF(v)
  840. } else {
  841. e
  842. }
  843. })
  844. .filter(|format| {
  845. matches!(
  846. format,
  847. Format::GT(_) | Format::GQ(_) | Format::DP(_) | Format::AD(_) /* | Format::AF(_) */
  848. )
  849. })
  850. .collect();
  851. Formats(filtered_vec)
  852. }
  853. }
  854. /// Filter
  855. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
  856. pub enum Filter {
  857. PASS,
  858. Other(String),
  859. }
  860. impl FromStr for Filter {
  861. type Err = anyhow::Error;
  862. fn from_str(s: &str) -> anyhow::Result<Self> {
  863. match s {
  864. "PASS" => Ok(Filter::PASS),
  865. _ => Ok(Filter::Other(s.to_string())),
  866. }
  867. }
  868. }
  869. impl fmt::Display for Filter {
  870. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  871. match self {
  872. Filter::PASS => write!(f, "PASS"),
  873. Filter::Other(ref s) => write!(f, "{}", s),
  874. }
  875. }
  876. }
  877. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  878. pub enum ReferenceAlternative {
  879. Nucleotide(Base),
  880. Nucleotides(Vec<Base>),
  881. Unstructured(String),
  882. }
  883. impl FromStr for ReferenceAlternative {
  884. type Err = anyhow::Error;
  885. fn from_str(s: &str) -> anyhow::Result<Self> {
  886. let possible_bases = s.as_bytes().iter();
  887. let mut res: Vec<Base> = Vec::new();
  888. for &base in possible_bases {
  889. match base.try_into() {
  890. std::result::Result::Ok(b) => res.push(b),
  891. Err(_) => {
  892. return Ok(Self::Unstructured(s.to_string()));
  893. }
  894. }
  895. }
  896. if res.len() == 1 {
  897. Ok(Self::Nucleotide(res.pop().unwrap()))
  898. } else {
  899. Ok(Self::Nucleotides(res))
  900. }
  901. }
  902. }
  903. impl fmt::Display for ReferenceAlternative {
  904. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  905. let string = match self {
  906. ReferenceAlternative::Nucleotide(b) => b.to_string(),
  907. ReferenceAlternative::Nucleotides(bases) => bases
  908. .iter()
  909. .fold(String::new(), |acc, e| format!("{}{}", acc, e)),
  910. ReferenceAlternative::Unstructured(s) => s.to_string(),
  911. };
  912. write!(f, "{}", string)
  913. }
  914. }
  915. #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
  916. pub enum Base {
  917. A,
  918. T,
  919. C,
  920. G,
  921. N,
  922. }
  923. impl TryFrom<u8> for Base {
  924. type Error = anyhow::Error;
  925. fn try_from(base: u8) -> anyhow::Result<Self> {
  926. match base {
  927. b'A' => Ok(Base::A),
  928. b'T' => Ok(Base::T),
  929. b'C' => Ok(Base::C),
  930. b'G' => Ok(Base::G),
  931. b'N' => Ok(Base::N),
  932. _ => Err(anyhow::anyhow!(
  933. "Unknown base: {}",
  934. String::from_utf8_lossy(&[base])
  935. )),
  936. }
  937. }
  938. }
  939. impl Base {
  940. pub fn into_u8(self) -> u8 {
  941. match self {
  942. Base::A => b'A',
  943. Base::T => b'T',
  944. Base::C => b'C',
  945. Base::G => b'G',
  946. Base::N => b'N',
  947. }
  948. }
  949. }
  950. impl fmt::Display for Base {
  951. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  952. // Use `self.number` to refer to each positional data point.
  953. let str = match self {
  954. Base::A => "A",
  955. Base::T => "T",
  956. Base::C => "C",
  957. Base::G => "G",
  958. Base::N => "N",
  959. };
  960. write!(f, "{}", str)
  961. }
  962. }
  963. pub trait Variants {
  964. fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection>;
  965. }
  966. pub trait VariantId {
  967. fn variant_id(&self) -> String;
  968. }
  969. pub trait RunnerVariants: Run + Variants + Send + Sync {}
  970. pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
  971. #[macro_export]
  972. macro_rules! init_somatic_callers {
  973. ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
  974. vec![
  975. $(
  976. Box::new(<$runner>::initialize($id, $config.clone())?) as CallerBox
  977. ),+
  978. ]
  979. };
  980. }
  981. #[macro_export]
  982. macro_rules! init_solo_callers {
  983. ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
  984. vec![
  985. $(
  986. Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as CallerBox
  987. ),+
  988. ]
  989. };
  990. }
  991. pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
  992. iterable
  993. .iter_mut()
  994. .try_for_each(|runner| runner.run())
  995. .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
  996. }
  997. pub fn load_variants(
  998. iterable: &mut [CallerBox],
  999. annotations: &Annotations,
  1000. ) -> anyhow::Result<Vec<VariantCollection>> {
  1001. iterable
  1002. .par_iter()
  1003. .map(|runner| {
  1004. let r = runner.variants(annotations);
  1005. if let Err(ref e) = r {
  1006. warn!("{e}");
  1007. };
  1008. r
  1009. })
  1010. .filter(|r| r.is_ok())
  1011. .collect::<anyhow::Result<Vec<_>>>()
  1012. .map_err(|e| anyhow::anyhow!("Failed to load variants.\n{e}"))
  1013. }
  1014. pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
  1015. vec1: &[T],
  1016. vec2: &[T],
  1017. ) -> (Vec<T>, Vec<T>, Vec<T>) {
  1018. let set1: HashSet<_> = vec1.par_iter().cloned().collect();
  1019. let set2: HashSet<_> = vec2.par_iter().cloned().collect();
  1020. let common: Vec<T> = set1
  1021. .par_iter()
  1022. .filter(|item| set2.contains(item))
  1023. .cloned()
  1024. .collect();
  1025. let only_in_first: Vec<T> = set1
  1026. .par_iter()
  1027. .filter(|item| !set2.contains(item))
  1028. .cloned()
  1029. .collect();
  1030. let only_in_second: Vec<T> = set2
  1031. .par_iter()
  1032. .filter(|item| !set1.contains(item))
  1033. .cloned()
  1034. .collect();
  1035. (common, only_in_first, only_in_second)
  1036. }