| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845 |
- use crate::{
- annotation::Annotations,
- helpers::Hash128,
- positions::{GenomePosition, GetGenomePosition, VcfPosition},
- runners::Run,
- variant::variant_collection::VariantCollection,
- };
- use anyhow::{anyhow, Context, Ok};
- use rayon::prelude::*;
- use serde::{Deserialize, Serialize};
- use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
- #[derive(Debug, Clone, Serialize, Deserialize)]
- pub struct VcfVariant {
- pub hash: Hash128,
- pub position: GenomePosition,
- pub id: String,
- pub reference: ReferenceAlternative,
- pub alternative: ReferenceAlternative,
- pub quality: Option<f32>,
- pub filter: Filter,
- pub infos: Infos,
- pub formats: Formats,
- }
- impl PartialEq for VcfVariant {
- fn eq(&self, other: &Self) -> bool {
- // Nota bene: id, filter, info, format and quality is intentionally not compared
- self.position == other.position
- && self.reference == other.reference
- && self.alternative == other.alternative
- }
- }
- impl Eq for VcfVariant {}
- impl FromStr for VcfVariant {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- let v: Vec<&str> = s.split('\t').collect();
- let vcf_position: VcfPosition = (
- *v.first().ok_or(anyhow!("Can't get contig from: {s}"))?,
- *v.get(1).ok_or(anyhow!("Can't get position from: {s}"))?,
- )
- .try_into()
- .context(format!("Can't parse position from: {s}"))?;
- let formats = if v.len() == 10 {
- (
- *v.get(8).ok_or(anyhow!("Can't parse formats from: {s}"))?,
- *v.get(9).ok_or(anyhow!("Can't parse formats from: {s}"))?,
- )
- .try_into()
- .context(format!("Can't parse formats from: {s}"))?
- } else {
- Formats::default()
- };
- let position: GenomePosition = vcf_position.into();
- let reference: ReferenceAlternative = v
- .get(3)
- .ok_or(anyhow!("Can't parse reference from: {s}"))?
- .parse()
- .context(format!("Can't parse reference from: {s}"))?;
- let alternative: ReferenceAlternative = v
- .get(4)
- .ok_or(anyhow!("Can't parse alternative from: {s}"))?
- .parse()
- .context(format!("Can't parse alternative from: {s}"))?;
- // Blake3 128 bytes Hash
- let mut hasher = blake3::Hasher::new();
- hasher.update(&position.contig.to_ne_bytes()); // Convert position to bytes
- hasher.update(&position.position.to_ne_bytes()); // Convert position to bytes
- hasher.update(reference.to_string().as_bytes()); // Reference string as bytes
- hasher.update(alternative.to_string().as_bytes()); // Alternative string as bytes
- let hash = hasher.finalize();
- let hash = Hash128::new(hash.as_bytes()[..16].try_into().unwrap());
- Ok(Self {
- hash,
- position,
- id: v
- .get(2)
- .ok_or(anyhow!("Can't parse id from: {s}"))?
- .to_string(),
- reference,
- alternative,
- quality: v
- .get(5)
- .map(|s| s.parse::<f32>().ok()) // Try to parse as f64; returns Option<f64>
- .unwrap_or(None),
- filter: v
- .get(6)
- .ok_or(anyhow!("Can't parse filter from: {s}"))?
- .parse()
- .context(format!("Can't parse filter from: {s}"))?,
- infos: v
- .get(7)
- .ok_or(anyhow!("Can't parse infos from: {s}"))?
- .parse()
- .context(format!("Can't parse infos from: {s}"))?,
- formats,
- })
- }
- }
- // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ADJAGBA_diag
- impl VcfVariant {
- pub fn into_vcf_row(&self) -> String {
- let vcf_position: VcfPosition = self.position.clone().into();
- let (contig, position) = vcf_position.into();
- let mut columns = vec![
- contig,
- position,
- self.id.to_string(),
- self.reference.to_string(),
- self.alternative.to_string(),
- self.quality
- .map(|v| v.to_string())
- .unwrap_or(".".to_string()),
- self.filter.to_string(),
- self.infos.to_string(),
- ];
- if !self.formats.0.is_empty() {
- let (format, values) = self.formats.clone().into();
- columns.push(format);
- columns.push(values);
- }
- columns.join("\t")
- }
- pub fn hash(&self) -> Hash128 {
- self.hash
- }
- pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
- VcfVariant {
- hash: self.hash,
- position: self.position.clone(),
- id: self.id.clone(),
- reference: self.reference.clone(),
- alternative: self.alternative.clone(),
- quality: self.quality,
- filter: Filter::Other(".".to_string()),
- infos: Infos(vec![Info::Empty]),
- formats: self.formats.commun_deepvariant_clairs(),
- }
- }
- pub fn has_svtype(&self) -> bool {
- self.infos.0.iter().any(|i| matches!(i, Info::SVTYPE(_)))
- }
- pub fn svtype(&self) -> Option<SVType> {
- self.infos.0.iter().find_map(|e| {
- if let Info::SVTYPE(sv_type) = e {
- Some(sv_type.clone())
- } else {
- None
- }
- })
- }
- pub fn alteration_category(&self) -> AlterationCategory {
- match (&self.reference, &self.alternative) {
- (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
- AlterationCategory::SNV
- }
- (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
- AlterationCategory::INS
- }
- (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
- AlterationCategory::Other
- }
- (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
- AlterationCategory::DEL
- }
- (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
- if a.len() < b.len() =>
- {
- AlterationCategory::INS
- }
- (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
- if a.len() > b.len() =>
- {
- AlterationCategory::DEL
- }
- _ => match self.svtype() {
- Some(sv_type) => AlterationCategory::from(sv_type),
- None => AlterationCategory::Other,
- }, // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
- // AlterationCategory::Rep
- // }
- // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
- // AlterationCategory::Other
- // }
- // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
- // AlterationCategory::Other
- // }
- // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
- // AlterationCategory::Other
- // }
- // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
- // AlterationCategory::Other
- // }
- }
- }
- }
- #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
- pub enum AlterationCategory {
- SNV,
- DEL,
- INS,
- DUP,
- INV,
- CNV,
- BND,
- Other,
- }
- impl fmt::Display for AlterationCategory {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(
- f,
- "{}",
- match self {
- AlterationCategory::SNV => "SNV",
- AlterationCategory::DEL => "DEL",
- AlterationCategory::INS => "INS",
- AlterationCategory::DUP => "DUP",
- AlterationCategory::INV => "INV",
- AlterationCategory::CNV => "CNV",
- AlterationCategory::BND => "BND",
- AlterationCategory::Other => "Other",
- }
- )
- }
- }
- impl From<SVType> for AlterationCategory {
- fn from(sv_type: SVType) -> Self {
- match sv_type {
- SVType::DEL => AlterationCategory::DEL,
- SVType::INS => AlterationCategory::INS,
- SVType::DUP => AlterationCategory::DUP,
- SVType::INV => AlterationCategory::INV,
- SVType::CNV => AlterationCategory::CNV,
- SVType::BND => AlterationCategory::BND,
- }
- }
- }
- #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
- pub enum SVType {
- DEL,
- INS,
- DUP,
- INV,
- CNV,
- BND,
- }
- impl FromStr for SVType {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- match s {
- "DEL" => Ok(SVType::DEL),
- "INS" => Ok(SVType::INS),
- "DUP" => Ok(SVType::DUP),
- "INV" => Ok(SVType::INV),
- "CNV" => Ok(SVType::CNV),
- "BND" => Ok(SVType::BND),
- _ => Err(anyhow!("Can't parse SVTYPE={s}")),
- }
- }
- }
- impl fmt::Display for SVType {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(
- f,
- "{}",
- match self {
- SVType::DEL => "DEL",
- SVType::INS => "INS",
- SVType::DUP => "DUP",
- SVType::INV => "INV",
- SVType::CNV => "CNV",
- SVType::BND => "BND",
- }
- )
- }
- }
- impl VariantId for VcfVariant {
- fn variant_id(&self) -> String {
- format!("{}_{}>{}", self.position, self.reference, self.alternative)
- }
- }
- impl GetGenomePosition for VcfVariant {
- fn position(&self) -> &GenomePosition {
- &self.position
- }
- }
- impl PartialOrd for VcfVariant {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
- }
- impl Ord for VcfVariant {
- fn cmp(&self, other: &Self) -> Ordering {
- self.position.cmp(&other.position)
- }
- }
- /// Info
- #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
- pub struct Infos(Vec<Info>);
- impl FromStr for Infos {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- Ok(Self(
- s.split(";")
- .map(Info::from_str)
- .collect::<Result<Vec<Info>, _>>()
- .map_err(|e| anyhow::anyhow!("Failed to parse info: {e}"))?,
- ))
- }
- }
- impl fmt::Display for Infos {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(
- f,
- "{}",
- self.0
- .iter()
- .map(|e| e.to_string())
- .collect::<Vec<String>>()
- .join(";")
- )
- }
- }
- #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
- pub enum Info {
- Empty,
- H,
- F,
- P,
- FAU(u32),
- FCU(u32),
- FGU(u32),
- FTU(u32),
- RAU(u32),
- RCU(u32),
- RGU(u32),
- RTU(u32),
- SVTYPE(SVType),
- SVLEN(i32),
- END(u32),
- MATEID(String),
- SVINSLEN(u32),
- SVINSSEQ(String),
- }
- impl FromStr for Info {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- if s.contains("=") {
- let (key, value) = s
- .split_once('=')
- .context(format!("Can't split with `=` {s}"))?;
- Ok(match key {
- "FAU" => Info::FAU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "FCU" => Info::FCU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "FGU" => Info::FGU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "FTU" => Info::FTU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "RAU" => Info::RAU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "RCU" => Info::RCU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "RGU" => Info::RGU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "RTU" => Info::RTU(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "SVTYPE" => Info::SVTYPE(value.parse()?),
- "SVLEN" => Info::SVLEN(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "END" => Info::END(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "MATEID" => Info::MATEID(value.to_string()),
- "SVINSLEN" => Info::SVINSLEN(
- value
- .parse()
- .context(format!("Can't parse into u32: {value}"))?,
- ),
- "SVINSSEQ" => Info::SVINSSEQ(value.to_string()),
- _ => Info::Empty,
- })
- } else {
- Ok(match s {
- "H" => Info::H,
- "F" => Info::F,
- "P" => Info::P,
- _ => Info::Empty,
- })
- }
- }
- }
- impl fmt::Display for Info {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Info::Empty => write!(f, "."),
- Info::H => write!(f, "H"),
- Info::F => write!(f, "F"),
- Info::P => write!(f, "P"),
- Info::FAU(v) => write!(f, "FAU={v}"),
- Info::FCU(v) => write!(f, "FCU={v}"),
- Info::FGU(v) => write!(f, "FGU={v}"),
- Info::FTU(v) => write!(f, "FTU={v}"),
- Info::RAU(v) => write!(f, "RAU={v}"),
- Info::RCU(v) => write!(f, "RCU={v}"),
- Info::RGU(v) => write!(f, "RGU={v}"),
- Info::RTU(v) => write!(f, "RTU={v}"),
- Info::SVTYPE(v) => write!(f, "SVTYPE={v}"),
- Info::SVLEN(v) => write!(f, "SVLEN={v}"),
- Info::END(v) => write!(f, "END={v}"),
- Info::MATEID(v) => write!(f, "MATEID={v}"),
- Info::SVINSLEN(v) => write!(f, "SVINSLEN={v}"),
- Info::SVINSSEQ(v) => write!(f, "SVINSSEQ={v}"),
- }
- }
- }
- /// Format
- #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
- pub enum Format {
- // DeepVariant
- GT(String),
- GQ(u32),
- DP(u32),
- AD(Vec<u32>),
- VAF(f32),
- PL(Vec<u32>),
- // Clairs
- AF(f32),
- NAF(u32),
- NDP(u32),
- NAD(Vec<u32>),
- AU(u32),
- CU(u32),
- GU(u32),
- TU(u32),
- NAU(u32),
- NCU(u32),
- NGU(u32),
- NTU(u32),
- // nanomonsv
- TR(u32),
- VR(u32),
- Other((String, String)), // (key, value)
- }
- #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
- pub struct Formats(Vec<Format>);
- impl TryFrom<(&str, &str)> for Formats {
- type Error = anyhow::Error;
- fn try_from((k, v): (&str, &str)) -> anyhow::Result<Self> {
- let keys: Vec<&str> = k.split(':').collect();
- let values: Vec<&str> = v.split(':').collect();
- if keys.len() != values.len() {
- anyhow::bail!("Mismatch between keys and values count for {k} {v}");
- }
- Ok(Self(
- keys.into_iter()
- .zip(values)
- .map(|(key, value)| Format::try_from((key, value)))
- .collect::<Result<Vec<Format>, _>>()
- .map_err(|e| anyhow::anyhow!("Failed to parse format: {e}"))?,
- ))
- }
- }
- impl From<Formats> for (String, String) {
- fn from(formats: Formats) -> Self {
- let mut keys = Vec::new();
- let mut values = Vec::new();
- for format in formats.0 {
- let (key, value): (String, String) = format.into();
- keys.push(key);
- values.push(value);
- }
- (keys.join(":"), values.join(":"))
- }
- }
- impl TryFrom<(&str, &str)> for Format {
- type Error = anyhow::Error;
- fn try_from((key, value): (&str, &str)) -> anyhow::Result<Self> {
- Ok(match key {
- "GT" => Format::GT(value.to_string()),
- "GQ" => Format::GQ(value.parse().context(format!("Can't parse GQ: {value}"))?),
- "DP" => Format::DP(value.parse().context(format!("Can't parse DP: {value}"))?),
- "AD" => Format::AD(
- value
- .split(',')
- .map(|e| e.parse().context("Failed to parse AD"))
- .collect::<anyhow::Result<Vec<_>>>()?,
- ),
- "VAF" => Format::VAF(value.parse().context(format!("Can't parse VAF: {value}"))?),
- "PL" => Format::PL(
- value
- .split(',')
- .map(|e| e.parse().context("Failed to parse AD"))
- .collect::<anyhow::Result<Vec<_>>>()?,
- ),
- "TR" => Format::TR(value.parse()?),
- "VR" => Format::TR(value.parse()?),
- _ => Format::Other((key.to_string(), value.to_string())),
- })
- }
- }
- impl From<Format> for (String, String) {
- fn from(format: Format) -> Self {
- let concat = |values: Vec<u32>| -> String {
- values
- .iter()
- .map(|v| v.to_string())
- .collect::<Vec<_>>()
- .join(",")
- };
- match format {
- Format::GT(value) => ("GT".to_string(), value),
- Format::GQ(value) => ("GQ".to_string(), value.to_string()),
- Format::DP(value) => ("DP".to_string(), value.to_string()),
- Format::AD(values) => ("AD".to_string(), concat(values)),
- Format::VAF(value) => ("VAF".to_string(), value.to_string()),
- Format::PL(values) => ("PL".to_string(), concat(values)),
- Format::Other((key, value)) => (key, value),
- Format::AF(value) => ("AF".to_string(), value.to_string()),
- Format::NAF(value) => ("NAF".to_string(), value.to_string()),
- Format::NDP(value) => ("NDP".to_string(), value.to_string()),
- Format::NAD(values) => ("NAD".to_string(), concat(values)),
- Format::AU(value) => ("AU".to_string(), value.to_string()),
- Format::CU(value) => ("CU".to_string(), value.to_string()),
- Format::GU(value) => ("GU".to_string(), value.to_string()),
- Format::TU(value) => ("TU".to_string(), value.to_string()),
- Format::NAU(value) => ("NAU".to_string(), value.to_string()),
- Format::NCU(value) => ("NCU".to_string(), value.to_string()),
- Format::NGU(value) => ("NGU".to_string(), value.to_string()),
- Format::NTU(value) => ("NTU".to_string(), value.to_string()),
- Format::TR(value) => ("TR".to_string(), value.to_string()),
- Format::VR(value) => ("VR".to_string(), value.to_string()),
- }
- }
- }
- impl Formats {
- pub fn commun_deepvariant_clairs(&self) -> Self {
- let filtered_vec: Vec<Format> = self
- .0
- .clone()
- .into_iter()
- .map(|e| {
- if let Format::VAF(v) = e {
- Format::AF(v)
- } else {
- e
- }
- })
- .filter(|format| {
- matches!(
- format,
- Format::GT(_) | Format::GQ(_) | Format::DP(_) | Format::AD(_) | Format::AF(_)
- )
- })
- .collect();
- Formats(filtered_vec)
- }
- }
- /// Filter
- #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
- pub enum Filter {
- PASS,
- Other(String),
- }
- impl FromStr for Filter {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- match s {
- "PASS" => Ok(Filter::PASS),
- _ => Ok(Filter::Other(s.to_string())),
- }
- }
- }
- impl fmt::Display for Filter {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Filter::PASS => write!(f, "PASS"),
- Filter::Other(ref s) => write!(f, "{}", s),
- }
- }
- }
- #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
- pub enum ReferenceAlternative {
- Nucleotide(Base),
- Nucleotides(Vec<Base>),
- Unstructured(String),
- }
- impl FromStr for ReferenceAlternative {
- type Err = anyhow::Error;
- fn from_str(s: &str) -> anyhow::Result<Self> {
- let possible_bases = s.as_bytes().iter();
- let mut res: Vec<Base> = Vec::new();
- for &base in possible_bases {
- match base.try_into() {
- std::result::Result::Ok(b) => res.push(b),
- Err(_) => {
- return Ok(Self::Unstructured(s.to_string()));
- }
- }
- }
- if res.len() == 1 {
- Ok(Self::Nucleotide(res.pop().unwrap()))
- } else {
- Ok(Self::Nucleotides(res))
- }
- }
- }
- impl fmt::Display for ReferenceAlternative {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- let string = match self {
- ReferenceAlternative::Nucleotide(b) => b.to_string(),
- ReferenceAlternative::Nucleotides(bases) => bases
- .iter()
- .fold(String::new(), |acc, e| format!("{}{}", acc, e)),
- ReferenceAlternative::Unstructured(s) => s.to_string(),
- };
- write!(f, "{}", string)
- }
- }
- #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
- pub enum Base {
- A,
- T,
- C,
- G,
- N,
- }
- impl TryFrom<u8> for Base {
- type Error = anyhow::Error;
- fn try_from(base: u8) -> anyhow::Result<Self> {
- match base {
- b'A' => Ok(Base::A),
- b'T' => Ok(Base::T),
- b'C' => Ok(Base::C),
- b'G' => Ok(Base::G),
- b'N' => Ok(Base::N),
- _ => Err(anyhow::anyhow!(
- "Unknown base: {}",
- String::from_utf8_lossy(&[base])
- )),
- }
- }
- }
- impl Base {
- pub fn into_u8(self) -> u8 {
- match self {
- Base::A => b'A',
- Base::T => b'T',
- Base::C => b'C',
- Base::G => b'G',
- Base::N => b'N',
- }
- }
- }
- impl fmt::Display for Base {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- // Use `self.number` to refer to each positional data point.
- let str = match self {
- Base::A => "A",
- Base::T => "T",
- Base::C => "C",
- Base::G => "G",
- Base::N => "N",
- };
- write!(f, "{}", str)
- }
- }
- pub trait Variants {
- fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection>;
- }
- pub trait VariantId {
- fn variant_id(&self) -> String;
- }
- pub trait RunnerVariants: Run + Variants + Send + Sync {}
- pub type CallerBox = Box<dyn RunnerVariants + Send + Sync>;
- #[macro_export]
- macro_rules! init_somatic_callers {
- ($id:expr, $config:expr, $($runner:ty),+ $(,)?) => {
- vec![
- $(
- Box::new(<$runner>::initialize($id, $config.clone())?) as CallerBox
- ),+
- ]
- };
- }
- #[macro_export]
- macro_rules! init_solo_callers {
- ($id:expr, $config:expr, $($runner:ty, $arg:expr),+ $(,)?) => {
- vec![
- $(
- Box::new(<$runner>::initialize($id, $arg, $config.clone())?) as CallerBox
- ),+
- ]
- };
- }
- pub fn load_variants(
- iterable: &mut [CallerBox],
- annotations: &Annotations,
- ) -> anyhow::Result<Vec<VariantCollection>> {
- // First, run all items in parallel
- iterable
- .par_iter_mut()
- .try_for_each(|runner| runner.run())?;
- // Then, collect variants from all items in parallel
- let variants: Vec<VariantCollection> = iterable
- .par_iter()
- .map(|runner| runner.variants(annotations))
- .collect::<anyhow::Result<Vec<_>>>()?;
- Ok(variants)
- }
- pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
- vec1: &[T],
- vec2: &[T],
- ) -> (Vec<T>, Vec<T>, Vec<T>) {
- let set1: HashSet<_> = vec1.par_iter().cloned().collect();
- let set2: HashSet<_> = vec2.par_iter().cloned().collect();
- let common: Vec<T> = set1
- .par_iter()
- .filter(|item| set2.contains(item))
- .cloned()
- .collect();
- let only_in_first: Vec<T> = set1
- .par_iter()
- .filter(|item| !set2.contains(item))
- .cloned()
- .collect();
- let only_in_second: Vec<T> = set2
- .par_iter()
- .filter(|item| !set1.contains(item))
- .cloned()
- .collect();
- (common, only_in_first, only_in_second)
- }
|