|
|
@@ -0,0 +1,407 @@
|
|
|
+use serde::{Deserialize, Serialize};
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Serialize, Eq, PartialEq, Deserialize)]
|
|
|
+pub enum VariantType {
|
|
|
+ Somatic,
|
|
|
+ Constitutionnal,
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+impl Variant {
|
|
|
+ pub fn from_vcfrow(row: &VCFRow, source: VCFSource, variant_type: VariantType) -> anyhow::Result<Self> {
|
|
|
+ let callers_data = vec![CallerData {
|
|
|
+ qual: row.qual.parse::<f32>().ok(),
|
|
|
+ info: parse_info(&row.info, &source).context(anyhow!(
|
|
|
+ "Can't parse {:?} info for {}",
|
|
|
+ source,
|
|
|
+ row.info
|
|
|
+ ))?,
|
|
|
+ format: parse_format(&source, &row.value).context(anyhow!(
|
|
|
+ "Can't parse {:?} format for {}",
|
|
|
+ source,
|
|
|
+ row.value
|
|
|
+ ))?,
|
|
|
+ }];
|
|
|
+
|
|
|
+ Ok(Variant {
|
|
|
+ contig: row.chr.to_string(),
|
|
|
+ position: row.pos,
|
|
|
+ reference: row
|
|
|
+ .reference
|
|
|
+ .parse()
|
|
|
+ .context(anyhow!("Error while parsing {}", row.reference))?,
|
|
|
+ alternative: row
|
|
|
+ .alt
|
|
|
+ .parse()
|
|
|
+ .context(anyhow!("Error while parsing {}", row.alt))?,
|
|
|
+ n_ref: None,
|
|
|
+ n_alt: None,
|
|
|
+ vaf: None,
|
|
|
+ depth: None,
|
|
|
+ callers_data,
|
|
|
+ source: vec![source],
|
|
|
+ variant_type,
|
|
|
+ annotations: Vec::new(),
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn get_depth(&mut self) -> u32 {
|
|
|
+ if let Some(depth) = self.depth {
|
|
|
+ depth
|
|
|
+ } else {
|
|
|
+ let depth = self
|
|
|
+ .callers_data
|
|
|
+ .iter_mut()
|
|
|
+ .map(|v| v.get_depth())
|
|
|
+ .max()
|
|
|
+ .unwrap();
|
|
|
+ self.depth = Some(depth);
|
|
|
+ depth
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn get_n_alt(&mut self) -> u32 {
|
|
|
+ if let Some(n_alt) = self.n_alt {
|
|
|
+ n_alt
|
|
|
+ } else {
|
|
|
+ let n_alt = self
|
|
|
+ .callers_data
|
|
|
+ .iter_mut()
|
|
|
+ .map(|v| v.get_n_alt())
|
|
|
+ .max()
|
|
|
+ .unwrap();
|
|
|
+ self.n_alt = Some(n_alt);
|
|
|
+ n_alt
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn vaf(&mut self) -> f32 {
|
|
|
+ let n_alt = self.get_n_alt() as f32;
|
|
|
+ let depth = self.get_depth() as f32;
|
|
|
+ self.vaf = Some(n_alt / depth);
|
|
|
+ self.vaf.unwrap()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn is_ins(&self) -> bool {
|
|
|
+ matches!(
|
|
|
+ (&self.reference, &self.alternative),
|
|
|
+ (
|
|
|
+ ReferenceAlternative::Nucleotide(_),
|
|
|
+ ReferenceAlternative::Nucleotides(_)
|
|
|
+ )
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn alteration_category(&self) -> AlterationCategory {
|
|
|
+ match (&self.reference, &self.alternative) {
|
|
|
+ (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
|
|
|
+ AlterationCategory::Snv
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
|
|
|
+ AlterationCategory::Ins
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
|
|
|
+ AlterationCategory::Other
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
|
|
|
+ AlterationCategory::Del
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
|
|
|
+ if a.len() < b.len() =>
|
|
|
+ {
|
|
|
+ AlterationCategory::Ins
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
|
|
|
+ if a.len() > b.len() =>
|
|
|
+ {
|
|
|
+ AlterationCategory::Del
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
|
|
|
+ AlterationCategory::Rep
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
|
|
|
+ AlterationCategory::Other
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
|
|
|
+ AlterationCategory::Other
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
|
|
|
+ AlterationCategory::Other
|
|
|
+ }
|
|
|
+ (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
|
|
|
+ AlterationCategory::Other
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn to_min_string(&mut self) -> String {
|
|
|
+ let depth = self.get_depth();
|
|
|
+ let n_alt = self.get_n_alt();
|
|
|
+
|
|
|
+ format!(
|
|
|
+ "DP:AD\t{}:{}",
|
|
|
+ depth,
|
|
|
+ [(depth - n_alt).to_string(), n_alt.to_string()].join(",")
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn get_veps(&self) -> Vec<VEP> {
|
|
|
+ self.annotations
|
|
|
+ .iter()
|
|
|
+ .flat_map(|e| {
|
|
|
+ if let AnnotationType::VEP(e) = e {
|
|
|
+ e.clone()
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+ pub fn get_best_vep(&self) -> Result<VEP> {
|
|
|
+ get_best_vep(&self.get_veps())
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn is_from_category(&self, and_categories: &[Category]) -> bool {
|
|
|
+ let mut vec_bools = Vec::new();
|
|
|
+ for category in and_categories.iter() {
|
|
|
+ match category {
|
|
|
+ Category::VariantCategory(vc) => {
|
|
|
+ for annotations in self.annotations.iter() {
|
|
|
+ if let AnnotationType::VariantCategory(vvc) = annotations {
|
|
|
+ if vc == vvc {
|
|
|
+ vec_bools.push(true);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Category::PositionRange { contig, from, to } => {
|
|
|
+ if self.contig == *contig {
|
|
|
+ match (from, to) {
|
|
|
+ (None, None) => vec_bools.push(true),
|
|
|
+ (None, Some(to)) => vec_bools.push(self.position <= *to),
|
|
|
+ (Some(from), None) => vec_bools.push(self.position >= *from),
|
|
|
+ (Some(from), Some(to)) => {
|
|
|
+ vec_bools.push(self.position >= *from && self.position <= *to)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ vec_bools.push(false);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Category::VCFSource(_) => (),
|
|
|
+ Category::NCosmic(n) => {
|
|
|
+ let mut bools = Vec::new();
|
|
|
+ for annotations in self.annotations.iter() {
|
|
|
+ if let AnnotationType::Cosmic(c) = annotations {
|
|
|
+ bools.push(c.cosmic_cnt >= *n);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ vec_bools.push(bools.iter().any(|&b| b));
|
|
|
+ }
|
|
|
+ Category::NCBIFeature(ncbi_feature) => {
|
|
|
+ let mut bools = Vec::new();
|
|
|
+ for annotations in self.annotations.iter() {
|
|
|
+ if let AnnotationType::NCBIGFF(v) = annotations {
|
|
|
+ bools.push(v.feature == *ncbi_feature);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ vec_bools.push(bools.iter().any(|&b| b));
|
|
|
+ }
|
|
|
+ Category::VAF { min, max } => {
|
|
|
+ let v = if self.vaf.is_none() {
|
|
|
+ let mut s = self.clone();
|
|
|
+ s.vaf()
|
|
|
+ } else {
|
|
|
+ self.vaf.unwrap()
|
|
|
+ };
|
|
|
+ vec_bools.push(v >= *min && v <= *max);
|
|
|
+ }
|
|
|
+ Category::Pangolin => {
|
|
|
+ vec_bools.push(
|
|
|
+ self.annotations
|
|
|
+ .iter()
|
|
|
+ .filter(|a| matches!(a, AnnotationType::Pangolin(_)))
|
|
|
+ .count()
|
|
|
+ > 0,
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ vec_bools.iter().all(|&x| x)
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn callers(&self) -> Vec<String> {
|
|
|
+ self.source
|
|
|
+ .iter()
|
|
|
+ .map(|source| source.to_string())
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+}
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
|
|
|
+pub enum AlterationCategory {
|
|
|
+ Snv,
|
|
|
+ Ins,
|
|
|
+ Del,
|
|
|
+ Rep,
|
|
|
+ Other,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
|
|
|
+pub enum AnnotationType {
|
|
|
+ VariantCategory(VariantCategory),
|
|
|
+ VEP(Vec<VEP>),
|
|
|
+ Cluster(i32),
|
|
|
+ Cosmic(Cosmic),
|
|
|
+ GnomAD(GnomAD),
|
|
|
+ NCBIGFF(NCBIGFF),
|
|
|
+ Pangolin(Pangolin),
|
|
|
+ Phase(PhaseAnnotation),
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
|
|
|
+pub enum VariantCategory {
|
|
|
+ Somatic,
|
|
|
+ LowMRDDepth,
|
|
|
+ LOH,
|
|
|
+ Constit,
|
|
|
+ LowDiversity,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, ToSchema)]
|
|
|
+pub enum ReferenceAlternative {
|
|
|
+ Nucleotide(Base),
|
|
|
+ Nucleotides(Vec<Base>),
|
|
|
+ Unstructured(String),
|
|
|
+}
|
|
|
+
|
|
|
+impl FromStr for ReferenceAlternative {
|
|
|
+ type Err = anyhow::Error;
|
|
|
+
|
|
|
+ fn from_str(s: &str) -> Result<Self> {
|
|
|
+ let possible_bases = s.as_bytes().iter();
|
|
|
+ let mut res: Vec<Base> = Vec::new();
|
|
|
+ for &base in possible_bases {
|
|
|
+ match base.try_into() {
|
|
|
+ std::result::Result::Ok(b) => res.push(b),
|
|
|
+ Err(_) => {
|
|
|
+ return Ok(Self::Unstructured(s.to_string()));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if res.len() == 1 {
|
|
|
+ Ok(Self::Nucleotide(res.pop().unwrap()))
|
|
|
+ } else {
|
|
|
+ Ok(Self::Nucleotides(res))
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl fmt::Display for ReferenceAlternative {
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
+ let string = match self {
|
|
|
+ ReferenceAlternative::Nucleotide(b) => b.to_string(),
|
|
|
+ ReferenceAlternative::Nucleotides(bases) => bases
|
|
|
+ .iter()
|
|
|
+ .fold(String::new(), |acc, e| format!("{}{}", acc, e)),
|
|
|
+ ReferenceAlternative::Unstructured(s) => s.to_string(),
|
|
|
+ };
|
|
|
+ write!(f, "{}", string)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, ToSchema)]
|
|
|
+pub enum Base {
|
|
|
+ A,
|
|
|
+ T,
|
|
|
+ C,
|
|
|
+ G,
|
|
|
+ N,
|
|
|
+}
|
|
|
+
|
|
|
+impl TryFrom<u8> for Base {
|
|
|
+ type Error = anyhow::Error;
|
|
|
+ fn try_from(base: u8) -> Result<Self> {
|
|
|
+ match base {
|
|
|
+ b'A' => Ok(Base::A),
|
|
|
+ b'T' => Ok(Base::T),
|
|
|
+ b'C' => Ok(Base::C),
|
|
|
+ b'G' => Ok(Base::G),
|
|
|
+ b'N' => Ok(Base::N),
|
|
|
+ _ => Err(anyhow!(
|
|
|
+ "Unknown base: {}",
|
|
|
+ String::from_utf8_lossy(&[base])
|
|
|
+ )),
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Base {
|
|
|
+ pub fn into_u8(self) -> u8 {
|
|
|
+ match self {
|
|
|
+ Base::A => b'A',
|
|
|
+ Base::T => b'T',
|
|
|
+ Base::C => b'C',
|
|
|
+ Base::G => b'G',
|
|
|
+ Base::N => b'N',
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl fmt::Display for Base {
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
+ // Use `self.number` to refer to each positional data point.
|
|
|
+ let str = match self {
|
|
|
+ Base::A => "A",
|
|
|
+ Base::T => "T",
|
|
|
+ Base::C => "C",
|
|
|
+ Base::G => "G",
|
|
|
+ Base::N => "N",
|
|
|
+ };
|
|
|
+ write!(f, "{}", str)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, ToSchema)]
|
|
|
+pub enum Format {
|
|
|
+ DeepVariant(DeepVariantFormat),
|
|
|
+ ClairS(ClairSFormat),
|
|
|
+ Sniffles(SnifflesFormat),
|
|
|
+ Nanomonsv(NanomonsvFormat),
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, ToSchema)]
|
|
|
+pub enum Info {
|
|
|
+ #[schema(value_type=String)]
|
|
|
+ DeepVariant(DeepVariantInfo),
|
|
|
+ #[schema(value_type=String)]
|
|
|
+ ClairS(ClairSInfo),
|
|
|
+ #[schema(value_type=String)]
|
|
|
+ Sniffles(SnifflesInfo),
|
|
|
+ #[schema(value_type=String)]
|
|
|
+ Nanomonsv(NanomonsvInfo),
|
|
|
+}
|
|
|
+
|
|
|
+fn parse_info(s: &str, source: &VCFSource) -> Result<Info> {
|
|
|
+ match source {
|
|
|
+ VCFSource::DeepVariant => Ok(Info::DeepVariant(s.parse()?)),
|
|
|
+ VCFSource::ClairS => Ok(Info::ClairS(s.parse()?)),
|
|
|
+ VCFSource::Sniffles => Ok(Info::Sniffles(s.parse()?)),
|
|
|
+ VCFSource::Nanomonsv => Ok(Info::Nanomonsv(s.parse()?)),
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn parse_format(vcf_source: &VCFSource, data: &str) -> Result<Format> {
|
|
|
+ let res = match vcf_source {
|
|
|
+ VCFSource::DeepVariant => Format::DeepVariant(data.parse()?),
|
|
|
+ VCFSource::ClairS => Format::ClairS(data.parse()?),
|
|
|
+ VCFSource::Sniffles => Format::Sniffles(data.parse()?),
|
|
|
+ VCFSource::Nanomonsv => Format::Nanomonsv(data.parse()?),
|
|
|
+ };
|
|
|
+ Ok(res)
|
|
|
+}
|
|
|
+
|
|
|
+
|