Sfoglia il codice sorgente

Infos fields for dbSNP

Thomas 2 settimane fa
parent
commit
9e3c086a44
1 ha cambiato i file con 30 aggiunte e 9 eliminazioni
  1. 30 9
      src/variant/vcf_variant.rs

+ 30 - 9
src/variant/vcf_variant.rs

@@ -107,7 +107,13 @@
 //! ```
 
 use crate::{
-    annotation::Annotations, helpers::{Hash128, estimate_shannon_entropy, mean, revcomp}, io::fasta::sequence_range, pipes::ShouldRun, positions::{GenomePosition, GetGenomePosition, VcfPosition, contig_to_num}, runners::Run, variant::variant_collection::VariantCollection
+    annotation::Annotations,
+    helpers::{estimate_shannon_entropy, mean, revcomp, Hash128},
+    io::fasta::sequence_range,
+    pipes::ShouldRun,
+    positions::{contig_to_num, GenomePosition, GetGenomePosition, VcfPosition},
+    runners::Run,
+    variant::variant_collection::VariantCollection,
 };
 use anyhow::{anyhow, Context};
 use bitcode::{Decode, Encode};
@@ -116,7 +122,12 @@ use log::{error, info};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{
-    cmp::Ordering, collections::{BTreeSet, HashSet}, fmt, fs::File, hash::Hash, str::FromStr
+    cmp::Ordering,
+    collections::{BTreeSet, HashSet},
+    fmt,
+    fs::File,
+    hash::Hash,
+    str::FromStr,
 };
 
 /// Represents a variant in the Variant Call Format (VCF).
@@ -315,7 +326,9 @@ impl VcfVariant {
 
         // Fetch REF from target assembly
         let mut new_ref = sequence_range(fasta, tgt_contig_name, tgt_start0, tgt_end0_inclusive)
-            .with_context(|| format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}"))?;
+            .with_context(|| {
+                format!("FASTA query failed at {tgt_contig_name}:{tgt_start0}-{tgt_end0_inclusive}")
+            })?;
 
         if on_reverse {
             new_ref = revcomp(&new_ref);
@@ -327,18 +340,14 @@ impl VcfVariant {
             contig: tgt_contig,
             position: tgt_start0 as u32,
         };
-        out.reference = new_ref
-            .parse()
-            .context("Failed to parse rewritten REF")?;
+        out.reference = new_ref.parse().context("Failed to parse rewritten REF")?;
 
         if on_reverse {
             // Only revcomp ALT if it is a plain sequence allele.
             // If you have symbolic alleles (<DEL>, etc.), guard here.
             let alt = out.alternative.to_string();
             let alt_rc = revcomp(&alt);
-            out.alternative = alt_rc
-                .parse()
-                .context("Failed to parse rewritten ALT")?;
+            out.alternative = alt_rc.parse().context("Failed to parse rewritten ALT")?;
         }
 
         Ok(Some(out))
@@ -1379,6 +1388,10 @@ pub enum Info {
     MATE_ID(String),
     INSIDE_VNTR(String),
     ALINGED_POS(String),
+    // dbSNP
+    FREQ(String),
+    COMMON,
+    RS(u32),
 }
 
 impl FromStr for Info {
@@ -1459,6 +1472,8 @@ impl FromStr for Info {
                 "MATE_ID" => Info::MATE_ID(value.to_string()),
                 "INSIDE_VNTR" => Info::INSIDE_VNTR(value.to_string()),
                 "ALINGED_POS" => Info::ALINGED_POS(value.to_string()),
+                "FREQ" => Info::FREQ(value.to_string()),
+                "RS" => Info::RS(parse_value(value, key)?),
 
                 _ => Info::Empty,
             })
@@ -1469,6 +1484,7 @@ impl FromStr for Info {
                 "P" => Info::P,
                 "PRECISE" => Info::PRECISE,
                 "IMPRECISE" => Info::IMPRECISE,
+                "COMMON" => Info::COMMON,
 
                 _ => Info::Empty,
             })
@@ -1553,6 +1569,11 @@ impl fmt::Display for Info {
             Info::MATE_ID(v) => write!(f, "MATE_ID={v}"),
             Info::INSIDE_VNTR(v) => write!(f, "INSIDE_VNTR={v}"),
             Info::ALINGED_POS(v) => write!(f, "ALINGED_POS={v}"),
+
+            // dbSNP
+            Info::FREQ(v) => write!(f, "FREQ={v}"),
+            Info::RS(v) => write!(f, "RS={v}"),
+            Info::COMMON => write!(f, "COMMON"),
         }
     }
 }