Quellcode durchsuchen

comments + pub Infos Formats

Thomas vor 1 Jahr
Ursprung
Commit
8332eff1e3
2 geänderte Dateien mit 58 neuen und 3 gelöschten Zeilen
  1. 1 1
      src/lib.rs
  2. 57 2
      src/variant/variant.rs

+ 1 - 1
src/lib.rs

@@ -574,7 +574,7 @@ mod tests {
         let position = 716766;
 
         let mut fasta_reader  = noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
-        let r = pipes::somatic::sequence_at(&mut fasta_reader, chr, position, 10)?;
+        let r = io::fasta::sequence_at(&mut fasta_reader, chr, position, 10)?;
         println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
         Ok(())
     }

+ 57 - 2
src/variant/variant.rs

@@ -10,20 +10,34 @@ use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
 
+/// Represents a variant in the Variant Call Format (VCF).
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct VcfVariant {
+    /// A 128-bit hash of the variant's key properties for efficient comparison and storage.
     pub hash: Hash128,
+    /// The genomic position of the variant.
     pub position: GenomePosition,
+    /// The identifier of the variant.
     pub id: String,
+    /// The reference allele.
     pub reference: ReferenceAlternative,
+    /// The alternative allele.
     pub alternative: ReferenceAlternative,
+    /// The quality score of the variant call, if available.
     pub quality: Option<f32>,
+    /// The filter status of the variant.
     pub filter: Filter,
+    /// Additional information about the variant.
     pub infos: Infos,
+    /// Genotype information and other sample-specific data.
     pub formats: Formats,
 }
 
 impl PartialEq for VcfVariant {
+    /// Compares two VcfVariants for equality.
+    ///
+    /// Note: This comparison only considers position, reference, and alternative.
+    /// It intentionally ignores id, filter, info, format, and quality.
     fn eq(&self, other: &Self) -> bool {
         // Nota bene: id, filter, info, format and quality is intentionally not compared
         self.position == other.position
@@ -37,6 +51,13 @@ impl Eq for VcfVariant {}
 impl FromStr for VcfVariant {
     type Err = anyhow::Error;
 
+    /// Parses a VcfVariant from a string representation.
+    ///
+    /// The input string is expected to be a tab-separated VCF line.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if parsing fails for any field.
     fn from_str(s: &str) -> anyhow::Result<Self> {
         let v: Vec<&str> = s.split('\t').collect();
         let vcf_position: VcfPosition = (
@@ -108,6 +129,10 @@ impl FromStr for VcfVariant {
 
 // #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  ADJAGBA_diag
 impl VcfVariant {
+    /// Converts the VcfVariant into a VCF-formatted row string.
+    ///
+    /// This method creates a tab-separated string representation of the variant,
+    /// suitable for writing to a VCF file.
     pub fn into_vcf_row(&self) -> String {
         let vcf_position: VcfPosition = self.position.clone().into();
         let (contig, position) = vcf_position.into();
@@ -134,10 +159,15 @@ impl VcfVariant {
         columns.join("\t")
     }
 
+    /// Returns the hash of the variant.
     pub fn hash(&self) -> Hash128 {
         self.hash
     }
 
+    /// Creates a new VcfVariant with common attributes from DeepVariant and CLAIRS.
+    ///
+    /// This method generates a new variant with shared properties, resetting some fields
+    /// to default or empty values.
     pub fn commun_deepvariant_clairs(&self) -> VcfVariant {
         VcfVariant {
             hash: self.hash,
@@ -152,10 +182,16 @@ impl VcfVariant {
         }
     }
 
+    /// Checks if the variant has an SVTYPE info field.
+    ///
+    /// Returns true if the variant contains structural variation type information.
     pub fn has_svtype(&self) -> bool {
         self.infos.0.iter().any(|i| matches!(i, Info::SVTYPE(_)))
     }
 
+    /// Retrieves the structural variation type of the variant, if present.
+    ///
+    /// Returns Some(SVType) if the variant has an SVTYPE info field,
     pub fn svtype(&self) -> Option<SVType> {
         self.infos.0.iter().find_map(|e| {
             if let Info::SVTYPE(sv_type) = e {
@@ -166,6 +202,25 @@ impl VcfVariant {
         })
     }
 
+    /// Determines the alteration category of the variant.
+    ///
+    /// This method analyzes the reference and alternative alleles to classify
+    /// the variant into one of several alteration categories:
+    /// - SNV (Single Nucleotide Variant)
+    /// - INS (Insertion)
+    /// - DEL (Deletion)
+    /// - Other (including structural variants and complex alterations)
+    ///
+    /// The classification is based on the following rules:
+    /// 1. If both reference and alternative are single nucleotides, it's an SNV.
+    /// 2. If reference is a single nucleotide and alternative is multiple nucleotides, it's an insertion.
+    /// 3. If reference is multiple nucleotides and alternative is a single nucleotide, it's a deletion.
+    /// 4. For cases where both are multiple nucleotides, the longer one determines if it's an insertion or deletion.
+    /// 5. If none of the above apply, it checks for structural variant types.
+    /// 6. If no structural variant type is found, it's classified as "Other".
+    ///
+    /// # Returns
+    /// An `AlterationCategory` enum representing the type of alteration.
     pub fn alteration_category(&self) -> AlterationCategory {
         match (&self.reference, &self.alternative) {
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
@@ -384,7 +439,7 @@ impl Ord for VcfVariant {
 
 /// Info
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
-pub struct Infos(Vec<Info>);
+pub struct Infos(pub Vec<Info>);
 
 impl FromStr for Infos {
     type Err = anyhow::Error;
@@ -642,7 +697,7 @@ pub enum Format {
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
-pub struct Formats(Vec<Format>);
+pub struct Formats(pub Vec<Format>);
 
 impl TryFrom<(&str, &str)> for Formats {
     type Error = anyhow::Error;