|
|
@@ -1,3 +1,111 @@
|
|
|
+//! # VCF Variant Types and Parsing
|
|
|
+//!
|
|
|
+//! This module provides core types for representing and parsing VCF (Variant Call Format)
|
|
|
+//! variants, including SNVs, indels, and structural variants (SVs).
|
|
|
+//!
|
|
|
+//! ## Core Types
|
|
|
+//!
|
|
|
+//! ### Variant Representation
|
|
|
+//!
|
|
|
+//! - [`VcfVariant`] - Main VCF variant struct with genomic position, alleles, quality, filters, and annotations
|
|
|
+//! - [`ReferenceAlternative`] - Reference or alternative allele sequences
|
|
|
+//! - [`Filter`] - VCF FILTER field values (PASS, LowQual, etc.)
|
|
|
+//! - [`AlterationCategory`] - High-level variant classification (SNV, Insertion, Deletion, SV, etc.)
|
|
|
+//! - [`SVType`] - Structural variant types (DEL, INS, DUP, INV, BND, CNV)
|
|
|
+//!
|
|
|
+//! ### Structural Variants
|
|
|
+//!
|
|
|
+//! - [`BNDDesc`] - Breakend (BND) description for translocations and complex rearrangements
|
|
|
+//! - [`BNDGraph`] - Graph structure for analyzing breakend connections
|
|
|
+//! - [`DeletionDesc`] - Deletion-specific metadata
|
|
|
+//!
|
|
|
+//! ### VCF Fields
|
|
|
+//!
|
|
|
+//! - [`Infos`] / [`Info`] - Parsed INFO field key-value pairs with typed values
|
|
|
+//! - [`Formats`] / [`Format`] - Parsed FORMAT and sample genotype fields
|
|
|
+//!
|
|
|
+//! ## Key Traits
|
|
|
+//!
|
|
|
+//! - [`Variants`] - Load variants from variant caller outputs
|
|
|
+//! - [`Label`] - Provide human-readable caller labels
|
|
|
+//! - [`VariantId`] - Generate unique variant identifiers
|
|
|
+//! - [`GroupByThreshold`] - Group breakends by genomic proximity
|
|
|
+//! - [`ToBNDGraph`] - Convert breakend lists to graph representations
|
|
|
+//!
|
|
|
+//! ## Parsing
|
|
|
+//!
|
|
|
+//! VCF variants can be parsed from tab-separated strings using `FromStr`:
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::variant::vcf_variant::VcfVariant;
|
|
|
+//! use std::str::FromStr;
|
|
|
+//!
|
|
|
+//! let vcf_line = "chr1\t1000\t.\tA\tT\t30.0\tPASS\tDP=50\tGT:AD:DP\t0/1:25,25:50";
|
|
|
+//! let variant = VcfVariant::from_str(vcf_line)?;
|
|
|
+//!
|
|
|
+//! assert_eq!(variant.position.contig, "chr1");
|
|
|
+//! assert_eq!(variant.position.position, 1000);
|
|
|
+//! assert_eq!(variant.reference.seq, "A");
|
|
|
+//! assert_eq!(variant.alternative.seq, "T");
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Structural Variant Analysis
|
|
|
+//!
|
|
|
+//! Breakends (BND) can be grouped by proximity and analyzed as graphs:
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::variant::vcf_variant::{BNDDesc, GroupByThreshold, ToBNDGraph};
|
|
|
+//!
|
|
|
+//! let breakends: Vec<BNDDesc> = variants
|
|
|
+//! .into_iter()
|
|
|
+//! .filter_map(|v| v.to_bnd_desc().ok())
|
|
|
+//! .collect();
|
|
|
+//!
|
|
|
+//! // Group nearby breakends (within 1000 bp)
|
|
|
+//! let groups = breakends.group_by_threshold(1000);
|
|
|
+//!
|
|
|
+//! // Convert to graph for complex SV analysis
|
|
|
+//! let graph = breakends.to_bnd_graph();
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Variant Classification
|
|
|
+//!
|
|
|
+//! Variants are automatically classified into categories:
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::variant::vcf_variant::AlterationCategory;
|
|
|
+//!
|
|
|
+//! match variant.alteration_category() {
|
|
|
+//! AlterationCategory::SNV => println!("Single nucleotide variant"),
|
|
|
+//! AlterationCategory::Insertion => println!("Insertion"),
|
|
|
+//! AlterationCategory::Deletion => println!("Deletion"),
|
|
|
+//! AlterationCategory::SV(sv_type) => println!("Structural variant: {}", sv_type),
|
|
|
+//! _ => {}
|
|
|
+//! }
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## INFO and FORMAT Field Access
|
|
|
+//!
|
|
|
+//! INFO and FORMAT fields are parsed into typed enums for safe access:
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::variant::vcf_variant::Info;
|
|
|
+//!
|
|
|
+//! // Access INFO fields
|
|
|
+//! if let Some(Info::DP(depth)) = variant.infos.get_key("DP") {
|
|
|
+//! println!("Read depth: {}", depth);
|
|
|
+//! }
|
|
|
+//!
|
|
|
+//! // Access FORMAT fields
|
|
|
+//! if let Some(genotype) = variant.formats.get_genotype() {
|
|
|
+//! println!("Genotype: {}", genotype);
|
|
|
+//! }
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+
|
|
|
use crate::{
|
|
|
annotation::Annotations,
|
|
|
helpers::{estimate_shannon_entropy, mean, Hash128},
|