Browse Source

Infos Formats Update

Thomas 8 months ago
parent
commit
1f81fa84cc
1 changed files with 633 additions and 28 deletions
  1. 633 28
      src/variant/variant.rs

+ 633 - 28
src/variant/variant.rs

@@ -11,7 +11,13 @@ use bitcode::{Decode, Encode};
 use log::{error, info};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
-use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
+use std::{
+    cmp::Ordering,
+    collections::{BTreeSet, HashSet},
+    fmt,
+    hash::Hash,
+    str::FromStr,
+};
 
 /// Represents a variant in the Variant Call Format (VCF).
 #[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
@@ -372,7 +378,7 @@ impl VcfVariant {
         self.deletion_len().map(|len| DeletionDesc {
             contig: self.position.contig(),
             start: self.position.position + 1,
-            end: self.position.position.checked_add(len).unwrap_or(u32::MAX),
+            end: self.position.position.checked_add(len).unwrap_or(u32::MAX), // TODO
         })
     }
 }
@@ -517,13 +523,25 @@ impl Ord for VcfVariant {
     }
 }
 
-/// Info
+/// A container for a list of VCF `INFO` fields.
+///
+/// Represents a parsed set of key-value annotations or flags found in the INFO column.
+///
+/// # Example
+/// ```
+/// use your_crate::Infos;
+/// use std::str::FromStr;
+///
+/// let infos = Infos::from_str("SVTYPE=DEL;END=12345;TUMOUR_AF=0.25,0.15").unwrap();
+/// println!("{}", infos); // Displays: SVTYPE=DEL;END=12345;TUMOUR_AF=0.25,0.15
+/// ```
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default, Encode, Decode)]
 pub struct Infos(pub Vec<Info>);
 
 impl FromStr for Infos {
     type Err = anyhow::Error;
 
+    /// Parses a semicolon-separated list of INFO fields from a VCF record.
     fn from_str(s: &str) -> anyhow::Result<Self> {
         Ok(Self(
             s.split(";")
@@ -535,6 +553,7 @@ impl FromStr for Infos {
 }
 
 impl fmt::Display for Infos {
+    /// Formats the `Infos` as a semicolon-separated VCF-style INFO string.
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(
             f,
@@ -548,6 +567,12 @@ impl fmt::Display for Infos {
     }
 }
 
+/// Enum representing a single INFO field in a VCF record.
+///
+/// Supports both standard fields and Severus-specific structural variant annotations.
+/// Handles string values, numeric values, vectors, and flags.
+///
+/// Variants with `Vec<_>` represent fields with multiple comma-separated values.
 #[allow(non_camel_case_types)]
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)]
 pub enum Info {
@@ -602,11 +627,28 @@ pub enum Info {
     END(u32),
     SVINSLEN(u32),
     SVINSSEQ(String),
+    // Severus
+    PRECISE,
+    IMPRECISE,
+    STRANDS(String),
+    DETAILED_TYPE(String),
+    INSLEN(i32),
+    MAPQ(u32),
+    PHASESETID(String),
+    HP(u32),
+    CLUSTERID(String),
+    INSSEQ(String),
+    MATE_ID(String),
+    INSIDE_VNTR(String),
+    ALINGED_POS(String),
 }
 
 impl FromStr for Info {
     type Err = anyhow::Error;
 
+    /// Parses a single `INFO` key or key=value string into a typed `Info` variant.
+    ///
+    /// Handles both presence/absence flags and key-value fields
     fn from_str(s: &str) -> anyhow::Result<Self> {
         if s.contains('=') {
             let (key, value) = s
@@ -665,6 +707,21 @@ impl FromStr for Info {
                 "END_EVENT_SIZE_MEDIAN" => Info::END_EVENT_SIZE_MEDIAN(parse_value(value, key)?),
                 "END_EVENT_SIZE_MEAN" => Info::END_EVENT_SIZE_MEAN(parse_value(value, key)?),
                 "CLASS" => Info::CLASS(value.to_string()),
+
+                "PRECISE" => Info::PRECISE,
+                "IMPRECISE" => Info::IMPRECISE,
+                "STRANDS" => Info::STRANDS(value.to_string()),
+                "DETAILED_TYPE" => Info::DETAILED_TYPE(value.to_string()),
+                "INSLEN" => Info::INSLEN(parse_value(value, key)?),
+                "MAPQ" => Info::MAPQ(parse_value(value, key)?),
+                "PHASESETID" => Info::PHASESETID(value.to_string()),
+                "HP" => Info::HP(parse_value(value, key)?),
+                "CLUSTERID" => Info::CLUSTERID(value.to_string()),
+                "INSSEQ" => Info::INSSEQ(value.to_string()),
+                "MATE_ID" => Info::MATE_ID(value.to_string()),
+                "INSIDE_VNTR" => Info::INSIDE_VNTR(value.to_string()),
+                "ALINGED_POS" => Info::ALINGED_POS(value.to_string()),
+
                 _ => Info::Empty,
             })
         } else {
@@ -672,6 +729,9 @@ impl FromStr for Info {
                 "H" => Info::H,
                 "F" => Info::F,
                 "P" => Info::P,
+                "PRECISE" => Info::PRECISE,
+                "IMPRECISE" => Info::IMPRECISE,
+
                 _ => Info::Empty,
             })
         }
@@ -679,12 +739,15 @@ impl FromStr for Info {
 }
 
 impl fmt::Display for Info {
+    /// Converts the `Info` enum into a VCF-compliant string (key=value or flag).
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Info::Empty => write!(f, "."),
             Info::H => write!(f, "H"),
             Info::F => write!(f, "F"),
             Info::P => write!(f, "P"),
+
+            // ClairS
             Info::FAU(v) => write!(f, "FAU={v}"),
             Info::FCU(v) => write!(f, "FCU={v}"),
             Info::FGU(v) => write!(f, "FGU={v}"),
@@ -693,12 +756,16 @@ impl fmt::Display for Info {
             Info::RCU(v) => write!(f, "RCU={v}"),
             Info::RGU(v) => write!(f, "RGU={v}"),
             Info::RTU(v) => write!(f, "RTU={v}"),
+
+            // Nanomonsv
             Info::SVTYPE(v) => write!(f, "SVTYPE={v}"),
             Info::SVLEN(v) => write!(f, "SVLEN={v}"),
             Info::END(v) => write!(f, "END={v}"),
             Info::MATEID(v) => write!(f, "MATEID={v}"),
             Info::SVINSLEN(v) => write!(f, "SVINSLEN={v}"),
             Info::SVINSSEQ(v) => write!(f, "SVINSSEQ={v}"),
+
+            // SAVANA
             Info::NORMAL_READ_SUPPORT(v) => write!(f, "NORMAL_READ_SUPPORT={v}"),
             Info::TUMOUR_READ_SUPPORT(v) => write!(f, "TUMOUR_READ_SUPPORT={v}"),
             Info::NORMAL_ALN_SUPPORT(v) => write!(f, "NORMAL_ALN_SUPPORT={v}"),
@@ -731,7 +798,23 @@ impl fmt::Display for Info {
             Info::END_EVENT_SIZE_STD_DEV(v) => write!(f, "END_EVENT_SIZE_STD_DEV={v}"),
             Info::END_EVENT_SIZE_MEDIAN(v) => write!(f, "END_EVENT_SIZE_MEDIAN={v}"),
             Info::END_EVENT_SIZE_MEAN(v) => write!(f, "END_EVENT_SIZE_MEAN={v}"),
+
             Info::CLASS(v) => write!(f, "CLASS={v}"),
+
+            // Severus
+            Info::PRECISE => write!(f, "PRECISE"),
+            Info::IMPRECISE => write!(f, "IMPRECISE"),
+            Info::STRANDS(v) => write!(f, "STRANDS={v}"),
+            Info::DETAILED_TYPE(v) => write!(f, "DETAILED_TYPE={v}"),
+            Info::INSLEN(v) => write!(f, "INSLEN={v}"),
+            Info::MAPQ(v) => write!(f, "MAPQ={v}"),
+            Info::PHASESETID(v) => write!(f, "PHASESETID={v}"),
+            Info::HP(v) => write!(f, "HP={v}"),
+            Info::CLUSTERID(v) => write!(f, "CLUSTERID={v}"),
+            Info::INSSEQ(v) => write!(f, "INSSEQ={v}"),
+            Info::MATE_ID(v) => write!(f, "MATE_ID={v}"),
+            Info::INSIDE_VNTR(v) => write!(f, "INSIDE_VNTR={v}"),
+            Info::ALINGED_POS(v) => write!(f, "ALINGED_POS={v}"),
         }
     }
 }
@@ -743,58 +826,398 @@ pub fn concat_numbers<T: ToString>(v: &[T]) -> String {
         .join(",")
 }
 
+impl Info {
+    /// Returns the complete set of known VCF `INFO` header definitions used by `Info` variants.
+    ///
+    /// # Example
+    /// ```
+    /// let headers = Info::header_definitions();
+    /// for line in headers {
+    ///     println!("{line}");
+    /// }
+    /// ```
+    pub fn header_definitions() -> BTreeSet<String> {
+        let mut set = BTreeSet::new();
+
+        macro_rules! push {
+            ($id:expr, $num:expr, $typ:expr, $desc:expr) => {
+                set.insert(format!(
+                    r#"##INFO=<ID={},Number={},Type={},Description="{}">"#,
+                    $id, $num, $typ, $desc
+                ));
+            };
+        }
+
+        // Flags
+        push!("H", 0, "Flag", "H flag");
+        push!("F", 0, "Flag", "F flag");
+        push!("P", 0, "Flag", "P flag");
+
+        // Allelic support
+        push!("FAU", 1, "Integer", "Forward A support in tumour");
+        push!("FCU", 1, "Integer", "Forward C support in tumour");
+        push!("FGU", 1, "Integer", "Forward G support in tumour");
+        push!("FTU", 1, "Integer", "Forward T support in tumour");
+        push!("RAU", 1, "Integer", "Reverse A support in tumour");
+        push!("RCU", 1, "Integer", "Reverse C support in tumour");
+        push!("RGU", 1, "Integer", "Reverse G support in tumour");
+        push!("RTU", 1, "Integer", "Reverse T support in tumour");
+
+        // Structural variant metadata
+        push!("SVTYPE", 1, "String", "Structural variant type");
+        push!("MATEID", 1, "String", "ID of the mate breakend");
+        push!("SVLEN", 1, "Integer", "Length of structural variant");
+        push!("SVINSLEN", 1, "Integer", "Length of inserted sequence");
+        push!("SVINSSEQ", 1, "String", "Inserted sequence");
+
+        // Positions and read support
+        push!("END", 1, "Integer", "End position of the variant");
+        push!(
+            "NORMAL_READ_SUPPORT",
+            1,
+            "Integer",
+            "Supporting reads in normal sample"
+        );
+        push!(
+            "TUMOUR_READ_SUPPORT",
+            1,
+            "Integer",
+            "Supporting reads in tumour sample"
+        );
+        push!(
+            "NORMAL_ALN_SUPPORT",
+            1,
+            "Integer",
+            "Aligned reads in normal sample"
+        );
+        push!(
+            "TUMOUR_ALN_SUPPORT",
+            1,
+            "Integer",
+            "Aligned reads in tumour sample"
+        );
+
+        // Depth profiles
+        push!(
+            "TUMOUR_DP_BEFORE",
+            ".",
+            "Integer",
+            "Depth before breakpoint in tumour"
+        );
+        push!(
+            "TUMOUR_DP_AT",
+            ".",
+            "Integer",
+            "Depth at breakpoint in tumour"
+        );
+        push!(
+            "TUMOUR_DP_AFTER",
+            ".",
+            "Integer",
+            "Depth after breakpoint in tumour"
+        );
+        push!(
+            "NORMAL_DP_BEFORE",
+            ".",
+            "Integer",
+            "Depth before breakpoint in normal"
+        );
+        push!(
+            "NORMAL_DP_AT",
+            ".",
+            "Integer",
+            "Depth at breakpoint in normal"
+        );
+        push!(
+            "NORMAL_DP_AFTER",
+            ".",
+            "Integer",
+            "Depth after breakpoint in normal"
+        );
+
+        // Allele frequencies
+        push!(
+            "TUMOUR_AF",
+            ".",
+            "Float",
+            "Variant allele frequencies in tumour"
+        );
+        push!(
+            "NORMAL_AF",
+            ".",
+            "Float",
+            "Variant allele frequencies in normal"
+        );
+
+        // Haplotype/phasing
+        push!(
+            "TUMOUR_ALT_HP",
+            ".",
+            "Integer",
+            "Alternate haplotype support in tumour"
+        );
+        push!("TUMOUR_PS", ".", "String", "Phasing set in tumour");
+        push!(
+            "NORMAL_ALT_HP",
+            ".",
+            "Integer",
+            "Alternate haplotype support in normal"
+        );
+        push!("NORMAL_PS", ".", "String", "Phasing set in normal");
+        push!(
+            "TUMOUR_TOTAL_HP_AT",
+            ".",
+            "Integer",
+            "Total haplotype depth at breakpoint in tumour"
+        );
+        push!(
+            "NORMAL_TOTAL_HP_AT",
+            ".",
+            "Integer",
+            "Total haplotype depth at breakpoint in normal"
+        );
+
+        // Cluster analysis
+        push!(
+            "CLUSTERED_READS_TUMOUR",
+            1,
+            "Integer",
+            "Clustered reads in tumour"
+        );
+        push!(
+            "CLUSTERED_READS_NORMAL",
+            1,
+            "Integer",
+            "Clustered reads in normal"
+        );
+
+        // Origin and end-point statistics
+        push!(
+            "ORIGIN_STARTS_STD_DEV",
+            1,
+            "Float",
+            "STDDEV of read starts at origin"
+        );
+        push!("ORIGIN_MAPQ_MEAN", 1, "Float", "Mean MAPQ at origin");
+        push!(
+            "ORIGIN_EVENT_SIZE_STD_DEV",
+            1,
+            "Float",
+            "STDDEV of event size at origin"
+        );
+        push!(
+            "ORIGIN_EVENT_SIZE_MEDIAN",
+            1,
+            "Float",
+            "Median event size at origin"
+        );
+        push!(
+            "ORIGIN_EVENT_SIZE_MEAN",
+            1,
+            "Float",
+            "Mean event size at origin"
+        );
+
+        push!(
+            "END_STARTS_STD_DEV",
+            1,
+            "Float",
+            "STDDEV of read starts at end"
+        );
+        push!("END_MAPQ_MEAN", 1, "Float", "Mean MAPQ at end");
+        push!(
+            "END_EVENT_SIZE_STD_DEV",
+            1,
+            "Float",
+            "STDDEV of event size at end"
+        );
+        push!(
+            "END_EVENT_SIZE_MEDIAN",
+            1,
+            "Float",
+            "Median event size at end"
+        );
+        push!("END_EVENT_SIZE_MEAN", 1, "Float", "Mean event size at end");
+
+        // Additional
+        push!("BP_NOTATION", 1, "String", "Breakpoint notation");
+        push!("SOURCE", 1, "String", "Caller source name");
+        push!("CLASS", 1, "String", "Variant classification");
+
+        // Severus
+        push!(
+            "PRECISE",
+            0,
+            "Flag",
+            "SV with precise breakpoints coordinates and length"
+        );
+        push!(
+            "IMPRECISE",
+            0,
+            "Flag",
+            "SV with imprecise breakpoints coordinates and length"
+        );
+        push!("STRANDS", 1, "String", "Breakpoint strandedness");
+        push!("DETAILED_TYPE", 1, "String", "Detailed type of the SV");
+        push!(
+            "INSLEN",
+            1,
+            "Integer",
+            "Length of the unmapped sequence between breakpoint"
+        );
+        push!(
+            "MAPQ",
+            1,
+            "Integer",
+            "Median mapping quality of supporting reads"
+        );
+        push!(
+            "PHASESETID",
+            1,
+            "String",
+            "Matching phaseset ID for phased SVs"
+        );
+        push!("HP", 1, "Integer", "Matching haplotype ID for phased SVs");
+        push!("CLUSTERID", 1, "String", "Cluster ID in breakpoint_graph");
+        push!(
+            "INSSEQ",
+            1,
+            "String",
+            "Insertion sequence between breakpoints"
+        );
+        push!("MATE_ID", 1, "String", "MATE ID for breakends");
+        push!(
+            "INSIDE_VNTR",
+            1,
+            "String",
+            "True if an indel is inside a VNTR"
+        );
+        push!("ALINGED_POS", 1, "String", "Position in the reference");
+
+        set
+    }
+}
+
 /// Format
+/// Enum representing individual FORMAT fields from a VCF record.
+///
+/// This enum supports common fields used by DeepVariant, Clairs, and nanomonsv,
+/// as well as a generic fallback for other key-value pairs.
+///
+/// # Examples
+///
+/// ```
+/// use your_crate::Format;
+///
+/// let gt = Format::GT("0/1".to_string());
+/// let dp = Format::DP(30);
+/// let ad = Format::AD(vec![10, 20]);
+/// ```
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Encode, Decode)]
 pub enum Format {
-    // DeepVariant
+    // --- DeepVariant fields ---
+    /// Genotype string, e.g., "0/1", "1/1".
     GT(String),
+
+    /// Genotype quality.
     GQ(u32),
+
+    /// Read depth (total coverage at the variant position).
     DP(u32),
+
+    /// Allelic depths for the ref and alt alleles (e.g., [ref, alt1, alt2...]).
     AD(Vec<u32>),
+
+    /// Variant allele frequency (e.g., 0.25 for 25%).
     VAF(f32),
+
+    /// Phred-scaled genotype likelihoods.
     PL(Vec<u32>),
 
-    // Clairs
-    // when format begins with N: normal
-    // AF(f32),
-    // NAF(f32), // DP(u32),
+    // --- Clairs fields (prefixed with N: for normal sample, or tumor in case of paired) ---
+    /// Normal sample total depth.
     NDP(u32),
+
+    /// Normal sample allelic depths (e.g., [ref, alt1, alt2...]).
     NAD(Vec<u32>),
+
+    /// Allele-specific counts for A, C, G, T bases in tumor sample.
     AU(u32),
     CU(u32),
     GU(u32),
     TU(u32),
+
+    /// Allele-specific counts for A, C, G, T bases in normal sample.
     NAU(u32),
     NCU(u32),
     NGU(u32),
     NTU(u32),
 
-    // nanomonsv
+    // --- nanomonsv fields ---
+    /// Total number of supporting reads in tumor.
     TR(u32),
+
+    /// Variant-supporting reads in tumor.
     VR(u32),
 
-    Other((String, String)), // (key, value)
+    // --- Severus fields ---
+    DR(u32),
+    DV(u32),
+    HVAF(Vec<f32>),
+
+    /// Fallback for any other key-value pair not explicitly modeled.
+    /// Contains the raw key and value as strings.
+    Other((String, String)),
 }
 
+/// Container for a list of `Format` items.
+/// Represents the full FORMAT field and sample value for one sample.
+///
+/// # Examples
+///
+/// ```
+/// use your_crate::{Formats, Format};
+///
+/// let formats = Formats(vec![
+///     Format::GT("0/1".to_string()),
+///     Format::DP(45),
+///     Format::AD(vec![15, 30]),
+/// ]);
+/// ```
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default, Encode, Decode)]
 pub struct Formats(pub Vec<Format>);
 
 impl Formats {
-    /// Get the tumoral alternative read depth and total depth as an Option<(u32, u32)>.
+    /// Returns the tumor alternative read depth and total depth if both are available.
+    ///
+    /// This method looks for:
+    /// - `Format::AD`: to compute the sum of alternative allele depths (excluding reference)
+    /// - `Format::DP`: to get total read depth
+    ///
+    /// Returns `Some((alt_depth, total_depth))` if both are present, else `None`.
+    ///
+    /// # Example
+    /// ```
+    /// use your_crate::{Formats, Format};
+    ///
+    /// let f = Formats(vec![
+    ///     Format::AD(vec![10, 20, 5]),
+    ///     Format::DP(40),
+    /// ]);
+    ///
+    /// assert_eq!(f.n_alt_depth(), Some((25, 40)));
+    /// ```
     pub fn n_alt_depth(&self) -> Option<(u32, u32)> {
         let mut tumor_alt_depth: Option<u32> = None;
         let mut tumor_total_depth: Option<u32> = None;
 
         for format in &self.0 {
             match format {
-                // Tumor Allelic Depth (AD)
                 Format::AD(values) => {
                     if values.len() > 1 {
-                        // Sum all alternative allele depths (excluding reference allele)
                         tumor_alt_depth = Some(values[1..].iter().sum());
                     }
                 }
-                // Tumor Total Depth (DP)
                 Format::DP(value) => {
                     tumor_total_depth = Some(*value);
                 }
@@ -802,7 +1225,6 @@ impl Formats {
             }
         }
 
-        // Return a tuple (tumor_alt_depth, tumor_total_depth) if both are available
         match (tumor_alt_depth, tumor_total_depth) {
             (Some(alt), Some(total)) => Some((alt, total)),
             _ => None,
@@ -813,6 +1235,22 @@ impl Formats {
 impl TryFrom<(&str, &str)> for Formats {
     type Error = anyhow::Error;
 
+    /// Attempts to construct a `Formats` from a pair of colon-separated FORMAT keys and values.
+    ///
+    /// # Arguments
+    /// * `k` - FORMAT field names (e.g., "GT:DP:AD")
+    /// * `v` - Corresponding values (e.g., "0/1:35:10,25")
+    ///
+    /// # Errors
+    /// Returns an error if the number of keys and values do not match or if parsing fails.
+    ///
+    /// # Example
+    /// ```
+    /// use your_crate::Formats;
+    /// use std::convert::TryFrom;
+    ///
+    /// let f = Formats::try_from(("GT:DP:AD", "0/1:40:12,28")).unwrap();
+    /// ```
     fn try_from((k, v): (&str, &str)) -> anyhow::Result<Self> {
         let keys: Vec<&str> = k.split(':').collect();
         let values: Vec<&str> = v.split(':').collect();
@@ -832,6 +1270,23 @@ impl TryFrom<(&str, &str)> for Formats {
 }
 
 impl From<Formats> for (String, String) {
+    /// Converts `Formats` back into a `(keys, values)` tuple of colon-separated strings.
+    ///
+    /// This is the inverse of the `TryFrom<(&str, &str)>` implementation.
+    ///
+    /// # Example
+    /// ```
+    /// use your_crate::{Format, Formats};
+    ///
+    /// let formats = Formats(vec![
+    ///     Format::GT("0/1".to_string()),
+    ///     Format::DP(30),
+    /// ]);
+    ///
+    /// let (k, v): (String, String) = formats.into();
+    /// assert_eq!(k, "GT:DP");
+    /// assert_eq!(v, "0/1:30");
+    /// ```
     fn from(formats: Formats) -> Self {
         let mut keys = Vec::new();
         let mut values = Vec::new();
@@ -849,15 +1304,34 @@ impl From<Formats> for (String, String) {
 impl TryFrom<(&str, &str)> for Format {
     type Error = anyhow::Error;
 
+    /// Tries to convert a `(key, value)` pair into a typed `Format` variant.
+    ///
+    /// This parser supports known FORMAT keys from DeepVariant, Clairs, and nanomonsv.
+    /// Unknown keys are stored as `Format::Other((key, value))`.
+    ///
+    /// # Arguments
+    /// * `key` - FORMAT field name
+    /// * `value` - raw string value associated with the key
+    ///
+    /// # Example
+    /// ```
+    /// use your_crate::Format;
+    /// use std::convert::TryFrom;
+    ///
+    /// let dp = Format::try_from(("DP", "42")).unwrap();
+    /// assert!(matches!(dp, Format::DP(42)));
+    /// ```
     fn try_from((key, value): (&str, &str)) -> anyhow::Result<Self> {
         let format = match key {
+            // DeepVariant
             "GT" => Format::GT(value.to_string()),
             "GQ" => Format::GQ(parse_value(value, key)?),
             "DP" => Format::DP(parse_value(value, key)?),
             "AD" => Format::AD(parse_vec_value(value, key)?),
             "VAF" => Format::VAF(parse_value(value, key)?),
-            // "AF" => Format::AF(parse_value(value, key)?),
-            // "NAF" => Format::NAF(parse_value(value, key)?),
+            "PL" => Format::PL(parse_vec_value(value, key)?),
+
+            // Clairs
             "NDP" => Format::NDP(parse_value(value, key)?),
             "NAD" => Format::NAD(parse_vec_value(value, key)?),
             "AU" => Format::AU(parse_value(value, key)?),
@@ -868,9 +1342,17 @@ impl TryFrom<(&str, &str)> for Format {
             "NCU" => Format::NCU(parse_value(value, key)?),
             "NGU" => Format::NGU(parse_value(value, key)?),
             "NTU" => Format::NTU(parse_value(value, key)?),
-            "PL" => Format::PL(parse_vec_value(value, key)?),
+
+            // nanomonsv
             "TR" => Format::TR(parse_value(value, key)?),
             "VR" => Format::VR(parse_value(value, key)?),
+
+            // Severus
+            "DR" => Format::DR(parse_value(value, key)?),
+            "DV" => Format::DV(parse_value(value, key)?),
+            "hVAF" => Format::HVAF(parse_vec_value(value, key)?),
+
+            // fallback
             _ => Format::Other((key.to_string(), value.to_string())),
         };
         Ok(format)
@@ -888,7 +1370,7 @@ where
         .context(format!("Can't parse {}: {}", key, value)) // Add context
 }
 
-// Helper function to parse comma-separated values (DeepSeek)
+// Helper function to parse comma-separated values
 fn parse_vec_value<T: std::str::FromStr>(value: &str, key: &str) -> anyhow::Result<Vec<T>>
 where
     T::Err: std::fmt::Debug,
@@ -904,27 +1386,47 @@ where
 }
 
 impl From<Format> for (String, String) {
+    /// Converts a `Format` enum into a `(key, value)` pair, as strings.
+    ///
+    /// This is used to serialize the FORMAT field back into VCF-compatible string values.
+    /// The key corresponds to the field ID (e.g., `"DP"`, `"GT"`), and the value is the encoded string representation.
+    ///
+    /// # Examples
+    /// ```
+    /// use your_crate::Format;
+    /// let f = Format::DP(42);
+    /// let (k, v): (String, String) = f.into();
+    /// assert_eq!(k, "DP");
+    /// assert_eq!(v, "42");
+    /// ```
     fn from(format: Format) -> Self {
-        let concat = |values: Vec<u32>| -> String {
+        let concat_u32 = |values: Vec<u32>| -> String {
             values
                 .iter()
-                .map(|v| v.to_string())
+                .map(u32::to_string)
+                .collect::<Vec<_>>()
+                .join(",")
+        };
+        let concat_f32 = |values: Vec<f32>| -> String {
+            values
+                .iter()
+                .map(|v| format!("{:.5}", v)) // consistent decimal format
                 .collect::<Vec<_>>()
                 .join(",")
         };
 
         match format {
+            // DeepVariant
             Format::GT(value) => ("GT".to_string(), value),
             Format::GQ(value) => ("GQ".to_string(), value.to_string()),
             Format::DP(value) => ("DP".to_string(), value.to_string()),
-            Format::AD(values) => ("AD".to_string(), concat(values)),
-            Format::VAF(value) => ("VAF".to_string(), value.to_string()),
-            Format::PL(values) => ("PL".to_string(), concat(values)),
-            Format::Other((key, value)) => (key, value),
-            // Format::AF(value) => ("AF".to_string(), value.to_string()),
-            // Format::NAF(value) => ("NAF".to_string(), value.to_string()),
+            Format::AD(values) => ("AD".to_string(), concat_u32(values)),
+            Format::VAF(value) => ("VAF".to_string(), format!("{:.5}", value)),
+            Format::PL(values) => ("PL".to_string(), concat_u32(values)),
+
+            // Clairs
             Format::NDP(value) => ("NDP".to_string(), value.to_string()),
-            Format::NAD(values) => ("NAD".to_string(), concat(values)),
+            Format::NAD(values) => ("NAD".to_string(), concat_u32(values)),
             Format::AU(value) => ("AU".to_string(), value.to_string()),
             Format::CU(value) => ("CU".to_string(), value.to_string()),
             Format::GU(value) => ("GU".to_string(), value.to_string()),
@@ -933,8 +1435,18 @@ impl From<Format> for (String, String) {
             Format::NCU(value) => ("NCU".to_string(), value.to_string()),
             Format::NGU(value) => ("NGU".to_string(), value.to_string()),
             Format::NTU(value) => ("NTU".to_string(), value.to_string()),
+
+            // nanomonsv
             Format::TR(value) => ("TR".to_string(), value.to_string()),
             Format::VR(value) => ("VR".to_string(), value.to_string()),
+
+            // Severus
+            Format::DR(value) => ("DR".to_string(), value.to_string()),
+            Format::DV(value) => ("DV".to_string(), value.to_string()),
+            Format::HVAF(values) => ("hVAF".to_string(), concat_f32(values)),
+
+            // fallback
+            Format::Other((key, value)) => (key, value),
         }
     }
 }
@@ -963,6 +1475,99 @@ impl Formats {
 
         Formats(filtered_vec)
     }
+
+    /// Returns a sorted set of VCF header definitions for all possible `Format` fields.
+    ///
+    /// # Example
+    /// ```
+    /// let headers = Formats::format_headers();
+    /// for h in headers {
+    ///     println!("{}", h);
+    /// }
+    /// ```
+    pub fn format_headers() -> BTreeSet<String> {
+        let mut headers = BTreeSet::new();
+
+        headers
+            .insert(r#"##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">"#.to_string());
+        headers.insert(
+            r#"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">"#.to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">"#.to_string(),
+        );
+        headers.insert(r#"##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles">"#.to_string());
+        headers.insert(
+            r#"##FORMAT=<ID=VAF,Number=1,Type=Float,Description="Variant Allele Frequency">"#
+                .to_string(),
+        );
+        headers.insert(r#"##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Phred-scaled genotype likelihoods">"#.to_string());
+
+        headers.insert(
+            r#"##FORMAT=<ID=NDP,Number=1,Type=Integer,Description="Normal sample read depth">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=NAD,Number=R,Type=Integer,Description="Normal sample allelic depths">"#
+                .to_string(),
+        );
+
+        headers.insert(
+            r#"##FORMAT=<ID=AU,Number=1,Type=Integer,Description="Tumor A allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=CU,Number=1,Type=Integer,Description="Tumor C allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=GU,Number=1,Type=Integer,Description="Tumor G allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=TU,Number=1,Type=Integer,Description="Tumor T allele count">"#
+                .to_string(),
+        );
+
+        headers.insert(
+            r#"##FORMAT=<ID=NAU,Number=1,Type=Integer,Description="Normal A allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=NCU,Number=1,Type=Integer,Description="Normal C allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=NGU,Number=1,Type=Integer,Description="Normal G allele count">"#
+                .to_string(),
+        );
+        headers.insert(
+            r#"##FORMAT=<ID=NTU,Number=1,Type=Integer,Description="Normal T allele count">"#
+                .to_string(),
+        );
+
+        headers.insert(r#"##FORMAT=<ID=TR,Number=1,Type=Integer,Description="Total supporting reads (tumor)">"#.to_string());
+        headers.insert(r#"##FORMAT=<ID=VR,Number=1,Type=Integer,Description="Variant-supporting reads (tumor)">"#.to_string());
+
+        // Severus
+        headers.insert(
+            r#"##FORMAT=<ID=DR,Number=1,Type=Integer,Description="Number of reference reads">"#
+                .to_string(),
+        );
+
+        headers.insert(
+            r#"##FORMAT=<ID=DV,Number=1,Type=Integer,Description="Number of variant reads">"#
+                .to_string(),
+        );
+        headers.insert(r#"##FORMAT=<ID=hVAF,Number=3,Type=Float,Description="Haplotype specific variant Allele frequency (H0,H1,H2)">"#.to_string());
+
+        // headers.insert(
+        //     r#"##FORMAT=<ID=Other,Number=.,Type=String,Description="Unspecified FORMAT field">"#
+        //         .to_string(),
+        // );
+
+        headers
+    }
 }
 
 /// Filter