|
|
@@ -32,6 +32,24 @@ use crate::{
|
|
|
positions::GenomePosition,
|
|
|
};
|
|
|
|
|
|
+/// A collection of VCF variants along with associated metadata.
|
|
|
+///
|
|
|
+/// This struct represents a set of variants from a VCF file, including
|
|
|
+/// the original VCF metadata and the caller annotation.
|
|
|
+///
|
|
|
+/// # Fields
|
|
|
+/// * `variants` - A vector of VcfVariant instances representing individual variants
|
|
|
+/// * `vcf` - The Vcf struct containing metadata from the original VCF file
|
|
|
+/// * `caller` - An Annotation indicating the variant caller used
|
|
|
+///
|
|
|
+/// # Derives
|
|
|
+/// This struct derives Debug and Clone traits, allowing for easy debugging
|
|
|
+/// and cloning of VariantCollection instances.
|
|
|
+///
|
|
|
+/// # Usage
|
|
|
+/// This struct is typically used to group related variants from a single VCF file,
|
|
|
+/// preserving the context in which they were called and allowing for collective
|
|
|
+/// operations on the set of variants.
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct VariantCollection {
|
|
|
pub variants: Vec<VcfVariant>,
|
|
|
@@ -40,19 +58,130 @@ pub struct VariantCollection {
|
|
|
}
|
|
|
|
|
|
impl VariantCollection {
|
|
|
+ /// Returns a vector of hash keys for all variants in the collection.
|
|
|
+ ///
|
|
|
+ /// This method generates a unique hash for each variant in the collection
|
|
|
+ /// and returns these hashes as a vector.
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// A `Vec<Hash128>` containing the hash of each variant in the collection.
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// This method iterates over all variants in the collection, so its time complexity
|
|
|
+ /// is O(n) where n is the number of variants.
|
|
|
+ ///
|
|
|
+ /// # Usage
|
|
|
+ /// This method is useful for obtaining unique identifiers for each variant,
|
|
|
+ /// which can be used for indexing, comparison, or lookup operations.
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let variant_keys = variant_collection.keys();
|
|
|
+ /// for key in variant_keys {
|
|
|
+ /// println!("Variant hash: {:?}", key);
|
|
|
+ /// }
|
|
|
+ /// ```
|
|
|
pub fn keys(&self) -> Vec<Hash128> {
|
|
|
self.variants.iter().map(|v| v.hash()).collect()
|
|
|
}
|
|
|
|
|
|
+ /// Retains only the variants whose hash keys are present in the provided set.
|
|
|
+ ///
|
|
|
+ /// This method filters the variants in the collection, keeping only those
|
|
|
+ /// whose hash keys are found in the `keys_to_keep` set.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `keys_to_keep` - A `HashSet<Hash128>` containing the hash keys of variants to retain
|
|
|
+ ///
|
|
|
+ /// # Effects
|
|
|
+ /// This method modifies the `VariantCollection` in place, potentially reducing
|
|
|
+ /// the number of variants it contains.
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// The time complexity is O(n), where n is the number of variants in the collection.
|
|
|
+ /// The space complexity is O(1) as it operates in place.
|
|
|
+ ///
|
|
|
+ /// # Usage
|
|
|
+ /// This method is useful for filtering variants based on a pre-computed set of keys,
|
|
|
+ /// which can be helpful in various scenarios such as:
|
|
|
+ /// - Removing duplicates
|
|
|
+ /// - Keeping only variants that meet certain criteria
|
|
|
+ /// - Synchronizing variants across different collections
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let mut variant_collection = VariantCollection::new();
|
|
|
+ /// // ... populate variant_collection ...
|
|
|
+ ///
|
|
|
+ /// let keys_to_keep: HashSet<Hash128> = some_filtering_function();
|
|
|
+ /// variant_collection.retain_keys(&keys_to_keep);
|
|
|
+ /// ```
|
|
|
pub fn retain_keys(&mut self, keys_to_keep: &HashSet<Hash128>) {
|
|
|
self.variants.retain(|v| keys_to_keep.contains(&v.hash()));
|
|
|
}
|
|
|
|
|
|
+ /// Removes variants whose hash keys are present in the provided set.
|
|
|
+ ///
|
|
|
+ /// This method filters the variants in the collection, removing those
|
|
|
+ /// whose hash keys are found in the `keys_to_remove` set.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `keys_to_remove` - A `HashSet<Hash128>` containing the hash keys of variants to remove
|
|
|
+ ///
|
|
|
+ /// # Effects
|
|
|
+ /// This method modifies the `VariantCollection` in place, potentially reducing
|
|
|
+ /// the number of variants it contains.
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// The time complexity is O(n), where n is the number of variants in the collection.
|
|
|
+ /// The space complexity is O(1) as it operates in place.
|
|
|
+ ///
|
|
|
+ /// # Usage
|
|
|
+ /// This method is useful for filtering out unwanted variants based on a pre-computed set of keys,
|
|
|
+ /// which can be helpful in various scenarios such as:
|
|
|
+ /// - Removing variants that fail certain criteria
|
|
|
+ /// - Eliminating duplicates identified by their hash keys
|
|
|
+ /// - Excluding variants present in another dataset
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let mut variant_collection = VariantCollection::new();
|
|
|
+ /// // ... populate variant_collection ...
|
|
|
+ ///
|
|
|
+ /// let keys_to_remove: HashSet<Hash128> = some_filtering_function();
|
|
|
+ /// variant_collection.remove_keys(&keys_to_remove);
|
|
|
+ /// ```
|
|
|
pub fn remove_keys(&mut self, keys_to_remove: &HashSet<Hash128>) {
|
|
|
self.variants
|
|
|
.retain(|v| !keys_to_remove.contains(&v.hash()));
|
|
|
}
|
|
|
|
|
|
+ /// Partitions the VcfVariants into two sets based on a given predicate.
|
|
|
+ ///
|
|
|
+ /// This function splits the current VcfVariants instance into two new instances,
|
|
|
+ /// where the variants are divided based on whether they satisfy the given predicate.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `predicate` - A closure that takes a reference to a VcfVariant and returns a boolean
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// A tuple of two VcfVariants instances:
|
|
|
+ /// * The first contains all variants for which the predicate returned true
|
|
|
+ /// * The second contains all variants for which the predicate returned false
|
|
|
+ ///
|
|
|
+ /// # Type Parameters
|
|
|
+ /// * `F` - The type of the predicate function, which must implement Fn(&VcfVariant) -> bool
|
|
|
+ ///
|
|
|
+ /// # Behavior
|
|
|
+ /// - Consumes the original VcfVariants instance
|
|
|
+ /// - Creates two new VcfVariants instances with the partitioned data
|
|
|
+ /// - The VCF and caller information are cloned for the first returned instance
|
|
|
+ /// and moved into the second returned instance
|
|
|
+ ///
|
|
|
+ /// # Examples
|
|
|
+ /// ```
|
|
|
+ /// let (passing, failing) = vcf_variants.partition(|variant| variant.quality.map_or(false, |q| q > 30));
|
|
|
+ /// ```
|
|
|
pub fn partition<F>(self, predicate: F) -> (Self, Self)
|
|
|
where
|
|
|
F: Fn(&VcfVariant) -> bool,
|
|
|
@@ -77,7 +206,7 @@ impl VariantCollection {
|
|
|
)
|
|
|
}
|
|
|
|
|
|
- pub fn chunk_size(&self, max_threads: u8) -> usize {
|
|
|
+ fn chunk_size(&self, max_threads: u8) -> usize {
|
|
|
let total_items = self.variants.len();
|
|
|
let min_chunk_size = 1000;
|
|
|
let max_chunks = max_threads;
|
|
|
@@ -86,6 +215,27 @@ impl VariantCollection {
|
|
|
optimal_chunk_size.max(min_chunk_size)
|
|
|
}
|
|
|
|
|
|
+ /// Annotates variants with sequence entropy information.
|
|
|
+ ///
|
|
|
+ /// This function calculates and adds Shannon entropy annotations to the variants
|
|
|
+ /// based on the surrounding sequence context. It processes variants in parallel
|
|
|
+ /// chunks for improved performance.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `annotations` - A reference to the Annotations structure to store the results
|
|
|
+ /// * `reference` - Path to the reference FASTA file
|
|
|
+ /// * `seq_len` - Length of the sequence context to consider for entropy calculation
|
|
|
+ /// * `max_threads` - Maximum number of threads to use for parallel processing
|
|
|
+ ///
|
|
|
+ /// # Behavior
|
|
|
+ /// - Processes variants in parallel chunks
|
|
|
+ /// - For each variant, retrieves the surrounding sequence from the reference
|
|
|
+ /// - Calculates Shannon entropy for the sequence
|
|
|
+ /// - Adds the entropy value as an annotation to the variant
|
|
|
+ /// - Skips variants that already have a Shannon entropy annotation
|
|
|
+ ///
|
|
|
+ /// # Panics
|
|
|
+ /// This function will panic if it fails to build the FASTA reader from the provided reference path.
|
|
|
pub fn annotate_with_sequence_entropy(
|
|
|
&self,
|
|
|
annotations: &Annotations,
|
|
|
@@ -122,6 +272,34 @@ impl VariantCollection {
|
|
|
});
|
|
|
}
|
|
|
|
|
|
+ /// Annotates variants with information from a constitutional BAM file.
|
|
|
+ ///
|
|
|
+ /// This function processes variants in parallel chunks and adds annotations
|
|
|
+ /// based on the read counts from a provided BAM file. It calculates and adds
|
|
|
+ /// annotations for the number of reads supporting the alternative allele and
|
|
|
+ /// the total read depth at each variant position.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `annotations` - A reference to the Annotations structure to store the results
|
|
|
+ /// * `constit_bam_path` - Path to the constituent BAM file
|
|
|
+ /// * `max_threads` - Maximum number of threads to use for parallel processing
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// * `Ok(())` if the annotation process completes successfully
|
|
|
+ /// * `Err` if there's an error opening the BAM file or processing the variants
|
|
|
+ ///
|
|
|
+ /// # Behavior
|
|
|
+ /// - Processes variants in parallel chunks
|
|
|
+ /// - For each variant, retrieves read counts from the BAM file
|
|
|
+ /// - Calculates the number of reads supporting the alternative allele and total depth
|
|
|
+ /// - Adds ConstitAlt and ConstitDepth annotations to each variant
|
|
|
+ /// - Skips variants that already have both ConstitAlt and ConstitDepth annotations
|
|
|
+ /// - Handles insertions, deletions, and other alteration types differently
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// This function will return an error if:
|
|
|
+ /// - It fails to open the BAM file
|
|
|
+ /// - There's an error while processing the variants or reading from the BAM file
|
|
|
pub fn annotate_with_constit_bam(
|
|
|
&self,
|
|
|
annotations: &Annotations,
|
|
|
@@ -190,6 +368,42 @@ impl VariantCollection {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// Represents a consolidated genomic variant with associated information and annotations.
|
|
|
+///
|
|
|
+/// This struct encapsulates comprehensive data for a single genomic variant,
|
|
|
+/// including its unique identifier, genomic position, alleles, related VCF entries,
|
|
|
+/// and various annotations.
|
|
|
+///
|
|
|
+/// # Fields
|
|
|
+/// * `hash` - A unique `Hash128` identifier for the variant
|
|
|
+/// * `position` - The `GenomePosition` of the variant (0-based coordinates)
|
|
|
+/// * `reference` - The reference allele as a `ReferenceAlternative`
|
|
|
+/// * `alternative` - The alternative allele as a `ReferenceAlternative`
|
|
|
+/// * `vcf_variants` - A vector of `VcfVariant` instances associated with this variant
|
|
|
+/// * `annotations` - A vector of `Annotation` instances providing additional variant information
|
|
|
+///
|
|
|
+/// # Derives
|
|
|
+/// * `Debug` - For easier debugging and logging
|
|
|
+/// * `Serialize`, `Deserialize` - For serialization and deserialization (e.g., JSON)
|
|
|
+/// * `Clone` - For creating deep copies of the variant
|
|
|
+///
|
|
|
+/// # Usage
|
|
|
+/// This struct is central to variant analysis, providing a unified representation
|
|
|
+/// of a variant that may combine data from multiple VCF entries and include
|
|
|
+/// various annotations. It's useful for advanced variant processing, filtering,
|
|
|
+/// and reporting in genomic analysis pipelines.
|
|
|
+///
|
|
|
+/// # Example
|
|
|
+/// ```
|
|
|
+/// let variant = Variant {
|
|
|
+/// hash: Hash128::new(/* ... */),
|
|
|
+/// position: GenomePosition { contig: 1, position: 1000 },
|
|
|
+/// reference: ReferenceAlternative::new("A"),
|
|
|
+/// alternative: ReferenceAlternative::new("T"),
|
|
|
+/// vcf_variants: vec![/* VcfVariant instances */],
|
|
|
+/// annotations: vec![/* Annotation instances */],
|
|
|
+/// };
|
|
|
+/// ```
|
|
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
|
pub struct Variant {
|
|
|
pub hash: Hash128,
|
|
|
@@ -232,17 +446,108 @@ impl Variant {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// A collection of genomic variants.
|
|
|
+///
|
|
|
+/// This struct represents a set of `Variant` instances, providing a container
|
|
|
+/// for multiple genomic variants.
|
|
|
+///
|
|
|
+/// # Fields
|
|
|
+/// * `data` - A vector of `Variant` instances
|
|
|
+///
|
|
|
+/// # Derives
|
|
|
+/// * `Debug` - For easier debugging and logging
|
|
|
+/// * `Default` - Allows creation of an empty `Variants` instance
|
|
|
+/// * `Serialize`, `Deserialize` - For serialization and deserialization (e.g., JSON)
|
|
|
+/// * `Clone` - For creating deep copies of the collection
|
|
|
+///
|
|
|
+/// # Usage
|
|
|
+/// This struct is typically used to group related variants, such as those from
|
|
|
+/// a single sample or analysis run. It provides a convenient way to handle and
|
|
|
+/// process multiple variants as a single unit.
|
|
|
+///
|
|
|
+/// # Example
|
|
|
+/// ```
|
|
|
+/// let mut variants = Variants::default();
|
|
|
+/// variants.data.push(Variant {
|
|
|
+/// // ... variant details ...
|
|
|
+/// });
|
|
|
+/// // Process or analyze the collection of variants
|
|
|
+/// ```
|
|
|
+///
|
|
|
+/// # Note
|
|
|
+/// The `Default` implementation creates an empty vector of variants.
|
|
|
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
|
|
pub struct Variants {
|
|
|
pub data: Vec<Variant>,
|
|
|
}
|
|
|
|
|
|
impl Variants {
|
|
|
+ /// Sorts the variants in-place based on their genomic positions.
|
|
|
+ ///
|
|
|
+ /// This method uses an unstable sort to order the variants in the collection
|
|
|
+ /// according to their genomic positions. The sorting is done in ascending order,
|
|
|
+ /// first by contig and then by position within the contig.
|
|
|
+ ///
|
|
|
+ /// # Effects
|
|
|
+ /// Modifies the order of variants in the `data` vector in-place.
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// Uses `sort_unstable_by` for better performance, with a time complexity of O(n log n),
|
|
|
+ /// where n is the number of variants.
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let mut variants = Variants {
|
|
|
+ /// data: vec![
|
|
|
+ /// Variant { position: GenomePosition { contig: 0, position: 100 }, .. },
|
|
|
+ /// Variant { position: GenomePosition { contig: 0, position: 50 }, .. },
|
|
|
+ /// Variant { position: GenomePosition { contig: 1, position: 75 }, .. },
|
|
|
+ /// ]
|
|
|
+ /// };
|
|
|
+ /// variants.sort();
|
|
|
+ /// // Variants are now sorted by position
|
|
|
+ /// ```
|
|
|
pub fn sort(&mut self) {
|
|
|
self.data
|
|
|
.sort_unstable_by(|a, b| a.position.cmp(&b.position));
|
|
|
}
|
|
|
|
|
|
+ /// Merges this Variants collection with another VariantCollection, combining overlapping variants.
|
|
|
+ ///
|
|
|
+ /// This method performs a merge operation, combining the current Variants with a VariantCollection.
|
|
|
+ /// It uses a two-pointer technique to efficiently merge the sorted collections, handling overlapping
|
|
|
+ /// variants and creating new variants as necessary.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `others` - A VariantCollection to merge with the current Variants
|
|
|
+ /// * `annotations` - A reference to Annotations used when creating new variants
|
|
|
+ ///
|
|
|
+ /// # Effects
|
|
|
+ /// * Modifies the current Variants in-place, replacing its data with the merged result
|
|
|
+ /// * Drains the data from the current Variants during the merge process
|
|
|
+ ///
|
|
|
+ /// # Behavior
|
|
|
+ /// * Variants are compared based on their genomic positions
|
|
|
+ /// * When positions are equal, variants with matching reference and alternative alleles are merged
|
|
|
+ /// * Non-matching variants at the same position are kept separate
|
|
|
+ /// * The method logs the number of merged variants
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// * Time complexity: O(n + m), where n and m are the sizes of the two collections
|
|
|
+ /// * Space complexity: O(n + m) for the merged result
|
|
|
+ ///
|
|
|
+ /// # Note
|
|
|
+ /// This method assumes that both the current Variants and the input VariantCollection are sorted
|
|
|
+ /// by genomic position. Use the `sort` method before merging if necessary.
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let mut variants1 = Variants { /* ... */ };
|
|
|
+ /// let variants2 = VariantCollection { /* ... */ };
|
|
|
+ /// let annotations = Annotations::new();
|
|
|
+ /// variants1.merge(variants2, &annotations);
|
|
|
+ /// // variants1 now contains the merged data
|
|
|
+ /// ```
|
|
|
pub fn merge(&mut self, others: VariantCollection, annotations: &Annotations) {
|
|
|
let mut result = Vec::new();
|
|
|
let mut n_merged = 0;
|
|
|
@@ -290,6 +595,73 @@ impl Variants {
|
|
|
self.data = result;
|
|
|
}
|
|
|
|
|
|
+ /// Filters and returns variants of a specific alteration category.
|
|
|
+ ///
|
|
|
+ /// This method creates a new vector containing clones of all variants
|
|
|
+ /// in the collection that match the specified alteration category.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `cat` - An `AlterationCategory` enum value specifying the category to filter by
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// A `Vec<Variant>` containing clones of all matching variants
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// * Time complexity: O(n), where n is the number of variants in the collection
|
|
|
+ /// * Space complexity: O(m), where m is the number of matching variants
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let variants = Variants { /* ... */ };
|
|
|
+ /// let snvs = variants.get_alteration_cat(AlterationCategory::SNV);
|
|
|
+ /// println!("Found {} SNVs", snvs.len());
|
|
|
+ /// ```
|
|
|
+ ///
|
|
|
+ /// # Note
|
|
|
+ /// This method clones matching variants. If you need to process a large number of variants
|
|
|
+ /// and don't require ownership, consider implementing an iterator-based approach instead.
|
|
|
+ pub fn get_alteration_cat(&self, cat: AlterationCategory) -> Vec<Variant> {
|
|
|
+ self.data
|
|
|
+ .iter()
|
|
|
+ .filter(|v| v.alteration_category().contains(&cat))
|
|
|
+ .cloned()
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Saves the Variants collection to a BGZF-compressed JSON file.
|
|
|
+ ///
|
|
|
+ /// This method serializes the Variants struct to JSON format and writes it to a file
|
|
|
+ /// using BGZF (Blocked GNU Zip Format) compression.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `filename` - A string slice that holds the name of the file to write to
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// * `Ok(())` if the operation is successful
|
|
|
+ /// * `Err(anyhow::Error)` if any error occurs during file creation, serialization, or writing
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// This function will return an error if:
|
|
|
+ /// * The file cannot be created
|
|
|
+ /// * The Variants struct cannot be serialized to JSON
|
|
|
+ /// * The BGZF writer cannot be closed properly
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// The time and space complexity depends on the size of the Variants collection
|
|
|
+ /// and the efficiency of the JSON serialization and BGZF compression.
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// let variants = Variants { /* ... */ };
|
|
|
+ /// match variants.save_to_json("output.json.gz") {
|
|
|
+ /// Ok(()) => println!("Successfully saved variants"),
|
|
|
+ /// Err(e) => eprintln!("Failed to save variants: {}", e),
|
|
|
+ /// }
|
|
|
+ /// ```
|
|
|
+ ///
|
|
|
+ /// # Note
|
|
|
+ /// This method uses BGZF compression, which is compatible with standard gzip decompression.
|
|
|
+ /// The resulting file can be read using standard gzip-aware tools.
|
|
|
pub fn save_to_json(&self, filename: &str) -> anyhow::Result<()> {
|
|
|
let file = File::create(filename)
|
|
|
.with_context(|| format!("Failed to create file: {}", filename))?;
|
|
|
@@ -306,6 +678,39 @@ impl Variants {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ /// Loads a Variants collection from a BGZF-compressed JSON file.
|
|
|
+ ///
|
|
|
+ /// This method reads a BGZF-compressed file, deserializes its JSON content,
|
|
|
+ /// and constructs a new Variants instance from it.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `filename` - A string slice that holds the name of the file to read from
|
|
|
+ ///
|
|
|
+ /// # Returns
|
|
|
+ /// * `Ok(Self)` containing the deserialized Variants if successful
|
|
|
+ /// * `Err(anyhow::Error)` if any error occurs during file opening, reading, or deserialization
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// This function will return an error if:
|
|
|
+ /// * The file cannot be opened
|
|
|
+ /// * A BGZF reader cannot be created for the file
|
|
|
+ /// * The file content cannot be deserialized into a Variants struct
|
|
|
+ ///
|
|
|
+ /// # Performance
|
|
|
+ /// The time and space complexity depends on the size of the input file
|
|
|
+ /// and the efficiency of the JSON deserialization and BGZF decompression.
|
|
|
+ ///
|
|
|
+ /// # Example
|
|
|
+ /// ```
|
|
|
+ /// match Variants::load_from_json("input.json.gz") {
|
|
|
+ /// Ok(variants) => println!("Successfully loaded {} variants", variants.data.len()),
|
|
|
+ /// Err(e) => eprintln!("Failed to load variants: {}", e),
|
|
|
+ /// }
|
|
|
+ /// ```
|
|
|
+ ///
|
|
|
+ /// # Note
|
|
|
+ /// This method expects the input file to be in BGZF-compressed JSON format,
|
|
|
+ /// typically created by the `save_to_json` method of this struct.
|
|
|
pub fn load_from_json(filename: &str) -> anyhow::Result<Self> {
|
|
|
let file =
|
|
|
File::open(filename).with_context(|| format!("Failed to open file: {}", filename))?;
|
|
|
@@ -318,17 +723,40 @@ impl Variants {
|
|
|
debug!("Successfully loaded variants from {}", filename);
|
|
|
Ok(variants)
|
|
|
}
|
|
|
-
|
|
|
- pub fn get_alteration_cat(&self, cat: AlterationCategory) -> Vec<Variant> {
|
|
|
- self
|
|
|
- .data
|
|
|
- .iter()
|
|
|
- .filter(|v| v.alteration_category().contains(&cat))
|
|
|
- .cloned()
|
|
|
- .collect()
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
+/// Creates a new Variant instance from a collection of VcfVariants and annotations.
|
|
|
+///
|
|
|
+/// This function consolidates information from one or more VcfVariants into a single Variant,
|
|
|
+/// using the first VcfVariant as the primary source for most fields. It also retrieves
|
|
|
+/// and includes relevant annotations.
|
|
|
+///
|
|
|
+/// # Arguments
|
|
|
+/// * `vcf_variants` - A vector of VcfVariant instances to be consolidated
|
|
|
+/// * `annotations` - A reference to an Annotations structure containing annotation data
|
|
|
+///
|
|
|
+/// # Returns
|
|
|
+/// A new Variant instance
|
|
|
+///
|
|
|
+/// # Behavior
|
|
|
+/// - Uses the first VcfVariant in the vector as the source for hash, position, reference, and alternative
|
|
|
+/// - Includes all provided VcfVariants in the new Variant
|
|
|
+/// - Retrieves annotations for the variant based on its hash
|
|
|
+/// - If no annotations are found, an empty vector is used
|
|
|
+///
|
|
|
+/// # Panics
|
|
|
+/// This function will panic if `vcf_variants` is empty.
|
|
|
+///
|
|
|
+/// # Example
|
|
|
+/// ```
|
|
|
+/// let vcf_variants = vec![VcfVariant { /* ... */ }];
|
|
|
+/// let annotations = Annotations::new();
|
|
|
+/// let variant = create_variant(vcf_variants, &annotations);
|
|
|
+/// ```
|
|
|
+///
|
|
|
+/// # Note
|
|
|
+/// This function assumes that all VcfVariants in the input vector represent the same genomic variant
|
|
|
+/// and should be consolidated. It's the caller's responsibility to ensure this is the case.
|
|
|
fn create_variant(vcf_variants: Vec<VcfVariant>, annotations: &Annotations) -> Variant {
|
|
|
let first = &vcf_variants[0];
|
|
|
let annotations = annotations
|