|
|
@@ -14,7 +14,7 @@ use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
-use super::variant::{AlterationCategory, Info, ReferenceAlternative, VcfVariant};
|
|
|
+use super::variant::{AlterationCategory, Formats, Info, Infos, ReferenceAlternative, VcfVariant};
|
|
|
use crate::{
|
|
|
annotation::{
|
|
|
cosmic::Cosmic,
|
|
|
@@ -29,7 +29,7 @@ use crate::{
|
|
|
vcf::Vcf,
|
|
|
},
|
|
|
helpers::{app_storage_dir, estimate_shannon_entropy, mean, temp_file_path, Hash128},
|
|
|
- io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header},
|
|
|
+ io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
|
|
|
positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
|
|
|
};
|
|
|
|
|
|
@@ -587,6 +587,80 @@ impl Variant {
|
|
|
callers.sort();
|
|
|
callers.join(", ")
|
|
|
}
|
|
|
+
|
|
|
+ /// Merge all `Infos` from the list of `VcfVariant`s.
|
|
|
+ pub fn merge_infos(&self) -> Infos {
|
|
|
+ let mut seen_keys = HashSet::new();
|
|
|
+ let mut merged = Vec::new();
|
|
|
+
|
|
|
+ for vcf in self.vcf_variants.iter() {
|
|
|
+ for info in &vcf.infos.0 {
|
|
|
+ let key = info.key();
|
|
|
+ if seen_keys.insert(key.to_string()) {
|
|
|
+ merged.push(info.clone());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Infos(merged)
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn merge_formats(&self) -> Formats {
|
|
|
+ let mut seen_keys = HashSet::new();
|
|
|
+ let mut merged = Vec::new();
|
|
|
+
|
|
|
+ for vcf in self.vcf_variants.iter() {
|
|
|
+ for format in &vcf.formats.0 {
|
|
|
+ let (key, _) = format.clone().into();
|
|
|
+ if seen_keys.insert(key) {
|
|
|
+ merged.push(format.clone());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Formats(merged)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Writes the merged VCF representation of this `Variant` to the provided writer.
|
|
|
+ ///
|
|
|
+ /// Merges INFO and FORMAT fields from all underlying `VcfVariant`s.
|
|
|
+ /// Assumes a single-sample VCF structure.
|
|
|
+ pub fn write_vcf<W: Write>(&self, writer: &mut W) -> std::io::Result<()> {
|
|
|
+ if self.vcf_variants.is_empty() {
|
|
|
+ return Ok(());
|
|
|
+ }
|
|
|
+
|
|
|
+ let vcf = &self.vcf_variants[0];
|
|
|
+ let merged_infos = self.merge_infos();
|
|
|
+ let merged_formats = self.merge_formats();
|
|
|
+
|
|
|
+ let (format_keys, format_values): (String, String) = merged_formats.into();
|
|
|
+
|
|
|
+ writeln!(
|
|
|
+ writer,
|
|
|
+ "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
|
|
|
+ self.position.contig(),
|
|
|
+ self.position.position + 1,
|
|
|
+ if vcf.id.is_empty() { "." } else { &vcf.id },
|
|
|
+ self.reference,
|
|
|
+ self.alternative,
|
|
|
+ vcf.quality
|
|
|
+ .map(|q| format!("{:.2}", q))
|
|
|
+ .unwrap_or_else(|| ".".to_string()),
|
|
|
+ vcf.filter,
|
|
|
+ merged_infos,
|
|
|
+ if format_keys.is_empty() {
|
|
|
+ "."
|
|
|
+ } else {
|
|
|
+ &format_keys
|
|
|
+ },
|
|
|
+ if format_values.is_empty() {
|
|
|
+ "."
|
|
|
+ } else {
|
|
|
+ &format_values
|
|
|
+ },
|
|
|
+ )
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/// A collection of genomic variants.
|
|
|
@@ -953,6 +1027,40 @@ impl Variants {
|
|
|
let decoded: Self = bitcode::decode(&buffer)?;
|
|
|
Ok(decoded)
|
|
|
}
|
|
|
+
|
|
|
+ /// Write the complete VCF to the given output path, using a dict file for contig headers.
|
|
|
+ pub fn write_vcf(&self, output_path: &str, dict_path: &str, force: bool) -> anyhow::Result<()> {
|
|
|
+ let contigs = crate::io::dict::read_dict(dict_path)?;
|
|
|
+
|
|
|
+ let mut writer = get_gz_writer(output_path, force)?;
|
|
|
+
|
|
|
+ // File format and contig headers
|
|
|
+ writeln!(writer, "##fileformat=VCFv4.3")?;
|
|
|
+ for (name, len) in contigs {
|
|
|
+ writeln!(writer, "##contig=<ID={},length={}>", name, len)?;
|
|
|
+ }
|
|
|
+
|
|
|
+ // INFO and FORMAT headers
|
|
|
+ for info_header in Info::header_definitions() {
|
|
|
+ writeln!(writer, "{info_header}")?;
|
|
|
+ }
|
|
|
+ for format_header in Formats::format_headers() {
|
|
|
+ writeln!(writer, "{format_header}")?;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Column header
|
|
|
+ writeln!(
|
|
|
+ writer,
|
|
|
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"
|
|
|
+ )?;
|
|
|
+
|
|
|
+ // Write all variants
|
|
|
+ for variant in &self.data {
|
|
|
+ variant.write_vcf(&mut writer)?;
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/// Creates a new Variant instance from a collection of VcfVariants and annotations.
|