|
|
@@ -1,4 +1,8 @@
|
|
|
-use std::{collections::BTreeMap, io::BufRead, sync::Arc};
|
|
|
+use std::{
|
|
|
+ collections::{BTreeMap, BTreeSet},
|
|
|
+ io::BufRead,
|
|
|
+ sync::Arc,
|
|
|
+};
|
|
|
|
|
|
use anyhow::Context;
|
|
|
use dashmap::DashMap;
|
|
|
@@ -8,10 +12,13 @@ use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize, Serializer};
|
|
|
|
|
|
use crate::{
|
|
|
- annotation::{vep::VepImpact, Annotation},
|
|
|
+ annotation::{vep::VepImpact, Annotation, ReplicationClass},
|
|
|
config::Config,
|
|
|
helpers::bin_data,
|
|
|
- io::{dict::read_dict, gff::features_ranges, readers::get_gz_reader, writers::get_gz_writer},
|
|
|
+ io::{
|
|
|
+ bed::read_bed, dict::read_dict, gff::features_ranges, readers::get_gz_reader,
|
|
|
+ writers::get_gz_writer,
|
|
|
+ },
|
|
|
positions::{
|
|
|
contig_to_num, merge_overlapping_genome_ranges, par_overlaps, range_intersection_par,
|
|
|
GenomeRange,
|
|
|
@@ -48,6 +55,7 @@ pub struct VariantsStats {
|
|
|
|
|
|
pub somatic_rates: SomaticVariantRates,
|
|
|
pub high_depth_somatic_rates: SomaticVariantRates,
|
|
|
+ pub mutation_rates: Vec<(String, (u32, usize))>,
|
|
|
}
|
|
|
|
|
|
pub fn serialize_dashmap_sort<S, T>(
|
|
|
@@ -67,7 +75,7 @@ where
|
|
|
}
|
|
|
|
|
|
impl VariantsStats {
|
|
|
- pub fn new(variants: &Variants, id: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
+ pub fn new(variants: &mut Variants, id: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
let n = variants.data.len() as u32;
|
|
|
let alteration_categories: DashMap<String, u32> = DashMap::new();
|
|
|
let vep_impact: DashMap<String, u32> = DashMap::new();
|
|
|
@@ -79,8 +87,10 @@ impl VariantsStats {
|
|
|
let vafs: DashMap<OrderedFloat<f32>, u32> = DashMap::new();
|
|
|
let cosmic: DashMap<u64, u32> = DashMap::new();
|
|
|
let gnomads: DashMap<String, Vec<f64>> = DashMap::new();
|
|
|
+ let context: DashMap<String, Vec<String>> = DashMap::new();
|
|
|
|
|
|
variants.data.par_iter().for_each(|v| {
|
|
|
+ // VEP
|
|
|
if let Ok(best_vep) = v.best_vep() {
|
|
|
if let Some(ref impact) = best_vep.extra.impact {
|
|
|
*vep_impact.entry(impact.to_string()).or_default() += 1;
|
|
|
@@ -102,6 +112,7 @@ impl VariantsStats {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // VAF
|
|
|
let (n_alt, depth) = v.n_alt_depth();
|
|
|
*n_alts.entry(n_alt as u32).or_default() += 1;
|
|
|
*depths.entry(depth as u32).or_default() += 1;
|
|
|
@@ -111,6 +122,9 @@ impl VariantsStats {
|
|
|
*vafs.entry(vaf).or_default() += 1;
|
|
|
}
|
|
|
|
|
|
+ let callers = v.callers();
|
|
|
+
|
|
|
+ // Annotations
|
|
|
v.annotations.iter().for_each(|annotation| {
|
|
|
match annotation {
|
|
|
Annotation::Cosmic(v) => *cosmic.entry(v.cosmic_cnt).or_default() += 1,
|
|
|
@@ -122,9 +136,15 @@ impl VariantsStats {
|
|
|
gnomads.entry(key).or_default().push(value);
|
|
|
});
|
|
|
}
|
|
|
+ Annotation::TriNucleotides(bases) => context
|
|
|
+ .entry(bases.iter().map(|v| v.to_string()).collect())
|
|
|
+ .or_default()
|
|
|
+ .push(callers.clone()),
|
|
|
_ => (),
|
|
|
};
|
|
|
});
|
|
|
+
|
|
|
+ // Alteration category
|
|
|
let mut alteration_category_str = v
|
|
|
.alteration_category()
|
|
|
.iter()
|
|
|
@@ -155,15 +175,168 @@ impl VariantsStats {
|
|
|
|
|
|
let mut high_depth_ranges = high_depth_somatic(id, config)?;
|
|
|
high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
|
|
|
-
|
|
|
let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
|
|
|
let exons_high_depth = range_intersection_par(
|
|
|
&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
&exon_ranges_ref,
|
|
|
);
|
|
|
-
|
|
|
let high_depth = somatic_rates(&variants.data, &exons_high_depth, config)?;
|
|
|
|
|
|
+ let mut mutation_rates = Vec::new();
|
|
|
+
|
|
|
+ // HighDepths
|
|
|
+ let ann = Annotation::HighDepths;
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &high_depth_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+
|
|
|
+ // Exons
|
|
|
+ let res =
|
|
|
+ variants.annotate_with_ranges(&exon_ranges, None, config.min_n_callers, Vec::new());
|
|
|
+ mutation_rates.push(("Exons".to_string(), res));
|
|
|
+
|
|
|
+ // ExonsHighDepths
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &exons_high_depth,
|
|
|
+ None,
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push(("Exons HighDepths".to_string(), res));
|
|
|
+
|
|
|
+ // CpG
|
|
|
+ let cpg_ranges: Vec<GenomeRange> = read_bed(&config.cpg_bed)?
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| e.range)
|
|
|
+ .collect();
|
|
|
+ let ann = Annotation::CpG;
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &cpg_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+
|
|
|
+ // CpG HighDepths
|
|
|
+ let cpg_high_depth = range_intersection_par(
|
|
|
+ &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ &cpg_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ );
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &cpg_high_depth,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push(("CpG HighDepths".to_string(), res));
|
|
|
+
|
|
|
+ // Early replication
|
|
|
+ let early_ranges: Vec<GenomeRange> = read_bed(&config.early_bed)?
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| e.range)
|
|
|
+ .collect();
|
|
|
+ let ann = Annotation::ReplicationTiming(ReplicationClass::Early);
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &early_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+
|
|
|
+ // Early replication HighDepths
|
|
|
+ let early_ranges_high_depth = range_intersection_par(
|
|
|
+ &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ &early_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ );
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &early_ranges_high_depth,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push(("Early replication HighDepths".to_string(), res));
|
|
|
+
|
|
|
+ // Late replication
|
|
|
+ let late_ranges: Vec<GenomeRange> = read_bed(&config.late_bed)?
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| e.range)
|
|
|
+ .collect();
|
|
|
+ let ann = Annotation::ReplicationTiming(ReplicationClass::Late);
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &late_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+
|
|
|
+ // Late replication HighDepths
|
|
|
+ let late_ranges_high_depth = range_intersection_par(
|
|
|
+ &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ &late_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ );
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &late_ranges_high_depth,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push(("Late replication HighDepths".to_string(), res));
|
|
|
+
|
|
|
+ for (name, path) in config.panels.iter() {
|
|
|
+ let panel_ranges: Vec<GenomeRange> =
|
|
|
+ read_bed(path)?.into_iter().map(|e| e.range).collect();
|
|
|
+ let ann = Annotation::Panel(name.to_string());
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &panel_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+
|
|
|
+ let panel_ranges: Vec<GenomeRange> = range_intersection_par(
|
|
|
+ &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ &panel_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
+ );
|
|
|
+
|
|
|
+ let ann = Annotation::Panel(format!("{}_HighDepths", name));
|
|
|
+ let res = variants.annotate_with_ranges(
|
|
|
+ &panel_ranges,
|
|
|
+ Some(ann.clone()),
|
|
|
+ config.min_n_callers,
|
|
|
+ Vec::new(),
|
|
|
+ );
|
|
|
+ mutation_rates.push((ann.to_string(), res));
|
|
|
+ }
|
|
|
+
|
|
|
+ // mutation_rates.sort_by(|(_, (_, an)), (_, (_, bn))| an.cmp(bn));
|
|
|
+
|
|
|
+ // Output rates per megabase
|
|
|
+ for (feature, (bp, n)) in &mutation_rates {
|
|
|
+ let rate = (*n as f64) / ((*bp as f64) / 1e6);
|
|
|
+ println!("{feature}: {rate:.2} mutations/Mb ({n} / {bp})");
|
|
|
+ }
|
|
|
+
|
|
|
+ // let (glm_rows, rates_per_mb) =
|
|
|
+ // make_glm_rows_from_regions_par(variants, &high_depth_ranges, &tracked_annotations, config.min_n_callers);
|
|
|
+ //
|
|
|
+ // // Output rates per megabase
|
|
|
+ // for (feature, rate) in &rates_per_mb {
|
|
|
+ // println!("{feature}: {rate:.2} mutations/Mb");
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // write_glm_rows(
|
|
|
+ // &glm_rows,
|
|
|
+ // &format!("{}/{id}_glm_rows.csv", config.somatic_pipe_stats(id)),
|
|
|
+ // )?;
|
|
|
+
|
|
|
Ok(Self {
|
|
|
n,
|
|
|
alteration_categories,
|
|
|
@@ -179,6 +352,7 @@ impl VariantsStats {
|
|
|
n_gnomad,
|
|
|
somatic_rates: all_somatic_rates,
|
|
|
high_depth_somatic_rates: high_depth,
|
|
|
+ mutation_rates,
|
|
|
})
|
|
|
}
|
|
|
|
|
|
@@ -432,3 +606,245 @@ fn ranges_from_consecutive_true(vec: &[bool], start: u32, contig: &str) -> Vec<G
|
|
|
ranges
|
|
|
}
|
|
|
|
|
|
+/// A region-level data structure for modeling mutation rates using GLMs or other statistical models.
|
|
|
+///
|
|
|
+/// Each `GlmRow` represents a genomic interval (e.g., from a BED file) and contains:
|
|
|
+/// - The region's coordinates (`contig`, `start`, `end`)
|
|
|
+/// - Its length in base pairs
|
|
|
+/// - The number of somatic mutations overlapping the region
|
|
|
+/// - A list of binary feature labels (e.g., "Early", "HighGC")
|
|
|
+///
|
|
|
+/// This structure is designed to be serializable (e.g., to CSV) for downstream statistical modeling.
|
|
|
+///
|
|
|
+/// # Example
|
|
|
+/// ```
|
|
|
+/// GlmRow {
|
|
|
+/// contig: "chr1".to_string(),
|
|
|
+/// start: 100_000,
|
|
|
+/// end: 101_000,
|
|
|
+/// length: 1000,
|
|
|
+/// mutation_count: 3,
|
|
|
+/// features: vec!["Early".into(), "HighGC".into()],
|
|
|
+/// }
|
|
|
+/// ```
|
|
|
+#[derive(Debug, Serialize)]
|
|
|
+pub struct GlmRow {
|
|
|
+ /// Chromosome name (e.g., "chr1")
|
|
|
+ pub contig: String,
|
|
|
+
|
|
|
+ /// Start coordinate (0-based, inclusive)
|
|
|
+ pub start: u32,
|
|
|
+
|
|
|
+ /// End coordinate (0-based, exclusive)
|
|
|
+ pub end: u32,
|
|
|
+
|
|
|
+ /// Length of the region in base pairs (end - start)
|
|
|
+ pub length: usize,
|
|
|
+
|
|
|
+ /// Number of variants overlapping this region
|
|
|
+ pub mutation_count: usize,
|
|
|
+
|
|
|
+ /// Binary feature labels associated with this region
|
|
|
+ pub features: Vec<String>,
|
|
|
+}
|
|
|
+
|
|
|
+/// Builds a GLM-ready table from a set of fixed genomic regions and a list of annotated variants,
|
|
|
+/// in parallel, using efficient binary search-based double-pointer traversal.
|
|
|
+///
|
|
|
+/// For each region, this function:
|
|
|
+/// - Counts how many variants fall within the region
|
|
|
+/// - Extracts the relevant annotations (features) from overlapping variants
|
|
|
+/// - Produces one `GlmRow` with coordinates, mutation count, and feature labels
|
|
|
+///
|
|
|
+/// Additionally, it computes the **mutation rate per megabase** for each tracked annotation
|
|
|
+/// by aggregating counts and total base coverage across regions where each feature is present.
|
|
|
+///
|
|
|
+/// # Assumptions
|
|
|
+/// - `variants.data` must be sorted by `position.contig` then `position.position`
|
|
|
+/// - `regions` must be sorted by `range.contig` then `range.start`
|
|
|
+///
|
|
|
+/// # Arguments
|
|
|
+/// * `variants` - A list of annotated somatic variants (sorted).
|
|
|
+/// * `regions` - A set of genomic bins or intervals (e.g., from BED) used as modeling units (sorted).
|
|
|
+/// * `tracked_annotations` - A list of `Annotation` values to track as binary features.
|
|
|
+///
|
|
|
+/// # Returns
|
|
|
+/// A tuple:
|
|
|
+/// * `Vec<GlmRow>` — one per region, with mutation count and feature labels.
|
|
|
+/// * `Vec<(String, f64)>` — per-feature mutation rate (mutations per megabase).
|
|
|
+///
|
|
|
+/// # Example
|
|
|
+/// ```rust
|
|
|
+/// let (rows, rates) = make_glm_rows_from_regions_par(
|
|
|
+/// &variants,
|
|
|
+/// &genome_bins,
|
|
|
+/// &[
|
|
|
+/// Annotation::ReplicationTiming(ReplicationClass::Early),
|
|
|
+/// Annotation::HighDepths
|
|
|
+/// ]
|
|
|
+/// );
|
|
|
+///
|
|
|
+/// for (feature, rate) in rates {
|
|
|
+/// println!("{feature}: {:.2} mutations/Mb", rate);
|
|
|
+/// }
|
|
|
+/// ```
|
|
|
+pub fn make_glm_rows_from_regions_par(
|
|
|
+ variants: &Variants,
|
|
|
+ regions: &[GenomeRange],
|
|
|
+ tracked_annotations: &[Annotation],
|
|
|
+ min_callers: u8,
|
|
|
+) -> (Vec<GlmRow>, Vec<(String, f64)>) {
|
|
|
+ let (glm_rows, feature_stats): (Vec<GlmRow>, BTreeMap<String, (usize, usize)>) = regions
|
|
|
+ .par_iter()
|
|
|
+ .map(|region| {
|
|
|
+ let mut mutation_count = 0;
|
|
|
+ let mut feature_set = BTreeSet::new();
|
|
|
+
|
|
|
+ // Binary search into sorted variant list
|
|
|
+ let start_idx = variants.data.partition_point(|v| {
|
|
|
+ v.position.contig < region.contig
|
|
|
+ || (v.position.contig == region.contig
|
|
|
+ && v.position.position < region.range.start)
|
|
|
+ });
|
|
|
+
|
|
|
+ for var in &variants.data[start_idx..] {
|
|
|
+ let pos = &var.position;
|
|
|
+
|
|
|
+ if pos.contig != region.contig || pos.position >= region.range.end {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if region.range.contains(&pos.position) && var.n_callers() >= min_callers {
|
|
|
+ mutation_count += 1;
|
|
|
+
|
|
|
+ for ann in &var.annotations {
|
|
|
+ if tracked_annotations.contains(ann) {
|
|
|
+ feature_set.insert(ann.to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let region_len = region.length() as usize;
|
|
|
+
|
|
|
+ let row = GlmRow {
|
|
|
+ contig: region.contig.to_string(),
|
|
|
+ start: region.range.start,
|
|
|
+ end: region.range.end,
|
|
|
+ length: region_len,
|
|
|
+ mutation_count,
|
|
|
+ features: feature_set.clone().into_iter().collect(),
|
|
|
+ };
|
|
|
+
|
|
|
+ // Local (per-thread) accumulation of stats
|
|
|
+ let mut stats = BTreeMap::new();
|
|
|
+ for feat in &feature_set {
|
|
|
+ stats.insert(feat.clone(), (mutation_count, region_len));
|
|
|
+ }
|
|
|
+
|
|
|
+ (vec![row], stats)
|
|
|
+ })
|
|
|
+ .reduce(
|
|
|
+ || (Vec::new(), BTreeMap::new()),
|
|
|
+ |(mut rows_a, mut stats_a), (mut rows_b, stats_b)| {
|
|
|
+ rows_a.append(&mut rows_b);
|
|
|
+ for (feat, (n, bp)) in stats_b {
|
|
|
+ stats_a
|
|
|
+ .entry(feat)
|
|
|
+ .and_modify(|(na, bpa)| {
|
|
|
+ *na += n;
|
|
|
+ *bpa += bp;
|
|
|
+ })
|
|
|
+ .or_insert((n, bp));
|
|
|
+ }
|
|
|
+ (rows_a, stats_a)
|
|
|
+ },
|
|
|
+ );
|
|
|
+
|
|
|
+ let ranges_len: usize = regions.iter().map(|a| a.length() as usize).sum();
|
|
|
+ assert_eq!(10e6 as usize, 1_000_000);
|
|
|
+
|
|
|
+ // Compute mutation rates per Mb from aggregate stats
|
|
|
+ let rates_per_mb: Vec<(String, f64)> = feature_stats
|
|
|
+ .into_iter()
|
|
|
+ .map(|(feat, (n, bp))| {
|
|
|
+ debug!("{bp}");
|
|
|
+ let rate = if bp > 0 {
|
|
|
+ (n as f64) / ((ranges_len as f64) / 10e6)
|
|
|
+ } else {
|
|
|
+ 0.0
|
|
|
+ };
|
|
|
+ (feat, rate)
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ (glm_rows, rates_per_mb)
|
|
|
+}
|
|
|
+
|
|
|
+/// Returns a flat table: one-hot encoded feature columns.
|
|
|
+fn flatten_glm_rows(
|
|
|
+ rows: &[GlmRow],
|
|
|
+) -> (Vec<String>, Vec<std::collections::HashMap<String, String>>) {
|
|
|
+ let mut all_features: BTreeSet<String> = BTreeSet::new();
|
|
|
+ for r in rows {
|
|
|
+ all_features.extend(r.features.iter().cloned());
|
|
|
+ }
|
|
|
+
|
|
|
+ let feature_list: Vec<_> = all_features.into_iter().collect();
|
|
|
+
|
|
|
+ let mut table: Vec<_> = rows
|
|
|
+ .iter()
|
|
|
+ .map(|r| {
|
|
|
+ let mut row = std::collections::HashMap::new();
|
|
|
+ row.insert("contig".into(), r.contig.clone());
|
|
|
+ row.insert("start".into(), r.start.to_string());
|
|
|
+ row.insert("end".into(), r.end.to_string());
|
|
|
+ row.insert("length".into(), r.length.to_string());
|
|
|
+ row.insert("mutation_count".into(), r.mutation_count.to_string());
|
|
|
+ row.insert("log_length".into(), (r.length as f64).ln().to_string());
|
|
|
+
|
|
|
+ for f in &feature_list {
|
|
|
+ row.insert(
|
|
|
+ f.clone(),
|
|
|
+ if r.features.contains(f) { "1" } else { "0" }.into(),
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ row
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ // Parallel sort by contig and start
|
|
|
+ table.par_sort_by(|a, b| {
|
|
|
+ let ca = a.get("contig").unwrap();
|
|
|
+ let cb = b.get("contig").unwrap();
|
|
|
+ let sa = a.get("start").unwrap().parse::<u32>().unwrap();
|
|
|
+ let sb = b.get("start").unwrap().parse::<u32>().unwrap();
|
|
|
+ (ca, sa).cmp(&(cb, sb))
|
|
|
+ });
|
|
|
+
|
|
|
+ (feature_list, table)
|
|
|
+}
|
|
|
+
|
|
|
+pub fn write_glm_rows(all_rows: &[GlmRow], csv_path: &str) -> anyhow::Result<()> {
|
|
|
+ let (features, flat_rows) = flatten_glm_rows(all_rows);
|
|
|
+ let mut writer = csv::Writer::from_path(csv_path)?;
|
|
|
+
|
|
|
+ let mut headers = vec![
|
|
|
+ "contig",
|
|
|
+ "start",
|
|
|
+ "end",
|
|
|
+ "length",
|
|
|
+ "log_length",
|
|
|
+ "mutation_count",
|
|
|
+ ];
|
|
|
+ headers.extend(features.iter().map(|s| s.as_str()));
|
|
|
+ writer.write_record(&headers)?;
|
|
|
+
|
|
|
+ for row in flat_rows {
|
|
|
+ let values: Vec<_> = headers.iter().map(|&h| row.get(h).unwrap()).collect();
|
|
|
+ writer.write_record(values)?;
|
|
|
+ }
|
|
|
+ writer.flush()?;
|
|
|
+ Ok(())
|
|
|
+}
|