|
@@ -166,7 +166,7 @@ use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize, Serializer};
|
|
use serde::{Deserialize, Serialize, Serializer};
|
|
|
|
|
|
|
|
use crate::{
|
|
use crate::{
|
|
|
- annotation::{vep::VepImpact, Annotation, ReplicationClass},
|
|
|
|
|
|
|
+ annotation::{vep::VepImpact, Annotation},
|
|
|
config::Config,
|
|
config::Config,
|
|
|
helpers::bin_data,
|
|
helpers::bin_data,
|
|
|
io::{
|
|
io::{
|
|
@@ -235,7 +235,12 @@ where
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl VariantsStats {
|
|
impl VariantsStats {
|
|
|
- pub fn new(variants: &mut Variants, _id: &str, config: &Config, high_depth_ranges: &[GenomeRange]) -> anyhow::Result<Self> {
|
|
|
|
|
|
|
+ pub fn new(
|
|
|
|
|
+ variants: &mut Variants,
|
|
|
|
|
+ _id: &str,
|
|
|
|
|
+ config: &Config,
|
|
|
|
|
+ high_depth_ranges: &[GenomeRange],
|
|
|
|
|
+ ) -> anyhow::Result<Self> {
|
|
|
let n = variants.data.len() as u32;
|
|
let n = variants.data.len() as u32;
|
|
|
let alteration_categories: DashMap<String, u32> = DashMap::new();
|
|
let alteration_categories: DashMap<String, u32> = DashMap::new();
|
|
|
let vep_impact: DashMap<String, u32> = DashMap::new();
|
|
let vep_impact: DashMap<String, u32> = DashMap::new();
|
|
@@ -343,8 +348,6 @@ impl VariantsStats {
|
|
|
|
|
|
|
|
let all_somatic_rates = somatic_rates(&variants.data, &exon_ranges, config)?;
|
|
let all_somatic_rates = somatic_rates(&variants.data, &exon_ranges, config)?;
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
|
|
let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
|
|
|
let exons_high_depth = range_intersection_par(
|
|
let exons_high_depth = range_intersection_par(
|
|
|
&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
@@ -378,87 +381,6 @@ impl VariantsStats {
|
|
|
);
|
|
);
|
|
|
mutation_rates.push(("Exons HighDepths".to_string(), res));
|
|
mutation_rates.push(("Exons HighDepths".to_string(), res));
|
|
|
|
|
|
|
|
- // CpG
|
|
|
|
|
- let cpg_ranges: Vec<GenomeRange> = read_bed(&config.cpg_bed)?
|
|
|
|
|
- .into_iter()
|
|
|
|
|
- .map(|e| e.range)
|
|
|
|
|
- .collect();
|
|
|
|
|
- let ann = Annotation::CpG;
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &cpg_ranges,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push((ann.to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
- // CpG HighDepths
|
|
|
|
|
- let cpg_high_depth = range_intersection_par(
|
|
|
|
|
- &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- &cpg_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- );
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &cpg_high_depth,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push(("CpG HighDepths".to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
- // Early replication
|
|
|
|
|
- let early_ranges: Vec<GenomeRange> = read_bed(&config.early_bed)?
|
|
|
|
|
- .into_iter()
|
|
|
|
|
- .map(|e| e.range)
|
|
|
|
|
- .collect();
|
|
|
|
|
- let ann = Annotation::ReplicationTiming(ReplicationClass::Early);
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &early_ranges,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push((ann.to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
- // Early replication HighDepths
|
|
|
|
|
- let early_ranges_high_depth = range_intersection_par(
|
|
|
|
|
- &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- &early_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- );
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &early_ranges_high_depth,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push(("Early replication HighDepths".to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
- // Late replication
|
|
|
|
|
- let late_ranges: Vec<GenomeRange> = read_bed(&config.late_bed)?
|
|
|
|
|
- .into_iter()
|
|
|
|
|
- .map(|e| e.range)
|
|
|
|
|
- .collect();
|
|
|
|
|
- let ann = Annotation::ReplicationTiming(ReplicationClass::Late);
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &late_ranges,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push((ann.to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
- // Late replication HighDepths
|
|
|
|
|
- let late_ranges_high_depth = range_intersection_par(
|
|
|
|
|
- &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- &late_ranges.iter().collect::<Vec<&GenomeRange>>(),
|
|
|
|
|
- );
|
|
|
|
|
- let res = variants.annotate_with_ranges(
|
|
|
|
|
- &late_ranges_high_depth,
|
|
|
|
|
- Some(ann.clone()),
|
|
|
|
|
- config.min_n_callers,
|
|
|
|
|
- Vec::new(),
|
|
|
|
|
- );
|
|
|
|
|
- mutation_rates.push(("Late replication HighDepths".to_string(), res));
|
|
|
|
|
-
|
|
|
|
|
for (name, path) in config.panels.iter() {
|
|
for (name, path) in config.panels.iter() {
|
|
|
let panel_ranges: Vec<GenomeRange> =
|
|
let panel_ranges: Vec<GenomeRange> =
|
|
|
read_bed(path)?.into_iter().map(|e| e.range).collect();
|
|
read_bed(path)?.into_iter().map(|e| e.range).collect();
|
|
@@ -644,7 +566,7 @@ pub fn somatic_depth_quality_ranges(
|
|
|
.into_par_iter()
|
|
.into_par_iter()
|
|
|
.map(|contig| {
|
|
.map(|contig| {
|
|
|
let normal_path = format!("{}/{}_count.tsv.gz", cfg.normal_dir_count(id), contig);
|
|
let normal_path = format!("{}/{}_count.tsv.gz", cfg.normal_dir_count(id), contig);
|
|
|
- let tumor_path = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
|
|
|
|
|
|
|
+ let tumor_path = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
|
|
|
|
|
|
|
|
let normal_rdr = get_gz_reader(&normal_path)
|
|
let normal_rdr = get_gz_reader(&normal_path)
|
|
|
.with_context(|| format!("Failed to open normal file: {}", normal_path))?;
|
|
.with_context(|| format!("Failed to open normal file: {}", normal_path))?;
|
|
@@ -663,33 +585,63 @@ pub fn somatic_depth_quality_ranges(
|
|
|
let t_next = tl.next();
|
|
let t_next = tl.next();
|
|
|
match (n_next, t_next) {
|
|
match (n_next, t_next) {
|
|
|
(None, None) => break,
|
|
(None, None) => break,
|
|
|
- (Some(Err(e)), _) => return Err(anyhow::anyhow!("{} line {}: {}", normal_path, line_no + 1, e)),
|
|
|
|
|
- (_, Some(Err(e))) => return Err(anyhow::anyhow!("{} line {}: {}", tumor_path, line_no + 1, e)),
|
|
|
|
|
|
|
+ (Some(Err(e)), _) => {
|
|
|
|
|
+ return Err(anyhow::anyhow!(
|
|
|
|
|
+ "{} line {}: {}",
|
|
|
|
|
+ normal_path,
|
|
|
|
|
+ line_no + 1,
|
|
|
|
|
+ e
|
|
|
|
|
+ ))
|
|
|
|
|
+ }
|
|
|
|
|
+ (_, Some(Err(e))) => {
|
|
|
|
|
+ return Err(anyhow::anyhow!(
|
|
|
|
|
+ "{} line {}: {}",
|
|
|
|
|
+ tumor_path,
|
|
|
|
|
+ line_no + 1,
|
|
|
|
|
+ e
|
|
|
|
|
+ ))
|
|
|
|
|
+ }
|
|
|
(Some(Ok(n_line)), Some(Ok(t_line))) => {
|
|
(Some(Ok(n_line)), Some(Ok(t_line))) => {
|
|
|
line_no += 1;
|
|
line_no += 1;
|
|
|
|
|
|
|
|
- let n = BinCount::from_tsv_row(&n_line)
|
|
|
|
|
- .with_context(|| format!("Parse error at {}: {}", normal_path, line_no))?;
|
|
|
|
|
- let t = BinCount::from_tsv_row(&t_line)
|
|
|
|
|
- .with_context(|| format!("Parse error at {}: {}", tumor_path, line_no))?;
|
|
|
|
|
|
|
+ let n = BinCount::from_tsv_row(&n_line).with_context(|| {
|
|
|
|
|
+ format!("Parse error at {}: {}", normal_path, line_no)
|
|
|
|
|
+ })?;
|
|
|
|
|
+ let t = BinCount::from_tsv_row(&t_line).with_context(|| {
|
|
|
|
|
+ format!("Parse error at {}: {}", tumor_path, line_no)
|
|
|
|
|
+ })?;
|
|
|
|
|
|
|
|
if n.contig != t.contig {
|
|
if n.contig != t.contig {
|
|
|
- anyhow::bail!("Contig mismatch at line {}: {} vs {}", line_no, n.contig, t.contig);
|
|
|
|
|
|
|
+ anyhow::bail!(
|
|
|
|
|
+ "Contig mismatch at line {}: {} vs {}",
|
|
|
|
|
+ line_no,
|
|
|
|
|
+ n.contig,
|
|
|
|
|
+ t.contig
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
if n.start != t.start {
|
|
if n.start != t.start {
|
|
|
- anyhow::bail!("Position mismatch at line {}: {} vs {}", line_no, n.start, t.start);
|
|
|
|
|
|
|
+ anyhow::bail!(
|
|
|
|
|
+ "Position mismatch at line {}: {} vs {}",
|
|
|
|
|
+ line_no,
|
|
|
|
|
+ n.start,
|
|
|
|
|
+ t.start
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
// Ensure equal bin widths
|
|
// Ensure equal bin widths
|
|
|
if n.depths.len() != t.depths.len() {
|
|
if n.depths.len() != t.depths.len() {
|
|
|
anyhow::bail!(
|
|
anyhow::bail!(
|
|
|
"Depth vector length mismatch at line {}: {} vs {}",
|
|
"Depth vector length mismatch at line {}: {} vs {}",
|
|
|
- line_no, n.depths.len(), t.depths.len()
|
|
|
|
|
|
|
+ line_no,
|
|
|
|
|
+ n.depths.len(),
|
|
|
|
|
+ t.depths.len()
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
if n.low_qualities.len() != t.low_qualities.len() {
|
|
if n.low_qualities.len() != t.low_qualities.len() {
|
|
|
anyhow::bail!(
|
|
anyhow::bail!(
|
|
|
"LowQ vector length mismatch at line {}: {} vs {}",
|
|
"LowQ vector length mismatch at line {}: {} vs {}",
|
|
|
- line_no, n.low_qualities.len(), t.low_qualities.len()
|
|
|
|
|
|
|
+ line_no,
|
|
|
|
|
+ n.low_qualities.len(),
|
|
|
|
|
+ t.low_qualities.len()
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -699,18 +651,38 @@ pub fn somatic_depth_quality_ranges(
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
// NOTE: if you intended "low-quality regions" (bad), invert predicate.
|
|
// NOTE: if you intended "low-quality regions" (bad), invert predicate.
|
|
|
- let lowq_mask_iter = n.low_qualities.iter().zip(&t.low_qualities).map(|(&nq, &tq)| {
|
|
|
|
|
- nq > cfg.max_depth_low_quality && tq > cfg.max_depth_low_quality
|
|
|
|
|
- });
|
|
|
|
|
-
|
|
|
|
|
- high_runs.extend(ranges_from_consecutive_true_iter(high_mask_iter, n.start, &n.contig));
|
|
|
|
|
- lowq_runs.extend(ranges_from_consecutive_true_iter(lowq_mask_iter, n.start, &n.contig));
|
|
|
|
|
|
|
+ let lowq_mask_iter =
|
|
|
|
|
+ n.low_qualities
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .zip(&t.low_qualities)
|
|
|
|
|
+ .map(|(&nq, &tq)| {
|
|
|
|
|
+ nq > cfg.max_depth_low_quality && tq > cfg.max_depth_low_quality
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ high_runs.extend(ranges_from_consecutive_true_iter(
|
|
|
|
|
+ high_mask_iter,
|
|
|
|
|
+ n.start,
|
|
|
|
|
+ &n.contig,
|
|
|
|
|
+ ));
|
|
|
|
|
+ lowq_runs.extend(ranges_from_consecutive_true_iter(
|
|
|
|
|
+ lowq_mask_iter,
|
|
|
|
|
+ n.start,
|
|
|
|
|
+ &n.contig,
|
|
|
|
|
+ ));
|
|
|
}
|
|
}
|
|
|
(Some(_), None) => {
|
|
(Some(_), None) => {
|
|
|
- anyhow::bail!("Line count mismatch: {} has extra lines after {}", normal_path, line_no);
|
|
|
|
|
|
|
+ anyhow::bail!(
|
|
|
|
|
+ "Line count mismatch: {} has extra lines after {}",
|
|
|
|
|
+ normal_path,
|
|
|
|
|
+ line_no
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
(None, Some(_)) => {
|
|
(None, Some(_)) => {
|
|
|
- anyhow::bail!("Line count mismatch: {} has extra lines after {}", tumor_path, line_no);
|
|
|
|
|
|
|
+ anyhow::bail!(
|
|
|
|
|
+ "Line count mismatch: {} has extra lines after {}",
|
|
|
|
|
+ tumor_path,
|
|
|
|
|
+ line_no
|
|
|
|
|
+ );
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -731,14 +703,9 @@ pub fn somatic_depth_quality_ranges(
|
|
|
))
|
|
))
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
/// Iterator-based version (no temporary Vec<bool>).
|
|
/// Iterator-based version (no temporary Vec<bool>).
|
|
|
/// Produces end-exclusive ranges in the same shape as your old function.
|
|
/// Produces end-exclusive ranges in the same shape as your old function.
|
|
|
-pub fn ranges_from_consecutive_true_iter<I>(
|
|
|
|
|
- mask: I,
|
|
|
|
|
- start0: u32,
|
|
|
|
|
- contig: &str,
|
|
|
|
|
-) -> Vec<GenomeRange>
|
|
|
|
|
|
|
+pub fn ranges_from_consecutive_true_iter<I>(mask: I, start0: u32, contig: &str) -> Vec<GenomeRange>
|
|
|
where
|
|
where
|
|
|
I: IntoIterator<Item = bool>,
|
|
I: IntoIterator<Item = bool>,
|
|
|
{
|
|
{
|
|
@@ -753,13 +720,19 @@ where
|
|
|
current_start = Some(start0 + i);
|
|
current_start = Some(start0 + i);
|
|
|
}
|
|
}
|
|
|
} else if let Some(s) = current_start.take() {
|
|
} else if let Some(s) = current_start.take() {
|
|
|
- ranges.push(GenomeRange { contig, range: s..(start0 + i) });
|
|
|
|
|
|
|
+ ranges.push(GenomeRange {
|
|
|
|
|
+ contig,
|
|
|
|
|
+ range: s..(start0 + i),
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
i += 1;
|
|
i += 1;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if let Some(s) = current_start {
|
|
if let Some(s) = current_start {
|
|
|
- ranges.push(GenomeRange { contig, range: s..(start0 + i) });
|
|
|
|
|
|
|
+ ranges.push(GenomeRange {
|
|
|
|
|
+ contig,
|
|
|
|
|
+ range: s..(start0 + i),
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
ranges
|
|
ranges
|
|
@@ -771,9 +744,7 @@ pub fn merge_adjacent_ranges(mut ranges: Vec<GenomeRange>) -> Vec<GenomeRange> {
|
|
|
return ranges;
|
|
return ranges;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- ranges.sort_by(|a, b| {
|
|
|
|
|
- (a.contig, a.range.start).cmp(&(b.contig, b.range.start))
|
|
|
|
|
- });
|
|
|
|
|
|
|
+ ranges.sort_by(|a, b| (a.contig, a.range.start).cmp(&(b.contig, b.range.start)));
|
|
|
|
|
|
|
|
let mut merged = Vec::with_capacity(ranges.len());
|
|
let mut merged = Vec::with_capacity(ranges.len());
|
|
|
let mut cur = ranges[0].clone();
|
|
let mut cur = ranges[0].clone();
|