|
|
@@ -156,7 +156,6 @@
|
|
|
use std::collections::{BTreeMap, BTreeSet};
|
|
|
|
|
|
use anyhow::Context;
|
|
|
-use csv::ByteRecord;
|
|
|
use dashmap::DashMap;
|
|
|
use log::debug;
|
|
|
use ordered_float::OrderedFloat;
|
|
|
@@ -169,7 +168,7 @@ use crate::{
|
|
|
helpers::bin_data,
|
|
|
io::{
|
|
|
bed::read_bed, dict::read_dict, gff::features_ranges, readers::get_gz_reader,
|
|
|
- tsv::tsv_reader, writers::{finalize_bgzf_file, get_gz_writer},
|
|
|
+ tsv::TsvLine, writers::{finalize_bgzf_file, get_gz_writer},
|
|
|
},
|
|
|
positions::{
|
|
|
GenomeRange, contig_to_num, merge_overlapping_genome_ranges, par_overlaps, range_intersection_par
|
|
|
@@ -585,19 +584,16 @@ pub fn somatic_depth_quality_ranges(
|
|
|
let normal_path = format!("{}/{}_count.tsv.gz", cfg.normal_dir_count(id), contig);
|
|
|
let tumor_path = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
|
|
|
|
|
|
- let normal_rdr = get_gz_reader(&normal_path)
|
|
|
+ let mut normal_rdr = get_gz_reader(&normal_path)
|
|
|
.with_context(|| format!("Failed to open normal file: {}", normal_path))?;
|
|
|
- let tumor_rdr = get_gz_reader(&tumor_path)
|
|
|
+ let mut tumor_rdr = get_gz_reader(&tumor_path)
|
|
|
.with_context(|| format!("Failed to open tumor file: {}", tumor_path))?;
|
|
|
|
|
|
let mut high_runs: Vec<GenomeRange> = Vec::new();
|
|
|
let mut lowq_runs: Vec<GenomeRange> = Vec::new();
|
|
|
|
|
|
- let mut n_tsv = tsv_reader(normal_rdr); // normal_rdr: impl Read
|
|
|
- let mut t_tsv = tsv_reader(tumor_rdr);
|
|
|
-
|
|
|
- let mut n_rec = ByteRecord::new();
|
|
|
- let mut t_rec = ByteRecord::new();
|
|
|
+ let mut n_line = TsvLine::new();
|
|
|
+ let mut t_line = TsvLine::new();
|
|
|
|
|
|
let mut n_buf = BinRowBuf::default();
|
|
|
let mut t_buf = BinRowBuf::default();
|
|
|
@@ -605,39 +601,33 @@ pub fn somatic_depth_quality_ranges(
|
|
|
let mut line_no = 0usize;
|
|
|
|
|
|
loop {
|
|
|
- let n_ok = n_tsv.read_byte_record(&mut n_rec).with_context(|| {
|
|
|
- format!("reading normal TSV: {} line {}", normal_path, line_no + 1)
|
|
|
- })?;
|
|
|
-
|
|
|
- let t_ok = t_tsv.read_byte_record(&mut t_rec).with_context(|| {
|
|
|
- format!("reading tumor TSV: {} line {}", tumor_path, line_no + 1)
|
|
|
- })?;
|
|
|
+ let n_ok = n_line.read(&mut normal_rdr)
|
|
|
+ .with_context(|| format!("reading normal TSV: {} line {}", normal_path, line_no + 1))?;
|
|
|
+ let t_ok = t_line.read(&mut tumor_rdr)
|
|
|
+ .with_context(|| format!("reading tumor TSV: {} line {}", tumor_path, line_no + 1))?;
|
|
|
|
|
|
- if n_ok || t_ok {
|
|
|
- line_no += 1;
|
|
|
- }
|
|
|
+ if n_ok || t_ok { line_no += 1; }
|
|
|
|
|
|
match (n_ok, t_ok) {
|
|
|
(false, false) => break,
|
|
|
- (true, false) => {
|
|
|
- anyhow::bail!(
|
|
|
- "{normal_path} has extra lines at {line_no}; last normal record = {:?}",
|
|
|
- String::from_utf8_lossy(n_rec.as_slice())
|
|
|
- )
|
|
|
- }
|
|
|
- (false, true) => {
|
|
|
- anyhow::bail!(
|
|
|
- "{tumor_path} has extra lines at {line_no}; last tumor record = {:?}",
|
|
|
- String::from_utf8_lossy(t_rec.as_slice())
|
|
|
- )
|
|
|
- }
|
|
|
+ (true, false) => anyhow::bail!(
|
|
|
+ "{normal_path} has extra lines at {line_no}; last record = {:?}",
|
|
|
+ n_line.as_str()
|
|
|
+ ),
|
|
|
+ (false, true) => anyhow::bail!(
|
|
|
+ "{tumor_path} has extra lines at {line_no}; last record = {:?}",
|
|
|
+ t_line.as_str()
|
|
|
+ ),
|
|
|
(true, true) => {
|
|
|
+ let n_fields = n_line.split_fields();
|
|
|
+ let t_fields = t_line.split_fields();
|
|
|
+
|
|
|
let (n_start, n_depths, n_lowq) =
|
|
|
- parse_bin_record_into(&n_rec, &mut n_buf, &contig)
|
|
|
+ parse_bin_record_into(&n_fields, &mut n_buf, &contig)
|
|
|
.with_context(|| format!("{} line {}", normal_path, line_no))?;
|
|
|
|
|
|
let (t_start, t_depths, t_lowq) =
|
|
|
- parse_bin_record_into(&t_rec, &mut t_buf, &contig)
|
|
|
+ parse_bin_record_into(&t_fields, &mut t_buf, &contig)
|
|
|
.with_context(|| format!("{} line {}", tumor_path, line_no))?;
|
|
|
|
|
|
anyhow::ensure!(n_start == t_start, "start mismatch at line {}", line_no);
|