|
|
@@ -6,12 +6,14 @@ use log::info;
|
|
|
use rust_htslib::bam::{Format, Header, Read, Record, Writer};
|
|
|
use std::{
|
|
|
collections::{HashMap, HashSet},
|
|
|
- fs,
|
|
|
+ fs::{self, File},
|
|
|
+ io::{BufWriter, Write},
|
|
|
};
|
|
|
|
|
|
pub mod bam;
|
|
|
pub mod bin;
|
|
|
|
|
|
+#[allow(clippy::too_many_arguments)]
|
|
|
pub fn scan_save(
|
|
|
diag_bam_path: &str,
|
|
|
contig: &str,
|
|
|
@@ -19,13 +21,17 @@ pub fn scan_save(
|
|
|
end: u32,
|
|
|
bin_length: u32,
|
|
|
min_reads: usize,
|
|
|
- out_dir: &str,
|
|
|
+ out_scan_file: &str,
|
|
|
+ out_dir_bam: &str,
|
|
|
) -> anyhow::Result<()> {
|
|
|
+ let out_scan = File::create(out_scan_file).expect("Failed to create file");
|
|
|
+ let mut writer = BufWriter::new(out_scan);
|
|
|
+
|
|
|
let outliers_records: Vec<Vec<Record>> =
|
|
|
scan_outliers(diag_bam_path, contig, start, end, bin_length)
|
|
|
.iter()
|
|
|
.filter(|(_, _, _, sa_outlier, _, se_outlier)| *sa_outlier || *se_outlier)
|
|
|
- .map(|(start, _n, _sa, sa_outlier, _se, se_outlier)| {
|
|
|
+ .map(|(start, n, sa, sa_outlier, se, se_outlier)| {
|
|
|
let mut bin = Bin::new(diag_bam_path, contig, *start, bin_length).unwrap();
|
|
|
let mut records = Vec::new();
|
|
|
if *sa_outlier {
|
|
|
@@ -34,12 +40,16 @@ pub fn scan_save(
|
|
|
let (pos, _) = bin.max_start_or_end();
|
|
|
records.extend(bin.se_primary(pos));
|
|
|
};
|
|
|
- // info!(
|
|
|
- // "{}:{start}-{}\t{n}\t{sa}\t{se}\t{}",
|
|
|
- // contig,
|
|
|
- // start + bin_length - 1,
|
|
|
- // records.len()
|
|
|
- // );
|
|
|
+ writeln!(
|
|
|
+ writer,
|
|
|
+ "{}:{start}-{}\t{n}\t{sa}\t{}\t{se}\t{}\t{}",
|
|
|
+ contig,
|
|
|
+ start + bin_length - 1,
|
|
|
+ sa_outlier,
|
|
|
+ se_outlier,
|
|
|
+ records.len(),
|
|
|
+ )
|
|
|
+ .unwrap();
|
|
|
records
|
|
|
})
|
|
|
.collect();
|
|
|
@@ -105,7 +115,7 @@ pub fn scan_save(
|
|
|
.map(|r| r.to_vec())
|
|
|
.collect();
|
|
|
|
|
|
- fs::create_dir_all(out_dir).unwrap();
|
|
|
+ fs::create_dir_all(out_dir_bam).unwrap();
|
|
|
|
|
|
info!(
|
|
|
"{} reads to assemble in {} groups",
|
|
|
@@ -115,7 +125,7 @@ pub fn scan_save(
|
|
|
|
|
|
let header = Header::from_template(&header);
|
|
|
for records in grouped_records {
|
|
|
- let output_bam_path = format!("{out_dir}/{}.bam", uuid::Uuid::new_v4());
|
|
|
+ let output_bam_path = format!("{out_dir_bam}/{}.bam", uuid::Uuid::new_v4());
|
|
|
let mut output_bam = Writer::from_path(output_bam_path, &header, Format::Bam).unwrap();
|
|
|
for record in records {
|
|
|
output_bam.write(&record)?;
|
|
|
@@ -146,6 +156,7 @@ mod tests {
|
|
|
let result_dir = "/data/longreads_basic_pipe";
|
|
|
let diag_bam_path = format!("{result_dir}/{id}/diag/{id}_diag_hs1.bam");
|
|
|
let out_dir = format!("/data/tmp/scan_{}", uuid::Uuid::new_v4());
|
|
|
+ let out_scan_file = "/data/tmp/scan.tsv";
|
|
|
|
|
|
scan_save(
|
|
|
&diag_bam_path,
|
|
|
@@ -154,6 +165,7 @@ mod tests {
|
|
|
end,
|
|
|
bin_length,
|
|
|
min_reads,
|
|
|
+ out_scan_file,
|
|
|
&out_dir,
|
|
|
)?;
|
|
|
let n_files_out = fs::read_dir(out_dir).into_iter().count();
|