|
|
@@ -3,6 +3,7 @@ use crate::{
|
|
|
bin::{scan_outliers, Bin},
|
|
|
};
|
|
|
use anyhow::Context;
|
|
|
+use dict_reader::read_dict;
|
|
|
use log::info;
|
|
|
use rayon::prelude::*;
|
|
|
use rust_htslib::bam::{Format, Header, Read, Record, Writer};
|
|
|
@@ -14,6 +15,7 @@ use std::{
|
|
|
|
|
|
pub mod bam;
|
|
|
pub mod bin;
|
|
|
+pub mod dict_reader;
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
pub struct Config {
|
|
|
@@ -151,7 +153,15 @@ pub fn scan_save(
|
|
|
|
|
|
let header = Header::from_template(&header);
|
|
|
for records in grouped_records {
|
|
|
- let output_bam_path = format!("{out_dir_bam}/{}.bam", uuid::Uuid::new_v4());
|
|
|
+ let positions: Vec<i64> = records.iter().map(|r| r.pos()).collect();
|
|
|
+ let uuid = uuid::Uuid::new_v4().to_string();
|
|
|
+ let output_bam_path = format!(
|
|
|
+ "{out_dir_bam}/{}-{}_{}.bam",
|
|
|
+ *positions.iter().min().unwrap_or(&0),
|
|
|
+ *positions.iter().max().unwrap_or(&0),
|
|
|
+ &uuid[..4]
|
|
|
+ );
|
|
|
+
|
|
|
let mut output_bam = Writer::from_path(output_bam_path, &header, Format::Bam).unwrap();
|
|
|
for record in records {
|
|
|
output_bam.write(&record)?;
|
|
|
@@ -160,7 +170,7 @@ pub fn scan_save(
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
|
|
|
+pub fn par_contig_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
|
|
|
let bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
let header_view = bam.header();
|
|
|
let contig_tid = header_view
|
|
|
@@ -187,7 +197,7 @@ pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Re
|
|
|
fs::create_dir_all(&out_dir_bam)?;
|
|
|
let file_out = format!("{out_dir}/counts/{contig}");
|
|
|
fs::create_dir_all(&file_out)?;
|
|
|
- // let u = i.get(1..2).unwrap();
|
|
|
+
|
|
|
let counts_files: Vec<String> = i
|
|
|
.par_iter()
|
|
|
.enumerate()
|
|
|
@@ -230,6 +240,15 @@ pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Re
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+pub fn par_whole_scan(dict_file: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
|
|
|
+ let dict = read_dict(dict_file)?;
|
|
|
+ for (contig, _) in dict {
|
|
|
+ info!("Scanning {contig}");
|
|
|
+ par_contig_scan(&contig, bam_path, out_dir)?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|
|
|
// pub fn mrd_count_ratio(mrd_bam: &str, count_file: &str) -> anyhow::Result<()> {
|
|
|
// let count_file = File::open(count_file)?;
|
|
|
// let reader = BufReader::new(count_file);
|
|
|
@@ -285,7 +304,7 @@ mod tests {
|
|
|
let tmp_dir = format!("/data/tmp/scan_{}", uuid::Uuid::new_v4());
|
|
|
fs::create_dir_all(&tmp_dir).unwrap();
|
|
|
info!("Creating {tmp_dir}");
|
|
|
- par_whole_scan(
|
|
|
+ par_contig_scan(
|
|
|
"chr9",
|
|
|
&format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam"),
|
|
|
&tmp_dir,
|
|
|
@@ -293,6 +312,15 @@ mod tests {
|
|
|
.unwrap();
|
|
|
}
|
|
|
|
|
|
+ #[test]
|
|
|
+ fn whole() {
|
|
|
+ init();
|
|
|
+ let id = "SPINATO";
|
|
|
+ let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
|
|
|
+ let out_dir = format!("/data/longreads_basic_pipe/{id}/diag/scan");
|
|
|
+ par_whole_scan("/data/ref/hs1/chm13v2.0.dict", &bam_path, &out_dir).unwrap();
|
|
|
+ }
|
|
|
+
|
|
|
// #[test]
|
|
|
// fn phasing() -> anyhow::Result<()> {
|
|
|
// let id = "SALICETTO";
|