|
|
@@ -9,7 +9,11 @@ use rayon::prelude::*;
|
|
|
use rust_htslib::bam::IndexedReader;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
use std::{
|
|
|
- cmp::Ordering, collections::{HashSet, VecDeque}, fs::File, io::Read, thread::spawn
|
|
|
+ cmp::Ordering,
|
|
|
+ collections::{HashSet, VecDeque},
|
|
|
+ fs::File,
|
|
|
+ io::Read,
|
|
|
+ thread::spawn,
|
|
|
};
|
|
|
use std::{fs::OpenOptions, io::Write};
|
|
|
|
|
|
@@ -24,19 +28,29 @@ pub struct HeteroVar {
|
|
|
|
|
|
impl HeteroVar {
|
|
|
pub fn new(
|
|
|
- bam: &mut IndexedReader,
|
|
|
+ bam_a: &mut IndexedReader,
|
|
|
+ bam_b: &mut IndexedReader,
|
|
|
chr: &str,
|
|
|
position: i32,
|
|
|
reference: u8,
|
|
|
alternative: u8,
|
|
|
) -> Result<(HeteroVar, HeteroVar)> {
|
|
|
- let rec_base = if let std::result::Result::Ok(rb) =
|
|
|
- pandora_lib_pileup::qnames_at_base(bam, chr, position, false)
|
|
|
+ let mut rec_base = if let std::result::Result::Ok(rb) =
|
|
|
+ pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false)
|
|
|
{
|
|
|
rb
|
|
|
} else {
|
|
|
return Err(anyhow!("Error while reading BAM file."));
|
|
|
};
|
|
|
+
|
|
|
+ if let std::result::Result::Ok(rb) =
|
|
|
+ pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false)
|
|
|
+ {
|
|
|
+ rec_base.extend(rb);
|
|
|
+ } else {
|
|
|
+ return Err(anyhow!("Error while reading BAM file."));
|
|
|
+ };
|
|
|
+
|
|
|
let depth = rec_base.len() as i32;
|
|
|
if depth == 0 {
|
|
|
return Err(anyhow!("No records"));
|
|
|
@@ -133,7 +147,11 @@ impl Phase {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- pub fn range(&self, bam: &mut IndexedReader) -> Result<(i64, i64, i64, i64)> {
|
|
|
+ pub fn range(
|
|
|
+ &self,
|
|
|
+ bam_a: &mut IndexedReader,
|
|
|
+ bam_b: &mut IndexedReader,
|
|
|
+ ) -> Result<(i64, i64, i64, i64)> {
|
|
|
let mut phase = self.clone();
|
|
|
phase.sort_dedup();
|
|
|
let data = &self.data;
|
|
|
@@ -141,10 +159,23 @@ impl Phase {
|
|
|
let left = data.first().unwrap();
|
|
|
let right = data.last().unwrap();
|
|
|
|
|
|
- let left_records =
|
|
|
- pandora_lib_pileup::records_at_base(bam, &left.chr, left.position, true)?;
|
|
|
- let right_records =
|
|
|
- pandora_lib_pileup::records_at_base(bam, &right.chr, right.position, true)?;
|
|
|
+ let mut left_records =
|
|
|
+ pandora_lib_pileup::records_at_base(bam_a, &left.chr, left.position, false)?;
|
|
|
+ let mut right_records =
|
|
|
+ pandora_lib_pileup::records_at_base(bam_a, &right.chr, right.position, false)?;
|
|
|
+
|
|
|
+ left_records.extend(pandora_lib_pileup::records_at_base(
|
|
|
+ bam_b,
|
|
|
+ &left.chr,
|
|
|
+ left.position,
|
|
|
+ false,
|
|
|
+ )?);
|
|
|
+ right_records.extend(pandora_lib_pileup::records_at_base(
|
|
|
+ bam_b,
|
|
|
+ &right.chr,
|
|
|
+ right.position,
|
|
|
+ false,
|
|
|
+ )?);
|
|
|
|
|
|
let left_starts: Vec<_> = left_records
|
|
|
.iter()
|
|
|
@@ -161,11 +192,18 @@ impl Phase {
|
|
|
let min_cov = left_starts.iter().max();
|
|
|
let max_cov = right_ends.iter().min();
|
|
|
let max = right_ends.iter().max();
|
|
|
+ if max.is_none() {
|
|
|
+ println!("{right:?}");
|
|
|
+ println!("{}", String::from_utf8(vec![right.base]).unwrap());
|
|
|
+ println!("{right_records:?}");
|
|
|
+ println!("{right_ends:?}");
|
|
|
+ println!("{min:?} {min_cov:?} {max:?} {max_cov:?}");
|
|
|
+ }
|
|
|
|
|
|
if let (Some(min), Some(min_cov), Some(max_cov), Some(max)) = (min, min_cov, max_cov, max) {
|
|
|
Ok((*min, *min_cov, *max_cov, *max))
|
|
|
} else {
|
|
|
- Err(anyhow!("problem"))
|
|
|
+ Err(anyhow!(format!("can't find range {:#?}", self)))
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -180,9 +218,9 @@ impl Phase {
|
|
|
.dedup_by(|a, b| a.position == b.position && a.chr == b.chr && a.base == b.base);
|
|
|
}
|
|
|
|
|
|
- pub fn bed_string(&self, bam: &mut IndexedReader) -> Result<String> {
|
|
|
+ pub fn bed_string(&self, bam_a: &mut IndexedReader, bam_b: &mut IndexedReader) -> Result<String> {
|
|
|
let first = self.data.first().unwrap();
|
|
|
- let (_min, min_cov, max_cov, _max) = self.range(bam)?;
|
|
|
+ let (_min, min_cov, max_cov, _max) = self.range(bam_a, bam_b)?;
|
|
|
Ok([
|
|
|
first.chr.to_string(),
|
|
|
min_cov.to_string(),
|
|
|
@@ -191,6 +229,20 @@ impl Phase {
|
|
|
]
|
|
|
.join("\t"))
|
|
|
}
|
|
|
+
|
|
|
+ pub fn id(&mut self, bam_a: &mut IndexedReader, bam_b: &mut IndexedReader) -> anyhow::Result<String> {
|
|
|
+ self.sort_dedup();
|
|
|
+ let synteny = String::from_utf8(self.data.iter().map(|var| var.base).collect())?;
|
|
|
+ let (min, _, _, max) = self.range(bam_a, bam_b)?;
|
|
|
+ let f = self.data.first().unwrap().chr.clone();
|
|
|
+ let l = self.data.last().unwrap().chr.clone();
|
|
|
+ let b = if l != f {
|
|
|
+ format!("{l}:")
|
|
|
+ } else {
|
|
|
+ "".to_string()
|
|
|
+ };
|
|
|
+ Ok(format!("{}:{min}-{}{max}[{synteny}]", f, b))
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl Ord for Phase {
|
|
|
@@ -293,7 +345,8 @@ pub fn merge_phases(
|
|
|
|
|
|
pub fn variants_phasing(
|
|
|
variants: Vec<Variant>,
|
|
|
- bam_path: &str,
|
|
|
+ bam_path_a: &str,
|
|
|
+ bam_path_b: &str,
|
|
|
min_records: usize,
|
|
|
multi: &MultiProgress,
|
|
|
) -> Vec<Phase> {
|
|
|
@@ -303,7 +356,8 @@ pub fn variants_phasing(
|
|
|
let mut phases = variants
|
|
|
.par_chunks(2_000)
|
|
|
.flat_map(|chunks| {
|
|
|
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
+ let mut bam_a = rust_htslib::bam::IndexedReader::from_path(bam_path_a).unwrap();
|
|
|
+ let mut bam_b = rust_htslib::bam::IndexedReader::from_path(bam_path_b).unwrap();
|
|
|
let mut errors = Vec::new();
|
|
|
let mut phases: Vec<Phase> = Vec::new();
|
|
|
for v in chunks {
|
|
|
@@ -314,7 +368,8 @@ pub fn variants_phasing(
|
|
|
continue;
|
|
|
}
|
|
|
match HeteroVar::new(
|
|
|
- &mut bam,
|
|
|
+ &mut bam_a,
|
|
|
+ &mut bam_b,
|
|
|
&v.contig.to_string(),
|
|
|
v.position as i32,
|
|
|
reference.pop().unwrap(),
|
|
|
@@ -356,7 +411,8 @@ pub fn variants_phasing(
|
|
|
pub fn write_phases_bed(
|
|
|
phases: &Vec<Phase>,
|
|
|
min_var: usize,
|
|
|
- bam_path: &str,
|
|
|
+ bam_path_a: &str,
|
|
|
+ bam_path_b: &str,
|
|
|
contig: &str,
|
|
|
file: &str,
|
|
|
) -> Result<()> {
|
|
|
@@ -370,11 +426,12 @@ pub fn write_phases_bed(
|
|
|
let mut ranges: Vec<_> = phases
|
|
|
.par_chunks(100)
|
|
|
.flat_map(|chunks| {
|
|
|
- let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
|
|
|
+ let mut bam_a = rust_htslib::bam::IndexedReader::from_path(bam_path_a).unwrap();
|
|
|
+ let mut bam_b = rust_htslib::bam::IndexedReader::from_path(bam_path_b).unwrap();
|
|
|
chunks
|
|
|
.to_vec()
|
|
|
.iter()
|
|
|
- .map(|p| (p.range(&mut bam), p.mean_vaf(), p.data.len()))
|
|
|
+ .map(|p| (p.range(&mut bam_a, &mut bam_b), p.mean_vaf(), p.data.len()))
|
|
|
.collect::<Vec<_>>()
|
|
|
})
|
|
|
.filter(|(r, _, _)| r.is_ok())
|
|
|
@@ -477,4 +534,3 @@ pub fn load_phases(filename: &str) -> anyhow::Result<Vec<Phase>> {
|
|
|
let phases: Vec<Phase> = postcard::from_bytes(&bytes).expect("Deserialization failed");
|
|
|
Ok(phases)
|
|
|
}
|
|
|
-
|