|
|
@@ -1,9 +1,9 @@
|
|
|
use std::{collections::HashMap, fs::File, io::Write};
|
|
|
|
|
|
-use anyhow::{anyhow, Context, Ok, Result};
|
|
|
-use log::info;
|
|
|
+use anyhow::{anyhow, Context, Result};
|
|
|
+use log::{info, warn};
|
|
|
use rayon::prelude::*;
|
|
|
-use rust_htslib::bam::{self, ext::BamRecordExtensions, record, Header, Read, Record, Writer};
|
|
|
+use rust_htslib::bam::{self, ext::BamRecordExtensions, record::{self, Aux}, Header, HeaderView, IndexedReader, Read, Record, Writer};
|
|
|
|
|
|
pub fn get_hts_nt_pileup(
|
|
|
bam: &mut rust_htslib::bam::IndexedReader,
|
|
|
@@ -103,22 +103,16 @@ pub fn get_n_start(
|
|
|
stop: i32,
|
|
|
) -> Result<usize> {
|
|
|
bam.fetch((chr, start, stop))?;
|
|
|
-
|
|
|
- // let mut start_positions = Vec::new();
|
|
|
let mut n_start = 0;
|
|
|
for read in bam.records() {
|
|
|
- let record = read.context(format!("eRR"))?;
|
|
|
+ let record = read.context("Err".to_string())?;
|
|
|
let rstart = record.pos() as i32;
|
|
|
- if rstart >= start && rstart < stop {
|
|
|
- if record.mapq() > 40 {
|
|
|
- n_start += 1;
|
|
|
- }
|
|
|
- // start_positions.push(rstart);
|
|
|
+ if rstart >= start && rstart < stop && record.mapq() > 40 {
|
|
|
+ n_start += 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
Ok(n_start)
|
|
|
- // Ok(start_positions.len())
|
|
|
}
|
|
|
|
|
|
pub fn get_n_start_end_qual(
|
|
|
@@ -130,16 +124,13 @@ pub fn get_n_start_end_qual(
|
|
|
) -> Result<usize> {
|
|
|
bam.fetch((chr, start, stop))?;
|
|
|
|
|
|
- // let mut start_positions = Vec::new();
|
|
|
let mut n_start = 0;
|
|
|
for read in bam.records() {
|
|
|
- let record = read.context(format!("eRR"))?;
|
|
|
+ let record = read.context("eRR".to_string())?;
|
|
|
let rstart = record.pos() as i32;
|
|
|
let rend = record.reference_end() as i32;
|
|
|
- if rstart >= start && rstart < stop || rend >= start && rend < stop {
|
|
|
- if record.mapq() >= mapq {
|
|
|
- n_start += 1;
|
|
|
- }
|
|
|
+ if (rstart >= start && rstart < stop || rend >= start && rend < stop) && record.mapq() >= mapq {
|
|
|
+ n_start += 1;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -155,8 +146,8 @@ pub fn get_start_end_qual(
|
|
|
) -> Result<Vec<i32>> {
|
|
|
bam.fetch((chr, start - 1, stop))?;
|
|
|
let length = stop - start;
|
|
|
- let mut results = Vec::new();
|
|
|
- results.resize(length as usize, 0);
|
|
|
+ let mut results = vec![0; length as usize];
|
|
|
+ // results.resize(length as usize, 0);
|
|
|
|
|
|
for read in bam.records() {
|
|
|
let record = read.context("Error while parsing record")?;
|
|
|
@@ -262,6 +253,37 @@ pub fn primary_record(bam_path: &str, record: &Record) -> Option<Record> {
|
|
|
None
|
|
|
}
|
|
|
|
|
|
+pub fn primary_records(bam: &mut IndexedReader, record: Record) -> Record {
|
|
|
+ if record.is_supplementary() {
|
|
|
+ let qname = record.qname();
|
|
|
+
|
|
|
+ if let Ok(Aux::String(sa)) = record.aux(b"SA") {
|
|
|
+ let positions: Vec<(&str, i32)> = sa
|
|
|
+ .split(';')
|
|
|
+ .filter(|s| !s.is_empty())
|
|
|
+ .map(|s| {
|
|
|
+ let parts: Vec<&str> = s.split(',').take(2).collect();
|
|
|
+ let chr = parts.first().unwrap();
|
|
|
+ let position: i32 = parts.get(1).unwrap().parse().unwrap();
|
|
|
+ (*chr, position)
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ for (chr, start) in positions {
|
|
|
+ bam.fetch((chr, start, start + 1)).unwrap();
|
|
|
+ for record in bam.records().flatten() {
|
|
|
+ if qname == record.qname() && !record.is_supplementary() {
|
|
|
+ return record.clone();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ record
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
pub fn range_depths(
|
|
|
bam: &mut rust_htslib::bam::IndexedReader,
|
|
|
chr: &str,
|
|
|
@@ -741,6 +763,52 @@ pub fn calculate_end_position(start: u32, cigar_str: &str) -> anyhow::Result<u32
|
|
|
Ok(end)
|
|
|
}
|
|
|
|
|
|
+pub fn get_all_positions(
|
|
|
+ record: &Record,
|
|
|
+ header: &HeaderView,
|
|
|
+ bam: &mut IndexedReader,
|
|
|
+) -> anyhow::Result<Vec<(String, u32, u32)>> {
|
|
|
+ let mut positions = Vec::new();
|
|
|
+ let qname = record.qname();
|
|
|
+ let contig = String::from_utf8(header.tid2name(record.tid().try_into()?).to_vec())?;
|
|
|
+ positions.push((
|
|
|
+ contig,
|
|
|
+ record.reference_start() as u32,
|
|
|
+ record.reference_end() as u32,
|
|
|
+ ));
|
|
|
+
|
|
|
+ if let std::result::Result::Ok(record::Aux::String(sa)) = record.aux(b"SA") {
|
|
|
+ let sa_splits: Vec<&str> = sa.split(';').filter(|s| !s.is_empty()).collect();
|
|
|
+ for s in sa_splits {
|
|
|
+ let parts: Vec<&str> = s.split(',').take(4).collect();
|
|
|
+ let chr = parts.first().unwrap();
|
|
|
+ let start: u32 = parts.get(1).unwrap().parse().unwrap();
|
|
|
+ // SAM format is 1-based!
|
|
|
+ let start = start - 1;
|
|
|
+ // let strand = parts.get(2).unwrap();
|
|
|
+ // // CIGAR SA != CIGAR can't calculate the end from it.
|
|
|
+ // let cigar_str = *parts.get(3).unwrap();
|
|
|
+ // let end = calculate_end_position(start, cigar_str).unwrap();
|
|
|
+
|
|
|
+ bam.fetch((chr, start, start + 1))?;
|
|
|
+ let mut founded = false;
|
|
|
+ for record in bam.records().flatten() {
|
|
|
+ if qname == record.qname() {
|
|
|
+ founded = true;
|
|
|
+ positions.push((chr.to_string(), start, record.reference_end() as u32));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if !founded {
|
|
|
+ warn!("Not founded");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ positions.sort_by(|a, b| a.1.cmp(&b.1));
|
|
|
+ positions.sort_by(|a, b| a.0.cmp(&b.0));
|
|
|
+ positions.dedup();
|
|
|
+ Ok(positions)
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|