Browse Source

add scan lib fn

Thomas 1 year ago
parent
commit
381addffe7
1 changed files with 88 additions and 20 deletions
  1. 88 20
      src/lib.rs

+ 88 - 20
src/lib.rs

@@ -1,9 +1,9 @@
 use std::{collections::HashMap, fs::File, io::Write};
 
-use anyhow::{anyhow, Context, Ok, Result};
-use log::info;
+use anyhow::{anyhow, Context, Result};
+use log::{info, warn};
 use rayon::prelude::*;
-use rust_htslib::bam::{self, ext::BamRecordExtensions, record, Header, Read, Record, Writer};
+use rust_htslib::bam::{self, ext::BamRecordExtensions, record::{self, Aux}, Header, HeaderView, IndexedReader, Read, Record, Writer};
 
 pub fn get_hts_nt_pileup(
     bam: &mut rust_htslib::bam::IndexedReader,
@@ -103,22 +103,16 @@ pub fn get_n_start(
     stop: i32,
 ) -> Result<usize> {
     bam.fetch((chr, start, stop))?;
-
-    // let mut start_positions = Vec::new();
     let mut n_start = 0;
     for read in bam.records() {
-        let record = read.context(format!("eRR"))?;
+        let record = read.context("Err".to_string())?;
         let rstart = record.pos() as i32;
-        if rstart >= start && rstart < stop {
-            if record.mapq() > 40 {
-                n_start += 1;
-            }
-            // start_positions.push(rstart);
+        if rstart >= start && rstart < stop && record.mapq() > 40 {
+            n_start += 1;
         }
     }
 
     Ok(n_start)
-    // Ok(start_positions.len())
 }
 
 pub fn get_n_start_end_qual(
@@ -130,16 +124,13 @@ pub fn get_n_start_end_qual(
 ) -> Result<usize> {
     bam.fetch((chr, start, stop))?;
 
-    // let mut start_positions = Vec::new();
     let mut n_start = 0;
     for read in bam.records() {
-        let record = read.context(format!("eRR"))?;
+        let record = read.context("eRR".to_string())?;
         let rstart = record.pos() as i32;
         let rend = record.reference_end() as i32;
-        if rstart >= start && rstart < stop || rend >= start && rend < stop {
-            if record.mapq() >= mapq {
-                n_start += 1;
-            }
+        if (rstart >= start && rstart < stop || rend >= start && rend < stop) && record.mapq() >= mapq {
+            n_start += 1;
         }
     }
 
@@ -155,8 +146,8 @@ pub fn get_start_end_qual(
 ) -> Result<Vec<i32>> {
     bam.fetch((chr, start - 1, stop))?;
     let length = stop - start;
-    let mut results = Vec::new();
-    results.resize(length as usize, 0);
+    let mut results = vec![0; length as usize];
+    // results.resize(length as usize, 0);
 
     for read in bam.records() {
         let record = read.context("Error while parsing record")?;
@@ -262,6 +253,37 @@ pub fn primary_record(bam_path: &str, record: &Record) -> Option<Record> {
     None
 }
 
+pub fn primary_records(bam: &mut IndexedReader, record: Record) -> Record {
+    if record.is_supplementary() {
+        let qname = record.qname();
+
+        if let Ok(Aux::String(sa)) = record.aux(b"SA") {
+            let positions: Vec<(&str, i32)> = sa
+                .split(';')
+                .filter(|s| !s.is_empty())
+                .map(|s| {
+                    let parts: Vec<&str> = s.split(',').take(2).collect();
+                    let chr = parts.first().unwrap();
+                    let position: i32 = parts.get(1).unwrap().parse().unwrap();
+                    (*chr, position)
+                })
+                .collect();
+
+            for (chr, start) in positions {
+                bam.fetch((chr, start, start + 1)).unwrap();
+                for record in bam.records().flatten() {
+                    if qname == record.qname() && !record.is_supplementary() {
+                        return record.clone();
+                    }
+                }
+            }
+        }
+    }
+    record
+}
+
+
+
 pub fn range_depths(
     bam: &mut rust_htslib::bam::IndexedReader,
     chr: &str,
@@ -741,6 +763,52 @@ pub fn calculate_end_position(start: u32, cigar_str: &str) -> anyhow::Result<u32
     Ok(end)
 }
 
+pub fn get_all_positions(
+    record: &Record,
+    header: &HeaderView,
+    bam: &mut IndexedReader,
+) -> anyhow::Result<Vec<(String, u32, u32)>> {
+    let mut positions = Vec::new();
+    let qname = record.qname();
+    let contig = String::from_utf8(header.tid2name(record.tid().try_into()?).to_vec())?;
+    positions.push((
+        contig,
+        record.reference_start() as u32,
+        record.reference_end() as u32,
+    ));
+
+    if let std::result::Result::Ok(record::Aux::String(sa)) = record.aux(b"SA") {
+        let sa_splits: Vec<&str> = sa.split(';').filter(|s| !s.is_empty()).collect();
+        for s in sa_splits {
+            let parts: Vec<&str> = s.split(',').take(4).collect();
+            let chr = parts.first().unwrap();
+            let start: u32 = parts.get(1).unwrap().parse().unwrap();
+            // SAM format is 1-based!
+            let start = start - 1;
+            // let strand = parts.get(2).unwrap();
+            // // CIGAR SA != CIGAR can't calculate the end from it.
+            // let cigar_str = *parts.get(3).unwrap();
+            // let end = calculate_end_position(start, cigar_str).unwrap();
+
+            bam.fetch((chr, start, start + 1))?;
+            let mut founded = false;
+            for record in bam.records().flatten() {
+                if qname == record.qname() {
+                    founded = true;
+                    positions.push((chr.to_string(), start, record.reference_end() as u32));
+                }
+            }
+            if !founded {
+                warn!("Not founded");
+            }
+        }
+    }
+    positions.sort_by(|a, b| a.1.cmp(&b.1));
+    positions.sort_by(|a, b| a.0.cmp(&b.0));
+    positions.dedup();
+    Ok(positions)
+}
+
 
 #[cfg(test)]
 mod tests {