Thomas 1 年間 前
コミット
4234932ead
3 ファイル変更98 行追加26 行削除
  1. 3 3
      Cargo.lock
  2. 20 4
      src/lib.rs
  3. 75 19
      src/phase.rs

+ 3 - 3
Cargo.lock

@@ -229,9 +229,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.7"
+version = "1.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
+checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
 dependencies = [
  "jobserver",
  "libc",
@@ -1345,7 +1345,7 @@ dependencies = [
 [[package]]
 name = "pandora_lib_variants"
 version = "0.1.0"
-source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#97ec4c58d89abb607c93d8e3b41dee8df242cd82"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#a49815478eac792537b131854bd0b059afcb163f"
 dependencies = [
  "anyhow",
  "bgzip",

+ 20 - 4
src/lib.rs

@@ -282,7 +282,8 @@ mod tests {
     use indicatif::MultiProgress;
     use indicatif_log_bridge::LogWrapper;
     use num_format::{CustomFormat, Grouping, ToFormattedString};
-    use pandora_lib_variants::{in_out::dict_reader::read_dict, variants::Variant};
+    use pandora_lib_variants::{in_out::dict_reader::read_dict, variants::{AlterationCategory, Variant}};
+    use rust_htslib::bam::IndexedReader;
 
     use crate::phase::{load_phases, save_phases, variants_phasing};
 
@@ -326,7 +327,8 @@ mod tests {
         let multi = MultiProgress::new();
         LogWrapper::new(multi.clone(), logger).try_init().unwrap();
 
-        let bam_path = &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
+        let bam_path_a = &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
+        let bam_path_b = &format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
         let somatic_path =
             &format!("/data/longreads_basic_pipe/{id}/diag/{id}_constit.bytes.gz");
         let phases_dir = format!("/data/longreads_basic_pipe/{id}/diag/phases");
@@ -347,6 +349,9 @@ mod tests {
                 let mut v = v.clone();
                 v.vaf() > 0.4 && v.vaf() < 0.6
             })
+            .filter(|v| {
+                matches!(v.alt_cat(), AlterationCategory::Snv)
+            })
             .collect();
 
         let mut contigs = HashSet::new();
@@ -370,9 +375,9 @@ mod tests {
                 .into_par_iter()
                 .filter(|v| v.contig == contig)
                 .collect();
-            if !variants.is_empty() {
+            if variants.len() > 1 {
                 info!("{contig}: {} variants to phase", v.len());
-                let phases = variants_phasing(v, bam_path, min_records, &multi);
+                let phases = variants_phasing(v, bam_path_a, bam_path_b, min_records, &multi);
                 if !phases.is_empty() {
                     save_phases(&phases, &format!("{phases_dir}/{id}_{contig}_phases.postcard.gz"))?;
                 }
@@ -398,6 +403,17 @@ mod tests {
         let phase_path = format!("{phases_dir}/{id}_{contig}_phases.postcard.gz");
         let p = load_phases(&phase_path)?;
         info!("{} phases", p.len());
+
+        let bam_path_a = &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
+        let bam_path_b = &format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
+        let mut bam_a = IndexedReader::from_path(bam_path_a)?;
+        let mut bam_b = IndexedReader::from_path(bam_path_b)?;
+
+        for mut phase in p {
+            if phase.data.len() > 1 {
+                info!("{}\t{}", phase.id(&mut bam_a, &mut bam_b)?, phase.mean_vaf());
+            }
+        }
         Ok(())
     }
 }

+ 75 - 19
src/phase.rs

@@ -9,7 +9,11 @@ use rayon::prelude::*;
 use rust_htslib::bam::IndexedReader;
 use serde::{Deserialize, Serialize};
 use std::{
-    cmp::Ordering, collections::{HashSet, VecDeque}, fs::File, io::Read, thread::spawn
+    cmp::Ordering,
+    collections::{HashSet, VecDeque},
+    fs::File,
+    io::Read,
+    thread::spawn,
 };
 use std::{fs::OpenOptions, io::Write};
 
@@ -24,19 +28,29 @@ pub struct HeteroVar {
 
 impl HeteroVar {
     pub fn new(
-        bam: &mut IndexedReader,
+        bam_a: &mut IndexedReader,
+        bam_b: &mut IndexedReader,
         chr: &str,
         position: i32,
         reference: u8,
         alternative: u8,
     ) -> Result<(HeteroVar, HeteroVar)> {
-        let rec_base = if let std::result::Result::Ok(rb) =
-            pandora_lib_pileup::qnames_at_base(bam, chr, position, false)
+        let mut rec_base = if let std::result::Result::Ok(rb) =
+            pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false)
         {
             rb
         } else {
             return Err(anyhow!("Error while reading BAM file."));
         };
+
+        if let std::result::Result::Ok(rb) =
+            pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false)
+        {
+            rec_base.extend(rb);
+        } else {
+            return Err(anyhow!("Error while reading BAM file."));
+        };
+
         let depth = rec_base.len() as i32;
         if depth == 0 {
             return Err(anyhow!("No records"));
@@ -133,7 +147,11 @@ impl Phase {
         }
     }
 
-    pub fn range(&self, bam: &mut IndexedReader) -> Result<(i64, i64, i64, i64)> {
+    pub fn range(
+        &self,
+        bam_a: &mut IndexedReader,
+        bam_b: &mut IndexedReader,
+    ) -> Result<(i64, i64, i64, i64)> {
         let mut phase = self.clone();
         phase.sort_dedup();
         let data = &self.data;
@@ -141,10 +159,23 @@ impl Phase {
         let left = data.first().unwrap();
         let right = data.last().unwrap();
 
-        let left_records =
-            pandora_lib_pileup::records_at_base(bam, &left.chr, left.position, true)?;
-        let right_records =
-            pandora_lib_pileup::records_at_base(bam, &right.chr, right.position, true)?;
+        let mut left_records =
+            pandora_lib_pileup::records_at_base(bam_a, &left.chr, left.position, false)?;
+        let mut right_records =
+            pandora_lib_pileup::records_at_base(bam_a, &right.chr, right.position, false)?;
+
+        left_records.extend(pandora_lib_pileup::records_at_base(
+            bam_b,
+            &left.chr,
+            left.position,
+            false,
+        )?);
+        right_records.extend(pandora_lib_pileup::records_at_base(
+            bam_b,
+            &right.chr,
+            right.position,
+            false,
+        )?);
 
         let left_starts: Vec<_> = left_records
             .iter()
@@ -161,11 +192,18 @@ impl Phase {
         let min_cov = left_starts.iter().max();
         let max_cov = right_ends.iter().min();
         let max = right_ends.iter().max();
+        if max.is_none() {
+            println!("{right:?}");
+            println!("{}", String::from_utf8(vec![right.base]).unwrap());
+            println!("{right_records:?}");
+            println!("{right_ends:?}");
+            println!("{min:?} {min_cov:?} {max:?} {max_cov:?}");
+        }
 
         if let (Some(min), Some(min_cov), Some(max_cov), Some(max)) = (min, min_cov, max_cov, max) {
             Ok((*min, *min_cov, *max_cov, *max))
         } else {
-            Err(anyhow!("problem"))
+            Err(anyhow!(format!("can't find range {:#?}", self)))
         }
     }
 
@@ -180,9 +218,9 @@ impl Phase {
             .dedup_by(|a, b| a.position == b.position && a.chr == b.chr && a.base == b.base);
     }
 
-    pub fn bed_string(&self, bam: &mut IndexedReader) -> Result<String> {
+    pub fn bed_string(&self, bam_a: &mut IndexedReader, bam_b: &mut IndexedReader) -> Result<String> {
         let first = self.data.first().unwrap();
-        let (_min, min_cov, max_cov, _max) = self.range(bam)?;
+        let (_min, min_cov, max_cov, _max) = self.range(bam_a, bam_b)?;
         Ok([
             first.chr.to_string(),
             min_cov.to_string(),
@@ -191,6 +229,20 @@ impl Phase {
         ]
         .join("\t"))
     }
+
+    pub fn id(&mut self, bam_a: &mut IndexedReader, bam_b: &mut IndexedReader) -> anyhow::Result<String> {
+        self.sort_dedup();
+        let synteny = String::from_utf8(self.data.iter().map(|var| var.base).collect())?;
+        let (min, _, _, max) = self.range(bam_a, bam_b)?;
+        let f = self.data.first().unwrap().chr.clone();
+        let l = self.data.last().unwrap().chr.clone();
+        let b = if l != f {
+            format!("{l}:")
+        } else {
+            "".to_string()
+        };
+        Ok(format!("{}:{min}-{}{max}[{synteny}]", f, b))
+    }
 }
 
 impl Ord for Phase {
@@ -293,7 +345,8 @@ pub fn merge_phases(
 
 pub fn variants_phasing(
     variants: Vec<Variant>,
-    bam_path: &str,
+    bam_path_a: &str,
+    bam_path_b: &str,
     min_records: usize,
     multi: &MultiProgress,
 ) -> Vec<Phase> {
@@ -303,7 +356,8 @@ pub fn variants_phasing(
     let mut phases = variants
         .par_chunks(2_000)
         .flat_map(|chunks| {
-            let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
+            let mut bam_a = rust_htslib::bam::IndexedReader::from_path(bam_path_a).unwrap();
+            let mut bam_b = rust_htslib::bam::IndexedReader::from_path(bam_path_b).unwrap();
             let mut errors = Vec::new();
             let mut phases: Vec<Phase> = Vec::new();
             for v in chunks {
@@ -314,7 +368,8 @@ pub fn variants_phasing(
                     continue;
                 }
                 match HeteroVar::new(
-                    &mut bam,
+                    &mut bam_a,
+                    &mut bam_b,
                     &v.contig.to_string(),
                     v.position as i32,
                     reference.pop().unwrap(),
@@ -356,7 +411,8 @@ pub fn variants_phasing(
 pub fn write_phases_bed(
     phases: &Vec<Phase>,
     min_var: usize,
-    bam_path: &str,
+    bam_path_a: &str,
+    bam_path_b: &str,
     contig: &str,
     file: &str,
 ) -> Result<()> {
@@ -370,11 +426,12 @@ pub fn write_phases_bed(
     let mut ranges: Vec<_> = phases
         .par_chunks(100)
         .flat_map(|chunks| {
-            let mut bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
+            let mut bam_a = rust_htslib::bam::IndexedReader::from_path(bam_path_a).unwrap();
+            let mut bam_b = rust_htslib::bam::IndexedReader::from_path(bam_path_b).unwrap();
             chunks
                 .to_vec()
                 .iter()
-                .map(|p| (p.range(&mut bam), p.mean_vaf(), p.data.len()))
+                .map(|p| (p.range(&mut bam_a, &mut bam_b), p.mean_vaf(), p.data.len()))
                 .collect::<Vec<_>>()
         })
         .filter(|(r, _, _)| r.is_ok())
@@ -477,4 +534,3 @@ pub fn load_phases(filename: &str) -> anyhow::Result<Vec<Phase>> {
     let phases: Vec<Phase> = postcard::from_bytes(&bytes).expect("Deserialization failed");
     Ok(phases)
 }
-