Thomas 1 year ago
parent
commit
71164a50b7
6 changed files with 511 additions and 136 deletions
  1. 350 120
      Cargo.lock
  2. 2 1
      Cargo.toml
  3. 1 1
      src/annotations/cosmic.rs
  4. 3 3
      src/annotations/phase.rs
  5. 37 4
      src/lib.rs
  6. 118 7
      src/variants.rs

File diff suppressed because it is too large
+ 350 - 120
Cargo.lock


+ 2 - 1
Cargo.toml

@@ -42,4 +42,5 @@ trc = "1.2.4"
 pot = "=3.0.0"
 utoipa = "4.2.3"
 regex = "1.10.6"
-
+pandora_lib_scan = { git  = "https://git.t0m4.fr/Thomas/pandora_lib_scan.git" }
+pandora_lib_pileup = { git  = "https://git.t0m4.fr/Thomas/pandora_lib_pileup.git" }

+ 1 - 1
src/annotations/cosmic.rs

@@ -21,7 +21,7 @@ impl FromStr for Cosmic {
         }
 
         if vs[0].contains("MISSING") {
-            return Err(anyhow!("MISSING values {}", s));
+            Err(anyhow!("MISSING values {}", s))
         } else {
             let v: Vec<&str> = vs[2].split("=").collect();
 

+ 3 - 3
src/annotations/phase.rs

@@ -1,9 +1,9 @@
 use serde::{Deserialize, Serialize};
 use utoipa::ToSchema;
 
-
-
 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, ToSchema)]
 pub struct Phase {
-    
+    pub id: String,
 }
+
+

+ 37 - 4
src/lib.rs

@@ -21,6 +21,8 @@ mod tests {
         variants::{Category, Variants},
     };
 
+    use self::variants::AnnotationType;
+
     use super::*;
     #[test]
     fn get_config() -> Result<()> {
@@ -110,7 +112,7 @@ mod tests {
 
     #[test]
     fn p_pangolin() -> Result<()> {
-        let name = "KHABER";
+        let id = "KHABER";
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
                 .build();
@@ -118,13 +120,13 @@ mod tests {
         LogWrapper::new(multi.clone(), logger).try_init().unwrap();
 
         let mut variants = Variants::new_from_bytes(
-            name,
-            &format!("/data/longreads_basic_pipe/{name}/diag/{name}_variants.bytes.gz"),
+            id,
+            &format!("/data/longreads_basic_pipe/{id}/diag/{id}_variants.bytes.gz"),
             multi,
         )?;
         variants.pangolin()?;
 
-        let u = variants.filter_category(&vec![Category::Pangolin]);
+        let u = variants.filter_category(&[Category::Pangolin]);
 
         println!("{u:?}");
 
@@ -132,4 +134,35 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn phase() -> anyhow::Result<()> {
+        // init();
+        let id = "SALICETTO";
+        let contig = "chr1";
+        let logger =
+            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+                .build();
+        let multi = MultiProgress::new();
+        LogWrapper::new(multi.clone(), logger).try_init().unwrap();
+
+        let phases = pandora_lib_scan::phase::load_phases(&format!(
+            "/data/longreads_basic_pipe/{id}/diag/phases/{id}_{contig}_phases.postcard.gz"
+        ))?;
+
+        let mut variants = Variants::new_from_bytes(
+            id,
+            &format!("/data/longreads_basic_pipe/{id}/diag/{id}_variants.bytes.gz"),
+            multi,
+        )?;
+
+        variants.phase_contig(
+            &phases,
+            &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam",),
+            contig,
+        );
+        let n_phased = variants.with_annotation(&AnnotationType::Phase(annotations::phase::Phase{id: "".to_string()})).len();
+        info!("{n_phased} variants phased");
+        Ok(())
+    }
 }

+ 118 - 7
src/variants.rs

@@ -5,6 +5,7 @@ use crate::{
         gnomad::GnomAD,
         ncbi_gff::NCBIGFF,
         pangolin::{pangolin_parse_results, pangolin_save_variants, run_pangolin, Pangolin},
+        phase::Phase,
         vep::{get_best_vep, vep_chunk, VEP},
     },
     callers::{
@@ -38,10 +39,11 @@ use noodles_fasta::indexed_reader::Builder as FastaBuilder;
 use noodles_gff as gff;
 
 use rayon::prelude::*;
+use rust_htslib::bam::IndexedReader;
 use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
 use std::{
     env::temp_dir,
-    fmt::{self, Display},
+    fmt,
     fs::File,
     str::FromStr,
     sync::{
@@ -331,7 +333,7 @@ impl Variants {
                     n_already.fetch_add(1, Ordering::SeqCst);
                     continue;
                 }
-                let (pos, is_ins) = match tumoral.alt_cat() {
+                let (pos, is_ins) = match tumoral.alteration_category() {
                     AlterationCategory::Ins => (tumoral.position, true),
                     AlterationCategory::Del => (tumoral.position, false),
                     _ => (tumoral.position, false),
@@ -449,7 +451,7 @@ impl Variants {
                                         VariantCategory::Somatic,
                                     ));
                                 }
-                            } else if tumoral.alt_cat() == AlterationCategory::Del {
+                            } else if tumoral.alteration_category() == AlterationCategory::Del {
                                 let n_alt_mrd =
                                     bases.clone().into_iter().filter(|e| *e == b'D').count();
                                 if n_alt_mrd > 0 {
@@ -511,6 +513,30 @@ impl Variants {
             .collect::<Vec<Variant>>()
     }
 
+    pub fn with_annotation(&self, annotation: &AnnotationType) -> Vec<Variant> {
+        self.data
+            .clone()
+            .into_iter()
+            .filter(|v| {
+                v.annotations.iter().any(|a| {
+                    matches!(
+                        (annotation, a),
+                        (
+                            AnnotationType::VariantCategory(_),
+                            AnnotationType::VariantCategory(_)
+                        ) | (AnnotationType::VEP(_), AnnotationType::VEP(_))
+                            | (AnnotationType::Cluster(_), AnnotationType::Cluster(_))
+                            | (AnnotationType::Cosmic(_), AnnotationType::Cosmic(_))
+                            | (AnnotationType::GnomAD(_), AnnotationType::GnomAD(_))
+                            | (AnnotationType::NCBIGFF(_), AnnotationType::NCBIGFF(_))
+                            | (AnnotationType::Pangolin(_), AnnotationType::Pangolin(_))
+                            | (AnnotationType::Phase(_), AnnotationType::Phase(_))
+                    )
+                })
+            })
+            .collect()
+    }
+
     pub fn write_vcf_cat(&mut self, path: &str, cat: &VariantCategory) -> Result<()> {
         info!("Writing VCF {}", path);
 
@@ -777,6 +803,10 @@ impl Variants {
         self.data.len()
     }
 
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
     pub fn constit_len(&self) -> usize {
         self.constit.len()
     }
@@ -789,6 +819,87 @@ impl Variants {
             .collect()
     }
 
+    pub fn phase_contig(
+        &mut self,
+        phases: &[pandora_lib_scan::phase::Phase],
+        bam_path: &str,
+        contig: &str,
+    ) {
+        type Iv = rust_lapper::Interval<u64, pandora_lib_scan::phase::Phase>;
+
+        let data: Vec<Iv> = phases
+            .iter()
+            .map(|p| Iv {
+                start: p.range.unwrap().0,
+                stop: p.range.unwrap().3 + 1, // TODO verif if inclusif
+                val: p.clone(),
+            })
+            .collect();
+        let lapper = rust_lapper::Lapper::new(data);
+
+        let mut bam = IndexedReader::from_path(bam_path).unwrap();
+        let mut annotations = 0;
+        for v in self.data.iter_mut().filter(|v| v.contig == contig) {
+            let over_phases: Vec<&pandora_lib_scan::phase::Phase> = lapper
+                .find(v.position as u64, v.position as u64)
+                .map(|iv| &iv.val)
+                .collect();
+
+            if !over_phases.is_empty() {
+                let reads: Vec<String> = match v.alteration_category() {
+                    AlterationCategory::Snv => {
+                        let base = v.alternative.to_string().as_bytes()[0];
+                        pandora_lib_pileup::qnames_at_base(
+                            &mut bam,
+                            &v.contig,
+                            v.position as i32,
+                            false,
+                        )
+                        .unwrap()
+                        .iter()
+                        .filter(|(_, b)| *b == base)
+                        .map(|(r, _)| r.to_string())
+                        .collect()
+                    }
+                    AlterationCategory::Ins => pandora_lib_pileup::qnames_at_base(
+                        &mut bam,
+                        &v.contig,
+                        v.position as i32,
+                        true,
+                    )
+                    .unwrap()
+                    .iter()
+                    .filter(|(_, b)| *b == b'I')
+                    .map(|(r, _)| r.to_string())
+                    .collect(),
+                    AlterationCategory::Del => pandora_lib_pileup::qnames_at_base(
+                        &mut bam,
+                        &v.contig,
+                        v.position as i32,
+                        false,
+                    )
+                    .unwrap()
+                    .iter()
+                    .filter(|(_, b)| *b == b'D')
+                    .map(|(r, _)| r.to_string())
+                    .collect(),
+                    AlterationCategory::Rep => Vec::new(),
+                    AlterationCategory::Other => Vec::new(),
+                };
+
+                over_phases
+                    .iter()
+                    .filter(|p| p.contains(&reads))
+                    .for_each(|p| {
+                        if let Some(id) = p.id() {
+                            annotations += 1;
+                            v.annotations.push(AnnotationType::Phase(Phase { id }));
+                        };
+                    });
+            }
+        }
+    }
+
     pub fn stats(&self) -> Result<Vec<Stat>> {
         let mut callers_cat = HashMap::new();
         let mut n_caller_data = 0;
@@ -1193,7 +1304,7 @@ impl Variant {
         )
     }
 
-    pub fn alt_cat(&self) -> AlterationCategory {
+    pub fn alteration_category(&self) -> AlterationCategory {
         match (&self.reference, &self.alternative) {
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
                 AlterationCategory::Snv
@@ -1358,7 +1469,7 @@ pub enum AnnotationType {
     GnomAD(GnomAD),
     NCBIGFF(NCBIGFF),
     Pangolin(Pangolin),
-    // Phase(Phase)
+    Phase(Phase),
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
@@ -1441,13 +1552,13 @@ impl TryFrom<u8> for Base {
 
 impl Base {
     pub fn into_u8(self) -> u8 {
-        return match self {
+        match self {
             Base::A => b'A',
             Base::T => b'T',
             Base::C => b'C',
             Base::G => b'G',
             Base::N => b'N',
-        };
+        }
     }
 }
 

Some files were not shown because too many files changed in this diff