瀏覽代碼

save and restore annot

Thomas 1 年之前
父節點
當前提交
e7b1e6b3dd
共有 3 個文件被更改,包括 90 次插入32 次删除
  1. 10 6
      src/lib.rs
  2. 51 1
      src/sql/variants_sql.rs
  3. 29 25
      src/variants.rs

+ 10 - 6
src/lib.rs

@@ -34,7 +34,7 @@ mod tests {
 
     #[test]
     fn load_from_vcf() -> Result<()> {
-        let name = "CAMARA";
+        let name = "CHAMPION";
 
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
@@ -42,26 +42,30 @@ mod tests {
         let multi = MultiProgress::new();
         LogWrapper::new(multi.clone(), logger).try_init().unwrap();
 
-        variants::run_pipe(name, false, &multi)?;
+        variants::run_pipe(name, true, &multi)?;
 
         Ok(())
     }
 
     #[test]
     fn annot() -> anyhow::Result<()> {
-        let id = "CAMARA";
+        let id = "CHAMPION";
         write_annotaded(
             &format!("/data/longreads_basic_pipe/{id}/diag/{id}_variants.sqlite"),
             &format!("/data/longreads_basic_pipe/{id}/diag/report/data/{id}_annot_variants.json"),
         )
+        // update_annotations(
+        //     &format!("/data/longreads_basic_pipe/{id}/diag/{id}_variants.sqlite"),
+        //     &format!("/data/longreads_basic_pipe/{id}/diag/report/data/{id}_annot_variants.json"),
+        // )
     }
 
     #[test]
     fn graph() -> anyhow::Result<()> {
         let id = "CAMARA";
-        let stats = AllStats::load_json(
-            &format!("/data/longreads_basic_pipe/{id}/diag/report/data/{id}_variants_stats.json"),
-        )?;
+        let stats = AllStats::load_json(&format!(
+            "/data/longreads_basic_pipe/{id}/diag/report/data/{id}_variants_stats.json"
+        ))?;
         stats.generate_graph("/data/stats")?;
         Ok(())
     }

+ 51 - 1
src/sql/variants_sql.rs

@@ -1,7 +1,12 @@
-use std::{fs::OpenOptions, io::Write, str::FromStr};
+use std::{
+    fs::{File, OpenOptions},
+    io::{Read, Write},
+    str::FromStr,
+};
 
 use anyhow::{anyhow, Context, Ok, Result};
 use indicatif::MultiProgress;
+use log::warn;
 use serde::{Deserialize, Serialize};
 use serde_rusqlite::*;
 
@@ -283,3 +288,48 @@ pub fn write_annotaded(db_path: &str, json_path: &str) -> anyhow::Result<()> {
 
     Ok(())
 }
+
+pub fn read_annotated(json_path: &str) -> anyhow::Result<Vec<VariantSQL>> {
+    let mut file = File::open(json_path)?;
+    let mut contents = String::new();
+    file.read_to_string(&mut contents)?;
+
+    let variants: Vec<VariantSQL> = serde_json::from_str(&contents)?;
+    Ok(variants)
+}
+
+pub fn update_annotations(db_path: &str, json_path: &str) -> Result<()> {
+    let variants = read_annotated(json_path)?;
+    let connection = rusqlite::Connection::open(db_path)?;
+
+    connection.execute("BEGIN TRANSACTION", [])?;
+
+    let mut stmt = connection.prepare(
+        "UPDATE Variants 
+         SET interpretation = ?1 
+         WHERE contig = ?2 
+         AND position = ?3 
+         AND reference = ?4 
+         AND alternative = ?5",
+    )?;
+
+    for variant in variants {
+        let result = stmt.execute([
+            &variant.interpretation,
+            &Some(variant.contig.clone()),
+            &Some(variant.position.to_string()),
+            &Some(variant.reference.clone()),
+            &Some(variant.alternative.clone()),
+        ])?;
+        if result == 0 {
+            warn!(
+                "Variant not found: contig={}, position={}, reference={}, alternative={}",
+                variant.contig, variant.position, variant.reference, variant.alternative
+            );
+        }
+    }
+
+    connection.execute("COMMIT", [])?;
+
+    Ok(())
+}

+ 29 - 25
src/variants.rs

@@ -22,7 +22,10 @@ use crate::{
         vcf_reader::{read_vcf, VCFRow},
         vcf_writer::{vcf_header_from, VariantWritter},
     },
-    sql::{stats_sql::insert_stats, variants_sql::insert_variants},
+    sql::{
+        stats_sql::insert_stats,
+        variants_sql::{insert_variants, update_annotations, write_annotaded},
+    },
     utils::{
         chi_square_test_for_proportions, count_repetitions, estimate_shannon_entropy,
         get_hts_nt_pileup, new_pg, new_pg_speed, print_stat_cat,
@@ -1886,10 +1889,10 @@ pub enum Category {
     Pangolin,
 }
 
-pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
+pub fn run_pipe(id: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     let cfg = config::Config::get()?;
     let deepvariant_diag_vcf = format!(
-        "{}/{name}/diag/DeepVariant/{name}_diag_DeepVariant_PASSED.vcf.gz",
+        "{}/{id}/diag/DeepVariant/{id}_diag_DeepVariant_PASSED.vcf.gz",
         cfg.longreads_results_dir
     );
     if !std::path::Path::new(&deepvariant_diag_vcf).exists() {
@@ -1897,28 +1900,25 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
         // panic!("{deepvariant_diag_vcf} is required")
     }
     let deepvariant_mrd_vcf = format!(
-        "{}/{name}/mrd/DeepVariant/{name}_mrd_DeepVariant_PASSED.vcf.gz",
+        "{}/{id}/mrd/DeepVariant/{id}_mrd_DeepVariant_PASSED.vcf.gz",
         cfg.longreads_results_dir
     );
     if !std::path::Path::new(&deepvariant_mrd_vcf).exists() {
         return Err(anyhow!("{deepvariant_mrd_vcf} is required"));
     }
-    let mrd_bam = format!(
-        "{}/{name}/mrd/{name}_mrd_hs1.bam",
-        cfg.longreads_results_dir
-    );
+    let mrd_bam = format!("{}/{id}/mrd/{id}_mrd_hs1.bam", cfg.longreads_results_dir);
     if !std::path::Path::new(&mrd_bam).exists() {
         return Err(anyhow!("{mrd_bam} is required"));
     }
     let clairs_vcf = format!(
-        "{}/{name}/diag/ClairS/{name}_diag_clairs_PASSED.vcf.gz",
+        "{}/{id}/diag/ClairS/{id}_diag_clairs_PASSED.vcf.gz",
         cfg.longreads_results_dir
     );
     if !std::path::Path::new(&clairs_vcf).exists() {
         return Err(anyhow!("{clairs_vcf} is required"));
     }
     let clairs_indels_vcf = format!(
-        "{}/{name}/diag/ClairS/{name}_diag_clairs_indel_PASSED.vcf.gz",
+        "{}/{id}/diag/ClairS/{id}_diag_clairs_indel_PASSED.vcf.gz",
         cfg.longreads_results_dir
     );
     if !std::path::Path::new(&clairs_indels_vcf).exists() {
@@ -1936,7 +1936,7 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     //     return Err(anyhow!("{sniffles_vcf} is required"));
     // }
     let nanomonsv_vcf = format!(
-        "{}/{name}/diag/nanomonsv/{name}_diag_nanomonsv_PASSED.vcf.gz",
+        "{}/{id}/diag/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz",
         cfg.longreads_results_dir
     );
     if !std::path::Path::new(&nanomonsv_vcf).exists() {
@@ -1946,38 +1946,37 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     // let db_path = "/data/db_results.sqlite".to_string();
     // `${data_dir}/${name}/diag/${name}_variants.sqlite`
     let db_path = format!(
-        "{}/{name}/diag/{name}_variants.sqlite",
+        "{}/{id}/diag/{id}_variants.sqlite",
         cfg.longreads_results_dir
     );
     let bytes_path = format!(
-        "{}/{name}/diag/{name}_variants.bytes.gz",
+        "{}/{id}/diag/{id}_variants.bytes.gz",
         cfg.longreads_results_dir
     );
 
-    let loh_path = format!(
-        "{}/{name}/diag/{name}_loh.vcf.gz",
-        cfg.longreads_results_dir
-    );
+    let loh_path = format!("{}/{id}/diag/{id}_loh.vcf.gz", cfg.longreads_results_dir);
     // let db_constit_path = format!(
     //     "{}/{name}/diag/{name}_constit.sqlite",
     //     cfg.longreads_results_dir
     // );
     let bytes_constit_path = format!(
-        "{}/{name}/diag/{name}_constit.bytes.gz",
+        "{}/{id}/diag/{id}_constit.bytes.gz",
         cfg.longreads_results_dir
     );
 
-    let report_data_dir = format!("{}/{name}/diag/report/data", cfg.longreads_results_dir);
+    let report_data_dir = format!("{}/{id}/diag/report/data", cfg.longreads_results_dir);
     if !std::path::Path::new(&report_data_dir).exists() {
         fs::create_dir_all(&report_data_dir)?;
     }
 
-    let stats_path = format!("{}/{name}_variants_stats.json", report_data_dir);
+    let stats_path = format!("{}/{id}_variants_stats.json", report_data_dir);
+
+    let af_init_path = format!("{}/{id}_variants_af_init.csv", report_data_dir);
 
-    let af_init_path = format!("{}/{name}_variants_af_init.csv", report_data_dir);
+    let af_final_path = format!("{}/{id}_variants_af_final.csv", report_data_dir);
+    let graphs_prefix = format!("{}/{id}_barcharts", report_data_dir);
 
-    let af_final_path = format!("{}/{name}_variants_af_final.csv", report_data_dir);
-    let graphs_prefix = format!("{}/{name}_barcharts", report_data_dir);
+    let annot_json = format!("{report_data_dir}/{id}_annot_variants.json");
 
     let sources = vec![
         (
@@ -2011,7 +2010,7 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
             &VariantType::Somatic,
         ),
     ];
-    let mut variants = Variants::from_vcfs(name.to_string(), sources, &cfg, multi.clone())?;
+    let mut variants = Variants::from_vcfs(id.to_string(), sources, &cfg, multi.clone())?;
 
     write_af_data(&variants, &af_init_path).context("Can't write initial AF data")?;
 
@@ -2022,7 +2021,7 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     variants.bam_filters(&mrd_bam);
 
     let constits = variants.get_cat(&VariantCategory::Constit);
-    let constits = Variants::from_vec(name.to_string(), multi, constits);
+    let constits = Variants::from_vec(id.to_string(), multi, constits);
     constits.save_bytes(&bytes_constit_path)?;
     variants.keep_somatics_un();
     info!("Variants retained: {}", variants.len());
@@ -2055,12 +2054,17 @@ pub fn run_pipe(name: &str, force: bool, multi: &MultiProgress) -> Result<()> {
 
     if std::path::Path::new(&db_path).exists() {
         if force {
+            write_annotaded(&db_path, &annot_json)?;
             fs::remove_file(&db_path)?;
             variants.save_sql(&db_path)?;
         }
     } else {
         variants.save_sql(&db_path)?;
     }
+
+    if std::path::Path::new(&annot_json).exists() {
+        update_annotations(&db_path, &annot_json)?;
+    }
     info!("Variants : {}", variants.len());
 
     Ok(())