浏览代码

better err propagation; new jq consequence filter

Thomas 10 月之前
父节点
当前提交
04ca65ee34
共有 6 个文件被更改,包括 62 次插入18 次删除
  1. 12 0
      README.md
  2. 27 0
      jq_filters/jq_variants.jq
  3. 14 11
      src/callers/deep_somatic.rs
  4. 1 1
      src/lib.rs
  5. 5 3
      src/pipes/somatic.rs
  6. 3 3
      src/variant/variant.rs

+ 12 - 0
README.md

@@ -33,6 +33,18 @@ jq -L ./jq_filters -C 'include "jq_variants"; [.data[] | select(contig("chrM") a
 find /data/longreads_basic_pipe/ -name "*_diag_hs1_info.json" -type f -exec sh -c 'basename $(dirname $(dirname "{}")) | tr -d "\n"' \; -printf "\t" -exec jq -L ./jq_filters -r 'include "jq_bam"; contig_coverage("chrM")' {} \;
 ```
 
+### Using jq and find VEP consequences (cf https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html)
+
+```
+zcat /data/longreads_basic_pipe/ADJAGBA/diag/somatic_variants.json.gz | jq -L ./jq_filters -C 'include "jq_variants"; consequence("SynonymousVariant")' | bat
+```
+
+### Using jq and find to count VEP consequences 
+```
+find /data/longreads_basic_pipe/ -name "somatic_variants.json.gz" -type f -exec sh -c 'dirname=$(basename $(dirname $(dirname "$1"))); count=$(zcat "$1" | jq -L ./jq_filters -r '\''include "jq_variants"; count_consequence("SynonymousVariant") | [.true_count, .total_count, .proportion] | @tsv'\''); echo "${dirname}\t${count}"' sh {} \;
+```
+
+
 ### Reading log files
 ```
 zcat /data/longreads_basic_pipe/ID/log/deepsomatic/deepvariant_e7ed1.log.gz | jq -r '.log'

+ 27 - 0
jq_filters/jq_variants.jq

@@ -39,3 +39,30 @@ def contig(contig_str):
 def get_value(key):
 	map(select(has($key)) | .[$key]) | first // null;
 
+def consequence(csq):
+	[
+		.data[] | 
+		{
+			chr: num_to_contig(.position.contig),
+			position: .position.position,
+			has_consequence: ([ .annotations[] | select(has("VEP")) | .VEP[] | .consequence | contains([csq])] | any) 
+		}
+	];
+
+def count_consequence(csq):
+	[
+		.data[] |
+			[
+				.annotations[] | 
+				select(has("VEP")) | 
+				.VEP[] | 
+				.consequence | 
+				contains([csq])
+			] | 
+		any 
+	] |
+	{
+		true_count: map(select(. == true)) | length, 
+		total_count: length, 
+		proportion: ((map(select(. == true)) | length) / length)
+	};

+ 14 - 11
src/callers/deep_somatic.rs

@@ -116,10 +116,20 @@ impl Run for DeepSomatic {
                 &self.vcf_passed,
                 BcftoolsConfig::default(),
             )
-            .unwrap();
+            .map_err(|e| {
+                anyhow::anyhow!(
+                    "Error while running bcftools pass for {}.\n{e}",
+                    self.output_vcf
+                )
+            })?;
             report
                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
-                .unwrap();
+                .map_err(|e| {
+                    anyhow::anyhow!(
+                        "Error while saving bcftools report for {}.\n{e}",
+                        self.output_vcf
+                    )
+                })?;
         }
 
         Ok(())
@@ -136,19 +146,12 @@ impl Variants for DeepSomatic {
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!(
-            "Loading variants from {}: {}",
-            caller, self.vcf_passed
-        );
+        info!("Loading variants from {}: {}", caller, self.vcf_passed);
         let variants = read_vcf(&self.vcf_passed)?;
         variants.par_iter().for_each(|v| {
             annotations.insert_update(v.hash(), &add);
         });
-        info!(
-            "{}, {} variants loaded.",
-            caller,
-            variants.len()
-        );
+        info!("{}, {} variants loaded.", caller, variants.len());
         Ok(VariantCollection {
             variants,
             vcf: Vcf::new(self.vcf_passed.clone().into())?,

+ 1 - 1
src/lib.rs

@@ -591,7 +591,7 @@ mod tests {
     fn savana_cn() -> anyhow::Result<()> {
         init();
         let id = "CAMARA";
-        let s = SavanaCopyNumber::load_id(id, Config::default())?;
+        // let s = SavanaCopyNumber::load_id(id, Config::default())?;
         let s = SavanaReadCounts::load_id(id, Config::default())?;
         println!("tumoral reads: {}", s.n_tumoral_reads());
         println!("normal reads: {}", s.n_normal_reads());

+ 5 - 3
src/pipes/somatic.rs

@@ -101,10 +101,12 @@ impl SomaticStats {
                     .split(" + ")
                     .map(|k| k.parse())
                     .collect::<anyhow::Result<Vec<Annotation>>>()
-                    .unwrap();
-                (anns, *e.value())
+                    .map_err(|err| {
+                        anyhow::anyhow!("Error while splitting key in AnnotationsStats.\n{err}")
+                    })?;
+                Ok((anns, *e.value()))
             })
-            .collect();
+            .collect::<anyhow::Result<Vec<(Vec<Annotation>, u64)>>>()?;
 
         let callers_somatic_solo_tumor = [
             self.input

+ 3 - 3
src/variant/variant.rs

@@ -721,7 +721,7 @@ impl Formats {
             .clone()
             .into_iter()
             .map(|e| {
-                if let Format::VAF(v) = e {
+                if let Format::VAF(_v) = e {
                     e
                     // Format::AF(v)
                 } else {
@@ -899,7 +899,7 @@ pub fn run_variants(iterable: &mut [CallerBox]) -> anyhow::Result<()> {
     iterable
         .iter_mut()
         .try_for_each(|runner| runner.run())
-        .map_err(|e| anyhow::anyhow!("Error in run variants.\n{e}"))
+        .map_err(|e| anyhow::anyhow!("Error while calling run_variants.\n{e}"))
 }
 
 pub fn load_variants(
@@ -910,7 +910,7 @@ pub fn load_variants(
         .par_iter()
         .map(|runner| runner.variants(annotations))
         .collect::<anyhow::Result<Vec<_>>>()
-        .map_err(|e| anyhow::anyhow!("Error in load variants.\n{e}"))
+        .map_err(|e| anyhow::anyhow!("Error while running load_variants.\n{e}"))
 }
 
 pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(