|
|
@@ -0,0 +1,140 @@
|
|
|
+use anyhow::{Context, Result};
|
|
|
+use log::{info, warn};
|
|
|
+use regex::Regex;
|
|
|
+use serde::{Deserialize, Serialize};
|
|
|
+use utoipa::ToSchema;
|
|
|
+use std::io::{self, Write};
|
|
|
+use std::str::FromStr;
|
|
|
+use std::{
|
|
|
+ fs::File,
|
|
|
+ io::{BufRead, BufReader},
|
|
|
+ process::{Command, Stdio},
|
|
|
+};
|
|
|
+use uuid::Uuid;
|
|
|
+
|
|
|
+use crate::variants::{ReferenceAlternative, Variants};
|
|
|
+
|
|
|
+#[derive(Debug, ToSchema, Clone, PartialEq, Serialize, Deserialize)]
|
|
|
+pub struct Pangolin {
|
|
|
+ pub predictions: Vec<(u32, f64)>
|
|
|
+}
|
|
|
+
|
|
|
+// pangolin -c CHROM,POS,REF,ALT -s 0.1 /tmp/hattab_test_pango.csv /data/ref/hs1/hs1_simple_chr.fa /data/ref/hs1/gencode.v44.liftedTohs1.db gg.vcf
|
|
|
+pub fn run_pangolin(in_path: &str) -> Result<String> {
|
|
|
+ let tmp_file = format!("/tmp/{}", Uuid::new_v4());
|
|
|
+
|
|
|
+ let bin_dir = "/home/prom/.local/bin";
|
|
|
+ let ref_fa = "/data/ref/hs1/hs1_simple_chr.fa";
|
|
|
+ let db = "/data/ref/hs1/gencode.v44.liftedTohs1.db";
|
|
|
+
|
|
|
+ let mut cmd = Command::new(format!("{bin_dir}/pangolin"))
|
|
|
+ .arg("-c")
|
|
|
+ .arg("CHROM,POS,REF,ALT")
|
|
|
+ .arg("-s")
|
|
|
+ .arg("0.1")
|
|
|
+ .arg(in_path)
|
|
|
+ .arg(ref_fa)
|
|
|
+ .arg(db)
|
|
|
+ .arg(tmp_file.clone())
|
|
|
+ .stdout(Stdio::piped())
|
|
|
+ .spawn()
|
|
|
+ .context("pangolin failed to start")?;
|
|
|
+
|
|
|
+ let stdout = cmd.stdout.take().unwrap();
|
|
|
+
|
|
|
+ let reader = BufReader::new(stdout);
|
|
|
+ reader
|
|
|
+ .lines()
|
|
|
+ .filter_map(|line| line.ok())
|
|
|
+ .filter(|line| line.find("error").is_some())
|
|
|
+ .for_each(|line| warn!("{}", line));
|
|
|
+
|
|
|
+ cmd.wait()?;
|
|
|
+
|
|
|
+ Ok(format!("{}.csv", tmp_file))
|
|
|
+}
|
|
|
+
|
|
|
+pub fn pangolin_save_variants(variants: &Variants) -> Result<String> {
|
|
|
+ let tmp_file = format!("/tmp/{}.csv", Uuid::new_v4());
|
|
|
+ let mut file = File::create(&tmp_file)?;
|
|
|
+
|
|
|
+ writeln!(file, "CHROM,POS,REF,ALT")?;
|
|
|
+ let mut lines = 0;
|
|
|
+ for v in variants.data.iter() {
|
|
|
+ // let u = v.callers();
|
|
|
+ if v.callers().contains(&"Nanomonsv".to_string()) {
|
|
|
+ lines += 1;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ writeln!(
|
|
|
+ file,
|
|
|
+ "{}",
|
|
|
+ vec![
|
|
|
+ v.contig.to_string(),
|
|
|
+ v.position.to_string(),
|
|
|
+ v.reference.to_string(),
|
|
|
+ v.alternative.to_string()
|
|
|
+ ]
|
|
|
+ .join(",")
|
|
|
+ )?;
|
|
|
+ lines += 1;
|
|
|
+ }
|
|
|
+ assert_eq!(lines, variants.len());
|
|
|
+
|
|
|
+ Ok(tmp_file.to_string())
|
|
|
+}
|
|
|
+
|
|
|
+pub fn pangolin_parse_results(
|
|
|
+ path: &str,
|
|
|
+) -> Result<
|
|
|
+ Vec<(
|
|
|
+ String,
|
|
|
+ u32,
|
|
|
+ ReferenceAlternative,
|
|
|
+ ReferenceAlternative,
|
|
|
+ Pangolin,
|
|
|
+ )>,
|
|
|
+> {
|
|
|
+ let re = Regex::new(r"^[0-9]*:[0-9]*").unwrap();
|
|
|
+
|
|
|
+ let file = File::open(path)?;
|
|
|
+ let reader = BufReader::new(file);
|
|
|
+
|
|
|
+ let mut lines = reader.lines();
|
|
|
+ lines.next(); // Skip the first line
|
|
|
+
|
|
|
+ let mut res = Vec::new();
|
|
|
+ for line in lines {
|
|
|
+ let line = line?; // Unwrap the Result
|
|
|
+ let parts: Vec<&str> = line.split(',').collect(); // Split the line by comma
|
|
|
+ if parts.len() != 5 {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if parts[4] == "" {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let pangolin_res: Vec<&str> = parts[4].split("|").collect();
|
|
|
+ let pangolin_res: Vec<(u32, f64)> = pangolin_res
|
|
|
+ .into_iter()
|
|
|
+ .filter(|s| re.is_match(s))
|
|
|
+ .map(|s| {
|
|
|
+ let (a, b) = s.split_once(":").unwrap();
|
|
|
+ (a.parse().unwrap(), b.parse().unwrap())
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ if pangolin_res.len() > 0 {
|
|
|
+ res.push((
|
|
|
+ parts[0].to_string(),
|
|
|
+ parts[1].parse::<u32>()?,
|
|
|
+ ReferenceAlternative::from_str(parts[2])?,
|
|
|
+ ReferenceAlternative::from_str(parts[3])?,
|
|
|
+ Pangolin { predictions: pangolin_res}
|
|
|
+ ));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ info!("{} pangolin results parsed", res.len());
|
|
|
+ Ok(res)
|
|
|
+}
|