|
|
@@ -3,14 +3,14 @@ mod config;
|
|
|
use std::{
|
|
|
fs::{self, File},
|
|
|
io::Write,
|
|
|
- num::NonZeroI32,
|
|
|
+ // num::NonZeroI32,
|
|
|
process::{Command, Stdio},
|
|
|
};
|
|
|
|
|
|
use anyhow::{anyhow, Context, Ok};
|
|
|
use config::Config;
|
|
|
use log::info;
|
|
|
-use minimap2::{Mapping, Strand};
|
|
|
+// use minimap2::{Mapping, Strand};
|
|
|
use serde::Serialize;
|
|
|
|
|
|
pub struct Blast {
|
|
|
@@ -90,10 +90,10 @@ impl Blast {
|
|
|
self
|
|
|
}
|
|
|
|
|
|
- pub fn to_mapping(self) -> Option<Vec<Mapping>> {
|
|
|
- self.results
|
|
|
- .map(|res| res.iter().map(Mapping::from).collect())
|
|
|
- }
|
|
|
+ // pub fn to_mapping(self) -> Option<Vec<Mapping>> {
|
|
|
+ // self.results
|
|
|
+ // .map(|res| res.iter().map(Mapping::from).collect())
|
|
|
+ // }
|
|
|
|
|
|
pub fn to_json(self) -> anyhow::Result<String> {
|
|
|
if let Some(results) = self.results {
|
|
|
@@ -333,54 +333,54 @@ pub fn best_blastn(fa_path: &str) -> anyhow::Result<Vec<BlastResult>> {
|
|
|
Ok(results)
|
|
|
}
|
|
|
|
|
|
-pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
|
|
|
- Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
|
|
|
-}
|
|
|
-
|
|
|
-fn evalue_to_mapq(evalue: f64) -> u32 {
|
|
|
- if evalue <= 0.0 {
|
|
|
- return 60; // Max MAPQ for highly significant hits.
|
|
|
- }
|
|
|
-
|
|
|
- // Convert e-value to a logarithmic scale and then to a MAPQ score.
|
|
|
- let mapq = -10.0 * evalue.log10();
|
|
|
- println!("{mapq}");
|
|
|
-
|
|
|
- // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
|
|
|
- let clamped_mapq = mapq.clamp(0.0, 60.0);
|
|
|
-
|
|
|
- clamped_mapq as u32
|
|
|
-}
|
|
|
-
|
|
|
-impl From<&BlastResult> for Mapping {
|
|
|
- fn from(br: &BlastResult) -> Self {
|
|
|
- let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
|
|
|
- let (strand, target_start, target_end) = if br.s_start < br.s_end {
|
|
|
- (Strand::Forward, br.s_start as i32, br.s_end as i32)
|
|
|
- } else {
|
|
|
- (Strand::Reverse, br.s_end as i32, br.s_start as i32)
|
|
|
- };
|
|
|
- let mapq = evalue_to_mapq(br.e_value);
|
|
|
-
|
|
|
- Mapping {
|
|
|
- query_name: Some(br.query_id.clone()),
|
|
|
- query_len,
|
|
|
- query_start: br.q_start as i32,
|
|
|
- query_end: br.q_end as i32,
|
|
|
- strand,
|
|
|
- target_name: Some(br.subject_id.clone()),
|
|
|
- target_len: (target_end - target_start + 1).abs(),
|
|
|
- target_start,
|
|
|
- target_end,
|
|
|
- match_len: br.alignment_length as i32,
|
|
|
- block_len: br.alignment_length as i32,
|
|
|
- mapq,
|
|
|
- is_primary: true,
|
|
|
- is_supplementary: false,
|
|
|
- alignment: None,
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
+// pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
|
|
|
+// Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
|
|
|
+// }
|
|
|
+
|
|
|
+// fn evalue_to_mapq(evalue: f64) -> u32 {
|
|
|
+// if evalue <= 0.0 {
|
|
|
+// return 60; // Max MAPQ for highly significant hits.
|
|
|
+// }
|
|
|
+//
|
|
|
+// // Convert e-value to a logarithmic scale and then to a MAPQ score.
|
|
|
+// let mapq = -10.0 * evalue.log10();
|
|
|
+// println!("{mapq}");
|
|
|
+//
|
|
|
+// // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
|
|
|
+// let clamped_mapq = mapq.clamp(0.0, 60.0);
|
|
|
+//
|
|
|
+// clamped_mapq as u32
|
|
|
+// }
|
|
|
+
|
|
|
+// impl From<&BlastResult> for Mapping {
|
|
|
+// fn from(br: &BlastResult) -> Self {
|
|
|
+// let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
|
|
|
+// let (strand, target_start, target_end) = if br.s_start < br.s_end {
|
|
|
+// (Strand::Forward, br.s_start as i32, br.s_end as i32)
|
|
|
+// } else {
|
|
|
+// (Strand::Reverse, br.s_end as i32, br.s_start as i32)
|
|
|
+// };
|
|
|
+// let mapq = evalue_to_mapq(br.e_value);
|
|
|
+//
|
|
|
+// Mapping {
|
|
|
+// query_name: Some(br.query_id.clone()),
|
|
|
+// query_len,
|
|
|
+// query_start: br.q_start as i32,
|
|
|
+// query_end: br.q_end as i32,
|
|
|
+// strand,
|
|
|
+// target_name: Some(br.subject_id.clone()),
|
|
|
+// target_len: (target_end - target_start + 1).abs(),
|
|
|
+// target_start,
|
|
|
+// target_end,
|
|
|
+// match_len: br.alignment_length as i32,
|
|
|
+// block_len: br.alignment_length as i32,
|
|
|
+// mapq,
|
|
|
+// is_primary: true,
|
|
|
+// is_supplementary: false,
|
|
|
+// alignment: None,
|
|
|
+// }
|
|
|
+// }
|
|
|
+// }
|
|
|
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
@@ -406,43 +406,43 @@ mod tests {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- #[test]
|
|
|
- fn mapping() -> anyhow::Result<()> {
|
|
|
- // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/06276b91-b672-4441-accd-d012945b2992.fasta";
|
|
|
- // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/21664d3a-8c24-4d4d-9321-54fbf0e5e293.fasta";
|
|
|
- // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/320ee236-897b-4258-8d14-47a3be7e3f92.fasta";
|
|
|
- // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/7660b942-237f-4cb7-9fe8-6a7a8944d47d.fasta"; // inv-10
|
|
|
- // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/8e35799d-c100-4fa3-bbd4-61950dd36545.fasta";
|
|
|
- let id = "ROBIN";
|
|
|
-
|
|
|
- let mut genome = desc_seq_lib::Genome::new();
|
|
|
-
|
|
|
- let paths = fs::read_dir(format!("/data/longreads_basic_pipe/{id}/diag/asm_contigs"))?;
|
|
|
- for path in paths {
|
|
|
- let path = path?.path();
|
|
|
- if path.extension().unwrap() == "fasta" {
|
|
|
- let mappings = Blast::init(path.to_str().unwrap())?
|
|
|
- .run()?
|
|
|
- .keep_best()
|
|
|
- .to_mapping()
|
|
|
- .context("No results")?;
|
|
|
-
|
|
|
- let file_contents = fs::read_to_string(path.to_str().unwrap())
|
|
|
- .context("Failed to read the file")?;
|
|
|
- let mut lines: Vec<&str> = file_contents.split('\n').collect();
|
|
|
- lines = lines[1..].to_vec();
|
|
|
- genome.add_contig(
|
|
|
- path.file_stem().unwrap().to_string_lossy().to_string(),
|
|
|
- mappings,
|
|
|
- None,
|
|
|
- lines.join(""),
|
|
|
- )?;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- genome.stats();
|
|
|
- let u: Vec<String> = genome.contigs().filter_map(|c| c.hgvs()).collect();
|
|
|
- println!("{u:#?}");
|
|
|
- Ok(())
|
|
|
- }
|
|
|
+ // #[test]
|
|
|
+ // fn mapping() -> anyhow::Result<()> {
|
|
|
+ // // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/06276b91-b672-4441-accd-d012945b2992.fasta";
|
|
|
+ // // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/21664d3a-8c24-4d4d-9321-54fbf0e5e293.fasta";
|
|
|
+ // // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/320ee236-897b-4258-8d14-47a3be7e3f92.fasta";
|
|
|
+ // // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/7660b942-237f-4cb7-9fe8-6a7a8944d47d.fasta"; // inv-10
|
|
|
+ // // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/8e35799d-c100-4fa3-bbd4-61950dd36545.fasta";
|
|
|
+ // let id = "ROBIN";
|
|
|
+ //
|
|
|
+ // let mut genome = desc_seq_lib::Genome::new();
|
|
|
+ //
|
|
|
+ // let paths = fs::read_dir(format!("/data/longreads_basic_pipe/{id}/diag/asm_contigs"))?;
|
|
|
+ // for path in paths {
|
|
|
+ // let path = path?.path();
|
|
|
+ // if path.extension().unwrap() == "fasta" {
|
|
|
+ // let mappings = Blast::init(path.to_str().unwrap())?
|
|
|
+ // .run()?
|
|
|
+ // .keep_best()
|
|
|
+ // .to_mapping()
|
|
|
+ // .context("No results")?;
|
|
|
+ //
|
|
|
+ // let file_contents = fs::read_to_string(path.to_str().unwrap())
|
|
|
+ // .context("Failed to read the file")?;
|
|
|
+ // let mut lines: Vec<&str> = file_contents.split('\n').collect();
|
|
|
+ // lines = lines[1..].to_vec();
|
|
|
+ // genome.add_contig(
|
|
|
+ // path.file_stem().unwrap().to_string_lossy().to_string(),
|
|
|
+ // mappings,
|
|
|
+ // None,
|
|
|
+ // lines.join(""),
|
|
|
+ // )?;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // genome.stats();
|
|
|
+ // let u: Vec<String> = genome.contigs().filter_map(|c| c.hgvs()).collect();
|
|
|
+ // println!("{u:#?}");
|
|
|
+ // Ok(())
|
|
|
+ // }
|
|
|
}
|