Browse Source

rm desc_seq & minimap2

Thomas 1 year ago
parent
commit
563548cfbe
3 changed files with 287 additions and 3154 deletions
  1. 192 3059
      Cargo.lock
  2. 2 2
      Cargo.toml
  3. 93 93
      src/lib.rs

File diff suppressed because it is too large
+ 192 - 3059
Cargo.lock


+ 2 - 2
Cargo.toml

@@ -11,6 +11,6 @@ anyhow = "^1.0.75"
 serde_json = "1.0.120"
 serde = { version = "1.0.204", features = ["derive"] }
 confy = "0.6.1"
-minimap2 = { git = "https://github.com/jguhlin/minimap2-rs" }
-desc_seq_lib = { git = "https://git.t0m4.fr/Thomas/desc_seq_lib.git" }
+# minimap2 = { git = "https://github.com/jguhlin/minimap2-rs" }
+# desc_seq_lib = { git = "https://git.t0m4.fr/Thomas/desc_seq_lib.git" }
 num-format = "0.4.4"

+ 93 - 93
src/lib.rs

@@ -3,14 +3,14 @@ mod config;
 use std::{
     fs::{self, File},
     io::Write,
-    num::NonZeroI32,
+    // num::NonZeroI32,
     process::{Command, Stdio},
 };
 
 use anyhow::{anyhow, Context, Ok};
 use config::Config;
 use log::info;
-use minimap2::{Mapping, Strand};
+// use minimap2::{Mapping, Strand};
 use serde::Serialize;
 
 pub struct Blast {
@@ -90,10 +90,10 @@ impl Blast {
         self
     }
 
-    pub fn to_mapping(self) -> Option<Vec<Mapping>> {
-        self.results
-            .map(|res| res.iter().map(Mapping::from).collect())
-    }
+    // pub fn to_mapping(self) -> Option<Vec<Mapping>> {
+    //     self.results
+    //         .map(|res| res.iter().map(Mapping::from).collect())
+    // }
 
     pub fn to_json(self) -> anyhow::Result<String> {
         if let Some(results) = self.results {
@@ -333,54 +333,54 @@ pub fn best_blastn(fa_path: &str) -> anyhow::Result<Vec<BlastResult>> {
     Ok(results)
 }
 
-pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
-    Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
-}
-
-fn evalue_to_mapq(evalue: f64) -> u32 {
-    if evalue <= 0.0 {
-        return 60; // Max MAPQ for highly significant hits.
-    }
-
-    // Convert e-value to a logarithmic scale and then to a MAPQ score.
-    let mapq = -10.0 * evalue.log10();
-    println!("{mapq}");
-
-    // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
-    let clamped_mapq = mapq.clamp(0.0, 60.0);
-
-    clamped_mapq as u32
-}
-
-impl From<&BlastResult> for Mapping {
-    fn from(br: &BlastResult) -> Self {
-        let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
-        let (strand, target_start, target_end) = if br.s_start < br.s_end {
-            (Strand::Forward, br.s_start as i32, br.s_end as i32)
-        } else {
-            (Strand::Reverse, br.s_end as i32, br.s_start as i32)
-        };
-        let mapq = evalue_to_mapq(br.e_value);
-
-        Mapping {
-            query_name: Some(br.query_id.clone()),
-            query_len,
-            query_start: br.q_start as i32,
-            query_end: br.q_end as i32,
-            strand,
-            target_name: Some(br.subject_id.clone()),
-            target_len: (target_end - target_start + 1).abs(),
-            target_start,
-            target_end,
-            match_len: br.alignment_length as i32,
-            block_len: br.alignment_length as i32,
-            mapq,
-            is_primary: true,
-            is_supplementary: false,
-            alignment: None,
-        }
-    }
-}
+// pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
+//     Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
+// }
+
+// fn evalue_to_mapq(evalue: f64) -> u32 {
+//     if evalue <= 0.0 {
+//         return 60; // Max MAPQ for highly significant hits.
+//     }
+//
+//     // Convert e-value to a logarithmic scale and then to a MAPQ score.
+//     let mapq = -10.0 * evalue.log10();
+//     println!("{mapq}");
+//
+//     // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
+//     let clamped_mapq = mapq.clamp(0.0, 60.0);
+//
+//     clamped_mapq as u32
+// }
+
+// impl From<&BlastResult> for Mapping {
+//     fn from(br: &BlastResult) -> Self {
+//         let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
+//         let (strand, target_start, target_end) = if br.s_start < br.s_end {
+//             (Strand::Forward, br.s_start as i32, br.s_end as i32)
+//         } else {
+//             (Strand::Reverse, br.s_end as i32, br.s_start as i32)
+//         };
+//         let mapq = evalue_to_mapq(br.e_value);
+//
+//         Mapping {
+//             query_name: Some(br.query_id.clone()),
+//             query_len,
+//             query_start: br.q_start as i32,
+//             query_end: br.q_end as i32,
+//             strand,
+//             target_name: Some(br.subject_id.clone()),
+//             target_len: (target_end - target_start + 1).abs(),
+//             target_start,
+//             target_end,
+//             match_len: br.alignment_length as i32,
+//             block_len: br.alignment_length as i32,
+//             mapq,
+//             is_primary: true,
+//             is_supplementary: false,
+//             alignment: None,
+//         }
+//     }
+// }
 
 #[cfg(test)]
 mod tests {
@@ -406,43 +406,43 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn mapping() -> anyhow::Result<()> {
-        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/06276b91-b672-4441-accd-d012945b2992.fasta";
-        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/21664d3a-8c24-4d4d-9321-54fbf0e5e293.fasta";
-        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/320ee236-897b-4258-8d14-47a3be7e3f92.fasta";
-        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/7660b942-237f-4cb7-9fe8-6a7a8944d47d.fasta"; // inv-10
-        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/8e35799d-c100-4fa3-bbd4-61950dd36545.fasta";
-        let id = "ROBIN";
-
-        let mut genome = desc_seq_lib::Genome::new();
-
-        let paths = fs::read_dir(format!("/data/longreads_basic_pipe/{id}/diag/asm_contigs"))?;
-        for path in paths {
-            let path = path?.path();
-            if path.extension().unwrap() == "fasta" {
-                let mappings = Blast::init(path.to_str().unwrap())?
-                    .run()?
-                    .keep_best()
-                    .to_mapping()
-                    .context("No results")?;
-
-                let file_contents = fs::read_to_string(path.to_str().unwrap())
-                    .context("Failed to read the file")?;
-                let mut lines: Vec<&str> = file_contents.split('\n').collect();
-                lines = lines[1..].to_vec();
-                genome.add_contig(
-                    path.file_stem().unwrap().to_string_lossy().to_string(),
-                    mappings,
-                    None,
-                    lines.join(""),
-                )?;
-            }
-        }
-
-        genome.stats();
-        let u: Vec<String> = genome.contigs().filter_map(|c| c.hgvs()).collect();
-        println!("{u:#?}");
-        Ok(())
-    }
+    // #[test]
+    // fn mapping() -> anyhow::Result<()> {
+    //     // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/06276b91-b672-4441-accd-d012945b2992.fasta";
+    //     // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/21664d3a-8c24-4d4d-9321-54fbf0e5e293.fasta";
+    //     // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/320ee236-897b-4258-8d14-47a3be7e3f92.fasta";
+    //     // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/7660b942-237f-4cb7-9fe8-6a7a8944d47d.fasta"; // inv-10
+    //     // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/8e35799d-c100-4fa3-bbd4-61950dd36545.fasta";
+    //     let id = "ROBIN";
+    //
+    //     let mut genome = desc_seq_lib::Genome::new();
+    //
+    //     let paths = fs::read_dir(format!("/data/longreads_basic_pipe/{id}/diag/asm_contigs"))?;
+    //     for path in paths {
+    //         let path = path?.path();
+    //         if path.extension().unwrap() == "fasta" {
+    //             let mappings = Blast::init(path.to_str().unwrap())?
+    //                 .run()?
+    //                 .keep_best()
+    //                 .to_mapping()
+    //                 .context("No results")?;
+    //
+    //             let file_contents = fs::read_to_string(path.to_str().unwrap())
+    //                 .context("Failed to read the file")?;
+    //             let mut lines: Vec<&str> = file_contents.split('\n').collect();
+    //             lines = lines[1..].to_vec();
+    //             genome.add_contig(
+    //                 path.file_stem().unwrap().to_string_lossy().to_string(),
+    //                 mappings,
+    //                 None,
+    //                 lines.join(""),
+    //             )?;
+    //         }
+    //     }
+    //
+    //     genome.stats();
+    //     let u: Vec<String> = genome.contigs().filter_map(|c| c.hgvs()).collect();
+    //     println!("{u:#?}");
+    //     Ok(())
+    // }
 }

Some files were not shown because too many files changed in this diff