Thomas 1 year ago
parent
commit
22041afa7a
3 changed files with 3069 additions and 247 deletions
  1. 3027 229
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 41 18
      src/lib.rs

File diff suppressed because it is too large
+ 3027 - 229
Cargo.lock


+ 1 - 0
Cargo.toml

@@ -12,3 +12,4 @@ serde_json = "1.0.120"
 serde = { version = "1.0.204", features = ["derive"] }
 confy = "0.6.1"
 minimap2 = { git = "https://github.com/jguhlin/minimap2-rs" }
+desc_seq_lib = { git = "https://git.t0m4.fr/Thomas/desc_seq_lib.git" }

+ 41 - 18
src/lib.rs

@@ -70,7 +70,7 @@ impl Blast {
             let mut results: Vec<BlastPile> = results
                 .iter()
                 .take(self.config.max_results)
-                .map(|v| BlastPile::new(v))
+                .map(BlastPile::new)
                 .collect();
 
             allocate_y_positions(&mut results);
@@ -88,11 +88,7 @@ impl Blast {
     }
 
     pub fn to_mapping(self) -> Option<Vec<Mapping>> {
-        if let Some(results) = self.results {
-            Some(results.iter().map(Mapping::from).collect())
-        } else {
-            None
-        }
+        self.results.map(|res| res.iter().map(Mapping::from).collect())
     }
 
     pub fn to_json(self) -> anyhow::Result<String> {
@@ -146,7 +142,7 @@ pub fn run_blastn(query_fa_path: &str) -> anyhow::Result<String> {
 }
 
 pub fn parse_blastn_touput(output: String) -> Vec<BlastResult> {
-    let mut lines: Vec<&str> = output.split("\n").collect();
+    let mut lines: Vec<&str> = output.split('\n').collect();
 
     let mut results = Vec::new();
     for line in lines.drain(..) {
@@ -226,11 +222,11 @@ impl BlastPile {
     }
 }
 
-fn sort_blast_pile(objects: &mut Vec<BlastPile>) {
+fn sort_blast_pile(objects: &mut [BlastPile]) {
     objects.sort_by(|a, b| a.x.partial_cmp(&b.x).unwrap());
 }
 
-fn allocate_y_positions(pile: &mut Vec<BlastPile>) {
+fn allocate_y_positions(pile: &mut [BlastPile]) {
     sort_blast_pile(pile);
 
     let mut active_intervals: Vec<(usize, usize)> = vec![]; // Stores (x + length, y) tuples
@@ -272,7 +268,7 @@ pub fn best_blastn(fa_path: &str) -> anyhow::Result<Vec<BlastResult>> {
     let mut results: Vec<BlastPile> = res
         .iter()
         .take(config.max_results)
-        .map(|v| BlastPile::new(v))
+        .map(BlastPile::new)
         .collect();
     allocate_y_positions(&mut results);
 
@@ -327,7 +323,7 @@ impl From<&BlastResult> for Mapping {
             query_end: br.q_end as i32,
             strand,
             target_name: Some(br.subject_id.clone()),
-            target_len: (target_end - target_start + 1).abs() as i32,
+            target_len: (target_end - target_start + 1).abs(),
             target_start,
             target_end,
             match_len: br.alignment_length as i32,
@@ -366,14 +362,41 @@ mod tests {
 
     #[test]
     fn mapping() -> anyhow::Result<()> {
-        let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
+        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/06276b91-b672-4441-accd-d012945b2992.fasta";
+        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/21664d3a-8c24-4d4d-9321-54fbf0e5e293.fasta";
+        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/320ee236-897b-4258-8d14-47a3be7e3f92.fasta";
+        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/7660b942-237f-4cb7-9fe8-6a7a8944d47d.fasta"; // inv-10
+        // let fa = "/data/longreads_basic_pipe/SALICETTO/diag/asm_contigs/8e35799d-c100-4fa3-bbd4-61950dd36545.fasta";
+        let id = "ROBIN";
+
+        let mut genome = desc_seq_lib::Genome::new();
+
+        let paths = fs::read_dir(format!("/data/longreads_basic_pipe/{id}/diag/asm_contigs"))?;
+        for path in paths {
+            let path = path?.path();
+            if path.extension().unwrap() == "fasta" {
+                let mappings = Blast::init(path.to_str().unwrap())?
+                    .run()?
+                    .keep_best()
+                    .to_mapping()
+                    .context("No results")?;
+
+                let file_contents = fs::read_to_string(path.to_str().unwrap())
+                    .context("Failed to read the file")?;
+                let mut lines: Vec<&str> = file_contents.split('\n').collect();
+                lines = lines[1..].to_vec();
+                genome.add_contig(
+                    path.file_stem().unwrap().to_string_lossy().to_string(),
+                    mappings,
+                    None,
+                    lines.join(""),
+                )?;
+            }
+        }
 
-        let res = Blast::init(fa)?
-            .run()?
-            .keep_best()
-            .to_mapping()
-            .context("No results")?;
-        println!("{res:#?}");
+        genome.stats();
+        let u: Vec<String> = genome.contigs().filter_map(|c| c.hgvs()).collect();
+        println!("{u:#?}");
         Ok(())
     }
 }

Some files were not shown because too many files changed in this diff