Thomas 1 an în urmă
părinte
comite
6287f14939
8 a modificat fișierele cu 361 adăugiri și 1071 ștergeri
  1. 0 0
      src/assembler/canu.rs
  2. 75 12
      src/assembler/mod.rs
  3. 1 0
      src/assembler/ra.rs
  4. 24 59
      src/assembler/spades.rs
  5. 209 0
      src/assembler/wtdbg2.rs
  6. 0 106
      src/flye.rs
  7. 15 8
      src/io/bam.rs
  8. 37 886
      src/lib.rs

+ 0 - 0
src/assembler/canu.rs


+ 75 - 12
src/assembler/mod.rs

@@ -1,4 +1,4 @@
-use std::path::{Path, PathBuf};
+use std::{collections::HashMap, path::{Path, PathBuf}};
 
 use anyhow::{Context, Ok};
 use log::warn;
@@ -8,11 +8,12 @@ use crate::io::bam::{all_bam_paths, create_bam_leave_two_out};
 
 use self::{
     flye::{Flye, FlyeConfig},
-    spades::{Spades, SpadesConfig},
+    spades::{Spades, SpadesConfig}, wtdbg2::{Wtdbg2, Wtdbg2Config},
 };
 
 pub mod flye;
 pub mod spades;
+pub mod wtdbg2;
 
 pub trait Assemble: Sized {
     fn init(bam_input: &Path, config: &AssembleConfig) -> anyhow::Result<Self>;
@@ -23,9 +24,15 @@ pub trait Assemble: Sized {
 pub enum AssembleConfig {
     Flye(FlyeConfig),
     Spades(SpadesConfig),
+    Wtdbg2(Wtdbg2Config),
 }
 
-pub fn assemble<A: Assemble>(bam_input: &Path, config: &AssembleConfig) -> anyhow::Result<()> {
+pub fn assemble<A: Assemble>(
+    bam_input: &Path,
+    config: &AssembleConfig,
+    max_depth: u8,
+    current_depth: u8,
+) -> anyhow::Result<()> {
     let result = A::init(bam_input, config)
         .and_then(|b| b.assemble())
         .and_then(|c| c.save());
@@ -34,8 +41,16 @@ pub fn assemble<A: Assemble>(bam_input: &Path, config: &AssembleConfig) -> anyho
         std::result::Result::Ok(()) => Ok(()),
         Err(err) => {
             if let Some(AssembleError::NoContig(err_msg)) = err.downcast_ref::<AssembleError>() {
-                warn!("No contig assembled for {}, rescuing...", err_msg);
-                rescue_assembly::<A>(bam_input, config)
+                if current_depth < max_depth {
+                    warn!("No contig assembled for {}, rescuing...", err_msg);
+                    rescue_assembly::<A>(bam_input, config, max_depth, current_depth + 1)
+                } else {
+                    warn!("Max rescue depth reached for {}", err_msg);
+                    Err(AssembleError::MaxRescueDepthReached(
+                        bam_input.to_str().unwrap().to_string(),
+                    )
+                    .into())
+                }
             } else {
                 Err(err)
             }
@@ -43,10 +58,17 @@ pub fn assemble<A: Assemble>(bam_input: &Path, config: &AssembleConfig) -> anyho
     }
 }
 
-fn rescue_assembly<A: Assemble>(bam_input: &Path, config: &AssembleConfig) -> anyhow::Result<()> {
+fn rescue_assembly<A: Assemble>(
+    bam_input: &Path,
+    config: &AssembleConfig,
+    max_depth: u8,
+    current_depth: u8,
+) -> anyhow::Result<()> {
     let bams = create_bam_leave_two_out(bam_input.to_str().context("Invalid path")?)?;
 
-    let has_contig = bams.iter().any(|bam| assemble::<A>(bam, config).is_ok());
+    let has_contig = bams
+        .iter()
+        .any(|bam| assemble::<A>(bam, config, max_depth, current_depth).is_ok());
 
     if has_contig {
         Ok(())
@@ -58,14 +80,23 @@ fn rescue_assembly<A: Assemble>(bam_input: &Path, config: &AssembleConfig) -> an
 pub fn assemble_records(
     bam_input: &PathBuf,
     configs: &Vec<AssembleConfig>,
+    max_depth: u8,
 ) -> Vec<(PathBuf, String, anyhow::Result<()>)> {
     let mut results = Vec::new();
     for config in configs {
         let r = match config {
-            AssembleConfig::Flye(_) => ("flye".to_string(), assemble::<Flye>(bam_input, config)),
-            AssembleConfig::Spades(_) => {
-                ("spades".to_string(), assemble::<Spades>(bam_input, config))
-            }
+            AssembleConfig::Flye(_) => (
+                "flye".to_string(),
+                assemble::<Flye>(bam_input, config, max_depth, 0),
+            ),
+            AssembleConfig::Spades(_) => (
+                "spades".to_string(),
+                assemble::<Spades>(bam_input, config, max_depth, 0),
+            ),
+            AssembleConfig::Wtdbg2(_) => (
+                "wtdbg2".to_string(),
+                assemble::<Wtdbg2>(bam_input, config, max_depth, 0),
+            ),
         };
 
         results.push((bam_input.to_owned(), r.0, r.1));
@@ -76,10 +107,11 @@ pub fn assemble_records(
 pub fn assemble_dir(
     input_dir: &str,
     configs: Vec<AssembleConfig>,
+    max_depth: u8,
 ) -> anyhow::Result<Vec<(PathBuf, String, anyhow::Result<()>)>> {
     Ok(all_bam_paths(input_dir)?
         .iter()
-        .flat_map(|bam_input| assemble_records(bam_input, &configs))
+        .flat_map(|bam_input| assemble_records(bam_input, &configs, max_depth))
         .collect())
 }
 
@@ -90,4 +122,35 @@ pub enum AssembleError {
 
     #[error("No contig assembled after rescue: {0}")]
     NoContigRescue(String),
+
+    #[error("Max rescue depth reached: {0}")]
+    MaxRescueDepthReached(String),
+}
+
+pub fn calculate_shannon_entropy(sequence: &str) -> f64 {
+    let total_length = sequence.len() as f64;
+
+    if total_length == 0.0 {
+        return 0.0;
+    }
+
+    // Count the frequency of each base
+    let mut base_counts = HashMap::new();
+    for base in sequence.chars() {
+        *base_counts.entry(base).or_insert(0) += 1;
+    }
+
+    // Calculate Shannon entropy
+    let mut entropy = 0.0;
+    for &count in base_counts.values() {
+        let probability = count as f64 / total_length;
+        entropy -= probability * probability.log2();
+    }
+
+    // Normalize entropy by dividing by log2(4) since we have 4 possible bases
+    let max_entropy = 4f64.log2();
+    let normalized_entropy = entropy / max_entropy;
+
+    // Round to 4 decimal places
+    (normalized_entropy * 10000.0).round() / 10000.0
 }

+ 1 - 0
src/assembler/ra.rs

@@ -0,0 +1 @@
+// https://github.com/rvaser/ra

+ 24 - 59
src/assembler/spades.rs

@@ -1,5 +1,4 @@
 use std::{
-    collections::HashMap,
     fs::{self, File},
     io::{BufRead, BufReader},
     path::{Path, PathBuf},
@@ -17,7 +16,7 @@ use crate::{
     },
 };
 
-use super::{Assemble, AssembleConfig};
+use super::{calculate_shannon_entropy, Assemble, AssembleConfig};
 
 #[derive(Debug, Clone)]
 pub struct SpadesConfig {
@@ -56,6 +55,8 @@ impl SpadesConfig {
 pub struct Spades {
     pub config: SpadesConfig,
     pub input_id: String,
+    pub tmp_dir: String,
+    pub input_fq: String,
 
     pub input_records: Vec<bam::Record>,
     pub on_contig_bam: String,
@@ -69,12 +70,23 @@ impl Assemble for Spades {
                 let input_id = input_bam.file_stem().unwrap().to_str().unwrap().to_string();
                 let input_records = read_bam(input_bam)?;
 
+                let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
+                info!("Creating tmp directory {tmp_dir}");
+                fs::create_dir(&tmp_dir)?;
+
+                let input_fq = format!("{}/{}.fastq", tmp_dir, input_id);
+                if !Path::new(&input_fq).exists() {
+                    records_to_fastq(&input_records, &input_fq)?;
+                }
+
                 Ok(Self {
                     config: config.clone(),
                     input_id,
                     input_records,
                     on_contig_bam: String::new(),
                     contigs: None,
+                    tmp_dir,
+                    input_fq,
                 })
             }
             _ => Err(anyhow::anyhow!("Wrong config format for Spades.")),
@@ -82,19 +94,13 @@ impl Assemble for Spades {
     }
 
     fn assemble(mut self) -> anyhow::Result<Self> {
-        let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
-        info!("Creating tmp directory {tmp_dir}");
-        fs::create_dir(&tmp_dir).unwrap();
-
-        let input_fq = format!("{}/{}.fastq", tmp_dir, self.input_id);
-        if !Path::new(&input_fq).exists() {
-            records_to_fastq(&self.input_records, &input_fq)?;
-        }
+        let tmp_dir = &self.tmp_dir;
+        let input_fq = &self.input_fq;
 
         let spades_tmp_dir = format!("{tmp_dir}/spades");
         fs::create_dir(&spades_tmp_dir).unwrap();
 
-        run_spades(&input_fq, &spades_tmp_dir, &self.config);
+        run_spades(input_fq, &spades_tmp_dir, &self.config);
 
         let contigs_path = format!("{spades_tmp_dir}/contigs.fasta");
 
@@ -118,9 +124,7 @@ impl Assemble for Spades {
             anyhow::bail!(AssembleError::NoContig(self.input_id));
         }
 
-        // Cleaning
-        fs::remove_dir_all(tmp_dir)?;
-
+        
         Ok(self)
     }
 
@@ -147,25 +151,11 @@ impl Assemble for Spades {
             write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
             fai(&contig_fa)?;
 
-            // Writing bed file from best blastn results
-            // let bed_path = format!("{}/{contig_id}.bed", self.output_dir.display());
-            // let bed = pandora_lib_blastn::Blast::init(&contig_fa)?
-            //     .run()?
-            //     .keep_best()
-            //     .to_bed()?;
-            // let mut f = File::create(bed_path)?;
-            // f.write_all(bed.as_bytes())?;
-            //
             // Remaping input bam to contig
             info!("Mapping  input reads to {contig_id}");
             let new_bam = format!("{}/{contig_id}.bam", self.config.output_dir.display());
             duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
-            let input_fa = format!(
-                "{}/{}.fasta",
-                self.config.output_dir.display(),
-                self.input_id
-            );
-            let bwa = format!("bwa mem {contig_fa} {input_fa}");
+            let bwa = format!("bwa mem {contig_fa} {}", self.input_fq);
             let samtools = "samtools sort /dev/stdin";
             let pipe = format!("{bwa} | {samtools} > {new_bam}");
             duct::cmd!("bash", "-c", pipe).run()?;
@@ -175,11 +165,14 @@ impl Assemble for Spades {
 
             // Run modkit
             let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.config.output_dir.display());
-            duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
+            duct::cmd!("modkit", "pileup", "--cpg", "--ref", contig_fa, new_bam, modkit_pileup).run()?;
         }
+
+        // Cleaning
+        fs::remove_dir_all(self.tmp_dir)?;
+
         Ok(())
     }
-    // add code here
 }
 
 pub fn run_spades(input_fq: &str, tmp_dir: &str, config: &SpadesConfig) {
@@ -213,31 +206,3 @@ pub fn run_spades(input_fq: &str, tmp_dir: &str, config: &SpadesConfig) {
     cmd.wait().unwrap();
     cmd.kill().unwrap();
 }
-
-fn calculate_shannon_entropy(sequence: &str) -> f64 {
-    let total_length = sequence.len() as f64;
-
-    if total_length == 0.0 {
-        return 0.0;
-    }
-
-    // Count the frequency of each base
-    let mut base_counts = HashMap::new();
-    for base in sequence.chars() {
-        *base_counts.entry(base).or_insert(0) += 1;
-    }
-
-    // Calculate Shannon entropy
-    let mut entropy = 0.0;
-    for &count in base_counts.values() {
-        let probability = count as f64 / total_length;
-        entropy -= probability * probability.log2();
-    }
-
-    // Normalize entropy by dividing by log2(4) since we have 4 possible bases
-    let max_entropy = 4f64.log2();
-    let normalized_entropy = entropy / max_entropy;
-
-    // Round to 4 decimal places
-    (normalized_entropy * 10000.0).round() / 10000.0
-}

+ 209 - 0
src/assembler/wtdbg2.rs

@@ -0,0 +1,209 @@
+// https://github.com/ruanjue/wtdbg2/
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    path::{Path, PathBuf},
+    process::{Command, Stdio},
+};
+
+use log::{info, warn};
+
+use crate::{
+    assembler::AssembleError,
+    io::{
+        bam::{cp_mod_tags, read_bam},
+        fasta::{fai, write_fasta},
+        fastq::records_to_fastq,
+    },
+};
+
+use super::{calculate_shannon_entropy, Assemble, AssembleConfig};
+
+#[derive(Debug, Clone)]
+pub struct Wtdbg2Config {
+    pub output_dir: PathBuf,
+
+    pub min_cov: u16,
+    pub min_reads: u32,
+    pub min_entropy: f64,
+    pub threads: u16,
+    pub wtdbg2_bin: String,
+}
+
+impl Default for Wtdbg2Config {
+    fn default() -> Self {
+        Self {
+            output_dir: PathBuf::new(),
+            min_cov: 2,
+            min_reads: 3,
+            min_entropy: 0.2,
+            threads: 12,
+            wtdbg2_bin: "/data/tools/wtdbg2/wtdbg2.pl".to_string(),
+        }
+    }
+}
+
+impl Wtdbg2Config {
+    pub fn new(output_dir: &str) -> Self {
+        Wtdbg2Config {
+            output_dir: PathBuf::from(output_dir),
+            ..Default::default()
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Wtdbg2 {
+    pub config: Wtdbg2Config,
+    pub input_id: String,
+    pub tmp_dir: String,
+    pub input_fq: String,
+
+    pub input_records: Vec<bam::Record>,
+    pub on_contig_bam: String,
+    pub contigs: Option<Vec<String>>,
+}
+
+impl Assemble for Wtdbg2 {
+    fn init(input_bam: &std::path::Path, config: &AssembleConfig) -> anyhow::Result<Self> {
+        match config {
+            super::AssembleConfig::Wtdbg2(config) => {
+                let input_id = input_bam.file_stem().unwrap().to_str().unwrap().to_string();
+                let input_records = read_bam(input_bam)?;
+
+                let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
+                info!("Creating tmp directory {tmp_dir}");
+                fs::create_dir(&tmp_dir)?;
+
+                let input_fq = format!("{}/{}.fastq", tmp_dir, input_id);
+                if !Path::new(&input_fq).exists() {
+                    records_to_fastq(&input_records, &input_fq)?;
+                }
+
+                Ok(Self {
+                    config: config.clone(),
+                    input_id,
+                    input_records,
+                    on_contig_bam: String::new(),
+                    contigs: None,
+                    tmp_dir,
+                    input_fq,
+                })
+            }
+            _ => Err(anyhow::anyhow!("Wrong config format for Wtdbg2.")),
+        }
+    }
+
+    fn assemble(mut self) -> anyhow::Result<Self> {
+        let tmp_dir = &self.tmp_dir;
+        let input_fq = &self.input_fq;
+
+        let wtdbg2_tmp_dir = format!("{tmp_dir}/wtdbg2");
+        fs::create_dir(&wtdbg2_tmp_dir).unwrap();
+
+        run_wtdbg2(input_fq, &wtdbg2_tmp_dir, &self.config);
+
+        let contigs_path = format!("{tmp_dir}/wtdbg2.cns.fa");
+        warn!("reading {}", contigs_path);
+
+        if Path::new(&contigs_path).exists() {
+            // Read the assembly fasta
+            let mut reader = File::open(contigs_path)
+                .map(BufReader::new)
+                .map(noodles_fasta::Reader::new)?;
+
+            let mut contigs = Vec::new();
+            for result in reader.records() {
+                let record = result?;
+                let seq = record.sequence().as_ref();
+                let seq = String::from_utf8(seq.to_vec()).unwrap();
+                if calculate_shannon_entropy(&seq) >= self.config.min_entropy {
+                    contigs.push(seq);
+                }
+            }
+            self.contigs = Some(contigs);
+        } else {
+            anyhow::bail!(AssembleError::NoContig(self.input_id));
+        }
+
+        
+        Ok(self)
+    }
+
+    fn save(self) -> anyhow::Result<()> {
+        if self.contigs.is_none() {
+            anyhow::bail!(AssembleError::NoContig(self.input_id));
+        }
+
+        for (i, contig) in self.contigs.unwrap().iter().enumerate() {
+            let suffixe = if i == 0 {
+                "".to_string()
+            } else {
+                format!("_{i}")
+            };
+
+            if !self.config.output_dir.exists() {
+                fs::create_dir_all(&self.config.output_dir)?;
+            }
+
+            let contig_id = format!("{}{suffixe}_wtdbg2", self.input_id);
+            let contig_fa = format!("{}/{contig_id}.fa", self.config.output_dir.display());
+
+            info!("Saving contig {contig_id} in {contig_fa}");
+            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
+            fai(&contig_fa)?;
+
+            // Remaping input bam to contig
+            info!("Mapping  input reads to {contig_id}");
+            let new_bam = format!("{}/{contig_id}.bam", self.config.output_dir.display());
+            duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
+            let bwa = format!("bwa mem {contig_fa} {}", self.input_fq);
+            let samtools = "samtools sort /dev/stdin";
+            let pipe = format!("{bwa} | {samtools} > {new_bam}");
+            duct::cmd!("bash", "-c", pipe).run()?;
+
+            // Copy modified base tags to new bam
+            cp_mod_tags(&self.input_records, &new_bam)?;
+
+            // Run modkit
+            let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.config.output_dir.display());
+            duct::cmd!("modkit", "pileup", "--cpg", "--ref", contig_fa, new_bam, modkit_pileup).run()?;
+        }
+
+        // Cleaning
+        fs::remove_dir_all(self.tmp_dir)?;
+
+        Ok(())
+    }
+}
+
+pub fn run_wtdbg2(input_fq: &str, tmp_dir: &str, config: &Wtdbg2Config) {
+    info!("Running Wtdbg2 for {input_fq}");
+    let mut cmd = Command::new(&config.wtdbg2_bin)
+        .arg("-x")
+        .arg("ont")
+        .arg("-t")
+        .arg(config.threads.to_string())
+        .arg("-g")
+        .arg("25k")
+        .arg("-o")
+        .arg(tmp_dir)
+        .arg(input_fq)
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("Wtdbg2 failed to start");
+
+    let stderr = cmd.stderr.take().unwrap();
+    let reader = BufReader::new(stderr);
+    reader
+        .lines()
+        .map_while(Result::ok)
+        // .inspect(|o| info!("{o}"))
+        .filter(|line| line.contains("ERROR"))
+        .for_each(|line| warn!("[Wtdbg2] {line}"));
+
+    cmd.wait().unwrap();
+    cmd.kill().unwrap();
+}
+
+

+ 0 - 106
src/flye.rs

@@ -1,106 +0,0 @@
-use log::{info, warn};
-use std::{
-    fs::{self, File}, io::{BufRead, BufReader}, path::Path, process::{Command, Stdio}
-};
-
-pub fn run_flye(fasta_path: &str, tmp_dir: &str, _size: &str) {
-    info!("Running Flye for {fasta_path}");
-    let flye_bin = "/data/tools/Flye/bin/flye";
-    let mut cmd = Command::new(flye_bin)
-        .arg("--threads")
-        .arg("12")
-        // .arg("--keep-haplotypes")
-        // .arg("--meta")
-        .arg("--min-overlap")
-        .arg("1000")
-        .arg("--out-dir")
-        .arg(tmp_dir)
-        .arg("--nano-hq")
-        .arg(fasta_path)
-        .stderr(Stdio::piped())
-        .spawn()
-        .expect("Flye failed to start");
-
-    let stderr = cmd.stderr.take().unwrap();
-    let reader = BufReader::new(stderr);
-    reader
-        .lines()
-        .map_while(Result::ok)
-        .filter(|line| line.contains("ERROR"))
-        .for_each(|line| warn!("[FLYE] {line}"));
-
-    cmd.wait().unwrap();
-    cmd.kill().unwrap();
-}
-
-pub fn read_flye_coverage(path: &str, min_cov: i32, contig_name: &str) -> (usize, usize) {
-    use std::io::Read;
-    let mut reader = File::open(path).map(flate2::read::GzDecoder::new).unwrap();
-    let mut buf = Vec::new();
-    reader.read_to_end(&mut buf).unwrap();
-
-    let mut line_acc = Vec::new();
-    let mut start = None;
-    let mut end = None;
-    let mut last_end = 0;
-    for b in buf.iter() {
-        match b {
-            b'\n' => {
-                let s = String::from_utf8(line_acc.clone()).unwrap();
-                line_acc.clear();
-                if !s.starts_with(&format!("{contig_name}\t")) {
-                    continue;
-                }
-                let s = s.split('\t').collect::<Vec<&str>>();
-                let cov: i32 = s.get(3).unwrap().parse().unwrap();
-                if start.is_none() && cov >= min_cov {
-                    let st: i32 = s.get(1).unwrap().parse().unwrap();
-                    start = Some(st);
-                } else if end.is_none() && start.is_some() && cov < min_cov {
-                    let en: i32 = s.get(1).unwrap().parse().unwrap();
-                    end = Some(en);
-                    break;
-                }
-                last_end = s.get(2).unwrap().parse().unwrap();
-            }
-            _ => line_acc.push(*b),
-        }
-    }
-    (start.unwrap() as usize, end.unwrap_or(last_end) as usize)
-}
-
-pub fn assemble_flye(reads_path: &str) -> anyhow::Result<Option<Vec<String>>> {
-    let min_cov = 2;
-
-    let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
-    fs::create_dir(tmp_dir.clone()).unwrap();
-
-    run_flye(reads_path, &tmp_dir, "10M");
-
-    let contigs_path = format!("{tmp_dir}/assembly.fasta");
-    let mut res = None;
-
-    if Path::new(&contigs_path).exists() {
-        let assembly_path = format!("{tmp_dir}/assembly.fasta");
-        let flye_cov_path = format!("{tmp_dir}/40-polishing/base_coverage.bed.gz");
-
-        let mut reader = File::open(assembly_path)
-            .map(BufReader::new)
-            .map(noodles_fasta::Reader::new)?;
-        let mut contigs = Vec::new();
-        for result in reader.records() {
-            let record = result?;
-            let contig_name = String::from_utf8(record.name().to_vec()).unwrap();
-            let (s, e) = read_flye_coverage(&flye_cov_path, min_cov, &contig_name);
-            let seq = record.sequence().as_ref();
-            let seq = String::from_utf8(seq.to_vec()).unwrap();
-            let seq: String = seq[s..e].into();
-            contigs.push(seq.clone());
-        }
-        res = Some(contigs);
-    }
-    fs::remove_dir_all(tmp_dir)?;
-    Ok(res)
-}
-
-

+ 15 - 8
src/io/bam.rs

@@ -3,7 +3,7 @@ use bam::{
     record::tags::{StringType, TagValue}, BamReader, Record, RecordWriter
 };
 use log::{info, warn};
-use nom::AsBytes;
+// use nom::AsBytes;
 use std::{
     collections::HashMap,
     fs,
@@ -31,12 +31,16 @@ pub fn read_bam(input_bam: &Path) -> anyhow::Result<Vec<Record>> {
 }
 
 pub fn cp_mod_tags(input_records: &Vec<Record>, output_path: &str) -> anyhow::Result<()> {
+    info!("Transferring SAM tags from records to {output_path}");
     // Read tags from input tags
     let mut mm_hm = HashMap::new();
     let mut ml_hm = HashMap::new();
+    let mut tags_hm = HashMap::new();
     for record in input_records {
         // let record = record?;
         let query_name = String::from_utf8(record.name().to_vec())?;
+        let u = record.tags();
+        tags_hm.insert(query_name.clone(), u.clone());
         if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
             mm_hm.insert(query_name.clone(), value.to_vec());
         }
@@ -63,14 +67,17 @@ pub fn cp_mod_tags(input_records: &Vec<Record>, output_path: &str) -> anyhow::Re
         let mut record = record.clone();
         let record_name = String::from_utf8(record.name().to_vec())?;
         let tags = record.tags_mut();
-        if let Some(mm) = mm_hm.get(&record_name) {
-            tags.push_string(b"MM", mm)
-        }
-        let tags = record.tags_mut();
-        if let Some(ml) = ml_hm.get(&record_name) {
-            let v = ml.as_bytes();
-            tags.push_array(b"ML", v);
+        if let Some(all) = tags_hm.get(&record_name) {
+            *tags = all.clone();
         }
+        // if let Some(mm) = mm_hm.get(&record_name) {
+        //     tags.push_string(b"MM", mm)
+        // }
+        // let tags = record.tags_mut();
+        // if let Some(ml) = ml_hm.get(&record_name) {
+        //     let v = ml.as_bytes();
+        //     tags.push_array(b"ML", v);
+        // }
         w.write(&record)?;
     }
     w.finish()?;

+ 37 - 886
src/lib.rs

@@ -1,824 +1,11 @@
 pub mod assembler;
 pub mod io;
 
-use anyhow::{anyhow, Context};
-
-use io::bed::read_bed;
-use log::{info, warn};
-use nom::AsBytes;
-use pandora_lib_blastn::BlastResult;
-use pandora_lib_igv::{BamTrack, BedTrack, Track};
-use petgraph::{dot::Dot, graph::NodeIndex, stable_graph::StableUnGraph};
-use rayon::prelude::*;
-use rust_htslib::bam::{Read, Reader, Record};
-use std::{
-    collections::{HashMap, HashSet, VecDeque},
-    fs::{self, File},
-    io::{BufRead, BufReader, Write},
-    path::{Path, PathBuf},
-    str::FromStr,
-};
-
-//
-// pub fn make_graph(d: Vec<BlastResult>, records: &Vec<Record>) {
-//     let mut g: StableUnGraph<String, Option<BlastResult>> = StableUnGraph::default();
-//     let mut name_to_id: HashMap<String, NodeIndex> = HashMap::new();
-//
-//     for blast_result in d {
-//         let nid_a = if let Some(nid_a) = name_to_id.get(&blast_result.query_id) {
-//             *nid_a
-//         } else {
-//             let k = blast_result.query_id.clone();
-//             let nid_a = g.add_node(k.clone());
-//             name_to_id.insert(k, nid_a);
-//             nid_a
-//         };
-//         let nid_b = if let Some(nid_b) = name_to_id.get(&blast_result.subject_id) {
-//             *nid_b
-//         } else {
-//             let k = blast_result.subject_id.clone();
-//             let nid_b = g.add_node(k.clone());
-//             name_to_id.insert(k, nid_b);
-//             nid_b
-//         };
-//         g.add_edge(nid_a, nid_b, Some(blast_result));
-//     }
-//
-//     println!(
-//         "{:?}",
-//         Dot::with_config(&g, &[petgraph::dot::Config::EdgeNoLabel])
-//     );
-//     // let order = dijkstra(&g, start_node, None, |_| 0);
-//     // println!("{order:#?}");
-//
-//     // begin with start node
-//     let mut n_neigh: Vec<(usize, NodeIndex)> = g
-//         .node_indices()
-//         .map(|nid| (g.neighbors(nid).count(), nid))
-//         .collect();
-//     n_neigh.sort_by_key(|v| v.0);
-//     let start_node = n_neigh.last().unwrap().1;
-//     let start_name = g.node_weight(start_node).unwrap().to_string();
-//
-//     let neighbors = g.neighbors(start_node);
-//     let mut neighbors_blast = Vec::new();
-//     for neighbor in neighbors {
-//         let e: Vec<BlastResult> = g
-//             .edges_connecting(start_node, neighbor)
-//             .map(|e| e.weight().clone().unwrap())
-//             .collect();
-//         neighbors_blast.extend(e);
-//     }
-//     let mut neighbors_blast: Vec<(BlastResult, usize)> = neighbors_blast
-//         .iter()
-//         .map(|b| {
-//             if b.subject_id != start_name {
-//                 b.swap_query_subject()
-//             } else {
-//                 b.clone()
-//             }
-//         })
-//         .map(|b| (b.clone(), get_seq_len(records, &b.query_id)))
-//         .collect();
-//     neighbors_blast.sort_by_key(|b| b.0.s_end);
-//
-//     let start_len = get_seq_len(records, &start_name);
-//     println!("{start_len}");
-//     println!("{neighbors_blast:#?}");
-// }
-//
-// pub fn get_seq_len(records: &[Record], id: &str) -> usize {
-//     let lens: Vec<usize> = records
-//         .iter()
-//         .filter(|r| String::from_utf8(r.qname().to_vec()).unwrap() == id)
-//         .map(|r| r.seq_len())
-//         .collect();
-//     lens[0]
-// }
-//
-// pub fn order_sequences_with_positions(blast_results: &[BlastResult]) -> Vec<(String, usize)> {
-//     let mut graph: HashMap<String, HashSet<String>> = HashMap::new();
-//     let mut in_degree: HashMap<String, usize> = HashMap::new();
-//     let mut all_sequences: HashSet<String> = HashSet::new();
-//
-//     // Build the graph
-//     for result in blast_results {
-//         all_sequences.insert(result.query_id.clone());
-//         all_sequences.insert(result.subject_id.clone());
-//
-//         if result.q_start < result.s_start {
-//             graph
-//                 .entry(result.query_id.clone())
-//                 .or_insert_with(HashSet::new)
-//                 .insert(result.subject_id.clone());
-//             *in_degree.entry(result.subject_id.clone()).or_insert(0) += 1;
-//         } else {
-//             graph
-//                 .entry(result.subject_id.clone())
-//                 .or_insert_with(HashSet::new)
-//                 .insert(result.query_id.clone());
-//             *in_degree.entry(result.query_id.clone()).or_insert(0) += 1;
-//         }
-//     }
-//
-//     // Topological sort
-//     let mut queue: VecDeque<String> = all_sequences
-//         .iter()
-//         .filter(|&seq| !in_degree.contains_key(seq))
-//         .cloned()
-//         .collect();
-//     let mut ordered_sequences: Vec<String> = Vec::new();
-//
-//     while let Some(seq) = queue.pop_front() {
-//         ordered_sequences.push(seq.clone());
-//
-//         if let Some(neighbors) = graph.get(&seq) {
-//             for neighbor in neighbors {
-//                 if let Some(degree) = in_degree.get_mut(neighbor) {
-//                     *degree -= 1;
-//                     if *degree == 0 {
-//                         queue.push_back(neighbor.clone());
-//                     }
-//                 }
-//             }
-//         }
-//     }
-//
-//     // Handle any remaining sequences (in case of cycles)
-//     for seq in all_sequences {
-//         if !ordered_sequences.contains(&seq) {
-//             ordered_sequences.push(seq);
-//         }
-//     }
-//
-//     // Calculate absolute positions
-//     let mut positions: HashMap<String, usize> = HashMap::new();
-//     for (current_position, seq) in ordered_sequences.iter().enumerate() {
-//         positions.insert(seq.clone(), current_position);
-//     }
-//
-//     let mut res: Vec<_> = ordered_sequences
-//         .into_iter()
-//         .map(|seq| (seq.clone(), *positions.get(&seq).unwrap()))
-//         .collect();
-//     res.sort_by(|a, b| a.1.cmp(&b.1));
-//     res
-// }
-//
-//
-//
-// #[derive(Debug, Default)]
-// pub struct FlyeAsm {
-//     pub input_id: String,
-//     pub asm_dir: String,
-//     pub input_bam: String,
-//     pub input_records: Vec<bam::Record>,
-//     pub on_contig_bam: String,
-//     pub contigs: Option<Vec<String>>,
-//     pub flye_asm_params: FlyeAsmParams,
-// }
-//
-// #[derive(Debug)]
-// pub struct FlyeAsmParams {
-//     pub min_cov: u16,
-// }
-//
-// impl Default for FlyeAsmParams {
-//     fn default() -> Self {
-//         Self { min_cov: 2 }
-//     }
-// }
-//
-// impl FlyeAsm {
-//     pub fn new(asm_dir: &str, file_stem: &str) -> anyhow::Result<Self> {
-//         let input_bam = format!("{asm_dir}/{file_stem}.bam");
-//         let on_contig_bam = format!("{asm_dir}/{file_stem}_contig.bam");
-//
-//         let reader = bam::BamReader::from_path(&input_bam, 3)?;
-//         let mut input_records = Vec::new();
-//         for rec in reader {
-//             input_records.push(rec?);
-//         }
-//         Ok(FlyeAsm {
-//             asm_dir: asm_dir.to_string(),
-//             input_id: file_stem.to_string(),
-//             on_contig_bam,
-//             contigs: None,
-//             flye_asm_params: FlyeAsmParams::default(),
-//             input_records,
-//             input_bam,
-//         })
-//     }
-//
-//     pub fn assemble(mut self) -> anyhow::Result<Self> {
-//         let min_cov = self.flye_asm_params.min_cov;
-//
-//         let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
-//         info!("Creating tmp directory {tmp_dir}");
-//         fs::create_dir(&tmp_dir).unwrap();
-//         let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
-//
-//         if !Path::new(&input_fa).exists() {
-//             records_to_fasta(&self.input_records, &input_fa)?;
-//         }
-//
-//         // let input_fa = format!("{tmp_dir}/input.fa");
-//         // records_to_fasta(&self.input_records, &input_fa)?;
-//
-//         let flye_tmp_dir = format!("{tmp_dir}/flye");
-//         fs::create_dir(&flye_tmp_dir).unwrap();
-//
-//         run_flye(&input_fa, &flye_tmp_dir, "10M");
-//
-//         let contigs_path = format!("{flye_tmp_dir}/assembly.fasta");
-//
-//         if Path::new(&contigs_path).exists() {
-//             let assembly_path = format!("{flye_tmp_dir}/assembly.fasta");
-//             let flye_cov_path = format!("{flye_tmp_dir}/40-polishing/base_coverage.bed.gz");
-//
-//             // Read the assembly fasta
-//             let mut reader = File::open(assembly_path)
-//                 .map(BufReader::new)
-//                 .map(noodles_fasta::Reader::new)?;
-//
-//             let mut contigs = Vec::new();
-//             for result in reader.records() {
-//                 let record = result?;
-//                 let contig_name = String::from_utf8(record.name().to_vec()).unwrap();
-//                 let (s, e) = read_flye_coverage(&flye_cov_path, min_cov.into(), &contig_name);
-//                 let seq = record.sequence().as_ref();
-//                 let seq = String::from_utf8(seq.to_vec()).unwrap();
-//                 let seq: String = seq[s..e].into();
-//                 contigs.push(seq);
-//             }
-//             self.contigs = Some(contigs);
-//         } else {
-//             warn!("❌ No reads assembled for {}", self.input_id);
-//         }
-//
-//         // Cleaning
-//         fs::remove_dir_all(tmp_dir)?;
-//
-//         Ok(self)
-//     }
-//
-//     pub fn save(self) -> anyhow::Result<()> {
-//         if self.contigs.is_none() {
-//             return Err(anyhow!("No reads were assembled"));
-//         }
-//
-//         for (i, contig) in self.contigs.unwrap().iter().enumerate() {
-//             let suffixe = if i == 0 {
-//                 "".to_string()
-//             } else {
-//                 format!("_{i}")
-//             };
-//
-//             let contig_id = format!("{}{suffixe}_flye", self.input_id);
-//
-//             let contig_fa = format!("{}/{contig_id}.fa", self.asm_dir);
-//
-//             info!("Saving contig {contig_id} in {contig_fa}");
-//             write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
-//             fai(&contig_fa)?;
-//
-//             // Writing bed file from best blastn results
-//             let bed_path = format!("{}/{contig_id}.bed", self.asm_dir);
-//             let bed = pandora_lib_blastn::Blast::init(&contig_fa)?
-//                 .run()?
-//                 .keep_best()
-//                 .to_bed()?;
-//             let mut f = File::create(bed_path)?;
-//             f.write_all(bed.as_bytes())?;
-//
-//             // Remaping input bam to contig
-//             info!("Mapping  input reads to {contig_id}");
-//             let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
-//             duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
-//             let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
-//             let bwa = format!("bwa mem {contig_fa} {input_fa}");
-//             let samtools = "samtools sort /dev/stdin";
-//             let pipe = format!("{bwa} | {samtools} > {new_bam}");
-//             duct::cmd!("bash", "-c", pipe).run()?;
-//
-//             // Copy modified base tags to new bam
-//             cp_mod_tags(&self.input_records, &new_bam)?;
-//
-//             // Run modkit
-//             let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
-//             duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
-//         }
-//         Ok(())
-//     }
-// }
-
-// fn cluster_reads(input_path: &str) -> anyhow::Result<Vec<PathBuf>> {
-//     // Open the input BAM file
-//     let mut reader = Reader::from_path(input_path)
-//         .with_context(|| format!("Failed to open input BAM file: {}", input_path))?;
-//     let header = reader.header().clone();
-//     let header = rust_htslib::bam::Header::from_template(&header);
-//
-//     // Read all records into a vector
-//     let mut records: HashMap<i32, Vec<Record>> = HashMap::new();
-//     for record in reader.records() {
-//         let record = record?;
-//         records.entry(record.tid()).or_default().push(record);
-//     }
-//
-//     records
-//         .values_mut()
-//         .for_each(|v| v.sort_by_key(|a| a.pos()));
-//
-//     let threshold = 500;
-//     let records: Vec<Vec<Record>> = records
-//         .values()
-//         .flat_map(|v| {
-//             v.iter().fold(vec![vec![]], |mut acc: Vec<Vec<Record>>, r| {
-//                 if let Some(last_group) = acc.last_mut() {
-//                     if last_group.is_empty()
-//                         || r.pos() - last_group.last().unwrap().pos() < threshold
-//                     {
-//                         last_group.push(r.clone());
-//                     } else {
-//                         acc.push(vec![r.clone()]);
-//                     }
-//                 }
-//                 acc
-//             })
-//         })
-//         .collect();
-//
-//     let file_stem = PathBuf::from(input_path)
-//         .file_stem()
-//         .unwrap()
-//         .to_str()
-//         .unwrap()
-//         .to_string();
-//     let dir = PathBuf::from(input_path).parent().unwrap().to_owned();
-//     let mut output_paths = Vec::new();
-//
-//     // Iterate through records, creating a new file for each omitted record
-//     for (i, recs) in records.iter().enumerate() {
-//         if recs.len() < 3 {
-//             continue;
-//         }
-//         let output_filename = format!("{}/{file_stem}_{i}.bam", dir.display());
-//         let output_path = PathBuf::from(&output_filename);
-//         info!("Creating bam {}", output_path.display());
-//
-//         let mut writer = rust_htslib::bam::Writer::from_path(
-//             &output_path,
-//             &header,
-//             rust_htslib::bam::Format::Bam,
-//         )
-//         .with_context(|| {
-//             format!(
-//                 "Failed to create output BAM file: {}",
-//                 output_path.display()
-//             )
-//         })?;
-//
-//         // Write all records except the omitted one
-//         for record in recs.iter() {
-//             writer.write(record).with_context(|| {
-//                 format!("Failed to write record to file: {}", output_path.display())
-//             })?;
-//         }
-//
-//         output_paths.push(output_path);
-//     }
-//     Ok(output_paths)
-// }
-//
-// fn two_by_tow(input_path: &str) -> anyhow::Result<Vec<PathBuf>> {
-//     // Open the input BAM file
-//     let mut reader = Reader::from_path(input_path)
-//         .with_context(|| format!("Failed to open input BAM file: {}", input_path))?;
-//     let header = reader.header().clone();
-//     let header = rust_htslib::bam::Header::from_template(&header);
-//
-//     // Read all records into a vector
-//     let mut records = Vec::new();
-//     for record in reader.records() {
-//         records.push(record?);
-//     }
-//
-//     if records.len() < 3 {
-//         return Err(anyhow!("Not enough reads"));
-//     }
-//
-//     let file_stem = PathBuf::from(input_path)
-//         .file_stem()
-//         .unwrap()
-//         .to_str()
-//         .unwrap()
-//         .to_string();
-//     let dir = PathBuf::from(input_path).parent().unwrap().to_owned();
-//     let mut output_paths = Vec::new();
-//
-//     for i in 0..records.len() {
-//         let output_filename = format!("{}/{file_stem}_{i}.bam", dir.display());
-//         let output_path = PathBuf::from(&output_filename);
-//         info!("Creating bam {}", output_path.display());
-//
-//         let mut writer = rust_htslib::bam::Writer::from_path(
-//             &output_path,
-//             &header,
-//             rust_htslib::bam::Format::Bam,
-//         )
-//         .with_context(|| {
-//             format!(
-//                 "Failed to create output BAM file: {}",
-//                 output_path.display()
-//             )
-//         })?;
-//
-//         // Write two records to the file, skipping the i-th record
-//         for (j, record) in records.iter().enumerate() {
-//             if j != i && j != (i + 1) % records.len() {
-//                 writer.write(record)?;
-//             } else {
-//                 warn!(
-//                     "{} removing {}",
-//                     output_path.display(),
-//                     String::from_utf8(record.qname().to_vec()).unwrap()
-//                 );
-//             }
-//         }
-//
-//         output_paths.push(output_path);
-//     }
-//     Ok(output_paths)
-// }
-//
-// fn two_by_tow_new(input_path: &str) -> anyhow::Result<Vec<PathBuf>> {
-//     // Open the input BAM file
-//     let mut reader = Reader::from_path(input_path)
-//         .with_context(|| format!("Failed to open input BAM file: {}", input_path))?;
-//     let header = reader.header().clone();
-//     let header = rust_htslib::bam::Header::from_template(&header);
-//
-//     // Read all records into a vector
-//     let mut records = Vec::new();
-//     for record in reader.records() {
-//         records.push(record?);
-//     }
-//
-//     if records.len() < 3 {
-//         return Err(anyhow!("Not enough reads"));
-//     }
-//
-//     let file_stem = PathBuf::from(input_path)
-//         .file_stem()
-//         .unwrap()
-//         .to_str()
-//         .unwrap()
-//         .to_string();
-//     let dir = PathBuf::from(input_path).parent().unwrap().to_owned();
-//     let mut output_paths = Vec::new();
-//
-//     // Iterate through records, creating a new file for each pair of records
-//     for i in (0..records.len()).step_by(2) {
-//         let output_filename = format!("{}/{file_stem}_{}.bam", dir.display(), i / 2);
-//         let output_path = PathBuf::from(&output_filename);
-//         info!("Creating bam {}", output_path.display());
-//
-//         let mut writer = rust_htslib::bam::Writer::from_path(
-//             &output_path,
-//             &header,
-//             rust_htslib::bam::Format::Bam,
-//         )
-//         .with_context(|| {
-//             format!(
-//                 "Failed to create output BAM file: {}",
-//                 output_path.display()
-//             )
-//         })?;
-//
-//         // Write two records to the file
-//         writer.write(&records[i])?;
-//         if i + 1 < records.len() {
-//             writer.write(&records[i + 1])?;
-//         }
-//
-//         output_paths.push(output_path);
-//     }
-//     Ok(output_paths)
-// }
-//
-// fn create_bam_files_with_omissions(input_path: &str) -> anyhow::Result<Vec<PathBuf>> {
-//     // Open the input BAM file
-//     let mut reader = Reader::from_path(input_path)
-//         .with_context(|| format!("Failed to open input BAM file: {}", input_path))?;
-//     let header = reader.header().clone();
-//     let header = rust_htslib::bam::Header::from_template(&header);
-//
-//     // Read all records into a vector
-//     let mut records = Vec::new();
-//     for record in reader.records() {
-//         records.push(record?);
-//     }
-//
-//     if records.len() < 3 {
-//         return Err(anyhow!("Not enough reads"));
-//     }
-//
-//     let file_stem = PathBuf::from(input_path)
-//         .file_stem()
-//         .unwrap()
-//         .to_str()
-//         .unwrap()
-//         .to_string();
-//     let dir = PathBuf::from(input_path).parent().unwrap().to_owned();
-//     let mut output_paths = Vec::new();
-//
-//     // Iterate through records, creating a new file for each omitted record
-//     for (i, omitted_record) in records.iter().enumerate() {
-//         let output_filename = format!("{}/{file_stem}_{i}.bam", dir.display());
-//         let output_path = PathBuf::from(&output_filename);
-//         info!("Creating bam {}", output_path.display());
-//
-//         let mut writer = rust_htslib::bam::Writer::from_path(
-//             &output_path,
-//             &header,
-//             rust_htslib::bam::Format::Bam,
-//         )
-//         .with_context(|| {
-//             format!(
-//                 "Failed to create output BAM file: {}",
-//                 output_path.display()
-//             )
-//         })?;
-//
-//         // Write all records except the omitted one
-//         for record in records.iter() {
-//             if record != omitted_record {
-//                 writer.write(record).with_context(|| {
-//                     format!("Failed to write record to file: {}", output_path.display())
-//                 })?;
-//             }
-//         }
-//
-//         output_paths.push(output_path);
-//     }
-//
-//     Ok(output_paths)
-// }
-//
-// pub fn rescue(asm_dir: &str, file_stem: &str) -> anyhow::Result<()> {
-//     let input_bam = format!("{}/{}.bam", asm_dir, file_stem);
-//     // let paths = cluster_reads(&input_bam)?;
-//     let paths = two_by_tow(&input_bam)?;
-//     // let paths = create_bam_files_with_omissions(&input_bam)?;
-//     paths.par_iter().for_each(|path| {
-//         let file_stem = path.file_stem().unwrap().to_str().unwrap().to_string();
-//         info!("Trying to assemble {file_stem}");
-//         match FlyeAsm::new(asm_dir, &file_stem)
-//             .unwrap()
-//             .assemble()
-//             .unwrap()
-//             .save()
-//         {
-//             Ok(_) => {
-//                 info!("{file_stem} has been assembled ✅");
-//                 let link = igv_link(asm_dir, &file_stem).unwrap();
-//                 info!("{link}");
-//             }
-//             Err(err) => {
-//                 warn!("❌ {file_stem} {err}");
-//                 fs::remove_file(path).unwrap();
-//             }
-//         }
-//     });
-//
-//     Ok(())
-// }
-//
-//
-//
-// pub fn dir_flye(dir: &str, force: bool) -> anyhow::Result<()> {
-//     let pattern = format!("{}/*.bam", dir);
-//     // let re = Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$")?;
-//
-//     for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
-//         match entry {
-//             Ok(path) => {
-//                 if let Some(_file_name) = path.file_name().and_then(|name| name.to_str()) {
-//                     if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
-//                         if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() && !force {
-//                             warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
-//                         } else {
-//                             let f = FlyeAsm::new(dir, input_id)?;
-//                             match f.assemble()?.save() {
-//                                 // Ok(flye_asm) => match flye_asm.save() {
-//                                 Ok(_) => {
-//                                     info!("✅ Assembled: {}", igv_link(dir, input_id)?);
-//                                 }
-//                                 // Err(e) => warn!("❌ {e}"),
-//                                 // },
-//                                 Err(err) => {
-//                                     warn!("❌ {input_id} has not been assembled {err}");
-//                                     warn!("Rescuing");
-//                                     rescue(dir, input_id)?;
-//                                 }
-//                             }
-//                         }
-//                     }
-//                 }
-//             }
-//             Err(e) => warn!("{:?}", e),
-//         }
-//     }
-//     Ok(())
-// }
-//
-// pub fn dedup_dir(dir: &str) -> anyhow::Result<()> {
-//     // load bed
-//     let pattern = format!("{}/*_flye.bed", dir);
-//     // let re =
-//     // Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}_flye\.bed$")?;
-//
-//     let mut bed_hm: HashMap<String, Vec<String>> = HashMap::new();
-//     for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
-//         match entry {
-//             Ok(path) => {
-//                 println!("{path:?}");
-//                 if let Some(_file_name) = path.file_name().and_then(|name| name.to_str()) {
-//                     // if re.is_match(file_name) {
-//                     if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
-//                         let mut bed_blast: Vec<String> = read_blastn_bed(path.to_str().unwrap())?
-//                             .iter()
-//                             .map(|r| format!("{}{}", r.name, r.strand))
-//                             .collect();
-//                         bed_blast.sort();
-//                         let key = bed_blast.join("|");
-//                         bed_hm
-//                             .entry(key)
-//                             .or_default()
-//                             .push(input_id.to_owned().replace("_flye", ""));
-//                     }
-//                     // }
-//                 }
-//             }
-//             Err(e) => println!("{:?}", e),
-//         }
-//     }
-//     println!("{bed_hm:#?}");
-//
-//     for group in bed_hm.values() {
-//         if group.len() < 2 {
-//             continue;
-//         }
-//         let new_locus_id = uuid::Uuid::new_v4();
-//         let output_bam_path = format!("{dir}/{new_locus_id}.bam");
-//         info!("Merging {} bams into {}", group.len(), output_bam_path);
-//         let input_bam_paths = group.iter().map(|s| format!("{dir}/{s}.bam")).collect();
-//         merge_bam_files(input_bam_paths, &output_bam_path)?;
-//         dir_flye(dir, false)?;
-//     }
-//
-//     Ok(())
-// }
-//
-// pub struct BlastnBedRow {
-//     pub contig: String,
-//     pub start: u32,
-//     pub end: u32,
-//     pub name: String,
-//     pub score: String,
-//     pub strand: String,
-// }
-//
-// impl FromStr for BlastnBedRow {
-//     type Err = anyhow::Error;
-//
-//     fn from_str(line: &str) -> anyhow::Result<Self> {
-//         let fields: Vec<&str> = line.split('\t').collect();
-//
-//         if fields.len() != 6 {
-//             return Err(anyhow!(
-//                 "Error while parsing bed row, number of fields doesn't match {}",
-//                 line
-//             ));
-//         }
-//
-//         let row = BlastnBedRow {
-//             contig: fields[0].to_string(),
-//             start: fields[1].parse()?,
-//             end: fields[2].parse()?,
-//             name: fields[3].to_string(),
-//             score: fields[4].to_string(),
-//             strand: fields[5].to_string(),
-//         };
-//
-//         Ok(row)
-//     }
-// }
-//
-// fn read_blastn_bed(file_path: &str) -> anyhow::Result<Vec<BlastnBedRow>> {
-//     let file = File::open(file_path)?;
-//     let reader = BufReader::new(file);
-//
-//     reader
-//         .lines()
-//         .map_while(Result::ok)
-//         .map(|s| BlastnBedRow::from_str(&s))
-//         .collect()
-// }
-//
-// pub fn igv_link(dir: &str, contig_id: &str) -> anyhow::Result<String> {
-//     let contig_fa = format!("{dir}/{contig_id}_flye.fa");
-//     let bam_track = Track::Bam(BamTrack::new(
-//         "Assembled reads",
-//         &format!("{dir}/{contig_id}_flye.bam"),
-//     ));
-//     let bed_track = Track::Bed(BedTrack::new(
-//         "Blastn",
-//         &format!("{dir}/{contig_id}_flye.bed"),
-//     ));
-//     let mod_track = Track::Bed(BedTrack::new(
-//         "Modified bases",
-//         &format!("{dir}/{contig_id}_flye_mod.bed"),
-//     ));
-//
-//     let reference = pandora_lib_igv::ReferenceValues {
-//         id: format!("{contig_id}_flye.fa"),
-//         name: format!("{contig_id}_flye.fa"),
-//         index_url: format!("{contig_fa}.fai"),
-//         fasta_url: contig_fa,
-//         ..Default::default()
-//     };
-//     let igv = pandora_lib_igv::Session::default()
-//         .with_reference(reference)
-//         .add_track(bam_track)?
-//         .add_track(bed_track)?
-//         .add_track(mod_track)?;
-//     let link = igv.link("http://store-desktop.local/igv/")?;
-//     Ok(link)
-// }
-//
-// // Function to merge two BAM files into a new BAM file
-// fn merge_bam_files(input_bam_paths: Vec<String>, output_bam_path: &str) -> anyhow::Result<()> {
-//     // Ensure there is at least one BAM file to merge
-//     if input_bam_paths.is_empty() {
-//         return Err(anyhow!("No input BAM files provided"));
-//     }
-//
-//     // Open the first input BAM file to get the header
-//     let mut bam1 = rust_htslib::bam::Reader::from_path(&input_bam_paths[0])?;
-//     let header = rust_htslib::bam::Header::from_template(bam1.header());
-//
-//     // Create a new BAM writer with the header from the first BAM file
-//     let mut output_bam = rust_htslib::bam::Writer::from_path(
-//         output_bam_path,
-//         &header,
-//         rust_htslib::bam::Format::Bam,
-//     )?;
-//
-//     // Write records from the first BAM file to the output BAM file
-//     for result in bam1.records() {
-//         let record = result?;
-//         output_bam.write(&record)?;
-//     }
-//
-//     // Iterate over the remaining BAM files and write their records to the output BAM file
-//     for bam_path in &input_bam_paths[1..] {
-//         let mut bam = rust_htslib::bam::Reader::from_path(bam_path)?;
-//         for result in bam.records() {
-//             let record = result?;
-//             output_bam.write(&record)?;
-//         }
-//     }
-//
-//     Ok(())
-// }
-
-// pub fn par_whole_assemble(reads_dir: &str, dict_path: &str, force: bool) -> anyhow::Result<()> {
-//     read_dict(dict_path)?.par_iter().for_each(|(contig, _)| {
-//         dir_flye(&format!("{reads_dir}/{contig}"), force).unwrap();
-//     });
-//     Ok(())
-// }
-
-pub fn relevant_links(dir_path: &str) -> anyhow::Result<()> {
-    let pattern = format!("{}/**/*_flye.bed", dir_path);
-    let bed_paths: Vec<PathBuf> = glob::glob(&pattern)
-        .expect("Failed to read glob pattern")
-        .filter_map(Result::ok)
-        .collect();
-    for bed_path in bed_paths {
-        let bed_rows = read_bed(&bed_path)?;
-        if bed_rows.len() > 1 {
-            println!("{bed_rows:#?}");
-        }
-    }
-    Ok(())
-}
-
 #[cfg(test)]
 mod tests {
-    use crate::assembler::spades::SpadesConfig;
+    use std::path::PathBuf;
+
+    use crate::assembler::{spades::SpadesConfig, wtdbg2::Wtdbg2Config};
 
     use self::assembler::{assemble_records, flye::FlyeConfig, AssembleConfig};
 
@@ -842,6 +29,7 @@ mod tests {
         let res = assemble_records(
             &PathBuf::from(bam),
             &vec![AssembleConfig::Flye(FlyeConfig::new("/tmp/test_ass_flye"))],
+            1
         );
         println!("{res:#?}");
     }
@@ -860,6 +48,7 @@ mod tests {
         let res = assemble_records(
             &PathBuf::from(bam),
             &vec![AssembleConfig::Flye(FlyeConfig::new("/tmp/test_ass_flye"))],
+            1
         );
         println!("{res:#?}");
     }
@@ -867,88 +56,50 @@ mod tests {
     #[test]
     fn spades_bam() {
         init();
-        let id = "ROBIN";
-        let input_id = "143063957-143063957_8a24";
+        // let id = "ROBIN";
+        // let input_id = "143063957-143063957_8a24";
+        // let chr = "chr10";
+
+        let id = "LEVASSEUR";
+        let input_id = "12677262-102018167_2c45";
+        let chr = "chr10";
 
         let res_dir = "/data/longreads_basic_pipe";
-        let asm_dir = format!("{res_dir}/{id}/diag/scan/reads/chr9");
+        let asm_dir = format!("{res_dir}/{id}/diag/scan/reads/{chr}");
         let bam = format!("{asm_dir}/{input_id}.bam");
 
         let res = assemble_records(
             &PathBuf::from(bam),
-            &vec![AssembleConfig::Spades(SpadesConfig::new("/tmp/test_ass_spades"))],
+            &vec![AssembleConfig::Spades(SpadesConfig::new(
+                "/data/spades",
+            ))],
+            1
         );
         println!("{res:#?}");
     }
 
-
-    // #[test]
-    // fn flye_test() -> anyhow::Result<()> {
-    //     init();
-    //     let id = "ROBIN";
-    //     let input_id = "143063957-143063957_8a24";
-    //
-    //     // let id = "SALICETTO";
-    //     // let input_id = "cd8e4a8a-27ed-4045-af88-9838201f164f";
-    //
-    //     let res_dir = "/data/longreads_basic_pipe";
-    //     let asm_dir = format!("{res_dir}/{id}/diag/scan/reads/chr9");
-    //
-    //     match FlyeAsm::new(&asm_dir, input_id)?.assemble()?.save() {
-    //         Ok(_) => (),
-    //         Err(err) => {
-    //             warn!("{input_id} has not been assembled ❌ {err}");
-    //             warn!("Rescuing");
-    //             rescue(&asm_dir, input_id)?;
-    //         }
-    //     };
-    //     // let l = igv_link(&asm_dir, input_id)?;
-    //     // info!("{l}");
-    //     Ok(())
-    // }
-    //
-    // #[test]
-    // fn flye_d() -> anyhow::Result<()> {
-    //     init();
-    //     let id = "LEVASSEUR";
-    //     let res_dir = "/data/longreads_basic_pipe";
-    //     let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
-    //     dir_flye(&asm_dir, true)
-    // }
-    //
-    // #[test]
-    // fn tmp() {
-    //     init();
-    //     dir_flye(
-    //         "/data/longreads_basic_pipe/LEVASSEUR/diag/scan/reads/chr10",
-    //         true,
-    //     )
-    //     .unwrap();
-    // }
-    //
-    // #[test]
-    // fn dedup() {
-    //     init();
-    //     dedup_dir("/data/tmp/scan_7ed2f43c-d16d-4dcc-bdb4-fb619d082991").unwrap();
-    // }
-    //
-    // #[test]
-    // fn whole() {
-    //     init();
-    //     let id = "ROBIN";
-    //     if let Err(err) = par_whole_assemble(
-    //         &format!("/data/longreads_basic_pipe/{id}/diag/scan/reads"),
-    //         "/data/ref/hs1/chm13v2.0.dict",
-    //         false,
-    //     ) {
-    //         println!("{err}");
-    //     }
-    // }
-    //
     #[test]
-    fn links() {
+    fn wtdbg2_bam() {
         init();
+        // let id = "LEVASSEUR";
+        // let input_id = "12677262-102018167_2c45";
+        // let chr = "chr10";
+
         let id = "ROBIN";
-        relevant_links(&format!("/data/longreads_basic_pipe/{id}/diag/scan/reads")).unwrap();
+        let input_id = "143063957-143063957_8a24";
+        let chr = "chr9";
+
+        let res_dir = "/data/longreads_basic_pipe";
+        let asm_dir = format!("{res_dir}/{id}/diag/scan/reads/{chr}");
+        let bam = format!("{asm_dir}/{input_id}.bam");
+
+        let res = assemble_records(
+            &PathBuf::from(bam),
+            &vec![AssembleConfig::Wtdbg2(Wtdbg2Config::new(
+                "/data/wtdbg2",
+            ))],
+            1
+        );
+        println!("{res:#?}");
     }
 }