|
@@ -10,10 +10,14 @@ use crate::{
|
|
|
aligner::minimap::{Minimap2AlignOnt, Minimap2Preset},
|
|
aligner::minimap::{Minimap2AlignOnt, Minimap2Preset},
|
|
|
commands::samtools::{SamtoolsIndex, SamtoolsSort},
|
|
commands::samtools::{SamtoolsIndex, SamtoolsSort},
|
|
|
config::Config,
|
|
config::Config,
|
|
|
- de_novo::{Assembler, Polisher},
|
|
|
|
|
|
|
+ de_novo::{flye::FlyeAssembler, medaka::MedakaPolisher, Assembler, Polisher},
|
|
|
helpers::TempFileGuard,
|
|
helpers::TempFileGuard,
|
|
|
- io::{fasta::split_fasta, fastq::write_fastq},
|
|
|
|
|
- run, run_many,
|
|
|
|
|
|
|
+ io::{
|
|
|
|
|
+ bam::{bam_to_aligned_bed, primary_record},
|
|
|
|
|
+ fasta::split_fasta,
|
|
|
|
|
+ fastq::write_fastq,
|
|
|
|
|
+ },
|
|
|
|
|
+ run,
|
|
|
runners::Run,
|
|
runners::Run,
|
|
|
};
|
|
};
|
|
|
|
|
|
|
@@ -26,6 +30,7 @@ pub struct ContigAssemblyResult {
|
|
|
pub contig_fasta: PathBuf,
|
|
pub contig_fasta: PathBuf,
|
|
|
/// Reads realigned to this contig with MM/ML/MN tags from original records.
|
|
/// Reads realigned to this contig with MM/ML/MN tags from original records.
|
|
|
pub aligned_bam: PathBuf,
|
|
pub aligned_bam: PathBuf,
|
|
|
|
|
+ pub contig_ref_bed: PathBuf,
|
|
|
/// Read names excluded because they produced supplementary alignments on
|
|
/// Read names excluded because they produced supplementary alignments on
|
|
|
/// this contig. Non-empty signals that the input region should be tightened.
|
|
/// this contig. Non-empty signals that the input region should be tightened.
|
|
|
/// Note: a read suspicious on contig A but absent here was cleanly placed
|
|
/// Note: a read suspicious on contig A but absent here was cleanly placed
|
|
@@ -46,16 +51,6 @@ pub struct LocalAssemblyConfig {
|
|
|
pub config: Config,
|
|
pub config: Config,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-impl LocalAssemblyConfig {
|
|
|
|
|
- fn from_config(case_id: String, min_records: usize, config: Config) -> Self {
|
|
|
|
|
- Self {
|
|
|
|
|
- min_records,
|
|
|
|
|
- case_id,
|
|
|
|
|
- config,
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
// ─── Pipeline orchestrator ────────────────────────────────────────────────────
|
|
// ─── Pipeline orchestrator ────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
/// Orchestrate local de novo assembly from a set of primary htslib records.
|
|
/// Orchestrate local de novo assembly from a set of primary htslib records.
|
|
@@ -65,12 +60,15 @@ impl LocalAssemblyConfig {
|
|
|
///
|
|
///
|
|
|
/// Steps 1, 5, 6 run locally (pure Rust, Windows-safe).
|
|
/// Steps 1, 5, 6 run locally (pure Rust, Windows-safe).
|
|
|
/// Steps 2–4 are dispatched via `run!` / `run_many!` through SlurmRunner.
|
|
/// Steps 2–4 are dispatched via `run!` / `run_many!` through SlurmRunner.
|
|
|
|
|
+/// Run a single round of local assembly.
|
|
|
|
|
+/// Accepts boxed assembler and polisher to allow dynamic dispatch
|
|
|
|
|
+/// and iterative/recursive invocation with different tools per round.
|
|
|
pub fn run_local_assembly(
|
|
pub fn run_local_assembly(
|
|
|
records: &[bam::Record],
|
|
records: &[bam::Record],
|
|
|
- mut assembler: impl Assembler,
|
|
|
|
|
- mut polisher: impl Polisher,
|
|
|
|
|
|
|
+ mut assembler: BoxedAssembler,
|
|
|
|
|
+ mut polisher: BoxedPolisher,
|
|
|
work_dir: PathBuf,
|
|
work_dir: PathBuf,
|
|
|
- config: LocalAssemblyConfig,
|
|
|
|
|
|
|
+ config: &LocalAssemblyConfig,
|
|
|
) -> anyhow::Result<Vec<ContigAssemblyResult>> {
|
|
) -> anyhow::Result<Vec<ContigAssemblyResult>> {
|
|
|
if records.len() < config.min_records {
|
|
if records.len() < config.min_records {
|
|
|
anyhow::bail!(
|
|
anyhow::bail!(
|
|
@@ -85,11 +83,10 @@ pub fn run_local_assembly(
|
|
|
write_fastq(records, &reads_path).context("FASTQ write failed")?;
|
|
write_fastq(records, &reads_path).context("FASTQ write failed")?;
|
|
|
|
|
|
|
|
// Step 2 — assemble (cluster)
|
|
// Step 2 — assemble (cluster)
|
|
|
- run!(&config.config, &mut assembler)?;
|
|
|
|
|
|
|
+ run!(&config.config, &mut *assembler)?;
|
|
|
|
|
|
|
|
// Step 3 — polish (cluster)
|
|
// Step 3 — polish (cluster)
|
|
|
- // draft set at construction to assembler.assembly_fasta()
|
|
|
|
|
- run!(&config.config, &mut polisher)?;
|
|
|
|
|
|
|
+ run!(&config.config, &mut *polisher)?;
|
|
|
|
|
|
|
|
// Step 4 — split polished FASTA into per-contig files (local)
|
|
// Step 4 — split polished FASTA into per-contig files (local)
|
|
|
let contigs_dir = work_dir.join("contigs");
|
|
let contigs_dir = work_dir.join("contigs");
|
|
@@ -110,7 +107,6 @@ pub fn run_local_assembly(
|
|
|
let mut guard = TempFileGuard::new();
|
|
let mut guard = TempFileGuard::new();
|
|
|
|
|
|
|
|
// Step 5 — realign reads to each contig independently (cluster, sequential)
|
|
// Step 5 — realign reads to each contig independently (cluster, sequential)
|
|
|
- // Asm5(contig_fasta) — no MMI, reference is the single-contig FASTA
|
|
|
|
|
let mut realigners: Vec<Minimap2AlignOnt> = contigs
|
|
let mut realigners: Vec<Minimap2AlignOnt> = contigs
|
|
|
.iter()
|
|
.iter()
|
|
|
.map(|c| {
|
|
.map(|c| {
|
|
@@ -129,7 +125,6 @@ pub fn run_local_assembly(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Step 6 — align contigs to reference genome (cluster, sequential)
|
|
// Step 6 — align contigs to reference genome (cluster, sequential)
|
|
|
- // MapOnt — MMI precomputed inside init if not already present
|
|
|
|
|
let mut contig_aligners: Vec<Minimap2AlignOnt> = contigs
|
|
let mut contig_aligners: Vec<Minimap2AlignOnt> = contigs
|
|
|
.iter()
|
|
.iter()
|
|
|
.map(|c| {
|
|
.map(|c| {
|
|
@@ -137,8 +132,8 @@ pub fn run_local_assembly(
|
|
|
&config.case_id,
|
|
&config.case_id,
|
|
|
&config.config,
|
|
&config.config,
|
|
|
c.fasta_path.clone(),
|
|
c.fasta_path.clone(),
|
|
|
- work_dir.join(format!("{}.ref.bam", c.name)),
|
|
|
|
|
- Minimap2Preset::MapOntRef,
|
|
|
|
|
|
|
+ work_dir.join(format!("{}_{}.bam", c.name, config.config.reference_name)),
|
|
|
|
|
+ Minimap2Preset::Asm5Reference,
|
|
|
)
|
|
)
|
|
|
})
|
|
})
|
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
@@ -156,15 +151,13 @@ pub fn run_local_assembly(
|
|
|
.zip(contig_aligners.iter())
|
|
.zip(contig_aligners.iter())
|
|
|
.map(|((contig, realigner), contig_aligner)| {
|
|
.map(|((contig, realigner), contig_aligner)| {
|
|
|
let suspicious_qnames = collect_supplementary_qnames(&realigner.final_bam)
|
|
let suspicious_qnames = collect_supplementary_qnames(&realigner.final_bam)
|
|
|
- .with_context(|| {
|
|
|
|
|
- format!("Supplementary scan failed for contig {}", contig.name)
|
|
|
|
|
- })?;
|
|
|
|
|
|
|
+ .with_context(|| format!("Supplementary scan failed for contig {}", contig.name))?;
|
|
|
|
|
|
|
|
if !suspicious_qnames.is_empty() {
|
|
if !suspicious_qnames.is_empty() {
|
|
|
- tracing::warn!(
|
|
|
|
|
- contig = %contig.name,
|
|
|
|
|
- count = suspicious_qnames.len(),
|
|
|
|
|
- "Suspicious reads on contig — consider tightening input region"
|
|
|
|
|
|
|
+ log::warn!(
|
|
|
|
|
+ "Suspicious reads on contig — consider tightening input region | contig={} count={}",
|
|
|
|
|
+ contig.name,
|
|
|
|
|
+ suspicious_qnames.len(),
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -175,9 +168,7 @@ pub fn run_local_assembly(
|
|
|
&suspicious_qnames,
|
|
&suspicious_qnames,
|
|
|
&out_bam,
|
|
&out_bam,
|
|
|
)
|
|
)
|
|
|
- .with_context(|| {
|
|
|
|
|
- format!("MM/ML tag transfer failed for contig {}", contig.name)
|
|
|
|
|
- })?;
|
|
|
|
|
|
|
+ .with_context(|| format!("MM/ML tag transfer failed for contig {}", contig.name))?;
|
|
|
|
|
|
|
|
let final_bam = work_dir.join(format!("{}.bam", contig.name));
|
|
let final_bam = work_dir.join(format!("{}.bam", contig.name));
|
|
|
let mut sort_job = SamtoolsSort::from_config(&config.config, &out_bam, &final_bam);
|
|
let mut sort_job = SamtoolsSort::from_config(&config.config, &out_bam, &final_bam);
|
|
@@ -187,22 +178,168 @@ pub fn run_local_assembly(
|
|
|
SamtoolsIndex::from_config(&config.config, final_bam.to_str().unwrap());
|
|
SamtoolsIndex::from_config(&config.config, final_bam.to_str().unwrap());
|
|
|
let _log = run!(&config.config, &mut index_job)?;
|
|
let _log = run!(&config.config, &mut index_job)?;
|
|
|
|
|
|
|
|
- // contig_aligner.run() already performed sort + index internally
|
|
|
|
|
|
|
+ // Generate BED from contig→reference alignment (local, pure Rust)
|
|
|
|
|
+ let contig_ref_bed = work_dir.join(format!("{}_{}.bed", contig.name, config.config.reference_name));
|
|
|
|
|
+ bam_to_aligned_bed(&contig_aligner.final_bam, &contig_ref_bed)
|
|
|
|
|
+ .with_context(|| format!("BED generation failed for contig {}", contig.name))?;
|
|
|
|
|
+
|
|
|
Ok(ContigAssemblyResult {
|
|
Ok(ContigAssemblyResult {
|
|
|
- contig_name: contig.name,
|
|
|
|
|
- contig_fasta: contig.fasta_path,
|
|
|
|
|
- aligned_bam: final_bam,
|
|
|
|
|
- contig_ref_bam: contig_aligner.final_bam.clone(),
|
|
|
|
|
|
|
+ contig_name: contig.name,
|
|
|
|
|
+ contig_fasta: contig.fasta_path,
|
|
|
|
|
+ aligned_bam: final_bam,
|
|
|
|
|
+ contig_ref_bam: contig_aligner.final_bam.clone(),
|
|
|
|
|
+ contig_ref_bed,
|
|
|
suspicious_reads: suspicious_qnames.into_iter().collect(),
|
|
suspicious_reads: suspicious_qnames.into_iter().collect(),
|
|
|
})
|
|
})
|
|
|
})
|
|
})
|
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
|
|
|
|
|
|
guard.cleanup();
|
|
guard.cleanup();
|
|
|
|
|
+ Ok(results)
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// ─── Iterative wrapper ────────────────────────────────────────────────────────
|
|
|
|
|
+
|
|
|
|
|
+/// Run local assembly iteratively, extending the record set with suspicious
|
|
|
|
|
+/// reads at each round until convergence or max_rounds is reached.
|
|
|
|
|
+///
|
|
|
|
|
+/// Convergence = no suspicious reads across all contigs in a round.
|
|
|
|
|
+/// Each round outputs to `work_dir/round_{n}/`.
|
|
|
|
|
+pub fn run_local_assembly_iterative(
|
|
|
|
|
+ bam_path: &std::path::Path,
|
|
|
|
|
+ records: Vec<bam::Record>,
|
|
|
|
|
+ builder: &dyn LocalAssemblyBuilder,
|
|
|
|
|
+ work_dir: PathBuf,
|
|
|
|
|
+ config: LocalAssemblyConfig,
|
|
|
|
|
+ max_rounds: u8,
|
|
|
|
|
+) -> anyhow::Result<Vec<ContigAssemblyResult>> {
|
|
|
|
|
+ let mut current_records = records;
|
|
|
|
|
+ let mut results = Vec::new();
|
|
|
|
|
+
|
|
|
|
|
+ for round in 0..max_rounds {
|
|
|
|
|
+ let round_dir = work_dir.join(format!("round_{}", round));
|
|
|
|
|
+ std::fs::create_dir_all(&round_dir)?;
|
|
|
|
|
+
|
|
|
|
|
+ tracing::info!(
|
|
|
|
|
+ round,
|
|
|
|
|
+ records = current_records.len(),
|
|
|
|
|
+ "Starting assembly round"
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ let reads_path = round_dir.join("reads.fastq");
|
|
|
|
|
+ let (assembler, polisher) = builder.build(reads_path, &round_dir);
|
|
|
|
|
+
|
|
|
|
|
+ results = run_local_assembly(
|
|
|
|
|
+ ¤t_records,
|
|
|
|
|
+ assembler,
|
|
|
|
|
+ polisher,
|
|
|
|
|
+ round_dir.clone(),
|
|
|
|
|
+ &config,
|
|
|
|
|
+ )?;
|
|
|
|
|
+
|
|
|
|
|
+ // Collect suspicious qnames across all contigs
|
|
|
|
|
+ let suspicious_qnames: HashSet<Vec<u8>> = results
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .flat_map(|r| r.suspicious_reads.iter().cloned())
|
|
|
|
|
+ .collect();
|
|
|
|
|
+
|
|
|
|
|
+ if suspicious_qnames.is_empty() {
|
|
|
|
|
+ tracing::info!(round, "No suspicious reads — assembly converged");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ tracing::info!(
|
|
|
|
|
+ round,
|
|
|
|
|
+ count = suspicious_qnames.len(),
|
|
|
|
|
+ "Suspicious reads found — extending record set for next round"
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ // Extend current records, deduplicating by qname
|
|
|
|
|
+ let seen: HashSet<Vec<u8>> = current_records.iter().map(|r| r.qname().to_vec()).collect();
|
|
|
|
|
+
|
|
|
|
|
+ // Fetch suspicious reads using SA tag positions — no full BAM scan
|
|
|
|
|
+ let mut bam_reader = bam::IndexedReader::from_path(bam_path)
|
|
|
|
|
+ .context("Cannot open BAM for suspicious read resolution")?;
|
|
|
|
|
+
|
|
|
|
|
+ let added: Vec<bam::Record> = results
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .flat_map(|r| r.suspicious_reads.iter())
|
|
|
|
|
+ .filter_map(|qname| {
|
|
|
|
|
+ // Find the suspicious record in the realigned BAM to get its SA tag
|
|
|
|
|
+ results.iter().find_map(|r| {
|
|
|
|
|
+ let mut reader = bam::Reader::from_path(&r.aligned_bam).ok()?;
|
|
|
|
|
+ reader.records().find_map(|rec| {
|
|
|
|
|
+ let rec = rec.ok()?;
|
|
|
|
|
+ if rec.qname() == qname.as_slice() && rec.flags() & BAM_FSUPPLEMENTARY != 0
|
|
|
|
|
+ {
|
|
|
|
|
+ Some(rec)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ None
|
|
|
|
|
+ }
|
|
|
|
|
+ })
|
|
|
|
|
+ })
|
|
|
|
|
+ })
|
|
|
|
|
+ .filter_map(|supp_rec| {
|
|
|
|
|
+ // Resolve primary from original BAM using SA tag
|
|
|
|
|
+ let resolved = primary_record(&mut bam_reader, supp_rec);
|
|
|
|
|
+ let qname = resolved.qname().to_vec();
|
|
|
|
|
+ if !seen.contains(&qname) {
|
|
|
|
|
+ Some(resolved)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ None
|
|
|
|
|
+ }
|
|
|
|
|
+ })
|
|
|
|
|
+ .collect();
|
|
|
|
|
+
|
|
|
|
|
+ tracing::info!(
|
|
|
|
|
+ round,
|
|
|
|
|
+ added = added.len(),
|
|
|
|
|
+ total = current_records.len() + added.len(),
|
|
|
|
|
+ "Extended record set"
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ current_records.extend(added);
|
|
|
|
|
+
|
|
|
|
|
+ if round == max_rounds - 1 {
|
|
|
|
|
+ tracing::warn!(max_rounds, "Maximum rounds reached without convergence");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
Ok(results)
|
|
Ok(results)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Constructs a FlyeAssembler + MedakaPolisher for each assembly round.
|
|
|
|
|
+pub struct FlyeMedakaBuilder {
|
|
|
|
|
+ pub config: Config,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+pub type BoxedAssembler = Box<dyn Assembler>;
|
|
|
|
|
+pub type BoxedPolisher = Box<dyn Polisher>;
|
|
|
|
|
+
|
|
|
|
|
+pub trait LocalAssemblyBuilder {
|
|
|
|
|
+ fn build(&self, reads_path: PathBuf, round_dir: &Path) -> (BoxedAssembler, BoxedPolisher);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl LocalAssemblyBuilder for FlyeMedakaBuilder {
|
|
|
|
|
+ fn build(&self, reads_path: PathBuf, round_dir: &Path) -> (BoxedAssembler, BoxedPolisher) {
|
|
|
|
|
+ let assembler = FlyeAssembler::from_config(
|
|
|
|
|
+ &self.config,
|
|
|
|
|
+ reads_path.clone(),
|
|
|
|
|
+ round_dir.join("flye"),
|
|
|
|
|
+ "10k".to_string(),
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ let polisher = MedakaPolisher::from_config(
|
|
|
|
|
+ &self.config,
|
|
|
|
|
+ reads_path,
|
|
|
|
|
+ assembler.assembly_fasta(),
|
|
|
|
|
+ round_dir.join("medaka"),
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ (Box::new(assembler), Box::new(polisher))
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
// ─── Supplementary detection ──────────────────────────────────────────────────
|
|
// ─── Supplementary detection ──────────────────────────────────────────────────
|
|
|
|
|
|
|
|
const BAM_FSUPPLEMENTARY: u16 = 0x800;
|
|
const BAM_FSUPPLEMENTARY: u16 = 0x800;
|
|
@@ -261,7 +398,7 @@ fn transfer_methylation_tags(
|
|
|
|
|
|
|
|
if let Some(&orig) = original_index.get(rec.qname()) {
|
|
if let Some(&orig) = original_index.get(rec.qname()) {
|
|
|
for tag in METHYLATION_TAGS {
|
|
for tag in METHYLATION_TAGS {
|
|
|
- rec.remove_aux(tag)?;
|
|
|
|
|
|
|
+ let _ = rec.remove_aux(tag);
|
|
|
if let Ok(aux) = orig.aux(tag) {
|
|
if let Ok(aux) = orig.aux(tag) {
|
|
|
rec.push_aux(tag, aux).with_context(|| {
|
|
rec.push_aux(tag, aux).with_context(|| {
|
|
|
format!(
|
|
format!(
|
|
@@ -279,3 +416,115 @@ fn transfer_methylation_tags(
|
|
|
|
|
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+#[cfg(test)]
|
|
|
|
|
+mod tests {
|
|
|
|
|
+ use std::path::PathBuf;
|
|
|
|
|
+
|
|
|
|
|
+ use env_logger::init;
|
|
|
|
|
+ use log::info;
|
|
|
|
|
+
|
|
|
|
|
+ use crate::io::bam::fetch_primary_records;
|
|
|
|
|
+
|
|
|
|
|
+ use super::*;
|
|
|
|
|
+
|
|
|
|
|
+ /// Integration test: fetch records from the NUP214 locus on chr9 and run
|
|
|
|
|
+ /// the full local assembly pipeline.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// ABL1 locus — chr9:142,958,315-142,958,913
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// Requires:
|
|
|
|
|
+ /// - A valid indexed BAM at the path set in `bam_path`
|
|
|
|
|
+ /// - A valid Pandora `Config` with minimap2, flye, medaka binaries configured
|
|
|
|
|
+ /// - Cluster access (or slurm_runner = false for local execution)
|
|
|
|
|
+ #[test]
|
|
|
|
|
+ fn test_local_assembly() -> anyhow::Result<()> {
|
|
|
|
|
+ init();
|
|
|
|
|
+
|
|
|
|
|
+ let id = "CML2518";
|
|
|
|
|
+ let abl_locus = ("chr9", 142_958_315, 142_958_913);
|
|
|
|
|
+
|
|
|
|
|
+ let config = Config::default();
|
|
|
|
|
+
|
|
|
|
|
+ let bam_path = PathBuf::from(config.tumoral_bam(id));
|
|
|
|
|
+ let work_dir = PathBuf::from(format!(
|
|
|
|
|
+ "{}/{id}/{}/asm",
|
|
|
|
|
+ config.result_dir, config.tumoral_name
|
|
|
|
|
+ ));
|
|
|
|
|
+ if work_dir.exists() {
|
|
|
|
|
+ std::fs::remove_dir_all(&work_dir)?;
|
|
|
|
|
+ }
|
|
|
|
|
+ std::fs::create_dir_all(&work_dir)?;
|
|
|
|
|
+
|
|
|
|
|
+ let assembly_config = LocalAssemblyConfig {
|
|
|
|
|
+ min_records: 10,
|
|
|
|
|
+ case_id: id.to_string(),
|
|
|
|
|
+ config: config.clone(),
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ // Fetch initial primary records from locus
|
|
|
|
|
+ let records = fetch_primary_records(&bam_path, Some(abl_locus), None)?;
|
|
|
|
|
+
|
|
|
|
|
+ info!("Fetched {} primary records from locus", records.len());
|
|
|
|
|
+
|
|
|
|
|
+ assert!(
|
|
|
|
|
+ records.len() >= assembly_config.min_records,
|
|
|
|
|
+ "Too few records at locus: {} (need at least {})",
|
|
|
|
|
+ records.len(),
|
|
|
|
|
+ assembly_config.min_records
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ // Run iterative assembly — extends with suspicious reads each round
|
|
|
|
|
+ let results = run_local_assembly_iterative(
|
|
|
|
|
+ &bam_path,
|
|
|
|
|
+ records,
|
|
|
|
|
+ &FlyeMedakaBuilder {
|
|
|
|
|
+ config: config.clone(),
|
|
|
|
|
+ },
|
|
|
|
|
+ work_dir.clone(),
|
|
|
|
|
+ assembly_config,
|
|
|
|
|
+ 3,
|
|
|
|
|
+ )?;
|
|
|
|
|
+
|
|
|
|
|
+ // Assertions
|
|
|
|
|
+ assert!(!results.is_empty(), "No contigs assembled");
|
|
|
|
|
+
|
|
|
|
|
+ info!("Assembled {} contig(s)", results.len());
|
|
|
|
|
+
|
|
|
|
|
+ for result in &results {
|
|
|
|
|
+ info!(
|
|
|
|
|
+ "Contig result | contig={} bam={} ref_bam={} suspicious={}",
|
|
|
|
|
+ result.contig_name,
|
|
|
|
|
+ result.aligned_bam.display(),
|
|
|
|
|
+ result.contig_ref_bam.display(),
|
|
|
|
|
+ result.suspicious_reads.len(),
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ assert!(
|
|
|
|
|
+ result.aligned_bam.exists(),
|
|
|
|
|
+ "aligned_bam missing for contig {}",
|
|
|
|
|
+ result.contig_name
|
|
|
|
|
+ );
|
|
|
|
|
+ assert!(
|
|
|
|
|
+ result.contig_ref_bam.exists(),
|
|
|
|
|
+ "contig_ref_bam missing for contig {}",
|
|
|
|
|
+ result.contig_name
|
|
|
|
|
+ );
|
|
|
|
|
+ assert!(
|
|
|
|
|
+ result.contig_fasta.exists(),
|
|
|
|
|
+ "contig_fasta missing for contig {}",
|
|
|
|
|
+ result.contig_name
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ if !result.suspicious_reads.is_empty() {
|
|
|
|
|
+ log::warn!(
|
|
|
|
|
+ "Suspicious reads remain after final round | contig={} count={}",
|
|
|
|
|
+ result.contig_name,
|
|
|
|
|
+ result.suspicious_reads.len(),
|
|
|
|
|
+ );
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+ }
|
|
|
|
|
+}
|