lib.rs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. pub mod flye;
  2. use anyhow::anyhow;
  3. use bam::{
  4. header::HeaderEntry,
  5. record::tags::{StringType, TagValue},
  6. RecordWriter,
  7. };
  8. use flye::{read_flye_coverage, run_flye};
  9. use log::{info, warn};
  10. use nom::AsBytes;
  11. use pandora_lib_blastn::BlastResult;
  12. use petgraph::{
  13. algo::dijkstra,
  14. data::{Build, DataMap},
  15. dot::Dot,
  16. graph::NodeIndex,
  17. stable_graph::StableUnGraph,
  18. visit::{EdgeRef, IntoNeighbors, NodeIndexable},
  19. };
  20. use regex::Regex;
  21. use rust_htslib::bam::{Read, Reader, Record};
  22. use std::{
  23. collections::{HashMap, HashSet, VecDeque},
  24. fs::{self, File},
  25. io::{BufReader, Cursor, Write},
  26. path::{Path, PathBuf},
  27. };
  28. use uuid::Uuid;
  29. pub fn read_bam(path: &str) -> anyhow::Result<Vec<Record>> {
  30. let mut res = Vec::new();
  31. let mut bam = Reader::from_path(Path::new(path))?;
  32. // Iterate over all records
  33. for result in bam.records() {
  34. let record = result?;
  35. res.push(record);
  36. }
  37. Ok(res)
  38. }
  39. fn write_fastqs(dir_path: &str, records: &Vec<Record>) -> anyhow::Result<Vec<String>> {
  40. let mut paths = Vec::new();
  41. for record in records {
  42. let qname = String::from_utf8(record.qname().to_vec())?;
  43. let qseq = record.seq().as_bytes();
  44. let qqual = String::from_utf8(record.qual().to_vec())?;
  45. assert_eq!(qseq.len(), qqual.len());
  46. let fastq_record = noodles_fasta::Record::new(
  47. noodles_fasta::record::Definition::new(qname.clone(), None),
  48. qseq.into(),
  49. );
  50. let mut writer = noodles_fasta::io::Writer::new(Vec::new());
  51. writer.write_record(&fastq_record)?;
  52. let file_path = format!("{dir_path}/{qname}.fasta");
  53. let mut file = File::create(&file_path)?;
  54. file.write_all(writer.get_ref())?;
  55. paths.push(file_path);
  56. }
  57. Ok(paths)
  58. }
  59. pub fn lastz_two_by_two(records: &[String], best: bool) -> anyhow::Result<Vec<BlastResult>> {
  60. let lastz_bin = "lastz";
  61. let mut all: Vec<BlastResult> = Vec::new();
  62. for i in 0..records.len() {
  63. for j in (i + 1)..records.len() {
  64. let record_a = &records[i];
  65. let record_b = &records[j];
  66. let out = duct::cmd!(lastz_bin, "--format=blastn", record_a, record_b).read()?;
  67. let mut results: Vec<pandora_lib_blastn::BlastResult> = out
  68. .lines()
  69. .filter(|s| !s.starts_with('#'))
  70. .map(|s| s.to_string())
  71. .filter_map(|s| BlastResult::try_from(s).ok())
  72. .collect();
  73. if !results.is_empty() {
  74. if best {
  75. results.sort_by_key(|r| r.alignment_length);
  76. all.push(results.last().unwrap().clone())
  77. } else {
  78. all.extend(results.into_iter());
  79. }
  80. }
  81. }
  82. }
  83. Ok(all)
  84. }
  85. pub fn lastz_bam(query_path: &str, subject_path: &str) -> anyhow::Result<Vec<bam::Record>> {
  86. let lastz_bin = "lastz";
  87. let pipe =
  88. format!("{lastz_bin} --format=softsam {query_path} {subject_path} | samtools view -b");
  89. let out = duct::cmd!("bash", "-c", pipe).reader()?;
  90. let reader = bam::BamReader::from_stream(out, 4)?;
  91. Ok(reader.into_iter().filter_map(anyhow::Result::ok).collect())
  92. }
  93. pub fn lastz_remap(
  94. bam_path: &str,
  95. ref_fa: &str,
  96. ref_len: u32,
  97. new_bam: &str,
  98. modkit_pileup: &str,
  99. ) -> anyhow::Result<()> {
  100. let tmp = "/tmp";
  101. let tmp_dir = format!("{tmp}/{}", Uuid::new_v4());
  102. fs::create_dir(&tmp_dir)?;
  103. let mut records = Vec::new();
  104. let mut mm_hm = HashMap::new();
  105. let mut ml_hm = HashMap::new();
  106. let reader = bam::BamReader::from_path(bam_path, 4).unwrap();
  107. for record in reader {
  108. let record = record?;
  109. let query_name = String::from_utf8(record.name().to_vec())?;
  110. if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
  111. mm_hm.insert(query_name.clone(), value.to_vec());
  112. }
  113. if let Some(TagValue::IntArray(value)) = record.tags().get(b"ML") {
  114. ml_hm.insert(query_name.clone(), value.raw().to_vec());
  115. }
  116. let query_path = format!("{}/{}.fa", &tmp_dir, query_name);
  117. let query_seq = String::from_utf8(record.sequence().to_vec())?;
  118. write_fasta(&query_path, &vec![(query_name.clone(), query_seq)]);
  119. let mut res = lastz_bam(&query_path, ref_fa)?;
  120. // take best length
  121. res.sort_by_cached_key(|r| r.aligned_query_end() - r.aligned_query_start());
  122. let mut r = res.last().unwrap().clone();
  123. r.set_name(query_name.as_bytes().to_vec().clone());
  124. records.push(r);
  125. }
  126. records.sort_by_cached_key(|b| b.start());
  127. let contig_name = PathBuf::from(&ref_fa)
  128. .file_stem()
  129. .unwrap()
  130. .to_string_lossy()
  131. .to_string();
  132. // Header format
  133. let mut header = bam::header::Header::new();
  134. let mut header_line = HeaderEntry::header_line("1.6".to_string());
  135. header_line.push(b"SO", "Coordinate".to_string());
  136. header.push_entry(header_line).unwrap();
  137. header
  138. .push_entry(HeaderEntry::ref_sequence(contig_name, ref_len))
  139. .unwrap();
  140. let mut bam_writer = bam::bam_writer::BamWriterBuilder::new();
  141. let mut w = bam_writer.from_path(new_bam, header)?;
  142. for record in records.iter() {
  143. let mut record = record.clone();
  144. let record_name = String::from_utf8(record.name().to_vec())?;
  145. let tags = record.tags_mut();
  146. if let Some(mm) = mm_hm.get(&record_name) {
  147. tags.push_string(b"MM", mm)
  148. }
  149. let tags = record.tags_mut();
  150. if let Some(ml) = ml_hm.get(&record_name) {
  151. let v = ml.as_bytes();
  152. tags.push_array(b"ML", v);
  153. }
  154. w.write(&record)?;
  155. }
  156. w.finish()?;
  157. fs::remove_dir_all(tmp_dir)?;
  158. duct::cmd!("samtools", "index", "-@", "10", new_bam).run()?;
  159. duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
  160. Ok(())
  161. }
  162. pub fn write_fasta(contig_fa: &str, d: &Vec<(String, String)>) {
  163. let file = File::create(contig_fa).unwrap();
  164. let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
  165. let mut passed = Vec::new();
  166. for (name, sequence) in d {
  167. let name = name.to_string();
  168. if sequence.is_empty() {
  169. continue;
  170. }
  171. if passed.contains(&name) {
  172. continue;
  173. }
  174. passed.push(name.clone());
  175. let record = noodles_fasta::Record::new(
  176. noodles_fasta::record::Definition::new(name.as_str(), None),
  177. noodles_fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
  178. );
  179. writer.write_record(&record).unwrap();
  180. }
  181. }
  182. pub fn make_graph(d: Vec<BlastResult>, records: &Vec<Record>) {
  183. let mut g: StableUnGraph<String, Option<BlastResult>> = StableUnGraph::default();
  184. let mut name_to_id: HashMap<String, NodeIndex> = HashMap::new();
  185. for blast_result in d {
  186. let nid_a = if let Some(nid_a) = name_to_id.get(&blast_result.query_id) {
  187. *nid_a
  188. } else {
  189. let k = blast_result.query_id.clone();
  190. let nid_a = g.add_node(k.clone());
  191. name_to_id.insert(k, nid_a);
  192. nid_a
  193. };
  194. let nid_b = if let Some(nid_b) = name_to_id.get(&blast_result.subject_id) {
  195. *nid_b
  196. } else {
  197. let k = blast_result.subject_id.clone();
  198. let nid_b = g.add_node(k.clone());
  199. name_to_id.insert(k, nid_b);
  200. nid_b
  201. };
  202. g.add_edge(nid_a, nid_b, Some(blast_result));
  203. }
  204. println!(
  205. "{:?}",
  206. Dot::with_config(&g, &[petgraph::dot::Config::EdgeNoLabel])
  207. );
  208. // let order = dijkstra(&g, start_node, None, |_| 0);
  209. // println!("{order:#?}");
  210. // begin with start node
  211. let mut n_neigh: Vec<(usize, NodeIndex)> = g
  212. .node_indices()
  213. .map(|nid| (g.neighbors(nid).count(), nid))
  214. .collect();
  215. n_neigh.sort_by_key(|v| v.0);
  216. let start_node = n_neigh.last().unwrap().1;
  217. let start_name = g.node_weight(start_node).unwrap().to_string();
  218. let neighbors = g.neighbors(start_node);
  219. let mut neighbors_blast = Vec::new();
  220. for neighbor in neighbors {
  221. let e: Vec<BlastResult> = g
  222. .edges_connecting(start_node, neighbor)
  223. .map(|e| e.weight().clone().unwrap())
  224. .collect();
  225. neighbors_blast.extend(e);
  226. }
  227. let mut neighbors_blast: Vec<(BlastResult, usize)> = neighbors_blast
  228. .iter()
  229. .map(|b| {
  230. if b.subject_id != start_name {
  231. b.swap_query_subject()
  232. } else {
  233. b.clone()
  234. }
  235. })
  236. .map(|b| (b.clone(), get_seq_len(records, &b.query_id)))
  237. .collect();
  238. neighbors_blast.sort_by_key(|b| b.0.s_end);
  239. let start_len = get_seq_len(records, &start_name);
  240. println!("{start_len}");
  241. println!("{neighbors_blast:#?}");
  242. }
  243. pub fn get_seq_len(records: &[Record], id: &str) -> usize {
  244. let lens: Vec<usize> = records
  245. .iter()
  246. .filter(|r| String::from_utf8(r.qname().to_vec()).unwrap() == id)
  247. .map(|r| r.seq_len())
  248. .collect();
  249. lens[0]
  250. }
  251. pub fn order_sequences_with_positions(blast_results: &[BlastResult]) -> Vec<(String, usize)> {
  252. let mut graph: HashMap<String, HashSet<String>> = HashMap::new();
  253. let mut in_degree: HashMap<String, usize> = HashMap::new();
  254. let mut all_sequences: HashSet<String> = HashSet::new();
  255. // Build the graph
  256. for result in blast_results {
  257. all_sequences.insert(result.query_id.clone());
  258. all_sequences.insert(result.subject_id.clone());
  259. if result.q_start < result.s_start {
  260. graph
  261. .entry(result.query_id.clone())
  262. .or_insert_with(HashSet::new)
  263. .insert(result.subject_id.clone());
  264. *in_degree.entry(result.subject_id.clone()).or_insert(0) += 1;
  265. } else {
  266. graph
  267. .entry(result.subject_id.clone())
  268. .or_insert_with(HashSet::new)
  269. .insert(result.query_id.clone());
  270. *in_degree.entry(result.query_id.clone()).or_insert(0) += 1;
  271. }
  272. }
  273. // Topological sort
  274. let mut queue: VecDeque<String> = all_sequences
  275. .iter()
  276. .filter(|&seq| !in_degree.contains_key(seq))
  277. .cloned()
  278. .collect();
  279. let mut ordered_sequences: Vec<String> = Vec::new();
  280. while let Some(seq) = queue.pop_front() {
  281. ordered_sequences.push(seq.clone());
  282. if let Some(neighbors) = graph.get(&seq) {
  283. for neighbor in neighbors {
  284. if let Some(degree) = in_degree.get_mut(neighbor) {
  285. *degree -= 1;
  286. if *degree == 0 {
  287. queue.push_back(neighbor.clone());
  288. }
  289. }
  290. }
  291. }
  292. }
  293. // Handle any remaining sequences (in case of cycles)
  294. for seq in all_sequences {
  295. if !ordered_sequences.contains(&seq) {
  296. ordered_sequences.push(seq);
  297. }
  298. }
  299. // Calculate absolute positions
  300. let mut positions: HashMap<String, usize> = HashMap::new();
  301. let mut current_position = 0;
  302. for seq in &ordered_sequences {
  303. positions.insert(seq.clone(), current_position);
  304. current_position += 1; // Increment position for each sequence
  305. }
  306. let mut res: Vec<_> = ordered_sequences
  307. .into_iter()
  308. .map(|seq| (seq.clone(), *positions.get(&seq).unwrap()))
  309. .collect();
  310. res.sort_by(|a, b| a.1.cmp(&b.1));
  311. res
  312. }
  313. // fn order_sequences_with_positions_bam(records: &Vec<bam::Record>, header: &Header) -> anyhow::Result<Vec<(String, usize)>> {
  314. // let mut graph: HashMap<String, HashSet<String>> = HashMap::new();
  315. // let mut in_degree: HashMap<String, usize> = HashMap::new();
  316. // let mut all_sequences: HashSet<String> = HashSet::new();
  317. //
  318. // for record in records {
  319. // let query_id = String::from_utf8(record.name().to_vec());
  320. // let subject_id = record.ref_id().clone();
  321. // let q_start = record.aligned_query_start();
  322. // let s_start = record.start();
  323. //
  324. // all_sequences.insert(query_id.clone());
  325. // all_sequences.insert(subject_id.clone());
  326. //
  327. // if q_start < s_start {
  328. // graph.entry(query_id.clone())
  329. // .or_insert_with(HashSet::new())
  330. // .insert(subject_id.clone());
  331. // *in_degree.entry(subject_id.clone()).or_insert(0) += 1;
  332. // } else {
  333. // graph.entry(subject_id.clone())
  334. // .or_insert_with(HashSet::new())
  335. // .insert(query_id.clone());
  336. // *in_degree.entry(query_id.clone()).or_insert(0) += 1;
  337. // }
  338. // }
  339. //
  340. // let mut queue: VecDeque<String> = all_sequences.iter()
  341. // .filter(|&seq| !in_degree.contains_key(seq))
  342. // .cloned()
  343. // .collect();
  344. //
  345. // let mut ordered_sequences: Vec<String> = Vec::new();
  346. //
  347. // while let Some(seq) = queue.pop_front() {
  348. // ordered_sequences.push(seq.clone());
  349. //
  350. // if let Some(neighbors) = graph.get(&seq) {
  351. // for neighbor in neighbors {
  352. // if let Some(degree) = in_degree.get_mut(neighbor) {
  353. // *degree -= 1;
  354. // if *degree == 0 {
  355. // queue.push_back(neighbor.clone());
  356. // }
  357. // }
  358. // }
  359. // }
  360. // }
  361. //
  362. // for seq in all_sequences {
  363. // if !ordered_sequences.contains(&seq) {
  364. // ordered_sequences.push(seq.clone());
  365. // }
  366. // }
  367. //
  368. // let mut positions: HashMap<String, usize> = HashMap::new();
  369. // let mut current_position = 0;
  370. //
  371. // for seq in &ordered_sequences {
  372. // positions.insert(seq.clone(), current_position);
  373. // current_position += 1;
  374. // }
  375. //
  376. // ordered_sequences.into_iter()
  377. // .map(|seq| (seq.clone(), *positions.get(&seq).unwrap()))
  378. // .collect()
  379. // }
  380. pub fn fai(path: &str) -> Result<std::process::Output, std::io::Error> {
  381. duct::cmd!("samtools", "faidx", path).run()
  382. }
  383. #[derive(Debug, Default)]
  384. pub struct FlyeAsm {
  385. pub input_id: String,
  386. pub asm_dir: String,
  387. pub input_bam: String,
  388. pub input_records: Vec<bam::Record>,
  389. pub on_contig_bam: String,
  390. pub contigs: Option<Vec<String>>,
  391. pub flye_asm_params: FlyeAsmParams,
  392. }
  393. #[derive(Debug)]
  394. pub struct FlyeAsmParams {
  395. pub min_cov: u16,
  396. }
  397. impl Default for FlyeAsmParams {
  398. fn default() -> Self {
  399. Self { min_cov: 2 }
  400. }
  401. }
  402. impl FlyeAsm {
  403. pub fn new(asm_dir: &str, input_id: &str) -> anyhow::Result<Self> {
  404. let input_bam = format!("{asm_dir}/{input_id}.bam");
  405. let on_contig_bam = format!("{asm_dir}/{input_id}_contig.bam");
  406. let reader = bam::BamReader::from_path(&input_bam, 3)?;
  407. let mut input_records = Vec::new();
  408. for rec in reader {
  409. input_records.push(rec?);
  410. }
  411. Ok(FlyeAsm {
  412. asm_dir: asm_dir.to_string(),
  413. input_id: input_id.to_string(),
  414. on_contig_bam,
  415. contigs: None,
  416. flye_asm_params: FlyeAsmParams::default(),
  417. input_records,
  418. input_bam,
  419. })
  420. }
  421. pub fn assemble(mut self) -> anyhow::Result<Self> {
  422. let min_cov = self.flye_asm_params.min_cov;
  423. let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
  424. info!("Creating tmp directory {tmp_dir}");
  425. fs::create_dir(&tmp_dir).unwrap();
  426. let input_fa = format!("{tmp_dir}/input.fa");
  427. records_to_fasta(&self.input_records, &input_fa)?;
  428. let flye_tmp_dir = format!("{tmp_dir}/flye");
  429. fs::create_dir(&flye_tmp_dir).unwrap();
  430. run_flye(&input_fa, &flye_tmp_dir, "10M");
  431. let contigs_path = format!("{flye_tmp_dir}/assembly.fasta");
  432. if Path::new(&contigs_path).exists() {
  433. let assembly_path = format!("{flye_tmp_dir}/assembly.fasta");
  434. let flye_cov_path = format!("{flye_tmp_dir}/40-polishing/base_coverage.bed.gz");
  435. // Read the assembly fasta
  436. let mut reader = File::open(assembly_path)
  437. .map(BufReader::new)
  438. .map(noodles_fasta::Reader::new)?;
  439. let mut contigs = Vec::new();
  440. for result in reader.records() {
  441. let record = result?;
  442. let contig_name = String::from_utf8(record.name().to_vec()).unwrap();
  443. let (s, e) = read_flye_coverage(&flye_cov_path, min_cov.into(), &contig_name);
  444. let seq = record.sequence().as_ref();
  445. let seq = String::from_utf8(seq.to_vec()).unwrap();
  446. let seq: String = seq[s..e].into();
  447. contigs.push(seq.clone());
  448. }
  449. self.contigs = Some(contigs);
  450. } else {
  451. warn!("No reads assembled")
  452. }
  453. // Cleaning
  454. fs::remove_dir_all(tmp_dir)?;
  455. Ok(self)
  456. }
  457. pub fn save(self) -> anyhow::Result<()> {
  458. if self.contigs.is_none() {
  459. return Err(anyhow!("No reads were assembled"));
  460. }
  461. for (i, contig) in self.contigs.unwrap().iter().enumerate() {
  462. let suffixe = if i == 0 {
  463. "".to_string()
  464. } else {
  465. format!("_{i}")
  466. };
  467. let contig_id = format!("{}{suffixe}_flye", self.input_id);
  468. let contig_fa = format!("{}/{contig_id}.fa", self.asm_dir);
  469. info!("Saving contig {contig_id} in {contig_fa}");
  470. write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
  471. fai(&contig_fa)?;
  472. // Writing bed file from best blastn results
  473. let bed_path = format!("{}/{contig_id}.bed", self.asm_dir);
  474. let bed = pandora_lib_blastn::Blast::init(&contig_fa)?
  475. .run()?
  476. .keep_best()
  477. .to_bed()?;
  478. let mut f = File::create(bed_path)?;
  479. f.write_all(bed.as_bytes())?;
  480. // Remaping input bam to contig
  481. info!("Mapping input reads to {contig_id}");
  482. let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
  483. let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
  484. lastz_remap(
  485. &self.input_bam,
  486. &contig_fa,
  487. contig.len() as u32,
  488. &new_bam,
  489. &modkit_pileup,
  490. )?;
  491. }
  492. Ok(())
  493. }
  494. }
  495. pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<()> {
  496. let file = File::create(fa_path).unwrap();
  497. let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
  498. let mut names = Vec::new();
  499. for bam_record in r {
  500. let name = bam_record.name();
  501. if !names.contains(&name) {
  502. names.push(name);
  503. let sequence = bam_record.sequence().to_vec();
  504. let record = noodles_fasta::Record::new(
  505. noodles_fasta::record::Definition::new(name, None),
  506. noodles_fasta::record::Sequence::from(sequence),
  507. );
  508. writer.write_record(&record)?;
  509. }
  510. }
  511. Ok(())
  512. }
  513. pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
  514. let pattern = format!("{}/*.bam", dir);
  515. let re = Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$")
  516. .unwrap();
  517. for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
  518. match entry {
  519. Ok(path) => {
  520. if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
  521. if re.is_match(file_name) {
  522. if let Some(input_id ) = path.file_stem().and_then(|n| n.to_str()) {
  523. if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
  524. warn!("{input_id} already assembled");
  525. } else {
  526. let f = FlyeAsm::new(dir, input_id)?;
  527. if let Ok(f) = f.assemble() {
  528. match f.save() {
  529. Ok(_) => info!("✅"),
  530. Err(e) => warn!("❌ {e}"),
  531. }
  532. }
  533. }
  534. }
  535. }
  536. }
  537. }
  538. Err(e) => println!("{:?}", e),
  539. }
  540. }
  541. Ok(())
  542. }
  543. #[cfg(test)]
  544. mod tests {
  545. use std::fs;
  546. use uuid::Uuid;
  547. use super::*;
  548. fn init() {
  549. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  550. .is_test(true)
  551. .try_init();
  552. }
  553. #[test]
  554. fn it_works() -> anyhow::Result<()> {
  555. let dir = "/tmp";
  556. let records = read_bam("/home/prom/Documents/Programmes/pandora_lib_pileup/44d3d2a9-ad53-45f7-9a1a-2e1e229377e4.bam")?;
  557. println!("{} rec", records.len());
  558. let tmp_dir = format!("{dir}/ass_{}", Uuid::new_v4());
  559. println!("{tmp_dir}");
  560. fs::create_dir(&tmp_dir)?;
  561. let paths = write_fastqs(&tmp_dir, &records)?;
  562. println!("kkkk");
  563. let blast_results = lastz_two_by_two(&paths, true)?;
  564. // println!("{blast_results:#?}");
  565. fs::remove_dir_all(tmp_dir)?;
  566. make_graph(blast_results, &records);
  567. // let res = order_sequences_with_positions(&blast_results);
  568. // println!("{res:#?}");
  569. // let leftmost = res.first().unwrap().0.clone();
  570. // let mut on_lm: Vec<BlastResult> = blast_results
  571. // .iter()
  572. // .filter(|b| b.subject_id == leftmost || b.query_id == leftmost)
  573. // .map(|b| {
  574. // if b.query_id == leftmost {
  575. // b.swap_query_subject()
  576. // } else {
  577. // b.clone()
  578. // }
  579. // })
  580. // .collect();
  581. // on_lm.sort_by(|a, b| a.s_start.cmp(&b.s_start));
  582. // println!("{on_lm:#?}");
  583. Ok(())
  584. }
  585. #[test]
  586. fn flye() -> anyhow::Result<()> {
  587. init();
  588. let id = "BECERRA";
  589. let input_id = "6d51d94e-b951-48ef-9cef-63c10f235ebe";
  590. // let id = "SALICETTO";
  591. // let input_id = "cd8e4a8a-27ed-4045-af88-9838201f164f";
  592. let res_dir = "/data/longreads_basic_pipe";
  593. let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
  594. FlyeAsm::new(&asm_dir, input_id)?.assemble()?.save()?;
  595. Ok(())
  596. }
  597. #[test]
  598. fn flye_d() -> anyhow::Result<()> {
  599. init();
  600. let id = "SALICETTO";
  601. let res_dir = "/data/longreads_basic_pipe";
  602. let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
  603. dir_flye(&asm_dir)
  604. }
  605. }