|
|
@@ -1,4 +1,4 @@
|
|
|
-use anyhow::{Ok, Result};
|
|
|
+use anyhow::{anyhow, Ok, Result};
|
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use noodles_fasta as fasta;
|
|
|
@@ -7,8 +7,9 @@ use std::{
|
|
|
collections::{HashMap, VecDeque},
|
|
|
fmt,
|
|
|
fs::{self, File},
|
|
|
- io::{BufWriter, Write},
|
|
|
+ io::{BufReader, BufWriter, Write},
|
|
|
process::{Command, Stdio},
|
|
|
+ thread
|
|
|
};
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
@@ -28,7 +29,7 @@ pub struct Contig {
|
|
|
// contig seq on ref
|
|
|
pub mappings: Vec<Mapping>,
|
|
|
// reads on ref
|
|
|
- pub supporting_records: Vec<Record>,
|
|
|
+ pub supporting_records: Option<Vec<Record>>,
|
|
|
pub sequence: String,
|
|
|
pub contig_ref: ContigRef,
|
|
|
}
|
|
|
@@ -103,6 +104,20 @@ impl ContigRef {
|
|
|
ContigRef::RightAmbiguity(_) => None,
|
|
|
ContigRef::Ambigous(_) => None,
|
|
|
ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
+ let mut v = vec![a, b, c];
|
|
|
+ v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+
|
|
|
+ let (a, b, c) = (
|
|
|
+ *v.get(0).clone().unwrap(),
|
|
|
+ *v.get(1).clone().unwrap(),
|
|
|
+ *v.get(2).clone().unwrap(),
|
|
|
+ );
|
|
|
+ let a_target_name = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let b_target_name = b.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let c_target_name = c.target_name.clone().unwrap_or(uk.clone());
|
|
|
+
|
|
|
+ if a_target_name != b_target_name {}
|
|
|
+
|
|
|
// Insertions
|
|
|
// prioritize first len
|
|
|
let (bp_a_1, bp_a_2) = if a.query_end <= b.query_end {
|
|
|
@@ -268,7 +283,7 @@ impl Genome {
|
|
|
&mut self,
|
|
|
id: String,
|
|
|
mappings: Vec<Mapping>,
|
|
|
- supporting_records: Vec<Record>,
|
|
|
+ supporting_records: Option<Vec<Record>>,
|
|
|
sequence: String,
|
|
|
) -> Result<()> {
|
|
|
let new_contig = Contig {
|
|
|
@@ -364,6 +379,19 @@ impl Genome {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ pub fn add_contig_from_seq(
|
|
|
+ &mut self,
|
|
|
+ name: String,
|
|
|
+ sequence: &Vec<u8>,
|
|
|
+ aligner: &Aligner,
|
|
|
+ ) -> Result<()> {
|
|
|
+ let mappings = aligner
|
|
|
+ .map(sequence, false, false, None, None)
|
|
|
+ .expect("Unable to align");
|
|
|
+ self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
pub fn stats(&self) {
|
|
|
for (k, v) in self.chromosomes.iter() {
|
|
|
info!("{}:{}", k, v.contigs.len());
|
|
|
@@ -386,13 +414,14 @@ impl Chromosome {
|
|
|
}
|
|
|
|
|
|
impl Contig {
|
|
|
- pub fn sort(&mut self) {
|
|
|
- // sorting by target order
|
|
|
- self.mappings
|
|
|
- .sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
- }
|
|
|
+ // pub fn sort(&mut self) {
|
|
|
+ // // sorting by target order
|
|
|
+ // self.mappings
|
|
|
+ // .sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
+ // }
|
|
|
|
|
|
pub fn to_igv(&self, dir_path: &str) -> Result<()> {
|
|
|
+ let supporting_records = self.supporting_records.clone().ok_or(anyhow!("no reads"))?;
|
|
|
let contig_name = if let Some(hgvs) = self.hgvs() {
|
|
|
hgvs
|
|
|
} else {
|
|
|
@@ -406,8 +435,7 @@ impl Contig {
|
|
|
write_fai(&fasta_path);
|
|
|
|
|
|
let reads_path = format!("{contig_dir}/reads.fa");
|
|
|
- let n_reads = self
|
|
|
- .supporting_records
|
|
|
+ let n_reads = supporting_records
|
|
|
.clone()
|
|
|
.into_iter()
|
|
|
.map(|r| {
|
|
|
@@ -450,7 +478,7 @@ impl Contig {
|
|
|
),
|
|
|
];
|
|
|
write_bed(&bed_path, &d)?;
|
|
|
- },
|
|
|
+ }
|
|
|
ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
let d: Vec<(String, i32, i32, String)> = vec![a, b, c]
|
|
|
.iter()
|
|
|
@@ -469,7 +497,7 @@ impl Contig {
|
|
|
})
|
|
|
.collect();
|
|
|
write_bed(&bed_path, &d)?;
|
|
|
- },
|
|
|
+ }
|
|
|
_ => (),
|
|
|
}
|
|
|
|
|
|
@@ -486,7 +514,11 @@ impl Contig {
|
|
|
.expect("Unable to build index");
|
|
|
|
|
|
let mut mappings = Vec::new();
|
|
|
- for record in self.supporting_records.iter() {
|
|
|
+ let supporting_records = self
|
|
|
+ .supporting_records
|
|
|
+ .clone()
|
|
|
+ .ok_or(anyhow!("no supporting records"))?;
|
|
|
+ for record in supporting_records.iter() {
|
|
|
let seq = record.seq().as_bytes();
|
|
|
let alignment = aligner
|
|
|
.map(&seq, false, false, None, None)
|
|
|
@@ -546,7 +578,6 @@ fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
alignments.push(vec![aln.clone()]);
|
|
|
}
|
|
|
}
|
|
|
- println!("{mappings:?}");
|
|
|
|
|
|
Ok(alignments)
|
|
|
}
|
|
|
@@ -665,13 +696,94 @@ pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()>
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+pub fn read_fasta(path: &str) -> Result<Vec<(String, Vec<u8>)>> {
|
|
|
+ let mut reader = File::open(&path)
|
|
|
+ .map(BufReader::new)
|
|
|
+ .map(fasta::Reader::new)?;
|
|
|
+
|
|
|
+ let mut res = Vec::new();
|
|
|
+ for result in reader.records() {
|
|
|
+ let record = result?;
|
|
|
+ let u = String::from_utf8(record.name().to_vec())?;
|
|
|
+ let s = record.sequence().as_ref().to_vec();
|
|
|
+ res.push((u, s));
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(res)
|
|
|
+}
|
|
|
+use crossbeam_channel::{unbounded, Receiver, Sender};
|
|
|
+
|
|
|
+pub fn spawn_aligner(reference: String, mmi: Option<String>) -> (Sender<Option<Vec<u8>>>, Receiver<(Vec<u8>, Vec<Mapping>)>) {
|
|
|
+ println!("kkkkkkkkkk");
|
|
|
+ let reference = reference.clone();
|
|
|
+ let mmi = mmi.clone();
|
|
|
+ let (thread_s, out_r) = unbounded::<(Vec<u8>, Vec<Mapping>)>();
|
|
|
+ let (out_s, thread_r) = unbounded::<Option<Vec<u8>>>();
|
|
|
+
|
|
|
+ thread::spawn(move || {
|
|
|
+ println!("Thread spawned");
|
|
|
+ let aligner = Aligner::builder()
|
|
|
+ .map_ont()
|
|
|
+ .with_threads(8)
|
|
|
+ .with_cigar()
|
|
|
+ .with_index(reference, mmi.as_deref())
|
|
|
+ .expect("Unable to build index");
|
|
|
+
|
|
|
+ println!("aligner spawned");
|
|
|
+
|
|
|
+
|
|
|
+ loop {
|
|
|
+ if let std::result::Result::Ok(seq) = thread_r.recv() {
|
|
|
+ println!("rec");
|
|
|
+ match seq {
|
|
|
+ Some(seq) => {
|
|
|
+ let mappings = aligner
|
|
|
+ .map(&seq, false, false, None, None)
|
|
|
+ .expect("Unable to align");
|
|
|
+ let _ = thread_s.send((seq, mappings));
|
|
|
+ }
|
|
|
+ None => break,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ println!("Out");
|
|
|
+
|
|
|
+ return (out_s, out_r);
|
|
|
+}
|
|
|
+
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
use super::*;
|
|
|
+ use test_log::test;
|
|
|
+
|
|
|
+
|
|
|
+ #[test_log::test]
|
|
|
+ fn it_works() -> Result<()> {
|
|
|
+ let _ = env_logger::builder().is_test(true).try_init();
|
|
|
+ let contig_fa = "./data_test/contig_1.fa";
|
|
|
|
|
|
- #[test]
|
|
|
- fn it_works() {
|
|
|
- // let result = add(2, 2);
|
|
|
- // assert_eq!(result, 4);
|
|
|
+ let reference = "/data/ref/hs1/chm13v2.0.fa";
|
|
|
+ let mmi_path = "/data/ref/hs1/hs1-ont.mmi".to_string();
|
|
|
+ let mmi = Some("/data/ref/hs1/hs1-ont.mmi");
|
|
|
+
|
|
|
+ let mut genome = Genome::new();
|
|
|
+ let (s, r) = spawn_aligner(reference.to_string(), Some(mmi_path.to_string()));
|
|
|
+
|
|
|
+ let sequences = read_fasta(contig_fa)?;
|
|
|
+ for (name, seq) in sequences {
|
|
|
+ println!("Sending to thread");
|
|
|
+ s.send(Some(seq)).unwrap();
|
|
|
+ // genome.add_contig_from_seq(name, &seq, &aligner)?;
|
|
|
+ }
|
|
|
+
|
|
|
+ while let std::result::Result::Ok(res) = r.recv() {
|
|
|
+ println!("Recived {:?}", res);
|
|
|
+ }
|
|
|
+
|
|
|
+ s.send(None)?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
}
|
|
|
}
|