Thomas 1 жил өмнө
parent
commit
603ef156d7
4 өөрчлөгдсөн 77 нэмэгдсэн , 724 устгасан
  1. 15 719
      Cargo.lock
  2. 2 1
      Cargo.toml
  3. 28 0
      src/dict_reader.rs
  4. 32 4
      src/lib.rs

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 15 - 719
Cargo.lock


+ 2 - 1
Cargo.toml

@@ -13,10 +13,11 @@ num-format = "0.4.4"
 rayon = "1.10.0"
 rust-htslib = "0.47.0"
 uuid = { version = "1.10.0", features = ["v4"] }
-pandora_lib_variants = { git = "https://git.t0m4.fr/Thomas/pandora_lib_variants.git" }
+# pandora_lib_variants = { git = "https://git.t0m4.fr/Thomas/pandora_lib_variants.git" }
 pandora_lib_pileup = { git = "https://git.t0m4.fr/Thomas/pandora_lib_pileup.git" }
 indicatif-log-bridge = "0.2.2"
 serde = { version = "1.0.*", default-features = false }
 postcard = { version = "1.0.8", features = ["alloc"] }
 flate2 = "1.0.31"
+csv = "1.3.0"
 

+ 28 - 0
src/dict_reader.rs

@@ -0,0 +1,28 @@
+use std::fs;
+
+use csv::ReaderBuilder;
+use anyhow::{ Ok, Result};
+use log::info;
+
+pub fn read_dict(path: &str) -> Result<Vec<(String, u32)>> {
+    info!("Parsing {}", path);
+
+    let mut reader = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .flexible(true)
+        .has_headers(false)
+        .from_reader(fs::File::open(path)?);
+
+    let mut res = Vec::new();
+    for line in reader.records() {
+        let line = line.unwrap();
+        if line.get(0).unwrap() == "@SQ" {
+            res.push((
+                line.get(1).unwrap().replace("SN:", ""),
+                line.get(2).unwrap().replace("LN:", "").parse().unwrap(),
+            ));
+        }
+    }
+    Ok(res)
+}
+

+ 32 - 4
src/lib.rs

@@ -3,6 +3,7 @@ use crate::{
     bin::{scan_outliers, Bin},
 };
 use anyhow::Context;
+use dict_reader::read_dict;
 use log::info;
 use rayon::prelude::*;
 use rust_htslib::bam::{Format, Header, Read, Record, Writer};
@@ -14,6 +15,7 @@ use std::{
 
 pub mod bam;
 pub mod bin;
+pub mod dict_reader;
 
 #[derive(Debug)]
 pub struct Config {
@@ -151,7 +153,15 @@ pub fn scan_save(
 
     let header = Header::from_template(&header);
     for records in grouped_records {
-        let output_bam_path = format!("{out_dir_bam}/{}.bam", uuid::Uuid::new_v4());
+        let positions: Vec<i64> = records.iter().map(|r| r.pos()).collect();
+        let uuid = uuid::Uuid::new_v4().to_string();
+        let output_bam_path = format!(
+            "{out_dir_bam}/{}-{}_{}.bam",
+            *positions.iter().min().unwrap_or(&0),
+            *positions.iter().max().unwrap_or(&0),
+            &uuid[..4]
+        );
+
         let mut output_bam = Writer::from_path(output_bam_path, &header, Format::Bam).unwrap();
         for record in records {
             output_bam.write(&record)?;
@@ -160,7 +170,7 @@ pub fn scan_save(
     Ok(())
 }
 
-pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
+pub fn par_contig_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
     let bam = rust_htslib::bam::IndexedReader::from_path(bam_path).unwrap();
     let header_view = bam.header();
     let contig_tid = header_view
@@ -187,7 +197,7 @@ pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Re
     fs::create_dir_all(&out_dir_bam)?;
     let file_out = format!("{out_dir}/counts/{contig}");
     fs::create_dir_all(&file_out)?;
-    // let u = i.get(1..2).unwrap();
+
     let counts_files: Vec<String> = i
         .par_iter()
         .enumerate()
@@ -230,6 +240,15 @@ pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Re
     Ok(())
 }
 
+pub fn par_whole_scan(dict_file: &str, bam_path: &str, out_dir: &str) -> anyhow::Result<()> {
+    let dict = read_dict(dict_file)?;
+    for (contig, _) in dict {
+        info!("Scanning {contig}");
+        par_contig_scan(&contig, bam_path, out_dir)?;
+    }
+    Ok(())
+}
+
 // pub fn mrd_count_ratio(mrd_bam: &str, count_file: &str) -> anyhow::Result<()> {
 //     let count_file = File::open(count_file)?;
 //     let reader = BufReader::new(count_file);
@@ -285,7 +304,7 @@ mod tests {
         let tmp_dir = format!("/data/tmp/scan_{}", uuid::Uuid::new_v4());
         fs::create_dir_all(&tmp_dir).unwrap();
         info!("Creating {tmp_dir}");
-        par_whole_scan(
+        par_contig_scan(
             "chr9",
             &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam"),
             &tmp_dir,
@@ -293,6 +312,15 @@ mod tests {
         .unwrap();
     }
 
+    #[test]
+    fn whole() {
+        init();
+        let id = "SPINATO";
+        let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
+        let out_dir = format!("/data/longreads_basic_pipe/{id}/diag/scan");
+        par_whole_scan("/data/ref/hs1/chm13v2.0.dict", &bam_path, &out_dir).unwrap();
+    }
+
     // #[test]
     // fn phasing() -> anyhow::Result<()> {
     //     let id = "SALICETTO";

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно