Ver Fonte

par whole scan

Thomas há 1 ano atrás
pai
commit
26dff6c9d2
2 ficheiros alterados com 132 adições e 4 exclusões
  1. 50 0
      src/bin.rs
  2. 82 4
      src/lib.rs

+ 50 - 0
src/bin.rs

@@ -88,6 +88,36 @@ impl Bin {
         max_pos.unwrap_or((0, 0))
     }
 
+    pub fn max_start(&self) -> (u32, usize) {
+        let mut s: HashMap<u32, usize> = HashMap::new();
+        for record in self.reads_store.values() {
+            let reference_start = record.reference_start() as u32;
+
+            if reference_start >= self.start && reference_start <= self.end {
+                *s.entry(reference_start).or_default() += 1;
+            }
+            
+        }
+
+        let max_pos = s.into_iter().max_by_key(|(_, v)| *v);
+        max_pos.unwrap_or((0, 0))
+    }
+
+    pub fn max_end(&self) -> (u32, usize) {
+        let mut e: HashMap<u32, usize> = HashMap::new();
+        for record in self.reads_store.values() {
+            let reference_end = record.reference_end() as u32;
+
+            if reference_end >= self.start && reference_end <= self.end {
+                *e.entry(reference_end).or_default() += 1;
+            }
+            
+        }
+
+        let max_pos = e.into_iter().max_by_key(|(_, v)| *v);
+        max_pos.unwrap_or((0, 0))
+    }
+
     pub fn se_primary(&mut self, pos: u32) -> Vec<Record> {
         self.reads_store
             .values()
@@ -97,6 +127,26 @@ impl Bin {
             .map(|record| primary_record(&mut self.bam_reader, record.clone()))
             .collect()
     }
+
+    pub fn s_primary(&mut self, pos: u32) -> Vec<Record> {
+        self.reads_store
+            .values()
+            .filter(|record| {
+                record.reference_start() as u32 == pos
+            })
+            .map(|record| primary_record(&mut self.bam_reader, record.clone()))
+            .collect()
+    }
+
+    pub fn e_primary(&mut self, pos: u32) -> Vec<Record> {
+        self.reads_store
+            .values()
+            .filter(|record| {
+                record.reference_end() as u32 == pos
+            })
+            .map(|record| primary_record(&mut self.bam_reader, record.clone()))
+            .collect()
+    }
 }
 
 pub fn scan(

+ 82 - 4
src/lib.rs

@@ -68,14 +68,25 @@ pub fn scan_save(
             })
             .filter(|(_, n, n_low_mapq, _, _, _, _)| *n > **n_low_mapq as usize)
             .filter(|(_, _, _, _, sa_outlier, _, se_outlier)| *sa_outlier || *se_outlier)
-            .map(|(start, _n, _, _sa, sa_outlier, _se, se_outlier)| {
+            .flat_map(|(start, _n, _, _sa, sa_outlier, _se, se_outlier)| {
                 let mut bin = Bin::new(bam_path, contig, *start, bin_length).unwrap();
                 let mut records = Vec::new();
                 if sa_outlier {
-                    records.extend(bin.sa_primary());
+                    records.push(bin.sa_primary())
+                    // records.extend(bin.sa_primary());
                 } else if se_outlier {
                     let (pos, _) = bin.max_start_or_end();
-                    records.extend(bin.se_primary(pos));
+                    // let s = bin.s_primary(pos);
+                    // if !s.is_empty() {
+                    //     records.push(s);
+                    // }
+                    // let e = bin.e_primary(pos);
+                    // if !e.is_empty() {
+                    //     records.push(e);
+                    // }
+                    records.push(bin.se_primary(pos));
+
+                    // records.extend(bin.se_primary(pos));
                 };
                 records
             })
@@ -289,6 +300,8 @@ pub fn par_whole_scan(dict_file: &str, bam_path: &str, out_dir: &str) -> anyhow:
 #[cfg(test)]
 mod tests {
 
+    use rust_htslib::bam::Reader;
+
     use super::*;
 
     fn init() {
@@ -315,10 +328,75 @@ mod tests {
     #[test]
     fn whole() {
         init();
-        let id = "SPINATO";
+        let id = "LEVASSEUR";
         let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
         let out_dir = format!("/data/longreads_basic_pipe/{id}/diag/scan");
         par_whole_scan("/data/ref/hs1/chm13v2.0.dict", &bam_path, &out_dir).unwrap();
+        let bam_path = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
+        let out_dir = format!("/data/longreads_basic_pipe/{id}/mrd/scan");
+        par_whole_scan("/data/ref/hs1/chm13v2.0.dict", &bam_path, &out_dir).unwrap();
+    }
+
+    #[test]
+    fn locus() {
+        init();
+        let len = 1000;
+        let id = "LEVASSEUR";
+        let bam_path = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
+        let out_scan = "/tmp/test.txt";
+        let mut writer = BufWriter::new(File::create(out_scan).unwrap());
+        let out_dir_bam = format!("/tmp/scan_{}", uuid::Uuid::new_v4());
+        fs::create_dir_all(&out_dir_bam).unwrap();
+        let config = Config::default();
+
+        scan_save(
+            &bam_path,
+            "chr10",
+            /* 102020492 - (len * 1000) */ 0,
+            120000000,
+            &mut writer,
+            &out_dir_bam,
+            &config,
+        )
+        .unwrap();
+        let files = fs::read_dir(&out_dir_bam).unwrap();
+        let mut records_ids = Vec::new();
+        for file in files {
+            let file = file.unwrap().path().display().to_string();
+            if file.contains(".bam") {
+                let mut r = Reader::from_path(file).unwrap();
+                for record in r.records() {
+                    let record = record.unwrap();
+                    records_ids.push(String::from_utf8(record.qname().to_vec()).unwrap());
+                }
+            }
+        }
+
+        assert!(records_ids.contains(&"56bbfd4a-0d1d-4d97-b307-7626be446ce8".to_string()));
+        fs::remove_dir_all(out_dir_bam).unwrap();
+        fs::remove_file(out_scan).unwrap();
+    }
+
+    #[test]
+    fn has() {
+        let out_dir_bam = "/data/longreads_basic_pipe/LEVASSEUR/diag/scan/reads/chr10";
+        let files = fs::read_dir(out_dir_bam).unwrap();
+        let mut records_ids = Vec::new();
+        for file in files {
+            let file = file.unwrap().path().display().to_string();
+            if file.contains(".bam") && !file.contains("flye") {
+                let mut r = Reader::from_path(&file).unwrap();
+                for record in r.records() {
+                    let record = record.unwrap();
+                    records_ids.push(String::from_utf8(record.qname().to_vec()).unwrap());
+                }
+                if records_ids.contains(&"56bbfd4a-0d1d-4d97-b307-7626be446ce8".to_string()) {
+                    println!("{}", file);
+                }
+            }
+        }
+
+        assert!(records_ids.contains(&"56bbfd4a-0d1d-4d97-b307-7626be446ce8".to_string()));
     }
 
     // #[test]