Thomas 2 rokov pred
rodič
commit
ea9bc17706
2 zmenil súbory, kde vykonal 96 pridanie a 13 odobranie
  1. 1 0
      .gitignore
  2. 95 13
      src/main.rs

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+target

+ 95 - 13
src/main.rs

@@ -1,4 +1,4 @@
-use rust_htslib::{bam, bam::Read};
+use rust_htslib::{bam, bam::{Read, IndexedReader}};
 use clap::Parser;
 
 #[derive(Parser, Debug)]
@@ -15,29 +15,111 @@ struct Args {
 
 fn main() {
     let args = Args::parse();
-    let bam = bam::Reader::from_path(&args.bam).unwrap();
-    let header = bam::Header::from_template(bam.header());
+    let mut bam = bam::IndexedReader::from_path(&args.bam).unwrap();
 
     // /Turbine-pool/LAL-T_ChIP/BAM/525/merged.bam
-    // chr1:47239205-47239396
+    // /Turbine-pool/LAL-T_ChIP/BAM/885/merged.bam
+    let start = 47239294;
+    let end = 47239298;
 
-    bam.fetch(("chr1", 47239204, 47239399)).unwrap();
+    let results = fetch_ins(&mut bam, "chr1", start, end);
+
+    for result in results {
+        println!("{}", result.tsv());
+    }
+}
+
+#[derive(Debug)]
+struct Result {
+    read_name: String,
+    seqname: String,
+    ref_pos: i64,
+    inserted_seq: String,
+    depth: u32
+}
+
+fn fetch_ins(bam: &mut IndexedReader, seqname: &str, start: u32, end: u32) -> Vec<Result> {
+    bam.fetch((seqname, start, end)).unwrap();
 
     // pileup over all covered sites
+    let mut results: Vec<Result> = Vec::new();
     for p in bam.pileup() {
         let pileup = p.unwrap();
-        println!("{}:{} depth {}", pileup.tid(), pileup.pos(), pileup.depth());
+        if pileup.pos() < start { continue; }
+        if pileup.pos() >= end { break; }
 
         for alignment in pileup.alignments() {
-            if !alignment.is_del() && !alignment.is_refskip() {
-                println!("Base {}", alignment.record().seq()[alignment.qpos().unwrap()]);
-            }
-            // mark indel start
             match alignment.indel() {
-                bam::pileup::Indel::Ins(len) => println!("Insertion of length {} between this and next position.", len),
-                bam::pileup::Indel::Del(len) => println!("Deletion of length {} between this and next position.", len),
-                bam::pileup::Indel::None => ()
+                bam::pileup::Indel::Ins(_len) => {
+                    let record = alignment.record();
+                    let seq = record.seq().as_bytes();
+                    let mut ref_pos = record.cigar().pos();
+                    let mut query_pos = 0;
+                    let mut inserted_seq = String::new();
+                    for cig in record.cigar().iter() {
+                        match cig {
+                            bam::record::Cigar::Match(l) => {
+                                ref_pos += *l as i64;
+                                query_pos += l;
+                            },
+                            bam::record::Cigar::Ins(l) => {
+                                let r = (query_pos as usize)..((query_pos + l) as usize);
+                                inserted_seq = String::from_utf8_lossy(&seq[r]).to_string();
+                                break;
+                            },
+                            bam::record::Cigar::Del(l) => {
+                                ref_pos += *l as i64;
+                            },
+                            bam::record::Cigar::RefSkip(l) => {
+                                ref_pos += *l as i64;
+                            },
+                            bam::record::Cigar::SoftClip(l) => {
+                                query_pos += l;
+                            },
+                            bam::record::Cigar::HardClip(l) => {
+                                query_pos += l;
+                            },
+                            bam::record::Cigar::Pad(_) => {
+                                panic!("??");
+                            },
+                            bam::record::Cigar::Equal(_) => {
+                                panic!("??");
+                            },
+                            bam::record::Cigar::Diff(_) => {
+                                panic!("??");
+                            },
+                        }
+                    }
+
+                    results.push(Result { 
+                        read_name: String::from_utf8_lossy(alignment.record().qname()).to_string(), 
+                        seqname: seqname.to_string(),
+                        ref_pos, 
+                        inserted_seq,
+                        depth: pileup.depth()
+                    });
+
+                    // println!(
+                    //     "{}\t{}:{}_{}ins[{}]\t{}",
+                    //     String::from_utf8_lossy(alignment.record().qname()),
+                    //     alignment.record().tid(), ref_pos, ref_pos + 1, inserted_seq,
+                    //     pileup.depth()
+                    // );
+                },
+                _ => ()
             }
         }
     }
+    results
 }
+
+impl Result {
+    fn hgvs(&self) -> String {
+        format!("{}:{}_{}ins[{}]", self.seqname, 
+        self.ref_pos, self.ref_pos + 1, self.inserted_seq)
+    }
+
+    fn tsv(&self) -> String {
+        format!("{}\t{}\t{}", self.read_name, self.hgvs(), self.depth)
+    }
+}