소스 검색

first commit

Thomas 2 년 전
커밋
4d5ec32d43
4개의 변경된 파일483개의 추가작업 그리고 0개의 파일을 삭제
  1. 1 0
      .gitignore
  2. 155 0
      Cargo.lock
  3. 11 0
      Cargo.toml
  4. 316 0
      src/lib.rs

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/target

+ 155 - 0
Cargo.lock

@@ -0,0 +1,155 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "anyhow"
+version = "1.0.81"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+
+[[package]]
+name = "bytelines"
+version = "2.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "791e4e40d13e1463dee537b254225c12c46ec7328f1817c6264873bc166f615f"
+
+[[package]]
+name = "cc"
+version = "1.0.90"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "crc32fast"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "desc_seq_lib"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "log",
+ "minimap2",
+]
+
+[[package]]
+name = "fffx"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f33c20b97a5cfd9d56d67e15677c1dae66c52846ee51801af32b94b70438f626"
+dependencies = [
+ "bytelines",
+ "flate2",
+ "simdutf8",
+ "static_assertions",
+]
+
+[[package]]
+name = "flate2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
+dependencies = [
+ "crc32fast",
+ "libz-sys",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.153"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "log"
+version = "0.4.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+
+[[package]]
+name = "minimap2"
+version = "0.1.16+minimap2.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a6cb7c4c240401901de9c5bdfbebbcd8a43a2beb980a00c3b7c558d922ec089"
+dependencies = [
+ "fffx",
+ "flate2",
+ "libc",
+ "minimap2-sys",
+ "simdutf8",
+]
+
+[[package]]
+name = "minimap2-sys"
+version = "0.1.16+minimap2.2.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3116fd091e0b499cd370475c6d03f8c333aa956b1769140dfd07e1a42101c8a"
+dependencies = [
+ "cc",
+ "libz-sys",
+ "pkg-config",
+]
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+dependencies = [
+ "adler",
+]
+
+[[package]]
+name = "pkg-config"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+
+[[package]]
+name = "simdutf8"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"

+ 11 - 0
Cargo.toml

@@ -0,0 +1,11 @@
+[package]
+name = "desc_seq_lib"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+minimap2 = "0.1.16+minimap2.2.26"
+anyhow = "1.0.75"
+log = "0.4.19"

+ 316 - 0
src/lib.rs

@@ -0,0 +1,316 @@
+use anyhow::Result;
+use std::{fmt, collections::{HashMap, VecDeque}};
+use log::info;
+use minimap2::Mapping;
+
+
+#[derive(Debug)]
+pub struct Contig {
+    pub id: String,
+    pub mappings: Vec<Mapping>,
+}
+
+#[derive(Debug, Clone)]
+pub enum ContigsRefRes {
+    Unique(Mapping),
+    Chimeric((Mapping, Mapping)),
+    ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
+    LeftAmbiguity((Vec<Mapping>, Mapping)),
+    RightAmbiguity((Mapping, Vec<Mapping>)),
+    Ambigous(Vec<Mapping>),
+}
+
+impl fmt::Display for ContigsRefRes {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        let str = match self {
+            ContigsRefRes::Unique(m) => mapping_to_string(m),
+            ContigsRefRes::Chimeric((a, b)) => {
+                format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
+            }
+            ContigsRefRes::ChimericMultiple((a, v, b)) => format!(
+                "{}<->{}<->{}",
+                mapping_to_string(a),
+                mappings_to_string(v),
+                mapping_to_string(b)
+            ),
+            ContigsRefRes::LeftAmbiguity((v, b)) => {
+                format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
+            }
+            ContigsRefRes::RightAmbiguity((a, v)) => {
+                format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
+            }
+            ContigsRefRes::Ambigous(v) => format!("{}", mappings_to_string(v)),
+        };
+        fmt.write_str(&str).unwrap();
+
+        std::result::Result::Ok(())
+    }
+}
+
+fn mapping_to_string(mapping: &Mapping) -> String {
+    let uk = "UNKNOWN".to_string();
+    format!(
+        "{}:{}-{}({}:{}-{})",
+        mapping.target_name.clone().unwrap_or(uk.clone()),
+        mapping.target_start,
+        mapping.target_end,
+        mapping.query_name.clone().unwrap_or(uk),
+        mapping.query_start,
+        mapping.query_end
+    )
+}
+
+fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
+    let v = mappings
+        .iter()
+        .map(mapping_to_string)
+        .collect::<Vec<String>>();
+    v.join("//")
+}
+
+impl Contig {
+    pub fn get_ref_pos(&mut self) -> Result<ContigsRefRes> {
+        if self.mappings.len() == 1 {
+            return Ok(ContigsRefRes::Unique(self.mappings.get(0).unwrap().clone()));
+        } else {
+            let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut self.mappings)?.into();
+
+            if grouped.len() == 1 {
+                let r = grouped.into_iter().flat_map(|e| e).collect();
+                return Ok(ContigsRefRes::Ambigous(r));
+            } else if grouped.len() >= 2 {
+                let first = grouped.pop_back().unwrap();
+                let last = grouped.pop_front().unwrap();
+
+                if grouped.len() == 0 {
+                    if first.len() == 1 && last.len() == 1 {
+                        return Ok(ContigsRefRes::Chimeric((
+                            first.get(0).unwrap().clone(),
+                            last.get(0).unwrap().clone(),
+                        )));
+                    } else if first.len() == 1 {
+                        return Ok(ContigsRefRes::RightAmbiguity((
+                            first.get(0).unwrap().clone(),
+                            last.clone(),
+                        )));
+                    } else if last.len() == 1 {
+                        return Ok(ContigsRefRes::LeftAmbiguity((
+                            first.clone(),
+                            last.get(0).unwrap().clone(),
+                        )));
+                    } else {
+                        let all: Vec<Mapping> =
+                            vec![first, last].into_iter().flat_map(|e| e).collect();
+                        return Ok(ContigsRefRes::Ambigous(all));
+                    }
+                } else {
+                }
+                if first.len() == 1 && last.len() == 1 {
+                    return Ok(ContigsRefRes::ChimericMultiple((
+                        first.get(0).unwrap().clone(),
+                        grouped.into_iter().flat_map(|e| e).collect(),
+                        last.get(0).unwrap().clone(),
+                    )));
+                } else if first.len() == 1 {
+                    let right: Vec<Mapping> =
+                        vec![grouped.into_iter().flat_map(|e| e).collect(), last]
+                            .into_iter()
+                            .flat_map(|e| e)
+                            .collect();
+                    return Ok(ContigsRefRes::RightAmbiguity((
+                        first.get(0).unwrap().clone(),
+                        right,
+                    )));
+                } else if last.len() == 1 {
+                    let left: Vec<Mapping> =
+                        vec![first, grouped.into_iter().flat_map(|e| e).collect()]
+                            .into_iter()
+                            .flat_map(|e| e)
+                            .collect();
+                    return Ok(ContigsRefRes::LeftAmbiguity((
+                        left,
+                        last.get(0).unwrap().clone(),
+                    )));
+                } else {
+                    let all: Vec<Mapping> =
+                        vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
+                            .into_iter()
+                            .flat_map(|e| e)
+                            .collect();
+                    return Ok(ContigsRefRes::Ambigous(all));
+                }
+            } else {
+                return Ok(ContigsRefRes::Ambigous(
+                    grouped.into_iter().flat_map(|e| e).collect(),
+                ));
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Genome {
+    chromosomes: HashMap<String, Chromosome>,
+}
+
+impl Genome {
+    pub fn new() -> Self {
+        Genome {
+            chromosomes: HashMap::new(),
+        }
+    }
+    pub fn add_contig(&mut self, id: String, mappings: Vec<Mapping>) -> Result<()> {
+        let mut new_contig = Contig { id, mappings };
+        // get the category of Mapping
+        let ref_res = new_contig.get_ref_pos()?;
+        match ref_res.clone() {
+            ContigsRefRes::Unique(contig_mapping) => {
+                match self
+                    .chromosomes
+                    .get_mut(&contig_mapping.target_name.unwrap())
+                {
+                    Some(chromosome) => {
+                        chromosome.contigs.push(ref_res);
+                    }
+                    None => (),
+                }
+            }
+            ContigsRefRes::Chimeric((a, b)) => {
+                let a_target_name = a.target_name.unwrap();
+                let b_target_name = b.target_name.unwrap();
+                if a_target_name == b_target_name {
+                    if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
+                        chromosome.contigs.push(ref_res);
+                    } else {
+                        self.chromosomes.insert(
+                            a_target_name,
+                            Chromosome {
+                                contigs: vec![ref_res],
+                            },
+                        );
+                    }
+                } else {
+                    let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
+                    if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
+                        chromosome.contigs.push(ref_res);
+                    } else {
+                        self.chromosomes.insert(
+                            chimeric_name,
+                            Chromosome {
+                                contigs: vec![ref_res],
+                            },
+                        );
+                    }
+                }
+            }
+            ContigsRefRes::ChimericMultiple((left, _, right)) => {
+                let left_target_name = left.target_name.unwrap();
+                let right_target_name = right.target_name.unwrap();
+                if left_target_name == right_target_name {
+                    if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
+                        chromosome.contigs.push(ref_res);
+                    } else {
+                        self.chromosomes.insert(
+                            left_target_name,
+                            Chromosome {
+                                contigs: vec![ref_res],
+                            },
+                        );
+                    }
+                } else {
+                    let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
+                    if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
+                        chromosome.contigs.push(ref_res);
+                    } else {
+                        self.chromosomes.insert(
+                            chimeric_name,
+                            Chromosome {
+                                contigs: vec![ref_res],
+                            },
+                        );
+                    }
+                }
+            }
+            _ => {
+                if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
+                    chromosome.contigs.push(ref_res);
+                } else {
+                    self.chromosomes.insert(
+                        "Ambigous".to_string(),
+                        Chromosome {
+                            contigs: vec![ref_res],
+                        },
+                    );
+                }
+            }
+        };
+
+        Ok(())
+    }
+    pub fn stats(&self) {
+        // let mut stats = HashMap::new();
+        for (k, v) in self.chromosomes.iter() {
+            info!("{}:{}", k, v.contigs.len());
+        }
+    }
+}
+#[derive(Debug, Clone)]
+pub struct Chromosome {
+    contigs: Vec<ContigsRefRes>,
+}
+
+fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
+    // sort alignments by query_start
+    mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+    // let mut graph = Graph::<String,()>::new();
+    //
+    // mappings.iter().enumerate().for_each(|(i, e)| {
+    //     let start = graph.add_node(format!("{}S:{}", i, e.query_start));
+    //     let end = graph.add_node(format!("{}E:{}", i, e.query_end));
+    //     graph.add_edge(start, end, ());
+    // });
+
+    let mut alignments: Vec<Vec<Mapping>> = vec![];
+    // group by overlapps > 30
+    for aln in mappings.iter() {
+        let mut last = alignments.last_mut();
+        if let Some(mut l) = last {
+            if l.iter()
+                .filter(|a| a.query_end - aln.query_start > 30)
+                .count()
+                > 0
+            {
+                l.push(aln.clone());
+            } else {
+                alignments.push(vec![aln.clone()]);
+            }
+        } else {
+            alignments.push(vec![aln.clone()]);
+        }
+    }
+
+    // let mut last_query_end = 0;
+    // let mut all_res = vec![];
+    // for map in alignments.iter() {
+    //         if map.len() > 1 {
+    //         let r: Vec<String> = map.iter().map(|m| format_map(m).unwrap()).collect();
+    //         all_res.push(format!("[{}]", r.join(" ")));
+    //     } else {
+    //         all_res.push(format_map(map.get(0).unwrap()).unwrap());
+    //     }
+    // }
+    //
+    // warn!("{}", all_res.join(" - "));
+    Ok(alignments)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn it_works() {
+        // let result = add(2, 2);
+        // assert_eq!(result, 4);
+    }
+}