Thomas 3 年 前
コミット
6af5f3b727
9 ファイル変更424 行追加0 行削除
  1. 0 0
      README.md
  2. 113 0
      index.js
  3. 80 0
      index.ts
  4. 51 0
      package-lock.json
  5. 20 0
      package.json
  6. 24 0
      test.js
  7. 10 0
      test.ts
  8. 101 0
      tsconfig.json
  9. 25 0
      yarn.lock

+ 0 - 0
README.md


+ 113 - 0
index.js

@@ -0,0 +1,113 @@
+"use strict";
+// https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_trembl_human.xml.gz
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+var __asyncValues = (this && this.__asyncValues) || function (o) {
+    if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
+    var m = o[Symbol.asyncIterator], i;
+    return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
+    function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
+    function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
+};
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
+const fs_1 = __importDefault(require("fs"));
+const readline_1 = __importDefault(require("readline"));
+const fast_xml_parser_1 = require("fast-xml-parser");
+const line$ = (path) => readline_1.default.createInterface({
+    input: fs_1.default.createReadStream(path),
+    crlfDelay: Infinity
+});
+const makeIndex = (filePath, indexPath) => __awaiter(void 0, void 0, void 0, function* () {
+    var e_1, _a;
+    indexPath = indexPath || filePath + '.jsi';
+    let byteAcc = 0;
+    const fromSel = new RegExp("^<entry");
+    const toSel = new RegExp("^</entry>");
+    const valSel = new RegExp('<accession>');
+    let tmp = { values: [] };
+    try {
+        for (var _b = __asyncValues(line$(filePath)), _c; _c = yield _b.next(), !_c.done;) {
+            const line = _c.value;
+            if (fromSel.test(line))
+                tmp['from'] = byteAcc;
+            byteAcc += (line.length + 1);
+            if (valSel.test(line))
+                tmp['values'].push(line.match("<accession>(.*?)</accession>")[1]); // 'uck 
+            if (toSel.test(line)) {
+                yield fs_1.default.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n');
+                tmp = { values: [] };
+            }
+        }
+    }
+    catch (e_1_1) { e_1 = { error: e_1_1 }; }
+    finally {
+        try {
+            if (_c && !_c.done && (_a = _b.return)) yield _a.call(_b);
+        }
+        finally { if (e_1) throw e_1.error; }
+    }
+});
+exports.makeIndex = makeIndex;
+const readOffset = (path, from, to) => {
+    return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
+        const size = to - from;
+        const buffer = Buffer.alloc(size);
+        let filehandle = null;
+        try {
+            filehandle = yield fs_1.default.promises.open(path, 'r+');
+            yield filehandle.read(buffer, 0, buffer.length, from);
+        }
+        finally {
+            if (filehandle) {
+                yield filehandle.close();
+                resolve(buffer.toString());
+            }
+        }
+    }));
+};
+exports.readOffset = readOffset;
+const getEntryOffset = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
+    var e_2, _d;
+    const indexPath = dbPath + '.jsi';
+    if (!fs_1.default.existsSync(indexPath))
+        yield makeIndex(dbPath);
+    const lineSel = new RegExp(accession);
+    try {
+        for (var _e = __asyncValues(line$(indexPath)), _f; _f = yield _e.next(), !_f.done;) {
+            const line = _f.value;
+            if (lineSel.test(line))
+                return [Number(line.split('\t')[1]), Number(line.split('\t')[2])];
+        }
+    }
+    catch (e_2_1) { e_2 = { error: e_2_1 }; }
+    finally {
+        try {
+            if (_f && !_f.done && (_d = _e.return)) yield _d.call(_e);
+        }
+        finally { if (e_2) throw e_2.error; }
+    }
+    return [0, 0];
+});
+const getEnrty = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
+    const parser = new fast_xml_parser_1.XMLParser({
+        ignoreAttributes: false,
+        alwaysCreateTextNode: false,
+        attributeNamePrefix: "",
+        textNodeName: "value",
+        allowBooleanAttributes: true,
+    });
+    const offsets = yield getEntryOffset(dbPath, accession);
+    return parser.parse(yield readOffset(dbPath, offsets[0], offsets[1]));
+});
+exports.getEnrty = getEnrty;

+ 80 - 0
index.ts

@@ -0,0 +1,80 @@
+// https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz
+
+import fs from 'fs'
+import readline from 'readline'
+import { XMLParser } from 'fast-xml-parser'
+
+const line$ = (path: string) => readline.createInterface({
+    input: fs.createReadStream(path),
+    crlfDelay: Infinity
+})
+
+const makeIndex = async (filePath: string, indexPath?: string) => {
+    interface entry {
+        filePath: string;
+        value   : string;
+        from    : number;
+        to     ?: number;
+    }
+    indexPath = indexPath || filePath + '.jsi'
+
+    let byteAcc = 0
+    const fromSel = new RegExp("^<entry")
+    const toSel = new RegExp("^</entry>")
+    const valSel = new RegExp('<accession>')
+    let tmp =  {values:[]} as any
+    for await (const line of line$(filePath)) {
+        if(fromSel.test(line)) tmp['from'] = byteAcc
+
+        byteAcc += (line.length + 1)
+
+        if (valSel.test(line)) tmp['values'].push(line.match("<accession>(.*?)</accession>")![1]) // 'uck 
+
+        if(toSel.test(line)) {
+            await fs.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n')
+            tmp = {values:[]}
+        }
+    }
+}
+
+const readOffset = (path: string, from:number, to:number) => {
+    return new Promise<string>(async (resolve, reject) => {
+        const size = to - from
+        const buffer = Buffer.alloc(size);
+        let filehandle = null;
+        try {
+            filehandle = await fs.promises.open(path, 'r+');
+            await filehandle.read(buffer, 0, buffer.length, from);
+        } finally {
+            if (filehandle) {
+                await filehandle.close()
+                resolve(buffer.toString())
+            }
+        }
+    })
+}
+
+const getEntryOffset = async (dbPath:string, accession:string): Promise<number[]> => {
+    const indexPath = dbPath + '.jsi'
+    if (!fs.existsSync(indexPath)) await makeIndex(dbPath)
+    const lineSel = new RegExp(accession)
+    for await (const line of line$(indexPath)) {
+        if (lineSel.test(line)) return [Number(line.split('\t')[1]),Number(line.split('\t')[2])]
+    }
+    return [0, 0]
+}
+
+const getEnrty = async (dbPath:string, accession:string) => {
+    const parser = new XMLParser({
+        ignoreAttributes: false, 
+        alwaysCreateTextNode: false, 
+        attributeNamePrefix: "",
+        textNodeName: "value",
+        allowBooleanAttributes: true,
+    })
+
+    const offsets = await getEntryOffset(dbPath, accession)
+    return parser.parse(await readOffset(dbPath, offsets[0], offsets[1]))
+}
+
+export { makeIndex, readOffset, getEnrty }

+ 51 - 0
package-lock.json

@@ -0,0 +1,51 @@
+{
+  "name": "uniprotparser",
+  "version": "1.0.0",
+  "lockfileVersion": 2,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "uniprotparser",
+      "version": "1.0.0",
+      "license": "ISC",
+      "devDependencies": {
+        "@types/node": "^17.0.21",
+        "typescript": "^4.6.2"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "17.0.21",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.21.tgz",
+      "integrity": "sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ==",
+      "dev": true
+    },
+    "node_modules/typescript": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.2.tgz",
+      "integrity": "sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=4.2.0"
+      }
+    }
+  },
+  "dependencies": {
+    "@types/node": {
+      "version": "17.0.21",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.21.tgz",
+      "integrity": "sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ==",
+      "dev": true
+    },
+    "typescript": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.6.2.tgz",
+      "integrity": "sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==",
+      "dev": true
+    }
+  }
+}

+ 20 - 0
package.json

@@ -0,0 +1,20 @@
+{
+  "name": "uniprotparser",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "build": "tsc",
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "devDependencies": {
+    "@types/node": "^17.0.21",
+    "typescript": "^4.6.2"
+  },
+  "dependencies": {
+    "fast-xml-parser": "^4.0.6"
+  }
+}

+ 24 - 0
test.js

@@ -0,0 +1,24 @@
+"use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const _1 = require(".");
+const fs_1 = __importDefault(require("fs"));
+(() => __awaiter(void 0, void 0, void 0, function* () {
+    const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml';
+    // await makeIndex(uniprotDB)
+    // const r =  await readOffset(uniprotDB, 118933, 255352)
+    const r = yield (0, _1.getEnrty)(uniprotDB, 'P46531');
+    console.log(r);
+    yield fs_1.default.promises.writeFile('test/test.json', JSON.stringify(r, null, 4));
+}))();

+ 10 - 0
test.ts

@@ -0,0 +1,10 @@
+import { makeIndex, readOffset, getEnrty } from ".";
+import fs from 'fs'
+(async()=>{
+    const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml'
+    // await makeIndex(uniprotDB)
+    // const r =  await readOffset(uniprotDB, 118933, 255352)
+    const r = await getEnrty(uniprotDB, 'P46531')
+    console.log(r);
+    await fs.promises.writeFile('test/test.json', JSON.stringify(r, null, 4))
+})()

+ 101 - 0
tsconfig.json

@@ -0,0 +1,101 @@
+{
+  "compilerOptions": {
+    /* Visit https://aka.ms/tsconfig.json to read more about this file */
+
+    /* Projects */
+    // "incremental": true,                              /* Enable incremental compilation */
+    // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
+    // "tsBuildInfoFile": "./",                          /* Specify the folder for .tsbuildinfo incremental compilation files. */
+    // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects */
+    // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
+    // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
+
+    /* Language and Environment */
+    "target": "es2016",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
+    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
+    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
+    // "experimentalDecorators": true,                   /* Enable experimental support for TC39 stage 2 draft decorators. */
+    // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
+    // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */
+    // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
+    // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */
+    // "reactNamespace": "",                             /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */
+    // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
+    // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
+
+    /* Modules */
+    "module": "commonjs",                                /* Specify what module code is generated. */
+    // "rootDir": "./",                                  /* Specify the root folder within your source files. */
+    // "moduleResolution": "node",                       /* Specify how TypeScript looks up a file from a given module specifier. */
+    // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
+    // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
+    // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
+    // "typeRoots": [],                                  /* Specify multiple folders that act like `./node_modules/@types`. */
+    // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
+    // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
+    // "resolveJsonModule": true,                        /* Enable importing .json files */
+    // "noResolve": true,                                /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */
+
+    /* JavaScript Support */
+    // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */
+    // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
+    // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */
+
+    /* Emit */
+    // "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
+    // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
+    // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
+    // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
+    // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */
+    // "outDir": "./",                                   /* Specify an output folder for all emitted files. */
+    // "removeComments": true,                           /* Disable emitting comments. */
+    // "noEmit": true,                                   /* Disable emitting files from a compilation. */
+    // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
+    // "importsNotUsedAsValues": "remove",               /* Specify emit/checking behavior for imports that are only used for types */
+    // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
+    // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
+    // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
+    // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
+    // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
+    // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
+    // "newLine": "crlf",                                /* Set the newline character for emitting files. */
+    // "stripInternal": true,                            /* Disable emitting declarations that have `@internal` in their JSDoc comments. */
+    // "noEmitHelpers": true,                            /* Disable generating custom helper functions like `__extends` in compiled output. */
+    // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
+    // "preserveConstEnums": true,                       /* Disable erasing `const enum` declarations in generated code. */
+    // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
+    // "preserveValueImports": true,                     /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
+
+    /* Interop Constraints */
+    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
+    // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
+    "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */
+    // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
+    "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
+
+    /* Type Checking */
+    "strict": true,                                      /* Enable all strict type-checking options. */
+    // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied `any` type.. */
+    // "strictNullChecks": true,                         /* When type checking, take into account `null` and `undefined`. */
+    // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
+    // "strictBindCallApply": true,                      /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */
+    // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
+    // "noImplicitThis": true,                           /* Enable error reporting when `this` is given the type `any`. */
+    // "useUnknownInCatchVariables": true,               /* Type catch clause variables as 'unknown' instead of 'any'. */
+    // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
+    // "noUnusedLocals": true,                           /* Enable error reporting when a local variables aren't read. */
+    // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read */
+    // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
+    // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
+    // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
+    // "noUncheckedIndexedAccess": true,                 /* Include 'undefined' in index signature results */
+    // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
+    // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type */
+    // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
+    // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
+
+    /* Completeness */
+    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
+    "skipLibCheck": true                                 /* Skip type checking all .d.ts files. */
+  }
+}

+ 25 - 0
yarn.lock

@@ -0,0 +1,25 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+"@types/node@^17.0.21":
+  version "17.0.21"
+  resolved "https://registry.npmjs.org/@types/node/-/node-17.0.21.tgz"
+  integrity sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ==
+
+fast-xml-parser@^4.0.6:
+  version "4.0.6"
+  resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.0.6.tgz#bd0b75badc7abfc55c772f6a0c21e417ad989743"
+  integrity sha512-RHz47iX/DKT6BQwYQUmKG/1fuC5g2s/TibpxNvE+0ysnpSJxePFzsJvRDtfGhLRg3zdKMzO6EJn8n7+AJ6pSHg==
+  dependencies:
+    strnum "^1.0.5"
+
+strnum@^1.0.5:
+  version "1.0.5"
+  resolved "https://registry.yarnpkg.com/strnum/-/strnum-1.0.5.tgz#5c4e829fe15ad4ff0d20c3db5ac97b73c9b072db"
+  integrity sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==
+
+typescript@^4.6.2:
+  version "4.6.2"
+  resolved "https://registry.npmjs.org/typescript/-/typescript-4.6.2.tgz"
+  integrity sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==