Thomas há 3 anos atrás
pai
commit
80c0fcc8f3
6 ficheiros alterados com 96 adições e 17 exclusões
  1. 45 7
      index.js
  2. 31 4
      index.ts
  3. 2 1
      package.json
  4. 8 2
      test.js
  5. 5 3
      test.ts
  6. 5 0
      yarn.lock

+ 45 - 7
index.js

@@ -26,7 +26,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.makeRefSeqFromReg = exports.getSymbol = exports.getFromAcc = void 0;
+exports.getData = exports.getOffsets = exports.makeRefSeqFromReg = exports.getSymbol = exports.getFromAcc = void 0;
 const fs_1 = __importDefault(require("fs"));
 const os_1 = __importDefault(require("os"));
 const path_1 = __importDefault(require("path"));
@@ -35,6 +35,7 @@ const readline_1 = __importDefault(require("readline"));
 const buffer_1 = require("buffer");
 const genbank_parser_1 = __importDefault(require("genbank-parser"));
 const aligner_1 = require("aligner");
+const jsonata_1 = __importDefault(require("jsonata"));
 const async_exec = (prog, args, onData) => {
     return new Promise((resolve, reject) => {
         const child = (0, child_process_1.spawn)(prog, args, { shell: true });
@@ -135,6 +136,43 @@ const getOffset = (indexPath, acc) => __awaiter(void 0, void 0, void 0, function
     }
     return res;
 });
+const getOffsets = (indexPath, accessions) => __awaiter(void 0, void 0, void 0, function* () {
+    var e_3, _g;
+    let res = [];
+    const indexPaths = Array.isArray(indexPath) ? indexPath : [indexPath];
+    for (const iP of indexPaths) {
+        try {
+            for (var _h = (e_3 = void 0, __asyncValues(line$(iP))), _j; _j = yield _h.next(), !_j.done;) {
+                const line = _j.value;
+                const tmp = line.split('\t');
+                if (accessions.test(tmp[0])) {
+                    res.push([iP.split('.jsi')[0], tmp[1], tmp[2], tmp[0]]);
+                }
+            }
+        }
+        catch (e_3_1) { e_3 = { error: e_3_1 }; }
+        finally {
+            try {
+                if (_j && !_j.done && (_g = _h.return)) yield _g.call(_h);
+            }
+            finally { if (e_3) throw e_3.error; }
+        }
+    }
+    return res;
+});
+exports.getOffsets = getOffsets;
+const getData = (dbPath, accessionRegex, query) => __awaiter(void 0, void 0, void 0, function* () {
+    dbPath = Array.isArray(dbPath) ? dbPath : [dbPath];
+    const results = [];
+    const allOffsets = yield getOffsets(dbPath.map(e => e + '.jsi'), accessionRegex);
+    for (const offset of allOffsets) {
+        const txt = yield readOffset(offset[0], Number(offset[1]), Number(offset[2]));
+        const json = (0, genbank_parser_1.default)(txt)[0];
+        query ? results.push((0, jsonata_1.default)(query).evaluate(json)) : results.push(json);
+    }
+    return results;
+});
+exports.getData = getData;
 const getFromAcc = (accession, dbPath, indexPath) => __awaiter(void 0, void 0, void 0, function* () {
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath];
     if (!indexPath) {
@@ -227,7 +265,7 @@ const getJSI = (dbPath) => __awaiter(void 0, void 0, void 0, function* () {
 });
 // Todo: add progress
 const makeRefSeqFromReg = (dbPath, reg, distFile, limit) => __awaiter(void 0, void 0, void 0, function* () {
-    var e_3, _g;
+    var e_4, _k;
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath];
     const jsiFiles = yield getJSI(dbPath);
     const tmpDir = path_1.default.join(os_1.default.tmpdir(), 'parser-' + Math.random());
@@ -237,8 +275,8 @@ const makeRefSeqFromReg = (dbPath, reg, distFile, limit) => __awaiter(void 0, vo
     for (const jsiFile of jsiFiles) {
         console.log('reading ' + jsiFile);
         try {
-            for (var _h = (e_3 = void 0, __asyncValues(line$(jsiFile))), _j; _j = yield _h.next(), !_j.done;) {
-                const line = _j.value;
+            for (var _l = (e_4 = void 0, __asyncValues(line$(jsiFile))), _m; _m = yield _l.next(), !_m.done;) {
+                const line = _m.value;
                 if (line.match(reg)) {
                     const [accession, from, to] = line.split('\t');
                     const res = yield getFromAcc(accession, jsiFile.split('.jsi')[0]);
@@ -269,12 +307,12 @@ const makeRefSeqFromReg = (dbPath, reg, distFile, limit) => __awaiter(void 0, vo
                         break;
             }
         }
-        catch (e_3_1) { e_3 = { error: e_3_1 }; }
+        catch (e_4_1) { e_4 = { error: e_4_1 }; }
         finally {
             try {
-                if (_j && !_j.done && (_g = _h.return)) yield _g.call(_h);
+                if (_m && !_m.done && (_k = _l.return)) yield _k.call(_l);
             }
-            finally { if (e_3) throw e_3.error; }
+            finally { if (e_4) throw e_4.error; }
         }
         if (limit)
             if (counter === limit)

+ 31 - 4
index.ts

@@ -14,6 +14,7 @@ import readline from 'readline'
 import { Buffer } from 'buffer'
 import genbankParser from 'genbank-parser'
 import { writeSequence } from 'aligner'
+import jsonata from 'jsonata'
 
 const async_exec = (prog: string, args: string[], onData: Function) => {
     return new Promise((resolve, reject) => {
@@ -35,11 +36,11 @@ const line$ = (path: string) => readline.createInterface({
 const readOffset = (path: string, from:number, to:number) => {
     return new Promise<string>(async (resolve, reject) => {
         const size = to - from
-        const buffer = Buffer.alloc(size);
-        let filehandle = null;
+        const buffer = Buffer.alloc(size)
+        let filehandle = null
         try {
             filehandle = await fs.promises.open(path, 'r+');
-            await filehandle.read(buffer, 0, buffer.length, from);
+            await filehandle.read(buffer, 0, buffer.length, from)
         } finally {
             if (filehandle) {
                 await filehandle.close()
@@ -104,6 +105,32 @@ const getOffset = async (indexPath: string, acc: string) => {
     return res
 }
 
+const getOffsets = async (indexPath: string | string[], accessions: RegExp) => {
+    let res: string[][] = []
+    const indexPaths = Array.isArray(indexPath) ? indexPath : [indexPath]
+    for (const iP of indexPaths) {
+        for await (const line of line$(iP)) {
+            const tmp = line.split('\t')
+            if (accessions.test(tmp[0])) {
+                res.push([iP.split('.jsi')[0], tmp[1], tmp[2], tmp[0]])
+            }
+        }
+    }
+    return res
+}
+
+const getData =  async (dbPath: string | string[], accessionRegex: RegExp, query?: string) => {
+    dbPath = Array.isArray(dbPath) ? dbPath : [dbPath]
+    const results = []
+    const allOffsets = await getOffsets(dbPath.map(e => e + '.jsi'), accessionRegex)
+    for (const offset of allOffsets) {
+        const txt = await readOffset(offset[0], Number(offset[1]), Number(offset[2]))
+        const json = genbankParser(txt)[0]
+        query ? results.push(jsonata(query).evaluate(json)) : results.push(json)
+    }
+    return results
+}
+
 const getFromAcc = async (accession: string, dbPath: string | string[], indexPath?: string | string[]) => {
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath]
     if (!indexPath) {
@@ -253,4 +280,4 @@ const makeRefSeqFromReg = async (
     await async_exec('bwa', ['index', distFile], () => console.log)
 }
 
-export { getFromAcc, getSymbol, makeRefSeqFromReg }
+export { getFromAcc, getSymbol, makeRefSeqFromReg, getOffsets, getData }

+ 2 - 1
package.json

@@ -22,6 +22,7 @@
     "blessed": "^0.1.81",
     "chalk": "^4.1.2",
     "figlet": "^1.5.2",
-    "genbank-parser": "^1.2.4"
+    "genbank-parser": "^1.2.4",
+    "jsonata": "^1.8.6"
   }
 }

+ 8 - 2
test.js

@@ -8,8 +8,12 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
         step((generator = generator.apply(thisArg, _arguments || [])).next());
     });
 };
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 const _1 = require(".");
+const fs_1 = __importDefault(require("fs"));
 (() => __awaiter(void 0, void 0, void 0, function* () {
     // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
     const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene';
@@ -22,7 +26,9 @@ const _1 = require(".");
     // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.[1-10].rna.gbff.gz
     const rnaDBPath = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff');
     // const res = await getSymbol('NOTCH1', LRGPath, tablePath, geneDBPath, rnaDBPath)
-    // await fs.promises.writeFile('test/test-getSymbol.json', JSON.stringify(res, null, 4))
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
-    yield (0, _1.makeRefSeqFromReg)(rnaDBPath, /NM_/, 'test/human_NM.fa', 10);
+    // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
+    // const res = await getOffsets(rnaDBPath.map(e => e + '.jsi'), /NR_/)
+    const res = yield (0, _1.getData)(rnaDBPath, /NR_/, '{"name": features[type="gene"].name, "accession": version}');
+    yield fs_1.default.promises.writeFile('test/test-NR.json', JSON.stringify(res, null, 4));
 }))();

+ 5 - 3
test.ts

@@ -1,4 +1,4 @@
-import { getSymbol, makeRefSeqFromReg } from '.'
+import { getSymbol, makeRefSeqFromReg, getFromAcc, getOffsets, getData } from '.'
 import fs from 'fs'
 ( async () => {
     // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
@@ -15,8 +15,10 @@ import fs from 'fs'
     const rnaDBPath  = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
 
     // const res = await getSymbol('NOTCH1', LRGPath, tablePath, geneDBPath, rnaDBPath)
-    // await fs.promises.writeFile('test/test-getSymbol.json', JSON.stringify(res, null, 4))
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
-    await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
+    // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
+    // const res = await getOffsets(rnaDBPath.map(e => e + '.jsi'), /NR_/)
+    const res = await getData(rnaDBPath, /NR_/, '{"name": features[type="gene"].name, "accession": version}')
+    await fs.promises.writeFile('test/test-NR.json', JSON.stringify(res, null, 4))
 })()
 

+ 5 - 0
yarn.lock

@@ -108,6 +108,11 @@ has-flag@^4.0.0:
   resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
   integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
 
+jsonata@^1.8.6:
+  version "1.8.6"
+  resolved "https://registry.yarnpkg.com/jsonata/-/jsonata-1.8.6.tgz#e5f0e6ace870a34bac881a182ca2b31227122791"
+  integrity sha512-ZH2TPYdNP2JecOl/HvrH47Xc+9imibEMQ4YqKy/F/FrM+2a6vfbGxeCX23dB9Fr6uvGwv+ghf1KxWB3iZk09wA==
+
 long@^5.1.0:
   version "5.2.0"
   resolved "https://registry.npmjs.org/long/-/long-5.2.0.tgz"