Thomas пре 3 година
родитељ
комит
ca352e37ce
4 измењених фајлова са 27 додато и 18 уклоњено
  1. 12 5
      index.js
  2. 12 5
      index.ts
  3. 1 6
      test.js
  4. 2 2
      test.ts

+ 12 - 5
index.js

@@ -161,16 +161,23 @@ const getOffsets = (indexPath, accessions) => __awaiter(void 0, void 0, void 0,
     return res;
 });
 exports.getOffsets = getOffsets;
-const getData = (dbPath, accessionRegex, query) => __awaiter(void 0, void 0, void 0, function* () {
+const getData = (dbPath, accessionRegex, outPath, query) => __awaiter(void 0, void 0, void 0, function* () {
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath];
-    const results = [];
     const allOffsets = yield getOffsets(dbPath.map(e => e + '.jsi'), accessionRegex);
-    for (const offset of allOffsets) {
+    console.log(allOffsets.length + ' entry to parse.');
+    fs_1.default.promises.appendFile(outPath, '[\n');
+    for (let index = 0; index < allOffsets.length; index++) {
+        const offset = allOffsets[index];
         const txt = yield readOffset(offset[0], Number(offset[1]), Number(offset[2]));
         const json = (0, genbank_parser_1.default)(txt)[0];
-        query ? results.push((0, jsonata_1.default)(query).evaluate(json)) : results.push(json);
+        const tmp = query ? (0, jsonata_1.default)(query).evaluate(json) : json;
+        const end = index + 1 === allOffsets.length ? '' : ',';
+        fs_1.default.promises.appendFile(outPath, JSON.stringify(tmp, null, 4) + end + '\n');
+        if ((index + 1) % 100 === 0)
+            console.log('Already ' + (index + 1) + ' sequence parsed');
     }
-    return results;
+    fs_1.default.promises.appendFile(outPath, ']');
+    return 0;
 });
 exports.getData = getData;
 const getFromAcc = (accession, dbPath, indexPath) => __awaiter(void 0, void 0, void 0, function* () {

+ 12 - 5
index.ts

@@ -119,16 +119,23 @@ const getOffsets = async (indexPath: string | string[], accessions: RegExp) => {
     return res
 }
 
-const getData =  async (dbPath: string | string[], accessionRegex: RegExp, query?: string) => {
+const getData =  async (dbPath: string | string[], accessionRegex: RegExp, outPath: string, query?: string) => {
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath]
-    const results = []
     const allOffsets = await getOffsets(dbPath.map(e => e + '.jsi'), accessionRegex)
-    for (const offset of allOffsets) {
+    console.log(allOffsets.length + ' entry to parse.');
+    
+    fs.promises.appendFile(outPath, '[\n')
+    for (let index = 0; index < allOffsets.length; index++) {
+        const offset = allOffsets[index];
         const txt = await readOffset(offset[0], Number(offset[1]), Number(offset[2]))
         const json = genbankParser(txt)[0]
-        query ? results.push(jsonata(query).evaluate(json)) : results.push(json)
+        const tmp = query ? jsonata(query).evaluate(json) : json
+        const end = index + 1 === allOffsets.length ? '' : ','
+        fs.promises.appendFile(outPath, JSON.stringify(tmp, null, 4) + end + '\n')
+        if ((index + 1)%100 === 0) console.log('Already ' + ( index + 1) + ' sequence parsed')
     }
-    return results
+    fs.promises.appendFile(outPath, ']')
+    return 0
 }
 
 const getFromAcc = async (accession: string, dbPath: string | string[], indexPath?: string | string[]) => {

+ 1 - 6
test.js

@@ -8,12 +8,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
         step((generator = generator.apply(thisArg, _arguments || [])).next());
     });
 };
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 const _1 = require(".");
-const fs_1 = __importDefault(require("fs"));
 (() => __awaiter(void 0, void 0, void 0, function* () {
     // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
     const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene';
@@ -29,6 +25,5 @@ const fs_1 = __importDefault(require("fs"));
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
     // const res = await getOffsets(rnaDBPath.map(e => e + '.jsi'), /NR_/)
-    const res = yield (0, _1.getData)(rnaDBPath, /NR_/, '{"name": features[type="gene"].name, "accession": version}');
-    yield fs_1.default.promises.writeFile('test/test-NR.json', JSON.stringify(res, null, 4));
+    const res = yield (0, _1.getData)(rnaDBPath, /NM_/, 'test/test-NM.json', '{"name": features[type="gene"].name, "accession": version}');
 }))();

+ 2 - 2
test.ts

@@ -18,7 +18,7 @@ import fs from 'fs'
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
     // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
     // const res = await getOffsets(rnaDBPath.map(e => e + '.jsi'), /NR_/)
-    const res = await getData(rnaDBPath, /NR_/, '{"name": features[type="gene"].name, "accession": version}')
-    await fs.promises.writeFile('test/test-NR.json', JSON.stringify(res, null, 4))
+    const res = await getData(rnaDBPath, /NM_/, 'test/test-NM.json', '{"name": features[type="gene"].name, "accession": version}')
+    
 })()