Thomas 4 anos atrás
pai
commit
0f1afe8471
4 arquivos alterados com 36 adições e 10 exclusões
  1. 16 3
      index.js
  2. 15 5
      index.ts
  3. 3 1
      test.js
  4. 2 1
      test.ts

+ 16 - 3
index.js

@@ -226,14 +226,16 @@ const getJSI = (dbPath) => __awaiter(void 0, void 0, void 0, function* () {
     return indexPath;
 });
 // Todo: add progress
-const makeRefSeqFromReg = (dbPath, reg, distFile) => __awaiter(void 0, void 0, void 0, function* () {
+const makeRefSeqFromReg = (dbPath, reg, distFile, limit) => __awaiter(void 0, void 0, void 0, function* () {
     var e_3, _g;
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath];
     const jsiFiles = yield getJSI(dbPath);
     const tmpDir = path_1.default.join(os_1.default.tmpdir(), 'parser-' + Math.random());
     yield fs_1.default.promises.mkdir(tmpDir);
     const createdFiles = [];
+    let counter = 0;
     for (const jsiFile of jsiFiles) {
+        console.log('reading ' + jsiFile);
         try {
             for (var _h = (e_3 = void 0, __asyncValues(line$(jsiFile))), _j; _j = yield _h.next(), !_j.done;) {
                 const line = _j.value;
@@ -243,14 +245,22 @@ const makeRefSeqFromReg = (dbPath, reg, distFile) => __awaiter(void 0, void 0, v
                     if (res === null || res === void 0 ? void 0 : res.sequence) {
                         try {
                             const file = path_1.default.join(tmpDir, (res === null || res === void 0 ? void 0 : res.version) || res.accession + '.fa');
-                            yield (0, aligner_1.writeSequence)((res === null || res === void 0 ? void 0 : res.version) || res.accession, res === null || res === void 0 ? void 0 : res.sequence, file);
-                            createdFiles.push(file);
+                            if (!createdFiles.includes(file)) {
+                                yield (0, aligner_1.writeSequence)((res === null || res === void 0 ? void 0 : res.version) || res.accession, res === null || res === void 0 ? void 0 : res.sequence, file);
+                                createdFiles.push(file);
+                                counter++;
+                                if (counter % 100 === 0)
+                                    console.log('Already ' + counter + ' sequence parsed');
+                            }
                         }
                         catch (error) {
                             console.log(error);
                         }
                     }
                 }
+                if (limit)
+                    if (counter === limit)
+                        break;
             }
         }
         catch (e_3_1) { e_3 = { error: e_3_1 }; }
@@ -260,6 +270,9 @@ const makeRefSeqFromReg = (dbPath, reg, distFile) => __awaiter(void 0, void 0, v
             }
             finally { if (e_3) throw e_3.error; }
         }
+        if (limit)
+            if (counter === limit)
+                break;
     }
     console.log(createdFiles.length + ' sequences');
     if (fs_1.default.existsSync(distFile))

+ 15 - 5
index.ts

@@ -209,13 +209,17 @@ const getJSI = async (dbPath: string | string[]) => {
 }
 
 // Todo: add progress
-const makeRefSeqFromReg = async (dbPath: string | string[], reg: RegExp, distFile:string ) => {
+const makeRefSeqFromReg = async (
+    dbPath: string | string[], reg: RegExp, distFile:string, limit?: number
+) => {
     dbPath = Array.isArray(dbPath) ? dbPath : [dbPath]
     const jsiFiles = await getJSI(dbPath)
-    const tmpDir = path.join(os.tmpdir(), 'parser-'+Math.random())
+    const tmpDir = path.join(os.tmpdir(), 'parser-' + Math.random())
     await fs.promises.mkdir(tmpDir)
-    const createdFiles = []
+    const createdFiles: string[] = []
+    let counter = 0
     for (const jsiFile of jsiFiles) {
+        console.log('reading ' + jsiFile)
         for await (const line of line$(jsiFile)) {
             if(line.match(reg)) {
                 const [accession, from, to] = line.split('\t')
@@ -223,14 +227,20 @@ const makeRefSeqFromReg = async (dbPath: string | string[], reg: RegExp, distFil
                 if (res?.sequence) {
                     try {
                         const file = path.join(tmpDir, res?.version || res.accession + '.fa')
-                        await writeSequence(res?.version || res.accession, res?.sequence, file)
-                        createdFiles.push(file)
+                        if (!createdFiles.includes(file)) {
+                            await writeSequence(res?.version || res.accession, res?.sequence, file)
+                            createdFiles.push(file)
+                            counter++
+                            if (counter%100 === 0) console.log('Already ' + counter + ' sequence parsed')
+                        }
                     } catch (error) {
                         console.log(error)
                     }
                 }
             }
+            if (limit) if (counter === limit) break
         }
+        if (limit) if (counter === limit) break
     }
     console.log(createdFiles.length + ' sequences')
     

+ 3 - 1
test.js

@@ -9,6 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
     });
 };
 Object.defineProperty(exports, "__esModule", { value: true });
+const _1 = require(".");
 (() => __awaiter(void 0, void 0, void 0, function* () {
     // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
     const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene';
@@ -22,5 +23,6 @@ Object.defineProperty(exports, "__esModule", { value: true });
     const rnaDBPath = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff');
     // const res = await getSymbol('NOTCH1', LRGPath, tablePath, geneDBPath, rnaDBPath)
     // await fs.promises.writeFile('test/test-getSymbol.json', JSON.stringify(res, null, 4))
-    // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
+    yield (0, _1.makeRefSeqFromReg)(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa');
+    // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
 }))();

+ 2 - 1
test.ts

@@ -16,6 +16,7 @@ import fs from 'fs'
 
     // const res = await getSymbol('NOTCH1', LRGPath, tablePath, geneDBPath, rnaDBPath)
     // await fs.promises.writeFile('test/test-getSymbol.json', JSON.stringify(res, null, 4))
-    // await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
+    await makeRefSeqFromReg(rnaDBPath, /NM_/, '/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa')
+    // await makeRefSeqFromReg(rnaDBPath, /NM_/, 'test/human_NM.fa', 10)
 })()