Thomas 3 anos atrás
pai
commit
4f05833ce4
3 arquivos alterados com 34 adições e 29 exclusões
  1. 1 0
      .gitignore
  2. 15 14
      index.js
  3. 18 15
      index.ts

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+node_modules

+ 15 - 14
index.js

@@ -1,4 +1,5 @@
 "use strict";
+// refactor with https://github.com/piscinajs/piscina
 var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
     function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
     return new (P || (P = Promise))(function (resolve, reject) {
@@ -23,7 +24,7 @@ const fs_1 = __importDefault(require("fs"));
 const esearch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi';
 const efetch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';
 const regex_sam_restriction = /[>0-9A-Za-z!#$%&+\./:;?@^_|~-]|[\n\t]/g;
-const maxWaiting = 30 * 60 * 1000;
+const maxWaiting = 300 * 60 * 1000;
 const invReplace = (regex, string, by = '_') => {
     return string.split('').map(letter => letter.match(regex) ? letter : by).join('');
 };
@@ -76,7 +77,7 @@ const get_ids_from_query = (arg, cb) => {
     });
 };
 // async
-const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20) => {
+const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20, concurrency = 2) => {
     return new Promise((resolve, reject) => {
         https_1.default.get(`${esearch}?db=nucleotide&term=${query}&usehistory=y&api_key=${NCBI_API}`, (resp) => __awaiter(void 0, void 0, void 0, function* () {
             let data = '';
@@ -85,7 +86,7 @@ const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20)
                 const tmp = data.match(/<Count>(\d+)<\/Count>/) || [0, 0];
                 const count = parseInt(tmp[1]);
                 const nIter = count % retMax === 0 ? count / retMax : Math.trunc(count / retMax) + 1;
-                const q = require('fastq')(get_ids_from_query, 2);
+                const q = require('fastq')(get_ids_from_query, concurrency);
                 let ids = [];
                 const callback = (arg) => {
                     const tmp = Array.isArray(arg) ? arg : [arg];
@@ -110,7 +111,7 @@ const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20)
                     resolve(ids);
                 }
                 else {
-                    reject('Error');
+                    reject(['Error ', ids.length, count].join(' '));
                 }
             }));
         })).on("error", (err) => {
@@ -145,17 +146,17 @@ const saveMultifastaFromIds = (query, path, NCBI_API, onProgress) => {
     }));
 };
 exports.saveMultifastaFromIds = saveMultifastaFromIds;
+// https://linsalrob.github.io/ComputationalGenomicsManual/Databases/NCBI_Edirect.html
 // https://www.ncbi.nlm.nih.gov/books/NBK21091/
 // https://www.ncbi.nlm.nih.gov/books/NBK50679/
 // ""Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]"
 // ""Homo sapiens"[Organism] AND srcdb_refseq[prop] AND biomol_rna[prop] "
-/*
-(async () => {
-    const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
-    await saveMultifastaFromIds(
-        '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]',
-        '/home/thomas/Human_Transcriptome_RefSeq.fna',
-        NCBI_API,
-        console.log
-    )
-})()*/ 
+// (async () => {
+//     const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
+//     await saveMultifastaFromIds(
+//         '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]', 
+//         '/home/thomas/Human_Transcriptome_RefSeq.fna',
+//         NCBI_API,
+//         console.log
+//     )
+// })()

+ 18 - 15
index.ts

@@ -1,3 +1,5 @@
+// refactor with https://github.com/piscinajs/piscina
+
 import https from 'https'
 import fs from 'fs'
 
@@ -14,7 +16,7 @@ type TaskBis = {query: string, from: number, max: number, NCBI_API: string}
 const esearch: string               = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
 const efetch: string                = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
 const regex_sam_restriction: RegExp = /[>0-9A-Za-z!#$%&+\./:;?@^_|~-]|[\n\t]/g;
-const maxWaiting                    = 30 * 60 * 1000
+const maxWaiting                    = 300 * 60 * 1000
 
 const invReplace = (regex: RegExp, string: string, by = '_') => {
   return string.split('').map(letter => letter.match(regex) ? letter : by).join('')
@@ -80,7 +82,8 @@ const get_multipage_ids_from_query = (
     query: string,
     onProgress: Function,
     NCBI_API: string,
-    retMax = 20
+    retMax = 20,
+    concurrency = 2
 ) => {
   return new Promise((resolve, reject) => {
     https.get(`${esearch}?db=nucleotide&term=${query}&usehistory=y&api_key=${NCBI_API}`, async (resp) => {
@@ -92,7 +95,7 @@ const get_multipage_ids_from_query = (
 
         const nIter = count%retMax === 0 ? count/retMax : Math.trunc(count/retMax) + 1
 
-        const q: queue<TaskBis> = require('fastq')(get_ids_from_query, 2)
+        const q: queue<TaskBis> = require('fastq')(get_ids_from_query, concurrency)
         let ids: any[] = []
         const callback: done = (arg) => {
             const tmp = Array.isArray(arg) ? arg : [arg]
@@ -116,7 +119,7 @@ const get_multipage_ids_from_query = (
         if(ids.length === count) {
             resolve(ids)
         } else {
-            reject('Error')
+            reject(['Error ',ids.length,count].join(' '))
         }
       })
     }).on("error", (err) => {
@@ -149,18 +152,18 @@ const saveMultifastaFromIds = (
 }
 
 export { saveMultifastaFromIds }
-
+// https://linsalrob.github.io/ComputationalGenomicsManual/Databases/NCBI_Edirect.html
 // https://www.ncbi.nlm.nih.gov/books/NBK21091/
 // https://www.ncbi.nlm.nih.gov/books/NBK50679/
 // ""Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]"
 // ""Homo sapiens"[Organism] AND srcdb_refseq[prop] AND biomol_rna[prop] "
-/*
-(async () => {
-    const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
-    await saveMultifastaFromIds(
-        '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]', 
-        '/home/thomas/Human_Transcriptome_RefSeq.fna',
-        NCBI_API,
-        console.log
-    )
-})()*/
+
+// (async () => {
+//     const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
+//     await saveMultifastaFromIds(
+//         '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]', 
+//         '/home/thomas/Human_Transcriptome_RefSeq.fna',
+//         NCBI_API,
+//         console.log
+//     )
+// })()