|
|
@@ -1,4 +1,5 @@
|
|
|
"use strict";
|
|
|
+// refactor with https://github.com/piscinajs/piscina
|
|
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
|
return new (P || (P = Promise))(function (resolve, reject) {
|
|
|
@@ -23,7 +24,7 @@ const fs_1 = __importDefault(require("fs"));
|
|
|
const esearch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi';
|
|
|
const efetch = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';
|
|
|
const regex_sam_restriction = /[>0-9A-Za-z!#$%&+\./:;?@^_|~-]|[\n\t]/g;
|
|
|
-const maxWaiting = 30 * 60 * 1000;
|
|
|
+const maxWaiting = 300 * 60 * 1000;
|
|
|
const invReplace = (regex, string, by = '_') => {
|
|
|
return string.split('').map(letter => letter.match(regex) ? letter : by).join('');
|
|
|
};
|
|
|
@@ -76,7 +77,7 @@ const get_ids_from_query = (arg, cb) => {
|
|
|
});
|
|
|
};
|
|
|
// async
|
|
|
-const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20) => {
|
|
|
+const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20, concurrency = 2) => {
|
|
|
return new Promise((resolve, reject) => {
|
|
|
https_1.default.get(`${esearch}?db=nucleotide&term=${query}&usehistory=y&api_key=${NCBI_API}`, (resp) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
let data = '';
|
|
|
@@ -85,7 +86,7 @@ const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20)
|
|
|
const tmp = data.match(/<Count>(\d+)<\/Count>/) || [0, 0];
|
|
|
const count = parseInt(tmp[1]);
|
|
|
const nIter = count % retMax === 0 ? count / retMax : Math.trunc(count / retMax) + 1;
|
|
|
- const q = require('fastq')(get_ids_from_query, 2);
|
|
|
+ const q = require('fastq')(get_ids_from_query, concurrency);
|
|
|
let ids = [];
|
|
|
const callback = (arg) => {
|
|
|
const tmp = Array.isArray(arg) ? arg : [arg];
|
|
|
@@ -110,7 +111,7 @@ const get_multipage_ids_from_query = (query, onProgress, NCBI_API, retMax = 20)
|
|
|
resolve(ids);
|
|
|
}
|
|
|
else {
|
|
|
- reject('Error');
|
|
|
+ reject(['Error ', ids.length, count].join(' '));
|
|
|
}
|
|
|
}));
|
|
|
})).on("error", (err) => {
|
|
|
@@ -145,17 +146,17 @@ const saveMultifastaFromIds = (query, path, NCBI_API, onProgress) => {
|
|
|
}));
|
|
|
};
|
|
|
exports.saveMultifastaFromIds = saveMultifastaFromIds;
|
|
|
+// https://linsalrob.github.io/ComputationalGenomicsManual/Databases/NCBI_Edirect.html
|
|
|
// https://www.ncbi.nlm.nih.gov/books/NBK21091/
|
|
|
// https://www.ncbi.nlm.nih.gov/books/NBK50679/
|
|
|
// ""Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]"
|
|
|
// ""Homo sapiens"[Organism] AND srcdb_refseq[prop] AND biomol_rna[prop] "
|
|
|
-/*
|
|
|
-(async () => {
|
|
|
- const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
|
|
|
- await saveMultifastaFromIds(
|
|
|
- '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]',
|
|
|
- '/home/thomas/Human_Transcriptome_RefSeq.fna',
|
|
|
- NCBI_API,
|
|
|
- console.log
|
|
|
- )
|
|
|
-})()*/
|
|
|
+// (async () => {
|
|
|
+// const NCBI_API = '5b283f20e48000e0e9f20874125d4cced808'
|
|
|
+// await saveMultifastaFromIds(
|
|
|
+// '"Homo sapiens"[Organism] AND biomol_transcribed_rna[PROP] AND refseq[filter]',
|
|
|
+// '/home/thomas/Human_Transcriptome_RefSeq.fna',
|
|
|
+// NCBI_API,
|
|
|
+// console.log
|
|
|
+// )
|
|
|
+// })()
|