"use strict"; // wget https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109.20211119/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_feature_table.txt.gz // wget https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109.20211119/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_structure/Primary_Assembly/assembled_chromosomes/chr2acc // wget https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109.20211119/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.gbff.gz // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/refseqgene.1.genomic.gbff.gz // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/biological_region/human.biological_region.gbff.gz // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.[1-10].rna.gbff.gz var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getSymbol = exports.getFromAcc = void 0; const fs_1 = __importDefault(require("fs")); const readline_1 = __importDefault(require("readline")); const buffer_1 = require("buffer"); const genbank_parser_1 = __importDefault(require("genbank-parser")); const line$ = (path) => readline_1.default.createInterface({ input: fs_1.default.createReadStream(path), crlfDelay: Infinity }); const readOffset = (path, from, to) => { return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () { const size = to - from; const buffer = buffer_1.Buffer.alloc(size); let filehandle = null; try { filehandle = yield fs_1.default.promises.open(path, 'r+'); yield filehandle.read(buffer, 0, buffer.length, from); } finally { if (filehandle) { yield filehandle.close(); resolve(buffer.toString()); } } })); }; /* * strings -a -t d human.1.rna.gbff | grep LOCUS | awk '{print $1"\t"$3}' > human.1.rna.gbff.index * */ const makeGbffIndex = (filePath, indexPath) => __awaiter(void 0, void 0, void 0, function* () { var e_1, _a; indexPath = indexPath || filePath + '.jsi'; let entries = []; let lineN = 0; let byteAcc = 0; try { for (var _b = __asyncValues(line$(filePath)), _c; _c = yield _b.next(), !_c.done;) { const line = _c.value; if (line.match(/^LOCUS/)) { entries.push({ filePath, value: line.split(/\s+/)[1], from: byteAcc }); if (lineN !== 0) { entries[entries.length - 2]["to"] = byteAcc; yield fs_1.default.promises.appendFile(indexPath, [ entries[entries.length - 2]["value"], entries[entries.length - 2]["from"], entries[entries.length - 2]["to"] ].join('\t') + '\n'); entries = entries.splice(1); } } byteAcc += (line.length + 1); lineN++; } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (_c && !_c.done && (_a = _b.return)) yield _a.call(_b); } finally { if (e_1) throw e_1.error; } } entries[entries.length - 1]["to"] = byteAcc; yield fs_1.default.promises.appendFile(indexPath, [ entries[entries.length - 1]["value"], entries[entries.length - 1]["from"], entries[entries.length - 1]["to"] ].join('\t')); return entries; }); const getOffset = (indexPath, acc) => __awaiter(void 0, void 0, void 0, function* () { var e_2, _d; let res; try { for (var _e = __asyncValues(line$(indexPath)), _f; _f = yield _e.next(), !_f.done;) { const line = _f.value; const tmp = line.split('\t'); if (tmp[0] === acc) { res = [indexPath.split('.jsi')[0], tmp[1], tmp[2]]; break; } } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (_f && !_f.done && (_d = _e.return)) yield _d.call(_e); } finally { if (e_2) throw e_2.error; } } return res; }); const getFromAcc = (accession, dbPath, indexPath) => __awaiter(void 0, void 0, void 0, function* () { dbPath = Array.isArray(dbPath) ? dbPath : [dbPath]; if (!indexPath) { indexPath = []; for (const p of dbPath) { const iP = p + '.jsi'; if (!fs_1.default.existsSync(iP)) { console.log('Writing index: ' + iP); yield makeGbffIndex(p); } indexPath.push(iP); } } else { indexPath = Array.isArray(indexPath) ? indexPath : [indexPath]; if (indexPath.length !== dbPath.length) throw 'Error'; } for (const iP of indexPath) { const [filePath, from, to] = (yield getOffset(iP, accession)) || [undefined, undefined, undefined]; if (filePath) { const txt = yield readOffset(filePath, Number(from), Number(to)); return (0, genbank_parser_1.default)(txt)[0]; } } return undefined; }); exports.getFromAcc = getFromAcc; const getPos = (selector, tablePath, headerTablePath) => __awaiter(void 0, void 0, void 0, function* () { const results = []; (yield fs_1.default.promises.readFile(tablePath)).toString().split('\n') .map((line) => { const lineObj = line.split('\t').reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [headerTablePath[i]]: isFinite(Number(c)) ? Number(c) : c })), {}); if (lineObj[Object.keys(selector)[0]] === selector[Object.keys(selector)[0]]) results.push(lineObj); }); return results; }); const getIndex = (symbol, LRGPath) => __awaiter(void 0, void 0, void 0, function* () { return (yield fs_1.default.promises.readFile(LRGPath)).toString() .split('\n') .filter((line) => line.match(new RegExp(symbol, 'g'))) .reduce((p, c) => { const [TaxID, GeneID, GeneName, GeneAcc, TranscriptsAcc, ProteinAcc, _] = c.split('\t').filter((e) => e !== ''); return { GeneID, GeneAcc, TranscriptsAcc: [...(new Set([...p.TranscriptsAcc, TranscriptsAcc]))], ProteinAcc: [...(new Set([...p.ProteinAcc, ProteinAcc]))] }; }, { GeneID: '', GeneAcc: '', TranscriptsAcc: [], ProteinAcc: [] }); }); const getSymbol = (symbol, LRGPath, // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene tablePath, // wget https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/109.20211119/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_feature_table.txt.gz // regionDBPath: string | string[], // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/biological_region/human.biological_region.gbff.gz geneDBPath, // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/refseqgene.[1-7].genomic.gbff.gz rnaDBPath // wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.[1-10].rna.gbff.gz ) => __awaiter(void 0, void 0, void 0, function* () { const geneIndex = yield getIndex(symbol, LRGPath); // const regionData = await getFromAcc(geneIndex.GeneAcc.split('.')[0], regionDBPath) // if (regionData) await fs.promises.writeFile('test-region.json', JSON.stringify(regionData, null, 4)) const headerTablePath = [ 'feature', 'class', 'assembly', 'assembly_unit', 'seq_type', 'chromosome', 'genomic_accession', 'start', 'end', 'strand', 'product_accession', 'non_redundant_refseq', 'related_accession', 'name', 'symbol', 'GeneID', 'locus_tag', 'feature_interval_length', 'product_length', 'attributes' ]; const allFeatures = yield getPos({ symbol }, tablePath, headerTablePath); for (let index = 0; index < allFeatures.length; index++) { const { feature, product_accession } = allFeatures[index]; let tmp; switch (feature) { case 'gene': allFeatures[index].product_accession = geneIndex.GeneAcc; tmp = yield getFromAcc(geneIndex.GeneAcc.split('.')[0], geneDBPath); yield fs_1.default.promises.writeFile('test/test-gene.json', JSON.stringify(tmp, null, 4)); allFeatures[index].data = tmp; break; case 'mRNA': tmp = yield getFromAcc(product_accession.split('.')[0], rnaDBPath); yield fs_1.default.promises.writeFile('test/test-rna-' + index + '.json', JSON.stringify(tmp, null, 4)); allFeatures[index].data = tmp; break; default: break; } } return allFeatures; }); exports.getSymbol = getSymbol;