"use strict"; // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnrty = exports.readOffset = exports.makeIndex = void 0; const fs_1 = __importDefault(require("fs")); const readline_1 = __importDefault(require("readline")); const fast_xml_parser_1 = require("fast-xml-parser"); const line$ = (path) => readline_1.default.createInterface({ input: fs_1.default.createReadStream(path), crlfDelay: Infinity }); const makeIndex = (filePath, indexPath) => __awaiter(void 0, void 0, void 0, function* () { var e_1, _a; indexPath = indexPath || filePath + '.jsi'; let byteAcc = 0; const fromSel = new RegExp("^"); const valSel = new RegExp(''); let tmp = { values: [] }; try { for (var _b = __asyncValues(line$(filePath)), _c; _c = yield _b.next(), !_c.done;) { const line = _c.value; if (fromSel.test(line)) tmp['from'] = byteAcc; byteAcc += (line.length + 1); if (valSel.test(line)) tmp['values'].push(line.match("(.*?)")[1]); // 'uck if (toSel.test(line)) { yield fs_1.default.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n'); tmp = { values: [] }; } } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (_c && !_c.done && (_a = _b.return)) yield _a.call(_b); } finally { if (e_1) throw e_1.error; } } }); exports.makeIndex = makeIndex; const readOffset = (path, from, to) => { return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () { const size = to - from; const buffer = Buffer.alloc(size); let filehandle; try { filehandle = yield fs_1.default.promises.open(path, 'r+'); yield filehandle.read(buffer, 0, buffer.length, from); } finally { if (filehandle) { yield filehandle.close(); resolve(buffer.toString()); } } })); }; exports.readOffset = readOffset; const getEntryOffset = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () { var e_2, _d; const indexPath = dbPath + '.jsi'; if (!fs_1.default.existsSync(indexPath)) yield makeIndex(dbPath); const lineSel = new RegExp(accession); try { for (var _e = __asyncValues(line$(indexPath)), _f; _f = yield _e.next(), !_f.done;) { const line = _f.value; if (lineSel.test(line)) return [Number(line.split('\t')[1]), Number(line.split('\t')[2])]; } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (_f && !_f.done && (_d = _e.return)) yield _d.call(_e); } finally { if (e_2) throw e_2.error; } } return [0, 0]; }); const getEntry = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () { const parser = new fast_xml_parser_1.XMLParser({ ignoreAttributes: false, alwaysCreateTextNode: false, attributeNamePrefix: "", textNodeName: "value", allowBooleanAttributes: true, }); const offsets = yield getEntryOffset(dbPath, accession); return parser.parse(yield readOffset(dbPath, offsets[0], offsets[1])); }); exports.getEnrty = getEntry; const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void 0, void 0, void 0, function* () { const accessions = yield getAccessFromGene(idmappingPath, geneName); return yield getEntry(dbPath, accessions[0]); // seems to be always the first with entry }); exports.getEntryFromGeneName = getEntryFromGeneName; const getAccessFromGene = (idmappingPath, geneName) => __awaiter(void 0, void 0, void 0, function* () { var e_3, _g; const sel = new RegExp('Gene_Name\t' + geneName); let accessions = []; try { for (var _h = __asyncValues(line$(idmappingPath)), _j; _j = yield _h.next(), !_j.done;) { const line = _j.value; if (sel.test(line)) accessions.push(line.split('\t')[0]); } } catch (e_3_1) { e_3 = { error: e_3_1 }; } finally { try { if (_j && !_j.done && (_g = _h.return)) yield _g.call(_h); } finally { if (e_3) throw e_3.error; } } return accessions; }); const getInteractionsFromEntry = (json) => __awaiter(void 0, void 0, void 0, function* () { const blaskList = ['DNA', 'PHOSPHOSERINE', 'MOTIFS', 'INFECTION', 'PROTEIN', 'PROTEINS', 'GAMMA-SECRETASE', 'CALCIUM', 'MICROBIAL', 'VIRUS', 'HEPATITIS', 'HERPES', 'SIMPLEX', 'RELATED', 'AND', 'CLATHRIN', 'WORTMANNIN']; const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession]; // geneName const gnT = Array.isArray(json.entry.gene.name) ? json.entry.gene.name : [json.entry.gene.name]; const geneName = gnT.filter((e) => e.type === 'primary').map((e) => e.value)[0]; // Interactants const interactants = json.entry.comment .filter((e) => (e === null || e === void 0 ? void 0 : e.type) === 'interaction') .flatMap((e) => ({ type: 'interactant', fromProductId: e.interactant[0].id, toProductId: e.interactant[1].id, to: e.interactant[1].label, nExperiments: Number(e.experiments) })); const regExp = new RegExp('INTERACTION WITH |Interacts with |complex with ', 'i'); const geneRegExp = new RegExp(/[A-Z]{1}[A-Z|0-9]{2,}$/); // uniprot_comment_text_value const commentInteractsWith = json.entry.comment .filter((e) => { var _a; return (_a = e === null || e === void 0 ? void 0 : e.text) === null || _a === void 0 ? void 0 : _a.value; }) .filter((e) => regExp.test(e.text.value)) .map((e) => ({ to: e.text.value .split(/\.|;/) .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' ')) .filter((ee) => regExp.test(ee)) .flatMap((ee) => ee.trim().split(regExp)) .flatMap((ee) => ee.split(/,| and | /)) .filter((_) => _) .filter((ee) => geneRegExp.test(ee)) .filter((ee) => !blaskList.includes(ee) && ee !== geneName) .map((ee) => ee.trim()), text: e.text.value, //evidences: e.text.evidence.split(' ')//.map((ee:string)=> json.entry.reference.filter((eee:any)=> eee.key === ee)) // Doesnt work with ref key })) .flatMap((e) => e.to.flatMap((ee) => ({ type: 'uniprot_comment_text_value', to: ee, text: e.text }))); // uniprot_reference_scope const referenceInteract = json.entry.reference .map((e) => (Object.assign(Object.assign({}, e), { scope: Array.isArray(e.scope) ? e.scope : [e.scope] }))) .filter((e) => regExp.test(e.scope.join(''))) .map((e) => (Object.assign({ to: e.scope //.split(/\.|;/) .flatMap((ee) => regExp.test(ee) ? [ee] : []) .filter((_) => _) .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' ')) .filter((ee) => regExp.test(ee)) .flatMap((ee) => ee.trim().split(regExp)[1]) .flatMap((ee) => ee.split(/,| and | /i)) .filter((_) => _) .filter((ee) => geneRegExp.test(ee)) .filter((ee) => !blaskList.includes(ee) && ee !== geneName) .map((ee) => ee.trim()) }, e))) .flatMap((e) => e.to.flatMap((ee) => ({ type: 'reference_scope', to: ee, scope: e.scope, //citation: e.citation }))); // Group const byTo = {}; [...interactants, ...referenceInteract, ...commentInteractsWith] //.map((e:any)=> byTo[e.to] = byTo[e.to] ? [e, ...byTo[e.to]] : [e] ) .map((e) => byTo[e.to] = byTo[e.to] ? Object.assign(Object.assign({}, e), byTo[e.to]) : Object.assign({}, e)); const results = Object.keys(byTo).map((e) => { var _a; (_a = byTo[e]) === null || _a === void 0 ? true : delete _a.to; return { from: geneName, to: e, data: byTo[e] }; }) .filter((e) => !blaskList.includes(e.to) && e.to !== geneName); yield fs_1.default.promises.writeFile('test/tmp.json', JSON.stringify(results.map((e) => e.to), null, 4)); return results; }); exports.getInteractionsFromEntry = getInteractionsFromEntry;