index.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. "use strict";
  2. // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz
  3. var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
  4. function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
  5. return new (P || (P = Promise))(function (resolve, reject) {
  6. function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
  7. function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
  8. function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
  9. step((generator = generator.apply(thisArg, _arguments || [])).next());
  10. });
  11. };
  12. var __asyncValues = (this && this.__asyncValues) || function (o) {
  13. if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
  14. var m = o[Symbol.asyncIterator], i;
  15. return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
  16. function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
  17. function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
  18. };
  19. var __importDefault = (this && this.__importDefault) || function (mod) {
  20. return (mod && mod.__esModule) ? mod : { "default": mod };
  21. };
  22. Object.defineProperty(exports, "__esModule", { value: true });
  23. exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
  24. const fast_xml_parser_1 = require("fast-xml-parser");
  25. const fs_1 = __importDefault(require("fs"));
  26. const readline_1 = __importDefault(require("readline"));
  27. const line$ = (path) => readline_1.default.createInterface({
  28. input: fs_1.default.createReadStream(path),
  29. crlfDelay: Infinity
  30. });
  31. const makeIndex = (filePath, indexPath) => __awaiter(void 0, void 0, void 0, function* () {
  32. var e_1, _a;
  33. indexPath = indexPath || filePath + '.jsi';
  34. let byteAcc = 0;
  35. const fromSel = new RegExp("^<entry");
  36. const toSel = new RegExp("^</entry>");
  37. const valSel = new RegExp('<accession>');
  38. let tmp = { values: [] };
  39. try {
  40. for (var _b = __asyncValues(line$(filePath)), _c; _c = yield _b.next(), !_c.done;) {
  41. const line = _c.value;
  42. if (fromSel.test(line))
  43. tmp['from'] = byteAcc;
  44. byteAcc += (line.length + 1);
  45. if (valSel.test(line))
  46. tmp['values'].push(line.match("<accession>(.*?)</accession>")[1]); // 'uck
  47. if (toSel.test(line)) {
  48. yield fs_1.default.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n');
  49. tmp = { values: [] };
  50. }
  51. }
  52. }
  53. catch (e_1_1) { e_1 = { error: e_1_1 }; }
  54. finally {
  55. try {
  56. if (_c && !_c.done && (_a = _b.return)) yield _a.call(_b);
  57. }
  58. finally { if (e_1) throw e_1.error; }
  59. }
  60. });
  61. exports.makeIndex = makeIndex;
  62. const readOffset = (path, from, to) => {
  63. return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
  64. const size = to - from;
  65. const buffer = Buffer.alloc(size);
  66. let filehandle;
  67. try {
  68. filehandle = yield fs_1.default.promises.open(path, 'r+');
  69. yield filehandle.read(buffer, 0, buffer.length, from);
  70. }
  71. finally {
  72. if (filehandle) {
  73. yield filehandle.close();
  74. resolve(buffer.toString());
  75. }
  76. }
  77. }));
  78. };
  79. exports.readOffset = readOffset;
  80. const getEntryOffset = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
  81. var e_2, _d;
  82. const indexPath = dbPath + '.jsi';
  83. if (!fs_1.default.existsSync(indexPath))
  84. yield makeIndex(dbPath);
  85. const lineSel = new RegExp(accession);
  86. try {
  87. for (var _e = __asyncValues(line$(indexPath)), _f; _f = yield _e.next(), !_f.done;) {
  88. const line = _f.value;
  89. if (lineSel.test(line))
  90. return [Number(line.split('\t')[1]), Number(line.split('\t')[2])];
  91. }
  92. }
  93. catch (e_2_1) { e_2 = { error: e_2_1 }; }
  94. finally {
  95. try {
  96. if (_f && !_f.done && (_d = _e.return)) yield _d.call(_e);
  97. }
  98. finally { if (e_2) throw e_2.error; }
  99. }
  100. return [0, 0];
  101. });
  102. const getEntry = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
  103. const parser = new fast_xml_parser_1.XMLParser({
  104. ignoreAttributes: false,
  105. alwaysCreateTextNode: false,
  106. attributeNamePrefix: "",
  107. textNodeName: "value",
  108. allowBooleanAttributes: true,
  109. });
  110. const offsets = yield getEntryOffset(dbPath, accession);
  111. return parser.parse(yield readOffset(dbPath, offsets[0], offsets[1]));
  112. });
  113. exports.getEnrty = getEntry;
  114. const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void 0, void 0, void 0, function* () {
  115. const accessions = yield getAccessFromGene(idmappingPath, geneName);
  116. return yield getEntry(dbPath, accessions[0]); // seems to be always the first with entry
  117. });
  118. exports.getEntryFromGeneName = getEntryFromGeneName;
  119. const getAccessFromGene = (idmappingPath, geneName) => __awaiter(void 0, void 0, void 0, function* () {
  120. var e_3, _g;
  121. const sel = new RegExp('Gene_Name\t' + geneName);
  122. let accessions = [];
  123. try {
  124. for (var _h = __asyncValues(line$(idmappingPath)), _j; _j = yield _h.next(), !_j.done;) {
  125. const line = _j.value;
  126. if (sel.test(line))
  127. accessions.push(line.split('\t')[0]);
  128. }
  129. }
  130. catch (e_3_1) { e_3 = { error: e_3_1 }; }
  131. finally {
  132. try {
  133. if (_j && !_j.done && (_g = _h.return)) yield _g.call(_h);
  134. }
  135. finally { if (e_3) throw e_3.error; }
  136. }
  137. return accessions;
  138. });
  139. const getInteractionsFromEntry = (json) => __awaiter(void 0, void 0, void 0, function* () {
  140. var _k, _l, _m, _o, _p, _q, _r, _s;
  141. const blaskList = ['DNA', 'PHOSPHOSERINE', 'MOTIFS', 'INFECTION', 'PROTEIN', 'PROTEINS', 'GAMMA-SECRETASE', 'CALCIUM',
  142. 'MICROBIAL', 'VIRUS', 'HEPATITIS', 'HERPES', 'SIMPLEX', 'RELATED', 'AND', 'CLATHRIN', 'WORTMANNIN',
  143. 'NUCLEOSOME', 'undefined', 'INTEGRINS', 'UBIQUITIN', 'MAGNESIUM'];
  144. const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession];
  145. // geneName
  146. const gnTT = Array.isArray(json.entry.gene) ? json.entry.gene[0] : json.entry.gene;
  147. let geneName = '';
  148. if (gnTT === null || gnTT === void 0 ? void 0 : gnTT.name) {
  149. const gnT = Array.isArray(gnTT.name) ? gnTT.name : [gnTT.name];
  150. geneName = gnT.filter((e) => e.type === 'primary').map((e) => e.value)[0];
  151. }
  152. else if ((_l = (_k = json.entry) === null || _k === void 0 ? void 0 : _k.protein) === null || _l === void 0 ? void 0 : _l.recommendedName) {
  153. geneName = Array.isArray((_o = (_m = json.entry) === null || _m === void 0 ? void 0 : _m.protein) === null || _o === void 0 ? void 0 : _o.recommendedName) ? (_q = (_p = json.entry) === null || _p === void 0 ? void 0 : _p.protein) === null || _q === void 0 ? void 0 : _q.recommendedName[0] : (_s = (_r = json.entry) === null || _r === void 0 ? void 0 : _r.protein) === null || _s === void 0 ? void 0 : _s.recommendedName;
  154. }
  155. // Interactants
  156. const jecT = Array.isArray(json.entry.comment) ? json.entry.comment : [json.entry.comment];
  157. const interactants = jecT
  158. .filter((e) => (e === null || e === void 0 ? void 0 : e.type) === 'interaction')
  159. .flatMap((e) => ({
  160. type: 'interactant',
  161. fromProductId: e.interactant[0].id,
  162. toProductId: e.interactant[1].id,
  163. to: e.interactant[1].label,
  164. nExperiments: Number(e.experiments)
  165. }));
  166. const regExp = new RegExp('INTERACTION WITH |Interacts with |complex with ', 'i');
  167. const geneRegExp = new RegExp(/[A-Z]{1}[A-Z|0-9]{2,}$/);
  168. // uniprot_comment_text_value
  169. const commentInteractsWith = jecT
  170. .filter((e) => { var _a; return (_a = e === null || e === void 0 ? void 0 : e.text) === null || _a === void 0 ? void 0 : _a.value; })
  171. .filter((e) => regExp.test(e.text.value))
  172. .map((e) => ({
  173. to: e.text.value
  174. .split(/\.|;/)
  175. .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' '))
  176. .filter((ee) => regExp.test(ee))
  177. .flatMap((ee) => ee.trim().split(regExp))
  178. .flatMap((ee) => ee.split(/,| and | /))
  179. .filter((_) => _)
  180. .filter((ee) => geneRegExp.test(ee))
  181. .filter((ee) => !blaskList.includes(ee) && ee !== geneName)
  182. .map((ee) => ee.trim()),
  183. text: e.text.value,
  184. //evidences: e.text.evidence.split(' ')//.map((ee:string)=> json.entry.reference.filter((eee:any)=> eee.key === ee)) // Doesnt work with ref key
  185. }))
  186. .flatMap((e) => e.to.flatMap((ee) => ({
  187. type: 'uniprot_comment_text_value',
  188. to: ee,
  189. text: e.text
  190. })));
  191. // uniprot_reference_scope
  192. const jerT = Array.isArray(json.entry.reference) ? json.entry.reference : [json.entry.reference];
  193. const referenceInteract = jerT
  194. .map((e) => (Object.assign(Object.assign({}, e), { scope: Array.isArray(e.scope) ? e.scope : [e.scope] })))
  195. .filter((e) => regExp.test(e.scope.join('')))
  196. .map((e) => (Object.assign({ to: e.scope
  197. //.split(/\.|;/)
  198. .flatMap((ee) => regExp.test(ee) ? [ee] : [])
  199. .filter((_) => _)
  200. .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' '))
  201. .filter((ee) => regExp.test(ee))
  202. .flatMap((ee) => ee.trim().split(regExp)[1])
  203. .flatMap((ee) => ee.split(/,| and | /i))
  204. .filter((_) => _)
  205. .filter((ee) => geneRegExp.test(ee))
  206. .filter((ee) => !blaskList.includes(ee) && ee !== geneName)
  207. .map((ee) => ee.trim()) }, e)))
  208. .flatMap((e) => e.to.flatMap((ee) => ({
  209. type: 'reference_scope',
  210. to: ee,
  211. scope: e.scope,
  212. citation: e.citation
  213. })));
  214. // Group
  215. const byTo = {};
  216. [...interactants, ...referenceInteract, ...commentInteractsWith]
  217. .map((e) => byTo[e.to] = byTo[e.to] ? Object.assign(Object.assign({}, e), byTo[e.to]) : Object.assign({}, e));
  218. const results = Object.keys(byTo).map((e) => {
  219. var _a;
  220. (_a = byTo[e]) === null || _a === void 0 ? true : delete _a.to;
  221. return {
  222. from: geneName,
  223. to: e,
  224. data: byTo[e]
  225. };
  226. })
  227. .filter((e) => !blaskList.includes(e.to) && e.to !== geneName);
  228. return results;
  229. });
  230. exports.getInteractionsFromEntry = getInteractionsFromEntry;