"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getAllSymbols = exports.getN = exports.getGOterms = exports.getSymbol = void 0; const fs_1 = __importDefault(require("fs")); const readline_1 = __importDefault(require("readline")); const stats_1 = require("@stdlib/stats"); const line$ = (path) => readline_1.default.createInterface({ input: fs_1.default.createReadStream(path), crlfDelay: Infinity }); // http://geneontology.org/docs/guide-go-evidence-codes/ // http://wiki.geneontology.org/index.php/Category:Evidence_Codes // http://current.geneontology.org/ontology/go-basic.obo // http://current.geneontology.org/annotations/goa_human.gaf.gz const getSymbol = (symbol, goaPath, oboPath) => __awaiter(void 0, void 0, void 0, function* () { var e_1, _a; var _b; const header = [ 'database', 'ID', 'Symbol', 'Qualifier', 'GO_Term', 'Evidence', 'Evidence_Code', 'With', 'From', 'Name', 'Alternative_symbols', 'Class', 'Taxon', 'Date', 'Origin' ]; const tester = new RegExp('\t' + symbol + '\t'); const separator = new RegExp('\\|'); const results = []; try { for (var _c = __asyncValues(line$(goaPath)), _d; _d = yield _c.next(), !_d.done;) { const line = _d.value; if (tester.test(line)) results.push(line.split('\t').filter((e) => e).reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [header[i]]: separator.test(c) ? c.split('|') : c })), {})); } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (_d && !_d.done && (_a = _c.return)) yield _a.call(_c); } finally { if (e_1) throw e_1.error; } } const subTerms = results.map(e => e === null || e === void 0 ? void 0 : e.GO_Term); const cacheTerms = yield getGOterms(subTerms, oboPath); for (let index = 0; index < results.length; index++) { const goTerm = (_b = results[index]) === null || _b === void 0 ? void 0 : _b.GO_Term; results[index]['GO_Term'] = cacheTerms.filter(e => e.id === goTerm)[0]; } return results; }); exports.getSymbol = getSymbol; const getGOterms = (terms, oboPath) => __awaiter(void 0, void 0, void 0, function* () { var e_2, _e; terms = Array.isArray(terms) ? terms : [terms]; const testerList = terms.map(e => new RegExp('id: ' + e)); let delim = false; const results = []; let result = {}; try { for (var _f = __asyncValues(line$(oboPath)), _g; _g = yield _f.next(), !_g.done;) { const line = _g.value; if (testerList.some(rx => rx.test(line))) delim = true; if (line === '' && delim) { delim = false; results.push(result); result = {}; } if (delim) result[line.split(': ')[0]] = line.split(': ')[1]; } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (_g && !_g.done && (_e = _f.return)) yield _e.call(_f); } finally { if (e_2) throw e_2.error; } } return results; }); exports.getGOterms = getGOterms; const getN = (terms, qualifiers, goaPath, oboPath) => __awaiter(void 0, void 0, void 0, function* () { var e_3, _h; terms = Array.isArray(terms) ? terms : [terms]; qualifiers = Array.isArray(qualifiers) ? qualifiers : [qualifiers]; let qualifs = {}; let termsList = []; for (const term of terms) { termsList.push(Object.assign(Object.assign({ nTotal: 0, genes: [] }, (yield getGOterms(term, oboPath))[0]), { test: new RegExp('\t' + term + '\t') })); } qualifiers.map((e) => qualifs[e] = {}); try { for (var _j = __asyncValues(line$(goaPath)), _k; _k = yield _j.next(), !_k.done;) { const line = _k.value; const t = termsList.filter((rx) => rx.test.test(line)); if (t.length > 0) { termsList = termsList.map((rx) => { if (rx.test.test(line)) { const genes = [...new Set([...(rx === null || rx === void 0 ? void 0 : rx.genes), line.split('\t')[2]])]; return Object.assign(Object.assign({}, rx), { nTotal: genes.length, genes }); } else { return rx; } }); } } } catch (e_3_1) { e_3 = { error: e_3_1 }; } finally { try { if (_k && !_k.done && (_h = _j.return)) yield _h.call(_j); } finally { if (e_3) throw e_3.error; } } return termsList.map((e) => { delete e['test']; return e; }); }); exports.getN = getN; const headerGOA = [ 'database', 'ID', 'Symbol', 'Qualifier', 'GO_Term', 'Evidence', 'Evidence_Code', 'With', 'From', 'Name', 'Alternative_symbols', 'Class', 'Taxon', 'Date', 'Origin' ]; const getAllSymbols = (symbols, qualifier, goaPath, oboPath) => __awaiter(void 0, void 0, void 0, function* () { var e_4, _l; symbols = Array.isArray(symbols) ? symbols : [symbols]; const testerList = [...new Set(symbols)].map(e => new RegExp('\t' + e + '\t' + qualifier + '\t')); const allGoa = []; const allGenes = {}; try { for (var _m = __asyncValues(line$(goaPath)), _o; _o = yield _m.next(), !_o.done;) { const line = _o.value; if (testerList.some(rx => rx.test(line))) { allGoa.push(line.split('\t'). reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [headerGOA[i]]: c })), {})); } allGenes[line.split('\t')[2]] = {}; } } catch (e_4_1) { e_4 = { error: e_4_1 }; } finally { try { if (_o && !_o.done && (_l = _m.return)) yield _l.call(_m); } finally { if (e_4) throw e_4.error; } } const nAllGenes = Object.keys(allGenes).length; console.log(nAllGenes); const allIDs = yield getN([...new Set(allGoa.map((e) => e.GO_Term))], qualifier, goaPath, oboPath); return allIDs.map((e) => { const observedGenes = symbols.filter((ee) => e.genes.includes(ee)); // const observedGenesNotIn = symbols.filter((ee:string)=> !e.genes.includes(ee)) // const allIn = Object.keys(allGenes).filter((ee:any)=>) const diff = e.genes.length - observedGenes.length; return Object.assign({ n: observedGenes.length, prop: observedGenes.length / e.genes.length, chi2test: (0, stats_1.chi2test)([[observedGenes.length, diff < 0 ? 0 : diff], [symbols.length, nAllGenes]]).pValue, observedGenes }, e); }).sort((a, b) => b.chi2test - a.chi2test); }); exports.getAllSymbols = getAllSymbols;