|
|
@@ -24,7 +24,6 @@ exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnr
|
|
|
const fs_1 = __importDefault(require("fs"));
|
|
|
const readline_1 = __importDefault(require("readline"));
|
|
|
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
|
-const jsonata_1 = __importDefault(require("jsonata"));
|
|
|
const line$ = (path) => readline_1.default.createInterface({
|
|
|
input: fs_1.default.createReadStream(path),
|
|
|
crlfDelay: Infinity
|
|
|
@@ -100,7 +99,7 @@ const getEntryOffset = (dbPath, accession) => __awaiter(void 0, void 0, void 0,
|
|
|
}
|
|
|
return [0, 0];
|
|
|
});
|
|
|
-const getEnrty = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+const getEntry = (dbPath, accession) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
const parser = new fast_xml_parser_1.XMLParser({
|
|
|
ignoreAttributes: false,
|
|
|
alwaysCreateTextNode: false,
|
|
|
@@ -111,8 +110,13 @@ const getEnrty = (dbPath, accession) => __awaiter(void 0, void 0, void 0, functi
|
|
|
const offsets = yield getEntryOffset(dbPath, accession);
|
|
|
return parser.parse(yield readOffset(dbPath, offsets[0], offsets[1]));
|
|
|
});
|
|
|
-exports.getEnrty = getEnrty;
|
|
|
+exports.getEnrty = getEntry;
|
|
|
const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const accessions = yield getAccessFromGene(idmappingPath, geneName);
|
|
|
+ return yield getEntry(dbPath, accessions[0]); // seems to be always the first with entry
|
|
|
+});
|
|
|
+exports.getEntryFromGeneName = getEntryFromGeneName;
|
|
|
+const getAccessFromGene = (idmappingPath, geneName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
var e_3, _g;
|
|
|
const sel = new RegExp('Gene_Name\t' + geneName);
|
|
|
let accessions = [];
|
|
|
@@ -130,43 +134,88 @@ const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void
|
|
|
}
|
|
|
finally { if (e_3) throw e_3.error; }
|
|
|
}
|
|
|
- return yield getEnrty(dbPath, accessions[0]);
|
|
|
+ return accessions;
|
|
|
});
|
|
|
-exports.getEntryFromGeneName = getEntryFromGeneName;
|
|
|
const getInteractionsFromEntry = (json) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const blaskList = ['DNA', 'PHOSPHOSERINE', 'MOTIFS', 'INFECTION', 'PROTEIN', 'PROTEINS', 'GAMMA-SECRETASE', 'CALCIUM',
|
|
|
+ 'MICROBIAL', 'VIRUS', 'HEPATITIS', 'HERPES', 'SIMPLEX', 'RELATED', 'AND', 'CLATHRIN', 'WORTMANNIN'];
|
|
|
const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession];
|
|
|
- // Comment interactant
|
|
|
- const res_inter = (0, jsonata_1.default)(`entry.comment[type="interaction"].interactant`).evaluate(json);
|
|
|
- let genes_interactant = [];
|
|
|
- if (res_inter) {
|
|
|
- genes_interactant = [...new Set(res_inter
|
|
|
- .filter((e) => !uniprotIDs.includes(e.label))
|
|
|
- .map((e) => e.label)
|
|
|
- .filter((e) => e))];
|
|
|
- }
|
|
|
- // Reference scope = INTERACTION WITH
|
|
|
- const scope_inter = (0, jsonata_1.default)(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json);
|
|
|
- let genes_scope_inter = [];
|
|
|
- if (scope_inter) {
|
|
|
- const comment_scope_inters = Array.isArray(scope_inter) ? scope_inter : [scope_inter];
|
|
|
- const comment_scope_inters_genes = comment_scope_inters.map((e) => (Object.assign({ txt: Array.isArray(e.scope) ? e.scope.filter((ee) => ee.match(/INTERACTION\ WITH/)).join() : e.scope }, e))).map((e) => (Object.assign({ interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length) }, e)));
|
|
|
- genes_scope_inter = [...new Set(comment_scope_inters_genes.map((e) => e.interaction))].flatMap((e) => e.split(/; | AND /));
|
|
|
- }
|
|
|
- // Comment subunit
|
|
|
- const comment_subunit = (0, jsonata_1.default)(`entry.comment[type="subunit"].text.value`).evaluate(json);
|
|
|
- let comment_subunits_genes = [];
|
|
|
- if (comment_subunit) {
|
|
|
- const comment_subunits = Array.isArray(comment_subunit) ? comment_subunit : [comment_subunit];
|
|
|
- comment_subunits_genes = comment_subunits
|
|
|
- .flatMap((e) => e.replace(/ *\([^)]*\) */g, '').split(/\n/))
|
|
|
- .filter((e) => /Interacts/.test(e))
|
|
|
- .flatMap((e) => e.match(/ [A-Z][A-Z0-9\-]{2,}/g));
|
|
|
- }
|
|
|
- let res = [...new Set([...genes_scope_inter, ...genes_interactant, ...comment_subunits_genes])].sort().filter((e) => typeof e === 'string').filter(_ => _);
|
|
|
- let filterOut = ['PHOSPHOSERINE', 'MOTIFS', 'INFECTION', 'PROTEIN', 'PROTEINS',
|
|
|
- 'MICROBIAL', 'VIRUS', 'HEPATITIS', 'HERPES', 'SIMPLEX', 'RELATED', 'AND', 'CLATHRIN'];
|
|
|
- if (res.length > 0)
|
|
|
- res = res.flatMap((e) => e.match(/[A-Z]{2,}[A-Z0-9\-]{1,}/g)).filter((e) => !(filterOut.includes(e))).filter((e) => !/-$/.test(e));
|
|
|
- return [...new Set(res.filter(_ => _))];
|
|
|
+ // geneName
|
|
|
+ const gnT = Array.isArray(json.entry.gene.name) ? json.entry.gene.name : [json.entry.gene.name];
|
|
|
+ const geneName = gnT.filter((e) => e.type === 'primary').map((e) => e.value)[0];
|
|
|
+ // Interactants
|
|
|
+ const interactants = json.entry.comment
|
|
|
+ .filter((e) => (e === null || e === void 0 ? void 0 : e.type) === 'interaction')
|
|
|
+ .flatMap((e) => ({
|
|
|
+ type: 'interactant',
|
|
|
+ fromProductId: e.interactant[0].id,
|
|
|
+ toProductId: e.interactant[1].id,
|
|
|
+ to: e.interactant[1].label,
|
|
|
+ nExperiments: Number(e.experiments)
|
|
|
+ }));
|
|
|
+ const regExp = new RegExp('INTERACTION WITH |Interacts with |complex with ', 'i');
|
|
|
+ const geneRegExp = new RegExp(/[A-Z]{1}[A-Z|0-9]{2,}$/);
|
|
|
+ // uniprot_comment_text_value
|
|
|
+ const commentInteractsWith = json.entry.comment
|
|
|
+ .filter((e) => { var _a; return (_a = e === null || e === void 0 ? void 0 : e.text) === null || _a === void 0 ? void 0 : _a.value; })
|
|
|
+ .filter((e) => regExp.test(e.text.value))
|
|
|
+ .map((e) => ({
|
|
|
+ to: e.text.value
|
|
|
+ .split(/\.|;/)
|
|
|
+ .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' '))
|
|
|
+ .filter((ee) => regExp.test(ee))
|
|
|
+ .flatMap((ee) => ee.trim().split(regExp))
|
|
|
+ .flatMap((ee) => ee.split(/,| and | /))
|
|
|
+ .filter((_) => _)
|
|
|
+ .filter((ee) => geneRegExp.test(ee))
|
|
|
+ .filter((ee) => !blaskList.includes(ee) && ee !== geneName)
|
|
|
+ .map((ee) => ee.trim()),
|
|
|
+ text: e.text.value,
|
|
|
+ //evidences: e.text.evidence.split(' ')//.map((ee:string)=> json.entry.reference.filter((eee:any)=> eee.key === ee)) // Doesnt work with ref key
|
|
|
+ }))
|
|
|
+ .flatMap((e) => e.to.flatMap((ee) => ({
|
|
|
+ type: 'uniprot_comment_text_value',
|
|
|
+ to: ee,
|
|
|
+ text: e.text
|
|
|
+ })));
|
|
|
+ // uniprot_reference_scope
|
|
|
+ const referenceInteract = json.entry.reference
|
|
|
+ .map((e) => (Object.assign(Object.assign({}, e), { scope: Array.isArray(e.scope) ? e.scope : [e.scope] })))
|
|
|
+ .filter((e) => regExp.test(e.scope.join('')))
|
|
|
+ .map((e) => (Object.assign({ to: e.scope
|
|
|
+ //.split(/\.|;/)
|
|
|
+ .flatMap((ee) => regExp.test(ee) ? [ee] : [])
|
|
|
+ .filter((_) => _)
|
|
|
+ .flatMap((ee) => ee.replace(/ *\([^)]*\) */g, ' '))
|
|
|
+ .filter((ee) => regExp.test(ee))
|
|
|
+ .flatMap((ee) => ee.trim().split(regExp)[1])
|
|
|
+ .flatMap((ee) => ee.split(/,| and | /i))
|
|
|
+ .filter((_) => _)
|
|
|
+ .filter((ee) => geneRegExp.test(ee))
|
|
|
+ .filter((ee) => !blaskList.includes(ee) && ee !== geneName)
|
|
|
+ .map((ee) => ee.trim()) }, e)))
|
|
|
+ .flatMap((e) => e.to.flatMap((ee) => ({
|
|
|
+ type: 'reference_scope',
|
|
|
+ to: ee,
|
|
|
+ scope: e.scope,
|
|
|
+ //citation: e.citation
|
|
|
+ })));
|
|
|
+ // Group
|
|
|
+ const byTo = {};
|
|
|
+ [...interactants, ...referenceInteract, ...commentInteractsWith]
|
|
|
+ //.map((e:any)=> byTo[e.to] = byTo[e.to] ? [e, ...byTo[e.to]] : [e] )
|
|
|
+ .map((e) => byTo[e.to] = byTo[e.to] ? Object.assign(Object.assign({}, e), byTo[e.to]) : Object.assign({}, e));
|
|
|
+ const results = Object.keys(byTo).map((e) => {
|
|
|
+ var _a;
|
|
|
+ (_a = byTo[e]) === null || _a === void 0 ? true : delete _a.to;
|
|
|
+ return {
|
|
|
+ from: geneName,
|
|
|
+ to: e,
|
|
|
+ data: byTo[e]
|
|
|
+ };
|
|
|
+ })
|
|
|
+ .filter((e) => !blaskList.includes(e.to) && e.to !== geneName);
|
|
|
+ yield fs_1.default.promises.writeFile('test/tmp.json', JSON.stringify(results.map((e) => e.to), null, 4));
|
|
|
+ return results;
|
|
|
});
|
|
|
exports.getInteractionsFromEntry = getInteractionsFromEntry;
|