Procházet zdrojové kódy

getInteractionsFromEntry

Thomas před 3 roky
rodič
revize
ce6d66dd2f
7 změnil soubory, kde provedl 93 přidání a 10 odebrání
  1. 2 0
      .gitignore
  2. 33 1
      index.js
  3. 30 1
      index.ts
  4. 2 1
      package.json
  5. 8 3
      test.js
  6. 13 4
      test.ts
  7. 5 0
      yarn.lock

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+test/
+node_modules

+ 33 - 1
index.js

@@ -20,10 +20,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
+exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
 const fs_1 = __importDefault(require("fs"));
 const readline_1 = __importDefault(require("readline"));
 const fast_xml_parser_1 = require("fast-xml-parser");
+const jsonata_1 = __importDefault(require("jsonata"));
 const line$ = (path) => readline_1.default.createInterface({
     input: fs_1.default.createReadStream(path),
     crlfDelay: Infinity
@@ -111,3 +112,34 @@ const getEnrty = (dbPath, accession) => __awaiter(void 0, void 0, void 0, functi
     return parser.parse(yield readOffset(dbPath, offsets[0], offsets[1]));
 });
 exports.getEnrty = getEnrty;
+const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void 0, void 0, void 0, function* () {
+    var e_3, _g;
+    const sel = new RegExp('Gene_Name\t' + geneName);
+    let accessions = [];
+    try {
+        for (var _h = __asyncValues(line$(idmappingPath)), _j; _j = yield _h.next(), !_j.done;) {
+            const line = _j.value;
+            if (sel.test(line))
+                accessions.push(line.split('\t')[0]);
+        }
+    }
+    catch (e_3_1) { e_3 = { error: e_3_1 }; }
+    finally {
+        try {
+            if (_j && !_j.done && (_g = _h.return)) yield _g.call(_h);
+        }
+        finally { if (e_3) throw e_3.error; }
+    }
+    return yield getEnrty(dbPath, accessions[0]);
+});
+exports.getEntryFromGeneName = getEntryFromGeneName;
+const getInteractionsFromEntry = (json) => __awaiter(void 0, void 0, void 0, function* () {
+    const uniprotIDs = json.entry.accession;
+    const genes_interactant = [...new Set((0, jsonata_1.default)(`entry.comment[type="interaction"].interactant`).evaluate(json)
+            .filter((e) => !uniprotIDs.includes(e.label))
+            .map((e) => e.label))];
+    const scope_inter = (0, jsonata_1.default)(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json).map((e) => (Object.assign({ txt: Array.isArray(e.scope) ? e.scope.filter((ee) => ee.match(/INTERACTION\ WITH/)).join() : e.scope }, e))).map((e) => (Object.assign({ interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length) }, e)));
+    const genes_scope_inter = [...new Set(scope_inter.map((e) => e.interaction))];
+    return [...new Set([...genes_scope_inter, ...genes_interactant])].filter(e => e);
+});
+exports.getInteractionsFromEntry = getInteractionsFromEntry;

+ 30 - 1
index.ts

@@ -3,6 +3,7 @@
 import fs from 'fs'
 import readline from 'readline'
 import { XMLParser } from 'fast-xml-parser'
+import jsonata from 'jsonata'
 
 const line$ = (path: string) => readline.createInterface({
     input: fs.createReadStream(path),
@@ -71,4 +72,32 @@ const getEnrty = async (dbPath:string, accession:string) => {
     return parser.parse(await readOffset(dbPath, offsets[0], offsets[1]))
 }
 
-export { makeIndex, readOffset, getEnrty }
+const getEntryFromGeneName = async (idmappingPath: string, dbPath:string, geneName:string) => {
+    const sel = new RegExp('Gene_Name\t' + geneName)
+    let accessions = []
+    for await (const line of line$(idmappingPath)) {
+        if(sel.test(line)) accessions.push(line.split('\t')[0])
+    }
+    return await getEnrty(dbPath, accessions[0])   
+}
+
+const getInteractionsFromEntry = async (json:any) => {
+    const uniprotIDs = json.entry.accession
+    
+    const genes_interactant = [...new Set(
+        jsonata(`entry.comment[type="interaction"].interactant`).evaluate(json)
+        .filter((e:any) => !uniprotIDs.includes(e.label))
+        .map((e:any) => e.label)
+    )]
+    
+    const scope_inter = jsonata(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json).map((e:any) => ({
+        txt: Array.isArray(e.scope) ? e.scope.filter((ee:any) => ee.match(/INTERACTION\ WITH/)).join() : e.scope,
+        ...e
+    })).map((e:any) => ({
+        interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length),...e}))
+    const genes_scope_inter = [...new Set(scope_inter.map((e:any) => e.interaction))]
+
+    return [...new Set([...genes_scope_inter, ...genes_interactant])].filter(e => e)
+}
+
+export { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry }

+ 2 - 1
package.json

@@ -15,6 +15,7 @@
     "typescript": "^4.6.2"
   },
   "dependencies": {
-    "fast-xml-parser": "^4.0.6"
+    "fast-xml-parser": "^4.0.6",
+    "jsonata": "^1.8.6"
   }
 }

+ 8 - 3
test.js

@@ -15,10 +15,15 @@ Object.defineProperty(exports, "__esModule", { value: true });
 const _1 = require(".");
 const fs_1 = __importDefault(require("fs"));
 (() => __awaiter(void 0, void 0, void 0, function* () {
+    // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/
+    // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz
+    const idmappingPath = '/home/thomas/NGS/ref/UNIPROT/HUMAN_9606_idmapping.dat';
     const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml';
     // await makeIndex(uniprotDB)
     // const r =  await readOffset(uniprotDB, 118933, 255352)
-    const r = yield (0, _1.getEnrty)(uniprotDB, 'P46531');
-    console.log(r);
-    yield fs_1.default.promises.writeFile('test/test.json', JSON.stringify(r, null, 4));
+    const uniprotID = 'Q92754';
+    const r = yield (0, _1.getEnrty)(uniprotDB, uniprotID);
+    yield fs_1.default.promises.writeFile('test/test-Q5T4S7.json', JSON.stringify(r, null, 4));
+    console.log(yield (0, _1.getInteractionsFromEntry)(r));
+    //await getEntryFromGeneName(idmappingPath, uniprotDB, 'CITED2')
 }))();

+ 13 - 4
test.ts

@@ -1,10 +1,19 @@
-import { makeIndex, readOffset, getEnrty } from ".";
+import { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry } from ".";
 import fs from 'fs'
+import jsonata from 'jsonata'
 (async()=>{
+    // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/
+    // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz
+    const idmappingPath = '/home/thomas/NGS/ref/UNIPROT/HUMAN_9606_idmapping.dat'
     const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml'
     // await makeIndex(uniprotDB)
     // const r =  await readOffset(uniprotDB, 118933, 255352)
-    const r = await getEnrty(uniprotDB, 'P46531')
-    console.log(r);
-    await fs.promises.writeFile('test/test.json', JSON.stringify(r, null, 4))
+    const uniprotID = 'Q92754'
+    const r = await getEnrty(uniprotDB, uniprotID)
+    
+    await fs.promises.writeFile('test/test-Q5T4S7.json', JSON.stringify(r, null, 4))
+    
+    console.log(await getInteractionsFromEntry(r))
+
+    //await getEntryFromGeneName(idmappingPath, uniprotDB, 'CITED2')
 })()

+ 5 - 0
yarn.lock

@@ -14,6 +14,11 @@ fast-xml-parser@^4.0.6:
   dependencies:
     strnum "^1.0.5"
 
+jsonata@^1.8.6:
+  version "1.8.6"
+  resolved "https://registry.yarnpkg.com/jsonata/-/jsonata-1.8.6.tgz#e5f0e6ace870a34bac881a182ca2b31227122791"
+  integrity sha512-ZH2TPYdNP2JecOl/HvrH47Xc+9imibEMQ4YqKy/F/FrM+2a6vfbGxeCX23dB9Fr6uvGwv+ghf1KxWB3iZk09wA==
+
 strnum@^1.0.5:
   version "1.0.5"
   resolved "https://registry.yarnpkg.com/strnum/-/strnum-1.0.5.tgz#5c4e829fe15ad4ff0d20c3db5ac97b73c9b072db"