Thomas 4 년 전
부모
커밋
f1613091d1
4개의 변경된 파일144개의 추가작업 그리고 42개의 파일을 삭제
  1. 54 8
      index.js
  2. 72 14
      index.ts
  3. 7 11
      test.js
  4. 11 9
      test.ts

+ 54 - 8
index.js

@@ -20,7 +20,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
+exports.findDistance = exports.getInteractionsFromEntry = exports.getEntryFromGeneName = exports.getEnrty = exports.readOffset = exports.makeIndex = void 0;
 const fs_1 = __importDefault(require("fs"));
 const readline_1 = __importDefault(require("readline"));
 const fast_xml_parser_1 = require("fast-xml-parser");
@@ -134,12 +134,58 @@ const getEntryFromGeneName = (idmappingPath, dbPath, geneName) => __awaiter(void
 });
 exports.getEntryFromGeneName = getEntryFromGeneName;
 const getInteractionsFromEntry = (json) => __awaiter(void 0, void 0, void 0, function* () {
-    const uniprotIDs = json.entry.accession;
-    const genes_interactant = [...new Set((0, jsonata_1.default)(`entry.comment[type="interaction"].interactant`).evaluate(json)
-            .filter((e) => !uniprotIDs.includes(e.label))
-            .map((e) => e.label))];
-    const scope_inter = (0, jsonata_1.default)(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json).map((e) => (Object.assign({ txt: Array.isArray(e.scope) ? e.scope.filter((ee) => ee.match(/INTERACTION\ WITH/)).join() : e.scope }, e))).map((e) => (Object.assign({ interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length) }, e)));
-    const genes_scope_inter = [...new Set(scope_inter.map((e) => e.interaction))];
-    return [...new Set([...genes_scope_inter, ...genes_interactant])].filter(e => e);
+    const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession];
+    // Comment interactant
+    const res_inter = (0, jsonata_1.default)(`entry.comment[type="interaction"].interactant`).evaluate(json);
+    let genes_interactant = [];
+    if (res_inter) {
+        genes_interactant = [...new Set(res_inter
+                .filter((e) => !uniprotIDs.includes(e.label))
+                .map((e) => e.label)
+                .filter((e) => e))];
+    }
+    // Reference scope = INTERACTION WITH
+    const scope_inter = (0, jsonata_1.default)(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json);
+    let genes_scope_inter = [];
+    if (scope_inter) {
+        const comment_scope_inters = Array.isArray(scope_inter) ? scope_inter : [scope_inter];
+        const comment_scope_inters_genes = comment_scope_inters.map((e) => (Object.assign({ txt: Array.isArray(e.scope) ? e.scope.filter((ee) => ee.match(/INTERACTION\ WITH/)).join() : e.scope }, e))).map((e) => (Object.assign({ interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length) }, e)));
+        genes_scope_inter = [...new Set(comment_scope_inters_genes.map((e) => e.interaction))].flatMap((e) => e.split(/; | AND /));
+    }
+    // Comment subunit
+    const comment_subunit = (0, jsonata_1.default)(`entry.comment[type="subunit"].text.value`).evaluate(json);
+    let comment_subunits_genes = [];
+    if (comment_subunit) {
+        const comment_subunits = Array.isArray(comment_subunit) ? comment_subunit : [comment_subunit];
+        comment_subunits_genes = comment_subunits
+            .flatMap((e) => e.replace(/ *\([^)]*\) */g, '').split(/\n/))
+            .filter((e) => /Interacts/.test(e))
+            .flatMap((e) => e.match(/[A-Z][A-Z0-9]{2,}/g));
+    }
+    let res = [...new Set([...genes_scope_inter, ...genes_interactant, ...comment_subunits_genes])].sort().filter((e) => typeof e === 'string').filter(_ => _);
+    console.log(res);
+    if (res.length > 0)
+        res = res.flatMap((e) => e.match(/[A-Z][A-Z0-9]{2,}/g)).filter(_ => _);
+    return res;
 });
 exports.getInteractionsFromEntry = getInteractionsFromEntry;
+const findDistance = (idmappingPath, dbPath, geneNameA, geneNameB, maxDistance = 6) => __awaiter(void 0, void 0, void 0, function* () {
+    let rounds = [[geneNameA]];
+    let run = true;
+    let nIter = 0;
+    while (nIter <= maxDistance && run) {
+        for (const gA of rounds[nIter]) {
+            console.log(nIter, gA);
+            const tmp = yield getInteractionsFromEntry(yield getEntryFromGeneName(idmappingPath, dbPath, gA));
+            if (tmp.includes(geneNameB)) {
+                run = false;
+                break;
+            }
+            rounds.push(tmp);
+        }
+        nIter++;
+    }
+    //console.log(rounds);
+    return nIter;
+});
+exports.findDistance = findDistance;

+ 72 - 14
index.ts

@@ -82,22 +82,80 @@ const getEntryFromGeneName = async (idmappingPath: string, dbPath:string, geneNa
 }
 
 const getInteractionsFromEntry = async (json:any) => {
-    const uniprotIDs = json.entry.accession
+    const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession]
     
-    const genes_interactant = [...new Set(
-        jsonata(`entry.comment[type="interaction"].interactant`).evaluate(json)
-        .filter((e:any) => !uniprotIDs.includes(e.label))
-        .map((e:any) => e.label)
-    )]
+    // Comment interactant
+    const res_inter = jsonata(`entry.comment[type="interaction"].interactant`).evaluate(json)
+    let genes_interactant:any[] = []
+    if (res_inter) {
+        genes_interactant = [...new Set(
+            res_inter
+            .filter((e:any) => !uniprotIDs.includes(e.label))
+            .map((e:any) => e.label)
+            .filter((e:any) => e)
+        )]
+    }
+    
+    // Reference scope = INTERACTION WITH
+    const scope_inter = jsonata(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json)
+    let genes_scope_inter = []
+    if (scope_inter) {
+        const comment_scope_inters = Array.isArray(scope_inter) ? scope_inter : [scope_inter]
+        const comment_scope_inters_genes = comment_scope_inters.map((e:any) => ({
+            txt: Array.isArray(e.scope) ? e.scope.filter((ee:any) => ee.match(/INTERACTION\ WITH/)).join() : e.scope,
+            ...e
+        })).map((e:any) => ({
+            interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length),
+            ...e
+        }))
+        genes_scope_inter = [...new Set(comment_scope_inters_genes.map((e:any) => e.interaction))].flatMap((e:any) => e.split(/; | AND /))
+    }
+    
+    // Comment subunit
+    const comment_subunit = jsonata(`entry.comment[type="subunit"].text.value`).evaluate(json)
+    let comment_subunits_genes = []
+    if (comment_subunit) {
+        const comment_subunits = Array.isArray(comment_subunit) ? comment_subunit : [comment_subunit]
+        comment_subunits_genes = comment_subunits
+        .flatMap((e:any) => e.replace(/ *\([^)]*\) */g, '').split(/\n/))
+        .filter((e:any) => /Interacts/.test(e))
+        .flatMap((e:any) => e.match(/[A-Z][A-Z0-9]{2,}/g))
+    }
+
+    let res = [...new Set([...genes_scope_inter, ...genes_interactant, ...comment_subunits_genes])].sort().filter((e:any) => typeof e === 'string').filter(_=>_)
+
+    console.log(res);
+    
+    if(res.length > 0) res = res.flatMap((e:any) => e.match(/[A-Z][A-Z0-9]{2,}/g)).filter(_=>_)
     
-    const scope_inter = jsonata(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json).map((e:any) => ({
-        txt: Array.isArray(e.scope) ? e.scope.filter((ee:any) => ee.match(/INTERACTION\ WITH/)).join() : e.scope,
-        ...e
-    })).map((e:any) => ({
-        interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length),...e}))
-    const genes_scope_inter = [...new Set(scope_inter.map((e:any) => e.interaction))]
-
-    return [...new Set([...genes_scope_inter, ...genes_interactant])].filter(e => e)
+    return res
 }
 
+// const findDistance = async (idmappingPath: string, dbPath:string, geneNameA:string, geneNameB:string, maxDistance = 6) => {
+//     let rounds = [[geneNameA]]
+
+//     let tree = {[geneNameA]: {}} as {[key:string]:any}
+//     let run = true
+
+//     let a = tree
+//     Object.keys(a).map((gene) => )
+
+//     let nIter = 0
+//     while(nIter <= maxDistance && run) {
+//         for (const gA of rounds[nIter]) {
+//             console.log(nIter,gA);
+            
+//             const tmp = await getInteractionsFromEntry(await getEntryFromGeneName(idmappingPath, dbPath, gA))
+//             if (tmp.includes(geneNameB)) { run = false; break }
+//             rounds.push(tmp)
+//         }
+//         nIter++
+//     }
+
+//     //console.log(rounds);
+    
+    
+//     return nIter
+// }
+
 export { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry }

+ 7 - 11
test.js

@@ -8,22 +8,18 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
         step((generator = generator.apply(thisArg, _arguments || [])).next());
     });
 };
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 const _1 = require(".");
-const fs_1 = __importDefault(require("fs"));
 (() => __awaiter(void 0, void 0, void 0, function* () {
     // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/
     // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz
     const idmappingPath = '/home/thomas/NGS/ref/UNIPROT/HUMAN_9606_idmapping.dat';
     const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml';
-    // await makeIndex(uniprotDB)
-    // const r =  await readOffset(uniprotDB, 118933, 255352)
-    const uniprotID = 'Q92754';
-    const r = yield (0, _1.getEnrty)(uniprotDB, uniprotID);
-    yield fs_1.default.promises.writeFile('test/test-Q5T4S7.json', JSON.stringify(r, null, 4));
-    console.log(yield (0, _1.getInteractionsFromEntry)(r));
-    //await getEntryFromGeneName(idmappingPath, uniprotDB, 'CITED2')
+    // const n = await getEntryFromGeneName(idmappingPath, uniprotDB, 'TTC23L')
+    // await fs.promises.writeFile('test/test-CITED2.json', JSON.stringify(n, null, 4))
+    // console.log(await getInteractionsFromEntry(n))
+    // const tmp = await getInteractionsFromEntry(await getEntryFromGeneName(idmappingPath, uniprotDB, 'UBQLN1'))
+    // console.log(tmp);
+    const dist = yield (0, _1.findDistance)(idmappingPath, uniprotDB, 'TTC23L', 'HOOK2');
+    console.log('dist', dist);
 }))();

+ 11 - 9
test.ts

@@ -1,4 +1,4 @@
-import { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry } from ".";
+import { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry,findDistance } from ".";
 import fs from 'fs'
 import jsonata from 'jsonata'
 (async()=>{
@@ -6,14 +6,16 @@ import jsonata from 'jsonata'
     // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz
     const idmappingPath = '/home/thomas/NGS/ref/UNIPROT/HUMAN_9606_idmapping.dat'
     const uniprotDB = '/home/thomas/NGS/ref/UNIPROT/uniprot_sprot_human.xml'
-    // await makeIndex(uniprotDB)
-    // const r =  await readOffset(uniprotDB, 118933, 255352)
-    const uniprotID = 'Q92754'
-    const r = await getEnrty(uniprotDB, uniprotID)
+
+    // const n = await getEntryFromGeneName(idmappingPath, uniprotDB, 'TTC23L')
+    // await fs.promises.writeFile('test/test-CITED2.json', JSON.stringify(n, null, 4))
+    // console.log(await getInteractionsFromEntry(n))
+
+    // const tmp = await getInteractionsFromEntry(await getEntryFromGeneName(idmappingPath, uniprotDB, 'UBQLN1'))
+    // console.log(tmp);
+    
+    const dist = await findDistance(idmappingPath, uniprotDB, 'TTC23L','HSPA8')
+    console.log('dist',dist);
     
-    await fs.promises.writeFile('test/test-Q5T4S7.json', JSON.stringify(r, null, 4))
     
-    console.log(await getInteractionsFromEntry(r))
-
-    //await getEntryFromGeneName(idmappingPath, uniprotDB, 'CITED2')
 })()