Thomas 3 ani în urmă
părinte
comite
2bdfd5f4b1
4 a modificat fișierele cu 157 adăugiri și 13 ștergeri
  1. 83 9
      GoToNeo4j.js
  2. 72 2
      GoToNeo4j.ts
  3. 1 1
      index.js
  4. 1 1
      index.ts

+ 83 - 9
GoToNeo4j.js

@@ -61,12 +61,26 @@ const templateGTnode = (args) => {
 const templateEdge = (from, to) => {
     return `MATCH (from:GoTerm {id: '${from}'})
     MATCH (to:GoTerm {id: '${to}'})
-    CREATE (from)-[rel:is_a]->(to)`;
+    MERGE (from)-[rel:is_a]->(to)`;
 };
-const insert = (oboPath) => __awaiter(void 0, void 0, void 0, function* () {
+const templateSymbol = (args) => {
+    let tmp = 'CREATE (sy:Symbol {';
+    Object.keys(args).forEach((dd, ii) => {
+        const delim = ii === 0 ? '' : ', ';
+        tmp += `${delim}${dd}: "${args[dd]}"`;
+    });
+    tmp += `})`;
+    return tmp;
+};
+const templateEdgeGoa = (fromSymbol, toGoTerm, relName, args) => {
+    const tmpArgs = Object.keys(args).reduce((p, c) => p += `${c}: "${args[c]}", `, "").slice(0, -2);
+    return `MATCH (from:Symbol {name: '${fromSymbol}'})
+    MATCH (to:GoTerm {id: '${toGoTerm}'})
+    MERGE (from)-[rel:${relName} {${tmpArgs}}]->(to)`;
+};
+const readObo = (oboPath) => __awaiter(void 0, void 0, void 0, function* () {
     var e_1, _a;
     let delim = false;
-    const results = [];
     const vertexes = [];
     const edges = [];
     let result = {};
@@ -85,7 +99,6 @@ const insert = (oboPath) => __awaiter(void 0, void 0, void 0, function* () {
                     }
                 }
                 result = {};
-                results.push(result);
             }
             else if (delim)
                 result[line.split(': ')[0]] = line.split(': ')[1].replace("\"", "");
@@ -100,6 +113,53 @@ const insert = (oboPath) => __awaiter(void 0, void 0, void 0, function* () {
     }
     return { vertexes, edges };
 });
+const readGoa = (goaPath) => __awaiter(void 0, void 0, void 0, function* () {
+    var e_2, _d;
+    const header = [
+        'database', 'ID', 'Symbol', 'Qualifier',
+        'GO_Term', 'Evidence', 'Evidence_Code',
+        'With', 'From', 'Name', 'Alternative_symbols',
+        'Class', 'Taxon', 'Date', 'Origin'
+    ];
+    const vertexes = [];
+    const edges = [];
+    const allSymbols = {};
+    try {
+        for (var _e = __asyncValues(line$(goaPath)), _f; _f = yield _e.next(), !_f.done;) {
+            const line = _f.value;
+            if (/^[^!]/.test(line)) {
+                const obj = line.split('\t').filter((e) => e).reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [header[i]]: /*separator.test(c) ? c.split('|') :*/ c })), {});
+                if (typeof allSymbols[obj.Symbol] === 'undefined') {
+                    vertexes.push(templateSymbol({
+                        name: obj.Symbol,
+                        class: obj.Class,
+                        fullName: obj.Name,
+                        alternativeName: obj.Alternative_symbols,
+                        taxon: obj.Taxon,
+                        goaID: obj.ID,
+                        goaDB: obj.Origin
+                    }));
+                    allSymbols[obj.Symbol] = '';
+                }
+                edges.push(templateEdgeGoa(obj.Symbol, obj.GO_Term, obj.Qualifier.replace("|", "_or_"), {
+                    goaEvidence: obj.Evidence,
+                    goaEvidenceCode: obj.Evidence_Code,
+                    goaWith: obj.With,
+                    goaFrom: obj.From,
+                    goaDate: obj.Date
+                }));
+            }
+        }
+    }
+    catch (e_2_1) { e_2 = { error: e_2_1 }; }
+    finally {
+        try {
+            if (_f && !_f.done && (_d = _e.return)) yield _d.call(_e);
+        }
+        finally { if (e_2) throw e_2.error; }
+    }
+    return { vertexes, edges };
+});
 const runNeo = (session, sql) => {
     return new Promise((resolve, reject) => {
         try {
@@ -112,16 +172,30 @@ const runNeo = (session, sql) => {
 };
 (() => __awaiter(void 0, void 0, void 0, function* () {
     const oboPath = '/home/thomas/NGS/ref/GO/go-basic.obo';
+    const goaPath = '/home/thomas/NGS/ref/GO/goa_human.gaf';
     var driver = neo4j.driver('neo4j://localhost', neo4j.auth.basic('neo4j', '123456'));
     var session = driver.session();
-    console.log(session);
-    const rr = yield insert(oboPath);
-    // const rr = await getGOterms('GO:0008150', oboPath)
-    console.log(rr.edges[0]);
+    // OBO
+    /*
+    const rr = await readObo(oboPath)
     for (const v of rr.vertexes) {
-        yield runNeo(session, v);
+        console.log('Inserting OBO vertexes...');
+        await runNeo(session, v)
     }
     for (const e of rr.edges) {
+        console.log('Inserting OBO edges...');
+        await runNeo(session, e)
+    }
+    */
+    // GOA
+    const goaAll = yield readGoa(goaPath);
+    console.log(goaAll.edges[0]);
+    // for (const v of goaAll.vertexes) {
+    //     console.log('Inserting GOA vertexes...');
+    //     await runNeo(session, v)
+    // }
+    for (const e of goaAll.edges) {
+        // console.log('Inserting GOA edges...');
         yield runNeo(session, e);
     }
     yield driver.close();

+ 72 - 2
GoToNeo4j.ts

@@ -2,6 +2,7 @@
 import fs from 'fs'
 import readline from 'readline'
 import * as neo4j from 'neo4j-driver'
+import { nextTick } from 'process'
 
 const line$ = (path: string) => readline.createInterface({
     input: fs.createReadStream(path),
@@ -21,7 +22,24 @@ const templateGTnode = (args:any) => {
 const templateEdge = (from:string, to:string) => {
     return `MATCH (from:GoTerm {id: '${from}'})
     MATCH (to:GoTerm {id: '${to}'})
-    CREATE (from)-[rel:is_a]->(to)`
+    MERGE (from)-[rel:is_a]->(to)`
+}
+
+const templateSymbol = (args:any) => {
+    let tmp = 'CREATE (sy:Symbol {'
+    Object.keys(args).forEach((dd:any,ii:number) => {
+        const delim = ii === 0 ? '' : ', '
+        tmp += `${delim}${dd}: "${args[dd]}"`
+    })
+    tmp += `})`
+    return tmp
+}
+
+const templateEdgeGoa = (fromSymbol:string, toGoTerm:string, relName:string, args:any) => {
+    const tmpArgs = Object.keys(args).reduce((p,c)=> p += `${c}: "${args[c]}", `, "").slice(0, -2)
+    return `MATCH (from:Symbol {name: '${fromSymbol}'})
+    MATCH (to:GoTerm {id: '${toGoTerm}'})
+    MERGE (from)-[rel:${relName} {${tmpArgs}}]->(to)`
 }
 
 const readObo = async (oboPath:string) => {
@@ -47,6 +65,44 @@ const readObo = async (oboPath:string) => {
     return {vertexes, edges}
 }
 
+const readGoa = async (goaPath:string) => {
+    const header = [
+        'database', 'ID', 'Symbol', 'Qualifier', 
+        'GO_Term', 'Evidence', 'Evidence_Code', 
+        'With', 'From','Name', 'Alternative_symbols', 
+        'Class', 'Taxon', 'Date', 'Origin'
+    ]
+    
+    const vertexes = []
+    const edges = []
+    const allSymbols = {} as any
+    for await (const line of line$(goaPath)) {
+        if(/^[^!]/.test(line)) {
+            const obj:any = line.split('\t').filter((e:any)=>e).reduce((p,c,i) => ({...p, [header[i]] : /*separator.test(c) ? c.split('|') :*/ c}), {})
+            if (typeof allSymbols[obj.Symbol] === 'undefined') {
+                vertexes.push(templateSymbol({
+                    name: obj.Symbol,
+                    class: obj.Class,
+                    fullName: obj.Name, 
+                    alternativeName: obj.Alternative_symbols,
+                    taxon: obj.Taxon,
+                    goaID: obj.ID, 
+                    goaDB: obj.Origin
+                }))
+                allSymbols[obj.Symbol] = ''
+            }
+            edges.push(templateEdgeGoa(obj.Symbol, obj.GO_Term, obj.Qualifier.replace("|","_or_"), {
+                goaEvidence: obj.Evidence,
+                goaEvidenceCode: obj.Evidence_Code,
+                goaWith: obj.With,
+                goaFrom: obj.From,
+                goaDate: obj.Date
+            }))
+        }
+    }
+    return {vertexes, edges}
+}
+
 const runNeo = (session:any, sql:string) => {
     return new Promise<void>((resolve, reject) => {
         try {
@@ -59,6 +115,8 @@ const runNeo = (session:any, sql:string) => {
 
 (async()=>{
     const oboPath   = '/home/thomas/NGS/ref/GO/go-basic.obo'
+    const goaPath   = '/home/thomas/NGS/ref/GO/goa_human.gaf'
+
     var driver = neo4j.driver(
         'neo4j://localhost',
         neo4j.auth.basic('neo4j', '123456')
@@ -77,6 +135,18 @@ const runNeo = (session:any, sql:string) => {
         await runNeo(session, e)
     }
     */
-   
+
+    // GOA
+    const goaAll = await readGoa(goaPath)
+    console.log(goaAll.edges[0]);
+    
+    // for (const v of goaAll.vertexes) {
+    //     console.log('Inserting GOA vertexes...');
+    //     await runNeo(session, v)
+    // }
+    for (const e of goaAll.edges) {
+        // console.log('Inserting GOA edges...');
+        await runNeo(session, e)
+    }
     await driver.close()
 })()

+ 1 - 1
index.js

@@ -47,7 +47,7 @@ const getSymbol = (symbol, goaPath, oboPath) => __awaiter(void 0, void 0, void 0
         for (var _c = __asyncValues(line$(goaPath)), _d; _d = yield _c.next(), !_d.done;) {
             const line = _d.value;
             if (tester.test(line))
-                results.push(line.split('\t').reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [header[i]]: separator.test(c) ? c.split('|') : c })), {}));
+                results.push(line.split('\t').filter((e) => e).reduce((p, c, i) => (Object.assign(Object.assign({}, p), { [header[i]]: separator.test(c) ? c.split('|') : c })), {}));
         }
     }
     catch (e_1_1) { e_1 = { error: e_1_1 }; }

+ 1 - 1
index.ts

@@ -23,7 +23,7 @@ const getSymbol = async (symbol:string, goaPath:string, oboPath:string) => {
     const separator = new RegExp('\\|')
     const results = [] as Array<{[key:string]:any}>
     for await (const line of line$(goaPath)) {
-        if(tester.test(line)) results.push(line.split('\t').reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {}))
+        if(tester.test(line)) results.push(line.split('\t').filter((e:any)=>e).reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {}))
     }
 
     const subTerms = results.map(e => e?.GO_Term)