| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- import fs from 'fs'
- import readline from 'readline'
- import * as neo4j from 'neo4j-driver'
- import { nextTick } from 'process'
- const line$ = (path: string) => readline.createInterface({
- input: fs.createReadStream(path),
- crlfDelay: Infinity
- })
- const templateGTnode = (args:any) => {
- let tmp = 'CREATE (gt:GoTerm {'
- Object.keys(args).forEach((dd:any,ii:number) => {
- const delim = ii === 0 ? '' : ', '
- tmp += `${delim}${dd}: "${args[dd].replace(/"/g,'')}"`
- })
- tmp += '})'
- return tmp
- }
- const templateEdge = (from:string, to:string) => {
- return `MATCH (from:GoTerm {id: '${from}'})
- MATCH (to:GoTerm {id: '${to}'})
- MERGE (from)-[rel:is_a]->(to)`
- }
- const templateSymbol = (args:any) => {
- let tmp = 'CREATE (sy:Symbol {'
- Object.keys(args).forEach((dd:any,ii:number) => {
- const delim = ii === 0 ? '' : ', '
- tmp += `${delim}${dd}: "${args[dd]}"`
- })
- tmp += `})`
- return tmp
- }
- const templateEdgeGoa = (fromSymbol:string, toGoTerm:string, relName:string, args:any) => {
- const tmpArgs = Object.keys(args).reduce((p,c)=> p += `${c}: "${args[c]}", `, "").slice(0, -2)
- return `MATCH (from:Symbol {name: '${fromSymbol}'})
- MATCH (to:GoTerm {id: '${toGoTerm}'})
- MERGE (from)-[rel:${relName} {${tmpArgs}}]->(to)`
- }
- const readObo = async (oboPath:string) => {
- let delim = false
- const vertexes = []
- const edges = []
- let result = {} as {[key:string]: any}
- for await (const line of line$(oboPath)) {
- if (line === '[Term]') {
- delim = true
- } else if (line === '' && delim) {
- delim = false
- vertexes.push(templateGTnode(result))
- if(result?.is_a) {
- if (/^GO:[0-9]*/.test(result.is_a)) {
- edges.push(templateEdge(result.id, result.is_a.match(/^GO:[0-9]*/)[0]))
- }
- }
- result = {}
- }
- else if (delim) result[line.split(': ')[0]] = line.split(': ')[1].replace("\"","")
- }
- return {vertexes, edges}
- }
- const readGoa = async (goaPath:string) => {
- const header = [
- 'database', 'ID', 'Symbol', 'Qualifier',
- 'GO_Term', 'Evidence', 'Evidence_Code',
- 'With', 'From','Name', 'Alternative_symbols',
- 'Class', 'Taxon', 'Date', 'Origin'
- ]
-
- const vertexes = []
- const edges = []
- const allSymbols = {} as any
- for await (const line of line$(goaPath)) {
- if(/^[^!]/.test(line)) {
- const obj:any = line.split('\t').filter((e:any)=>e).reduce((p,c,i) => ({...p, [header[i]] : /*separator.test(c) ? c.split('|') :*/ c}), {})
- if (typeof allSymbols[obj.Symbol] === 'undefined') {
- vertexes.push(templateSymbol({
- name: obj.Symbol,
- class: obj.Class,
- fullName: obj.Name,
- alternativeName: obj.Alternative_symbols,
- taxon: obj.Taxon,
- goaID: obj.ID,
- goaDB: obj.Origin
- }))
- allSymbols[obj.Symbol] = ''
- }
- edges.push(templateEdgeGoa(obj.Symbol, obj.GO_Term, obj.Qualifier.replace("|","-"), {
- goaEvidence: obj.Evidence,
- goaEvidenceCode: obj.Evidence_Code,
- goaWith: obj.With,
- goaFrom: obj.From,
- goaDate: obj.Date
- }))
- }
- }
- return {vertexes, edges}
- }
- const runNeo = (session:any, sql:string) => {
- return new Promise<void>((resolve, reject) => {
- try {
- session.run(sql).then((result:any) => resolve(result)).catch((err:any) => reject(err))
- } catch (error) {
- reject(error)
- }
- })
- }
- (async()=>{
- const oboPath = '/home/thomas/NGS/ref/GO/go-basic.obo'
- const goaPath = '/home/thomas/NGS/ref/GO/goa_human.gaf'
- var driver = neo4j.driver(
- 'neo4j://localhost',
- neo4j.auth.basic('neo4j', '123456')
- )
- var session = driver.session()
- // OBO
- /*
- const rr = await readObo(oboPath)
- for (const v of rr.vertexes) {
- console.log('Inserting OBO vertexes...');
- await runNeo(session, v)
- }
- for (const e of rr.edges) {
- console.log('Inserting OBO edges...');
- await runNeo(session, e)
- }
- */
- // GOA
- const goaAll = await readGoa(goaPath)
- console.log(goaAll.edges[0]);
-
- // for (const v of goaAll.vertexes) {
- // console.log('Inserting GOA vertexes...');
- // await runNeo(session, v)
- // }
- for (const e of goaAll.edges) {
- // console.log('Inserting GOA edges...');
- await runNeo(session, e)
- }
- await driver.close()
- })()
|