GoToNeo4j.ts 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import fs from 'fs'
  2. import readline from 'readline'
  3. import * as neo4j from 'neo4j-driver'
  4. import { nextTick } from 'process'
  5. const line$ = (path: string) => readline.createInterface({
  6. input: fs.createReadStream(path),
  7. crlfDelay: Infinity
  8. })
  9. const templateGTnode = (args:any) => {
  10. let tmp = 'CREATE (gt:GoTerm {'
  11. Object.keys(args).forEach((dd:any,ii:number) => {
  12. const delim = ii === 0 ? '' : ', '
  13. tmp += `${delim}${dd}: "${args[dd].replace(/"/g,'')}"`
  14. })
  15. tmp += '})'
  16. return tmp
  17. }
  18. const templateEdge = (from:string, to:string) => {
  19. return `MATCH (from:GoTerm {id: '${from}'})
  20. MATCH (to:GoTerm {id: '${to}'})
  21. MERGE (from)-[rel:is_a]->(to)`
  22. }
  23. const templateSymbol = (args:any) => {
  24. let tmp = 'CREATE (sy:Symbol {'
  25. Object.keys(args).forEach((dd:any,ii:number) => {
  26. const delim = ii === 0 ? '' : ', '
  27. tmp += `${delim}${dd}: "${args[dd]}"`
  28. })
  29. tmp += `})`
  30. return tmp
  31. }
  32. const templateEdgeGoa = (fromSymbol:string, toGoTerm:string, relName:string, args:any) => {
  33. const tmpArgs = Object.keys(args).reduce((p,c)=> p += `${c}: "${args[c]}", `, "").slice(0, -2)
  34. return `MATCH (from:Symbol {name: '${fromSymbol}'})
  35. MATCH (to:GoTerm {id: '${toGoTerm}'})
  36. MERGE (from)-[rel:${relName} {${tmpArgs}}]->(to)`
  37. }
  38. const readObo = async (oboPath:string) => {
  39. let delim = false
  40. const vertexes = []
  41. const edges = []
  42. let result = {} as {[key:string]: any}
  43. for await (const line of line$(oboPath)) {
  44. if (line === '[Term]') {
  45. delim = true
  46. } else if (line === '' && delim) {
  47. delim = false
  48. vertexes.push(templateGTnode(result))
  49. if(result?.is_a) {
  50. if (/^GO:[0-9]*/.test(result.is_a)) {
  51. edges.push(templateEdge(result.id, result.is_a.match(/^GO:[0-9]*/)[0]))
  52. }
  53. }
  54. result = {}
  55. }
  56. else if (delim) result[line.split(': ')[0]] = line.split(': ')[1].replace("\"","")
  57. }
  58. return {vertexes, edges}
  59. }
  60. const readGoa = async (goaPath:string) => {
  61. const header = [
  62. 'database', 'ID', 'Symbol', 'Qualifier',
  63. 'GO_Term', 'Evidence', 'Evidence_Code',
  64. 'With', 'From','Name', 'Alternative_symbols',
  65. 'Class', 'Taxon', 'Date', 'Origin'
  66. ]
  67. const vertexes = []
  68. const edges = []
  69. const allSymbols = {} as any
  70. for await (const line of line$(goaPath)) {
  71. if(/^[^!]/.test(line)) {
  72. const obj:any = line.split('\t').filter((e:any)=>e).reduce((p,c,i) => ({...p, [header[i]] : /*separator.test(c) ? c.split('|') :*/ c}), {})
  73. if (typeof allSymbols[obj.Symbol] === 'undefined') {
  74. vertexes.push(templateSymbol({
  75. name: obj.Symbol,
  76. class: obj.Class,
  77. fullName: obj.Name,
  78. alternativeName: obj.Alternative_symbols,
  79. taxon: obj.Taxon,
  80. goaID: obj.ID,
  81. goaDB: obj.Origin
  82. }))
  83. allSymbols[obj.Symbol] = ''
  84. }
  85. edges.push(templateEdgeGoa(obj.Symbol, obj.GO_Term, obj.Qualifier.replace("|","-"), {
  86. goaEvidence: obj.Evidence,
  87. goaEvidenceCode: obj.Evidence_Code,
  88. goaWith: obj.With,
  89. goaFrom: obj.From,
  90. goaDate: obj.Date
  91. }))
  92. }
  93. }
  94. return {vertexes, edges}
  95. }
  96. const runNeo = (session:any, sql:string) => {
  97. return new Promise<void>((resolve, reject) => {
  98. try {
  99. session.run(sql).then((result:any) => resolve(result)).catch((err:any) => reject(err))
  100. } catch (error) {
  101. reject(error)
  102. }
  103. })
  104. }
  105. (async()=>{
  106. const oboPath = '/home/thomas/NGS/ref/GO/go-basic.obo'
  107. const goaPath = '/home/thomas/NGS/ref/GO/goa_human.gaf'
  108. var driver = neo4j.driver(
  109. 'neo4j://localhost',
  110. neo4j.auth.basic('neo4j', '123456')
  111. )
  112. var session = driver.session()
  113. // OBO
  114. /*
  115. const rr = await readObo(oboPath)
  116. for (const v of rr.vertexes) {
  117. console.log('Inserting OBO vertexes...');
  118. await runNeo(session, v)
  119. }
  120. for (const e of rr.edges) {
  121. console.log('Inserting OBO edges...');
  122. await runNeo(session, e)
  123. }
  124. */
  125. // GOA
  126. const goaAll = await readGoa(goaPath)
  127. console.log(goaAll.edges[0]);
  128. // for (const v of goaAll.vertexes) {
  129. // console.log('Inserting GOA vertexes...');
  130. // await runNeo(session, v)
  131. // }
  132. for (const e of goaAll.edges) {
  133. // console.log('Inserting GOA edges...');
  134. await runNeo(session, e)
  135. }
  136. await driver.close()
  137. })()