index.ts 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import fs from 'fs'
  2. import readline from 'readline'
  3. const line$ = (path: string) => readline.createInterface({
  4. input: fs.createReadStream(path),
  5. crlfDelay: Infinity
  6. })
  7. // http://geneontology.org/docs/guide-go-evidence-codes/
  8. // http://wiki.geneontology.org/index.php/Category:Evidence_Codes
  9. // http://current.geneontology.org/ontology/go-basic.obo
  10. // http://current.geneontology.org/annotations/goa_human.gaf.gz
  11. const getSymbol = async (symbol:string, goaPath:string, oboPath:string) => {
  12. const header = [
  13. 'database', 'ID', 'Symbol', 'Qualifier',
  14. 'GO_Term', 'Evidence', 'Evidence_Code',
  15. 'With', 'From','Name', 'Alternative_symbols',
  16. 'Class', 'Taxon', 'Date', 'Origin'
  17. ]
  18. const tester = new RegExp('\t'+symbol+'\t')
  19. const separator = new RegExp('\\|')
  20. const results = [] as Array<{[key:string]:any}>
  21. for await (const line of line$(goaPath)) {
  22. if(tester.test(line)) results.push(line.split('\t').reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {}))
  23. }
  24. const subTerms = results.map(e => e?.GO_Term)
  25. const cacheTerms = await getGOterms(subTerms, oboPath)
  26. for (let index = 0; index < results.length; index++) {
  27. const goTerm = results[index]?.GO_Term
  28. results[index]['GO_Term'] = cacheTerms.filter(e => e.id === goTerm)[0]
  29. }
  30. return results
  31. }
  32. const getGOterms = async (
  33. terms: string[],
  34. oboPath: string
  35. ) => {
  36. const testerList = terms.map(e => new RegExp('id: ' + e))
  37. let delim = false
  38. const results = []
  39. let result = {} as {[key:string]: any}
  40. for await (const line of line$(oboPath)) {
  41. if (testerList.some(rx => rx.test(line))) delim = true
  42. if (line === '' && delim) {
  43. delim = false
  44. results.push(result)
  45. result = {}
  46. }
  47. if (delim) result[line.split(': ')[0]] = line.split(': ')[1]
  48. }
  49. return results
  50. }
  51. export { getSymbol, getGOterms }