| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import fs from 'fs'
- import readline from 'readline'
- const line$ = (path: string) => readline.createInterface({
- input: fs.createReadStream(path),
- crlfDelay: Infinity
- })
- // http://geneontology.org/docs/guide-go-evidence-codes/
- // http://wiki.geneontology.org/index.php/Category:Evidence_Codes
- // http://current.geneontology.org/ontology/go-basic.obo
- // http://current.geneontology.org/annotations/goa_human.gaf.gz
- const getSymbol = async (symbol:string, goaPath:string, oboPath:string) => {
- const header = [
- 'database', 'ID', 'Symbol', 'Qualifier',
- 'GO_Term', 'Evidence', 'Evidence_Code',
- 'With', 'From','Name', 'Alternative_symbols',
- 'Class', 'Taxon', 'Date', 'Origin'
- ]
- const tester = new RegExp('\t'+symbol+'\t')
- const separator = new RegExp('\\|')
- const results = [] as Array<{[key:string]:any}>
- for await (const line of line$(goaPath)) {
- if(tester.test(line)) results.push(line.split('\t').reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {}))
- }
- const subTerms = results.map(e => e?.GO_Term)
- const cacheTerms = await getGOterms(subTerms, oboPath)
- for (let index = 0; index < results.length; index++) {
- const goTerm = results[index]?.GO_Term
- results[index]['GO_Term'] = cacheTerms.filter(e => e.id === goTerm)[0]
- }
- return results
- }
- const getGOterms = async (
- terms: string[],
- oboPath: string
- ) => {
- const testerList = terms.map(e => new RegExp('id: ' + e))
- let delim = false
- const results = []
- let result = {} as {[key:string]: any}
- for await (const line of line$(oboPath)) {
- if (testerList.some(rx => rx.test(line))) delim = true
- if (line === '' && delim) {
- delim = false
- results.push(result)
- result = {}
- }
- if (delim) result[line.split(': ')[0]] = line.split(': ')[1]
- }
- return results
- }
- export { getSymbol, getGOterms }
|