import fs from 'fs' import readline from 'readline' const line$ = (path: string) => readline.createInterface({ input: fs.createReadStream(path), crlfDelay: Infinity }) // http://geneontology.org/docs/guide-go-evidence-codes/ // http://wiki.geneontology.org/index.php/Category:Evidence_Codes // http://current.geneontology.org/ontology/go-basic.obo // http://current.geneontology.org/annotations/goa_human.gaf.gz const getSymbol = async (symbol:string, goaPath:string, oboPath:string) => { const header = [ 'database', 'ID', 'Symbol', 'Qualifier', 'GO_Term', 'Evidence', 'Evidence_Code', 'With', 'From','Name', 'Alternative_symbols', 'Class', 'Taxon', 'Date', 'Origin' ] const tester = new RegExp('\t'+symbol+'\t') const separator = new RegExp('\\|') const results = [] as Array<{[key:string]:any}> for await (const line of line$(goaPath)) { if(tester.test(line)) results.push(line.split('\t').reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {})) } const subTerms = results.map(e => e?.GO_Term) const cacheTerms = await getGOterms(subTerms, oboPath) for (let index = 0; index < results.length; index++) { const goTerm = results[index]?.GO_Term results[index]['GO_Term'] = cacheTerms.filter(e => e.id === goTerm)[0] } return results } const getGOterms = async ( terms: string[], oboPath: string ) => { const testerList = terms.map(e => new RegExp('id: ' + e)) let delim = false const results = [] let result = {} as {[key:string]: any} for await (const line of line$(oboPath)) { if (testerList.some(rx => rx.test(line))) delim = true if (line === '' && delim) { delim = false results.push(result) result = {} } if (delim) result[line.split(': ')[0]] = line.split(': ')[1] } return results } export { getSymbol, getGOterms }