| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import fs from 'fs'
- import readline from 'readline'
- import {chi2test} from '@stdlib/stats'
- const line$ = (path: string) => readline.createInterface({
- input: fs.createReadStream(path),
- crlfDelay: Infinity
- })
- // http://geneontology.org/docs/guide-go-evidence-codes/
- // http://wiki.geneontology.org/index.php/Category:Evidence_Codes
- // http://current.geneontology.org/ontology/go-basic.obo
- // http://current.geneontology.org/annotations/goa_human.gaf.gz
- const getSymbol = async (symbol:string, goaPath:string, oboPath:string) => {
- const header = [
- 'database', 'ID', 'Symbol', 'Qualifier',
- 'GO_Term', 'Evidence', 'Evidence_Code',
- 'With', 'From','Name', 'Alternative_symbols',
- 'Class', 'Taxon', 'Date', 'Origin'
- ]
- const tester = new RegExp('\t'+symbol+'\t')
- const separator = new RegExp('\\|')
- const results = [] as Array<{[key:string]:any}>
- for await (const line of line$(goaPath)) {
- if(tester.test(line)) results.push(line.split('\t').filter((e:any)=>e).reduce((p,c,i) => ({...p, [header[i]] : separator.test(c) ? c.split('|') : c}), {}))
- }
- const subTerms = results.map(e => e?.GO_Term)
- const cacheTerms = await getGOterms(subTerms, oboPath)
- for (let index = 0; index < results.length; index++) {
- const goTerm = results[index]?.GO_Term
- results[index]['GO_Term'] = cacheTerms.filter(e => e.id === goTerm)[0]
- }
- return results
- }
- const getGOterms = async (
- terms: string[] | string,
- oboPath: string
- ) => {
- terms = Array.isArray(terms) ? terms : [terms]
- const testerList = terms.map(e => new RegExp('id: ' + e))
- let delim = false
- const results = []
- let result = {} as {[key:string]: any}
- for await (const line of line$(oboPath)) {
- if (testerList.some(rx => rx.test(line))) delim = true
- if (line === '' && delim) {
- delim = false
- results.push(result)
- result = {}
- }
- if (delim) result[line.split(': ')[0]] = line.split(': ')[1]
- }
- return results
- }
- const getN = async (
- terms: string | string[],
- qualifiers: string | string[],
- goaPath: string,
- oboPath: string
- ) => {
- terms = Array.isArray(terms) ? terms : [terms]
- qualifiers = Array.isArray(qualifiers) ? qualifiers : [qualifiers]
- let qualifs = {} as {[key:string]: {[key:string]:any}}
- let termsList: any[] = []
- for (const term of terms) {
- termsList.push({
- nTotal: 0,
- genes: [],
- ...(await getGOterms(term, oboPath))[0],
- test: new RegExp('\t' + term + '\t'),
- })
- }
- qualifiers.map((e:any)=> qualifs[e] = {})
-
- for await (const line of line$(goaPath)) {
- const t = termsList.filter((rx:any) => rx.test.test(line))
- if (t.length > 0) {
- termsList = termsList.map((rx:any) => {
- if (rx.test.test(line)) {
- const genes = [...new Set([...(rx?.genes), line.split('\t')[2]])]
- return {...rx, nTotal:genes.length, genes}
- } else {
- return rx
- }
- })
- }
- }
- return termsList.map((e:any)=> {
- delete e['test']
- return e
- })
- }
- const headerGOA = [
- 'database', 'ID', 'Symbol', 'Qualifier',
- 'GO_Term', 'Evidence', 'Evidence_Code',
- 'With', 'From','Name', 'Alternative_symbols',
- 'Class', 'Taxon', 'Date', 'Origin']
- const getAllSymbols = async (
- symbols: string[],
- qualifier: string,
- goaPath: string,
- oboPath: string
- ) => {
- symbols = Array.isArray(symbols) ? symbols : [symbols]
- const testerList = [...new Set(symbols)].map(e => new RegExp('\t' + e + '\t' + qualifier + '\t'))
- const allGoa = [] as Array<{[key:string]:any}>
- const allGenes = {} as {[key:string]: any}
- for await (const line of line$(goaPath)) {
- if (testerList.some(rx => rx.test(line))) {
- allGoa.push(
- line.split('\t').
- reduce((p,c,i) => ({...p, [headerGOA[i]] : c}), {})
- )
- }
- allGenes[line.split('\t')[2]] = {}
- }
- const nAllGenes = Object.keys(allGenes).length
- console.log(nAllGenes);
-
- const allIDs = await getN([...new Set(allGoa.map((e:any)=> e.GO_Term))], qualifier, goaPath, oboPath)
-
- return allIDs.map((e:any) => {
- const observedGenes = symbols.filter((ee:string)=> e.genes.includes(ee))
- // const observedGenesNotIn = symbols.filter((ee:string)=> !e.genes.includes(ee))
- // const allIn = Object.keys(allGenes).filter((ee:any)=>)
- const diff = e.genes.length - observedGenes.length
- return {
- n: observedGenes.length,
- prop: observedGenes.length/e.genes.length,
- chi2test: chi2test(
- [[observedGenes.length,
- diff < 0 ? 0 : diff],
- [symbols.length,
- nAllGenes]]
- ).pValue,
- observedGenes,
- ...e
- }
- }).sort((a,b) => b.chi2test - a.chi2test)
- }
- export { getSymbol, getGOterms, getN, getAllSymbols }
|