| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz
- import fs from 'fs'
- import readline from 'readline'
- import { XMLParser } from 'fast-xml-parser'
- import jsonata from 'jsonata'
- const line$ = (path: string) => readline.createInterface({
- input: fs.createReadStream(path),
- crlfDelay: Infinity
- })
- const makeIndex = async (filePath: string, indexPath?: string) => {
- indexPath = indexPath || filePath + '.jsi'
- let byteAcc = 0
- const fromSel = new RegExp("^<entry")
- const toSel = new RegExp("^</entry>")
- const valSel = new RegExp('<accession>')
- let tmp = {values:[]} as any
- for await (const line of line$(filePath)) {
- if(fromSel.test(line)) tmp['from'] = byteAcc
- byteAcc += (line.length + 1)
- if (valSel.test(line)) tmp['values'].push(line.match("<accession>(.*?)</accession>")![1]) // 'uck
- if(toSel.test(line)) {
- await fs.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n')
- tmp = {values:[]}
- }
- }
- }
- const readOffset = (path: string, from:number, to:number) => {
- return new Promise<string>(async (resolve, reject) => {
- const size = to - from
- const buffer = Buffer.alloc(size);
- let filehandle
- try {
- filehandle = await fs.promises.open(path, 'r+');
- await filehandle.read(buffer, 0, buffer.length, from);
- } finally {
- if (filehandle) {
- await filehandle.close()
- resolve(buffer.toString())
- }
- }
- })
- }
- const getEntryOffset = async (dbPath:string, accession:string): Promise<number[]> => {
- const indexPath = dbPath + '.jsi'
- if (!fs.existsSync(indexPath)) await makeIndex(dbPath)
- const lineSel = new RegExp(accession)
- for await (const line of line$(indexPath)) {
- if (lineSel.test(line)) return [Number(line.split('\t')[1]),Number(line.split('\t')[2])]
- }
- return [0, 0]
- }
- const getEnrty = async (dbPath:string, accession:string) => {
- const parser = new XMLParser({
- ignoreAttributes: false,
- alwaysCreateTextNode: false,
- attributeNamePrefix: "",
- textNodeName: "value",
- allowBooleanAttributes: true,
- })
- const offsets = await getEntryOffset(dbPath, accession)
- return parser.parse(await readOffset(dbPath, offsets[0], offsets[1]))
- }
- const getEntryFromGeneName = async (idmappingPath: string, dbPath:string, geneName:string) => {
- const sel = new RegExp('Gene_Name\t' + geneName)
- let accessions = []
- for await (const line of line$(idmappingPath)) {
- if(sel.test(line)) accessions.push(line.split('\t')[0])
- }
- return await getEnrty(dbPath, accessions[0])
- }
- const getInteractionsFromEntry = async (json:any) => {
- const uniprotIDs = Array.isArray(json.entry.accession) ? json.entry.accession : [json.entry.accession]
-
- // Comment interactant
- const res_inter = jsonata(`entry.comment[type="interaction"].interactant`).evaluate(json)
- let genes_interactant:any[] = []
- if (res_inter) {
- genes_interactant = [...new Set(
- res_inter
- .filter((e:any) => !uniprotIDs.includes(e.label))
- .map((e:any) => e.label)
- .filter((e:any) => e)
- )]
- }
-
- // Reference scope = INTERACTION WITH
- const scope_inter = jsonata(`entry.reference[scope ~> /INTERACTION WITH/i ]`).evaluate(json)
- let genes_scope_inter = []
- if (scope_inter) {
- const comment_scope_inters = Array.isArray(scope_inter) ? scope_inter : [scope_inter]
- const comment_scope_inters_genes = comment_scope_inters.map((e:any) => ({
- txt: Array.isArray(e.scope) ? e.scope.filter((ee:any) => ee.match(/INTERACTION\ WITH/)).join() : e.scope,
- ...e
- })).map((e:any) => ({
- interaction: e.txt.substring(e.txt.indexOf("INTERACTION WITH ") + "INTERACTION WITH ".length, e.txt.length),
- ...e
- }))
- genes_scope_inter = [...new Set(comment_scope_inters_genes.map((e:any) => e.interaction))].flatMap((e:any) => e.split(/; | AND /))
- }
-
- // Comment subunit
- const comment_subunit = jsonata(`entry.comment[type="subunit"].text.value`).evaluate(json)
- let comment_subunits_genes = []
- if (comment_subunit) {
- const comment_subunits = Array.isArray(comment_subunit) ? comment_subunit : [comment_subunit]
- comment_subunits_genes = comment_subunits
- .flatMap((e:any) => e.replace(/ *\([^)]*\) */g, '').split(/\n/))
- .filter((e:any) => /Interacts/.test(e))
- .flatMap((e:any) => e.match(/[A-Z][A-Z0-9]{2,}/g))
- }
- let res = [...new Set([...genes_scope_inter, ...genes_interactant, ...comment_subunits_genes])].sort().filter((e:any) => typeof e === 'string').filter(_=>_)
-
- if(res.length > 0) res = res.flatMap((e:any) => e.match(/[A-Z]{2,}[A-Z0-9]{1,}/g)).filter(_=>_)
-
- return res
- }
- // const findDistance = async (idmappingPath: string, dbPath:string, geneNameA:string, geneNameB:string, maxDistance = 6) => {
- // let rounds = [[geneNameA]]
- // let tree = {[geneNameA]: {}} as {[key:string]:any}
- // let run = true
- // let a = tree
- // Object.keys(a).map((gene) => )
- // let nIter = 0
- // while(nIter <= maxDistance && run) {
- // for (const gA of rounds[nIter]) {
- // console.log(nIter,gA);
-
- // const tmp = await getInteractionsFromEntry(await getEntryFromGeneName(idmappingPath, dbPath, gA))
- // if (tmp.includes(geneNameB)) { run = false; break }
- // rounds.push(tmp)
- // }
- // nIter++
- // }
- // //console.log(rounds);
-
-
- // return nIter
- // }
- export { makeIndex, readOffset, getEnrty, getEntryFromGeneName, getInteractionsFromEntry }
|