'use strict' import fs from "fs" import path from "path" import Piscina from "piscina" const NCBIGeneTemplate = (id:string) => { return { db: 'gene', id, api_key: '47796c7650360571735f00f510315f871607', retmode: 'xml', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query: [ `{ "Locus" : **.Gene_ref_locus, "Update_Date" : **.Gene_track_update_date.Date.Date_std.Date_std.{"Year": Date_std_year, "Month": Date_std_month, "Day": Date_std_month}, "Location" : **.Gene_ref_maploc, "Summary" : **.Entrezgene_summary, "Genomic_Position": **.Entrezgene_locus.Gene_commentary[Gene_commentary_type.value='genomic'][0].{ "Accession": Gene_commentary_accession, "Positions": $.{ "from" : Gene_commentary_seqs.**.Seq_interval_from, "to" : Gene_commentary_seqs.**.Seq_interval_to, "strand": Gene_commentary_seqs.**.Na_strand.value } }, "Gene_Ontology" : **.Entrezgene_properties.Gene_commentary[Gene_commentary_heading='GeneOntology'].Gene_commentary_comment.{ "Functions" : Gene_commentary[Gene_commentary_label='Function'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor }, "Processes" : Gene_commentary[Gene_commentary_label='Process'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor }, "Components" : Gene_commentary[Gene_commentary_label='Component'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor } }, "Transcript": { "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession, "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text }, "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{ "Accession": Gene_commentary_accession, "Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{ "DB" : Gene_commentary_source.**.Dbtag_db, "ID" : Gene_commentary_source.**.Object_id_id, "Anchor" : Gene_commentary_source.**.Other_source_anchor, "Location": Gene_commentary_comment.**.Gene_commentary_text } }, "Biblio_PMID": **.PubMedId }` ] } } const searchGeneTemplate = (name:string) => { return { term: name + '[Gene Name]+AND+Human[Organism]', db: 'gene', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `{"First_ID": **.Id[0]}` } } const searchTranscriptTemplate = (accession:string) => { return { term: accession + '+AND+Human[Organism]', db: 'nuccore', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `{"First_ID": **.Id[0]}` } } const searchProtTemplate = (accession:string) => { return { term : accession + '+AND+Human[Organism]', db : 'protein', api_key : '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query : `{"First_ID": **.Id[0]}` } } const searchSNPTemplate = (accession:string) => { return { term : accession + '+AND+Human[Organism]', db : 'snp', api_key : '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query : `**.Id` } } const NCBITranscriptTemplate = (id:string) => { return { db: 'nuccore', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query : `{ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version, "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date, "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype, "Length": GBSet[0].GBSeq[0].GBSeq_length, "Topology": GBSet[0].GBSeq[0].GBSeq_topology, "Definition": GBSet[0].GBSeq[0].GBSeq_definition, "Comment": GBSet[0].GBSeq[0].GBSeq_comment, "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{ "key": GBFeature_key, "location": GBFeature_location, "value": GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value : GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value : GBFeature_key in "CDS" ? { "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value, "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value, "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value } : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value }, "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence) }` } } const NCBIProteinTemplate = (id:string) => { return { db: 'protein', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query : `{ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version, "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date, "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype, "Length": GBSet[0].GBSeq[0].GBSeq_length, "Topology": GBSet[0].GBSeq[0].GBSeq_topology, "Definition": GBSet[0].GBSeq[0].GBSeq_definition, "Comment": GBSet[0].GBSeq[0].GBSeq_comment, "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value, "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{ "key": GBFeature_key, "location": GBFeature_location, "value": GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value }, "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence) }` } } const NCBISNPTemplate = (id:string) => { return { db: 'snp', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query : `**.DocumentSummary.{ "ID": SNP_ID, "Accession": ACC, "Position": $number($split(CHRPOS, ":")[1]), "Classe": FXN_CLASS, "MAF": GLOBAL_MAFS.MAF.{ "Study": STUDY, "Frequency": FREQ }, "Update_Date": UPDATEDATE }` } } const getEsearch = new Piscina({ filename: path.resolve(__dirname, './workers/esearch.js'), maxThreads: 4 }) const getGene = async (name:string) => { const id = (await Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID const result = (await Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value const accTranscript = result.Transcript.Accession const idTr = (await Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID const resultTr = (await Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value result.Transcript.nuccore = resultTr const prot = await getProtein(result.Products.Accession) result.Products.protein = prot return result } const getTranscript = async (accession:string) => { const id = (await Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID const result = (await Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value return result } const getProtein = async (accession:string) => { const id = (await Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID const result = (await Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value return result } const getSNPs = async (name:string) => { const ids = (await Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value const result = (await Promise.all(ids.map((id:string) => NCBISNPTemplate(String(id))).map((e:any) => getEsearch.run(e)))) return result.flatMap(e => e.value) } (async() => { // https://www.ncbi.nlm.nih.gov/books/NBK25499/ // const r = await getSNP('1667092841') // const r = await getProtein('NP_008818') // const r = await getSNPs('ZFP36L2') const r = await getGene('ZFP36L2') console.log(r) await fs.promises.writeFile('test.json', JSON.stringify(r)) })() /// https://rest.ensembl.org/lookup/symbol/homo_sapiens/ZFP36L2?expand=1;content-type=application/json