'use strict'; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const piscina_1 = __importDefault(require("piscina")); const NCBIGeneTemplate = (id) => { return { db: 'gene', id, api_key: '47796c7650360571735f00f510315f871607', retmode: 'xml', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query: [ `{ "Locus" : **.Gene_ref_locus, "Update_Date" : **.Gene_track_update_date.Date.Date_std.Date_std.{"Year": Date_std_year, "Month": Date_std_month, "Day": Date_std_month}, "Location" : **.Gene_ref_maploc, "Summary" : **.Entrezgene_summary, "Genomic_Position": **.Entrezgene_locus.Gene_commentary[Gene_commentary_type.value='genomic'][0].{ "Accession": Gene_commentary_accession, "Positions": $.{ "from" : Gene_commentary_seqs.**.Seq_interval_from, "to" : Gene_commentary_seqs.**.Seq_interval_to, "strand": Gene_commentary_seqs.**.Na_strand.value } }, "Gene_Ontology" : **.Entrezgene_properties.Gene_commentary[Gene_commentary_heading='GeneOntology'].Gene_commentary_comment.{ "Functions" : Gene_commentary[Gene_commentary_label='Function'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor }, "Processes" : Gene_commentary[Gene_commentary_label='Process'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor }, "Components" : Gene_commentary[Gene_commentary_label='Component'].Gene_commentary_comment.Gene_commentary.{ "ID" : Gene_commentary_source.Other_source.**.Object_id_id, "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text, "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor } }, "Transcript": { "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession, "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text }, "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{ "Accession": Gene_commentary_accession, "Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{ "DB" : Gene_commentary_source.**.Dbtag_db, "ID" : Gene_commentary_source.**.Object_id_id, "Anchor" : Gene_commentary_source.**.Other_source_anchor, "Location": Gene_commentary_comment.**.Gene_commentary_text } }, "Biblio_PMID": **.PubMedId }` ] }; }; const searchGeneTemplate = (name) => { return { term: name + '[Gene Name]+AND+Human[Organism]', db: 'gene', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `{"First_ID": **.Id[0]}` }; }; const searchTranscriptTemplate = (accession) => { return { term: accession + '+AND+Human[Organism]', db: 'nuccore', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `{"First_ID": **.Id[0]}` }; }; const searchProtTemplate = (accession) => { return { term: accession + '+AND+Human[Organism]', db: 'protein', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `{"First_ID": **.Id[0]}` }; }; const searchSNPTemplate = (accession) => { return { term: accession + '+AND+Human[Organism]', db: 'snp', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi', query: `**.Id` }; }; const NCBITranscriptTemplate = (id) => { return { db: 'nuccore', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query: `{ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version, "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date, "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype, "Length": GBSet[0].GBSeq[0].GBSeq_length, "Topology": GBSet[0].GBSeq[0].GBSeq_topology, "Definition": GBSet[0].GBSeq[0].GBSeq_definition, "Comment": GBSet[0].GBSeq[0].GBSeq_comment, "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{ "key": GBFeature_key, "location": GBFeature_location, "value": GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value : GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value : GBFeature_key in "CDS" ? { "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value, "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value, "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value } : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value }, "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence) }` }; }; const NCBIProteinTemplate = (id) => { return { db: 'protein', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query: `{ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version, "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date, "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype, "Length": GBSet[0].GBSeq[0].GBSeq_length, "Topology": GBSet[0].GBSeq[0].GBSeq_topology, "Definition": GBSet[0].GBSeq[0].GBSeq_definition, "Comment": GBSet[0].GBSeq[0].GBSeq_comment, "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value, "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{ "key": GBFeature_key, "location": GBFeature_location, "value": GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value }, "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence) }` }; }; const NCBISNPTemplate = (id) => { return { db: 'snp', id, retmode: 'xml', api_key: '47796c7650360571735f00f510315f871607', endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi', query: `**.DocumentSummary.{ "ID": SNP_ID, "Accession": ACC, "Position": $number($split(CHRPOS, ":")[1]), "Classe": FXN_CLASS, "MAF": GLOBAL_MAFS.MAF.{ "Study": STUDY, "Frequency": FREQ }, "Update_Date": UPDATEDATE }` }; }; const getEsearch = new piscina_1.default({ filename: path_1.default.resolve(__dirname, './workers/esearch.js'), maxThreads: 4 }); const getGene = (name) => __awaiter(void 0, void 0, void 0, function* () { const id = (yield Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID; const result = (yield Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value; const accTranscript = result.Transcript.Accession; const idTr = (yield Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID; const resultTr = (yield Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value; result.Transcript.nuccore = resultTr; const prot = yield getProtein(result.Products.Accession); result.Products.protein = prot; return result; }); const getTranscript = (accession) => __awaiter(void 0, void 0, void 0, function* () { const id = (yield Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID; const result = (yield Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value; return result; }); const getProtein = (accession) => __awaiter(void 0, void 0, void 0, function* () { const id = (yield Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID; const result = (yield Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value; return result; }); const getSNPs = (name) => __awaiter(void 0, void 0, void 0, function* () { const ids = (yield Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value; const result = (yield Promise.all(ids.map((id) => NCBISNPTemplate(String(id))).map((e) => getEsearch.run(e)))); return result.flatMap(e => e.value); }); (() => __awaiter(void 0, void 0, void 0, function* () { // https://www.ncbi.nlm.nih.gov/books/NBK25499/ // const r = await getSNP('1667092841') // const r = await getProtein('NP_008818') // const r = await getSNPs('ZFP36L2') const r = yield getGene('ZFP36L2'); console.log(r); yield fs_1.default.promises.writeFile('test.json', JSON.stringify(r)); }))();