| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226 |
- 'use strict';
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- var __importDefault = (this && this.__importDefault) || function (mod) {
- return (mod && mod.__esModule) ? mod : { "default": mod };
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- const fs_1 = __importDefault(require("fs"));
- const path_1 = __importDefault(require("path"));
- const piscina_1 = __importDefault(require("piscina"));
- const NCBIGeneTemplate = (id) => {
- return {
- db: 'gene',
- id,
- api_key: '47796c7650360571735f00f510315f871607',
- retmode: 'xml',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
- query: [
- `{ "Locus" : **.Gene_ref_locus,
- "Update_Date" : **.Gene_track_update_date.Date.Date_std.Date_std.{"Year": Date_std_year, "Month": Date_std_month, "Day": Date_std_month},
- "Location" : **.Gene_ref_maploc,
- "Summary" : **.Entrezgene_summary,
- "Genomic_Position": **.Entrezgene_locus.Gene_commentary[Gene_commentary_type.value='genomic'][0].{
- "Accession": Gene_commentary_accession,
- "Positions": $.{
- "from" : Gene_commentary_seqs.**.Seq_interval_from,
- "to" : Gene_commentary_seqs.**.Seq_interval_to,
- "strand": Gene_commentary_seqs.**.Na_strand.value
- }
- },
- "Gene_Ontology" : **.Entrezgene_properties.Gene_commentary[Gene_commentary_heading='GeneOntology'].Gene_commentary_comment.{
- "Functions" : Gene_commentary[Gene_commentary_label='Function'].Gene_commentary_comment.Gene_commentary.{
- "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
- "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
- "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
- },
- "Processes" : Gene_commentary[Gene_commentary_label='Process'].Gene_commentary_comment.Gene_commentary.{
- "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
- "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
- "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
- },
- "Components" : Gene_commentary[Gene_commentary_label='Component'].Gene_commentary_comment.Gene_commentary.{
- "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
- "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
- "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
- }
- },
- "Transcript": {
- "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession,
- "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text
- },
- "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{
- "Accession": Gene_commentary_accession,
- "Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{
- "DB" : Gene_commentary_source.**.Dbtag_db,
- "ID" : Gene_commentary_source.**.Object_id_id,
- "Anchor" : Gene_commentary_source.**.Other_source_anchor,
- "Location": Gene_commentary_comment.**.Gene_commentary_text
- }
- },
- "Biblio_PMID": **.PubMedId
- }`
- ]
- };
- };
- const searchGeneTemplate = (name) => {
- return {
- term: name + '[Gene Name]+AND+Human[Organism]',
- db: 'gene',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
- query: `{"First_ID": **.Id[0]}`
- };
- };
- const searchTranscriptTemplate = (accession) => {
- return {
- term: accession + '+AND+Human[Organism]',
- db: 'nuccore',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
- query: `{"First_ID": **.Id[0]}`
- };
- };
- const searchProtTemplate = (accession) => {
- return {
- term: accession + '+AND+Human[Organism]',
- db: 'protein',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
- query: `{"First_ID": **.Id[0]}`
- };
- };
- const searchSNPTemplate = (accession) => {
- return {
- term: accession + '+AND+Human[Organism]',
- db: 'snp',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
- query: `**.Id`
- };
- };
- const NCBITranscriptTemplate = (id) => {
- return {
- db: 'nuccore',
- id,
- retmode: 'xml',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
- query: `{
- "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
- "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
- "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
- "Length": GBSet[0].GBSeq[0].GBSeq_length,
- "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
- "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
- "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
- "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
- "key": GBFeature_key,
- "location": GBFeature_location,
- "value":
- GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value :
- GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value :
- GBFeature_key in "CDS" ? {
- "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value,
- "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value,
- "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value
- } :
- GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
- },
- "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence)
- }`
- };
- };
- const NCBIProteinTemplate = (id) => {
- return {
- db: 'protein',
- id,
- retmode: 'xml',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
- query: `{
- "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
- "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
- "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
- "Length": GBSet[0].GBSeq[0].GBSeq_length,
- "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
- "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
- "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
- "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value,
- "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
- "key": GBFeature_key,
- "location": GBFeature_location,
- "value":
- GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value :
- GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
- },
- "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence)
- }`
- };
- };
- const NCBISNPTemplate = (id) => {
- return {
- db: 'snp',
- id,
- retmode: 'xml',
- api_key: '47796c7650360571735f00f510315f871607',
- endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
- query: `**.DocumentSummary.{
- "ID": SNP_ID,
- "Accession": ACC,
- "Position": $number($split(CHRPOS, ":")[1]),
- "Classe": FXN_CLASS,
- "MAF": GLOBAL_MAFS.MAF.{
- "Study": STUDY,
- "Frequency": FREQ
- },
- "Update_Date": UPDATEDATE
- }`
- };
- };
- const getEsearch = new piscina_1.default({
- filename: path_1.default.resolve(__dirname, './workers/esearch.js'),
- maxThreads: 4
- });
- const getGene = (name) => __awaiter(void 0, void 0, void 0, function* () {
- const id = (yield Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID;
- const result = (yield Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
- const accTranscript = result.Transcript.Accession;
- const idTr = (yield Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID;
- const resultTr = (yield Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value;
- result.Transcript.nuccore = resultTr;
- const prot = yield getProtein(result.Products.Accession);
- result.Products.protein = prot;
- return result;
- });
- const getTranscript = (accession) => __awaiter(void 0, void 0, void 0, function* () {
- const id = (yield Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
- const result = (yield Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
- return result;
- });
- const getProtein = (accession) => __awaiter(void 0, void 0, void 0, function* () {
- const id = (yield Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
- const result = (yield Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
- return result;
- });
- const getSNPs = (name) => __awaiter(void 0, void 0, void 0, function* () {
- const ids = (yield Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value;
- const result = (yield Promise.all(ids.map((id) => NCBISNPTemplate(String(id))).map((e) => getEsearch.run(e))));
- return result.flatMap(e => e.value);
- });
- (() => __awaiter(void 0, void 0, void 0, function* () {
- // https://www.ncbi.nlm.nih.gov/books/NBK25499/
- // const r = await getSNP('1667092841')
- // const r = await getProtein('NP_008818')
- // const r = await getSNPs('ZFP36L2')
- const r = yield getGene('ZFP36L2');
- console.log(r);
- yield fs_1.default.promises.writeFile('test.json', JSON.stringify(r));
- }))();
|