|
|
@@ -19,6 +19,7 @@ const NCBIGeneTemplate = (id) => {
|
|
|
return {
|
|
|
db: 'gene',
|
|
|
id,
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
retmode: 'xml',
|
|
|
endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
|
|
|
query: [
|
|
|
@@ -52,10 +53,10 @@ const NCBIGeneTemplate = (id) => {
|
|
|
}
|
|
|
},
|
|
|
"Transcript": {
|
|
|
- "Accession" : **.'Entrezgene_comments'.'Gene_commentary'[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession,
|
|
|
- "Exon_Count": **.'Entrezgene_properties'.'Gene_commentary'[Gene_commentary_label='Exon count'].Gene_commentary_text
|
|
|
+ "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession,
|
|
|
+ "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text
|
|
|
},
|
|
|
- "Products": **.'Entrezgene_comments'.'Gene_commentary'[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{
|
|
|
+ "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{
|
|
|
"Accession": Gene_commentary_accession,
|
|
|
"Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{
|
|
|
"DB" : Gene_commentary_source.**.Dbtag_db,
|
|
|
@@ -71,23 +72,154 @@ const NCBIGeneTemplate = (id) => {
|
|
|
};
|
|
|
const searchGeneTemplate = (name) => {
|
|
|
return {
|
|
|
- term: name + '[Gene Name]+AND+Human[Organism] ',
|
|
|
+ term: name + '[Gene Name]+AND+Human[Organism]',
|
|
|
db: 'gene',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
|
|
|
query: `{"First_ID": **.Id[0]}`
|
|
|
};
|
|
|
};
|
|
|
+const searchTranscriptTemplate = (accession) => {
|
|
|
+ return {
|
|
|
+ term: accession + '+AND+Human[Organism]',
|
|
|
+ db: 'nuccore',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
|
|
|
+ query: `{"First_ID": **.Id[0]}`
|
|
|
+ };
|
|
|
+};
|
|
|
+const searchProtTemplate = (accession) => {
|
|
|
+ return {
|
|
|
+ term: accession + '+AND+Human[Organism]',
|
|
|
+ db: 'protein',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
|
|
|
+ query: `{"First_ID": **.Id[0]}`
|
|
|
+ };
|
|
|
+};
|
|
|
+const searchSNPTemplate = (accession) => {
|
|
|
+ return {
|
|
|
+ term: accession + '+AND+Human[Organism]',
|
|
|
+ db: 'snp',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
|
|
|
+ query: `**.Id`
|
|
|
+ };
|
|
|
+};
|
|
|
+const NCBITranscriptTemplate = (id) => {
|
|
|
+ return {
|
|
|
+ db: 'nuccore',
|
|
|
+ id,
|
|
|
+ retmode: 'xml',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
|
|
|
+ query: `{
|
|
|
+ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
|
|
|
+ "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
|
|
|
+ "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
|
|
|
+ "Length": GBSet[0].GBSeq[0].GBSeq_length,
|
|
|
+ "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
|
|
|
+ "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
|
|
|
+ "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
|
|
|
+ "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
|
|
|
+ "key": GBFeature_key,
|
|
|
+ "location": GBFeature_location,
|
|
|
+ "value":
|
|
|
+ GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value :
|
|
|
+ GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value :
|
|
|
+ GBFeature_key in "CDS" ? {
|
|
|
+ "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value,
|
|
|
+ "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value,
|
|
|
+ "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value
|
|
|
+ } :
|
|
|
+ GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
|
|
|
+ },
|
|
|
+ "Sequence": GBSet[0].GBSeq[0].GBSeq_sequence
|
|
|
+ }`
|
|
|
+ };
|
|
|
+};
|
|
|
+const NCBIProteinTemplate = (id) => {
|
|
|
+ return {
|
|
|
+ db: 'protein',
|
|
|
+ id,
|
|
|
+ retmode: 'xml',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
|
|
|
+ query: `{
|
|
|
+ "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
|
|
|
+ "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
|
|
|
+ "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
|
|
|
+ "Length": GBSet[0].GBSeq[0].GBSeq_length,
|
|
|
+ "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
|
|
|
+ "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
|
|
|
+ "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
|
|
|
+ "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value,
|
|
|
+ "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
|
|
|
+ "key": GBFeature_key,
|
|
|
+ "location": GBFeature_location,
|
|
|
+ "value":
|
|
|
+ GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value :
|
|
|
+ GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
|
|
|
+ },
|
|
|
+ "Sequence": GBSet[0].GBSeq[0].GBSeq_sequence
|
|
|
+ }`
|
|
|
+ };
|
|
|
+};
|
|
|
+const NCBISNPTemplate = (id) => {
|
|
|
+ return {
|
|
|
+ db: 'snp',
|
|
|
+ id,
|
|
|
+ retmode: 'xml',
|
|
|
+ api_key: '47796c7650360571735f00f510315f871607',
|
|
|
+ endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
|
|
|
+ query: `**.DocumentSummary.{
|
|
|
+ "ID": SNP_ID,
|
|
|
+ "Accession": ACC ,
|
|
|
+ "Position": $number($split(CHRPOS, ":")[1]),
|
|
|
+ "Classe": FXN_CLASS,
|
|
|
+ "MAF": GLOBAL_MAFS.MAF.{
|
|
|
+ "Study": STUDY,
|
|
|
+ "Frequency": FREQ
|
|
|
+ },
|
|
|
+ "Update_Date": UPDATEDATE
|
|
|
+ }`
|
|
|
+ };
|
|
|
+};
|
|
|
const getEsearch = new piscina_1.default({
|
|
|
- filename: path_1.default.resolve(__dirname, './workers/esearch.js')
|
|
|
+ filename: path_1.default.resolve(__dirname, './workers/esearch.js'),
|
|
|
+ maxThreads: 4
|
|
|
});
|
|
|
const getGene = (name) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
const id = (yield Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID;
|
|
|
const result = (yield Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
|
|
|
+ const accTranscript = result.Transcript.Accession;
|
|
|
+ const idTr = (yield Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID;
|
|
|
+ const resultTr = (yield Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value;
|
|
|
+ result.Transcript.nuccore = resultTr;
|
|
|
+ const prot = yield getProtein(result.Products.Accession);
|
|
|
+ result.Products.protein = prot;
|
|
|
return result;
|
|
|
});
|
|
|
+const getTranscript = (accession) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const id = (yield Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
|
|
|
+ const result = (yield Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
|
|
|
+ return result;
|
|
|
+});
|
|
|
+const getProtein = (accession) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const id = (yield Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
|
|
|
+ const result = (yield Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
|
|
|
+ return result;
|
|
|
+});
|
|
|
+const getSNPs = (name) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const ids = (yield Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value;
|
|
|
+ const result = (yield Promise.all(ids.map((id) => NCBISNPTemplate(String(id))).map((e) => getEsearch.run(e))));
|
|
|
+ return result.flatMap(e => e.value);
|
|
|
+});
|
|
|
(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
// https://www.ncbi.nlm.nih.gov/books/NBK25499/
|
|
|
- const r = yield getGene('ZFP36L2');
|
|
|
+ // const r = await getSNP('1667092841')
|
|
|
+ // const r = await getProtein('NP_008818')
|
|
|
+ const r = yield getSNPs('ZFP36L2');
|
|
|
console.log(r);
|
|
|
yield fs_1.default.promises.writeFile('test.json', JSON.stringify(r));
|
|
|
}))();
|