index.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. 'use strict';
  2. var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
  3. function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
  4. return new (P || (P = Promise))(function (resolve, reject) {
  5. function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
  6. function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
  7. function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
  8. step((generator = generator.apply(thisArg, _arguments || [])).next());
  9. });
  10. };
  11. var __importDefault = (this && this.__importDefault) || function (mod) {
  12. return (mod && mod.__esModule) ? mod : { "default": mod };
  13. };
  14. Object.defineProperty(exports, "__esModule", { value: true });
  15. const fs_1 = __importDefault(require("fs"));
  16. const path_1 = __importDefault(require("path"));
  17. const piscina_1 = __importDefault(require("piscina"));
  18. const NCBIGeneTemplate = (id) => {
  19. return {
  20. db: 'gene',
  21. id,
  22. api_key: '47796c7650360571735f00f510315f871607',
  23. retmode: 'xml',
  24. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  25. query: [
  26. `{ "Locus" : **.Gene_ref_locus,
  27. "Update_Date" : **.Gene_track_update_date.Date.Date_std.Date_std.{"Year": Date_std_year, "Month": Date_std_month, "Day": Date_std_month},
  28. "Location" : **.Gene_ref_maploc,
  29. "Summary" : **.Entrezgene_summary,
  30. "Genomic_Position": **.Entrezgene_locus.Gene_commentary[Gene_commentary_type.value='genomic'][0].{
  31. "Accession": Gene_commentary_accession,
  32. "Positions": $.{
  33. "from" : Gene_commentary_seqs.**.Seq_interval_from,
  34. "to" : Gene_commentary_seqs.**.Seq_interval_to,
  35. "strand": Gene_commentary_seqs.**.Na_strand.value
  36. }
  37. },
  38. "Gene_Ontology" : **.Entrezgene_properties.Gene_commentary[Gene_commentary_heading='GeneOntology'].Gene_commentary_comment.{
  39. "Functions" : Gene_commentary[Gene_commentary_label='Function'].Gene_commentary_comment.Gene_commentary.{
  40. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  41. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  42. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  43. },
  44. "Processes" : Gene_commentary[Gene_commentary_label='Process'].Gene_commentary_comment.Gene_commentary.{
  45. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  46. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  47. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  48. },
  49. "Components" : Gene_commentary[Gene_commentary_label='Component'].Gene_commentary_comment.Gene_commentary.{
  50. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  51. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  52. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  53. }
  54. },
  55. "Transcript": {
  56. "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession,
  57. "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text
  58. },
  59. "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{
  60. "Accession": Gene_commentary_accession,
  61. "Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{
  62. "DB" : Gene_commentary_source.**.Dbtag_db,
  63. "ID" : Gene_commentary_source.**.Object_id_id,
  64. "Anchor" : Gene_commentary_source.**.Other_source_anchor,
  65. "Location": Gene_commentary_comment.**.Gene_commentary_text
  66. }
  67. },
  68. "Biblio_PMID": **.PubMedId
  69. }`
  70. ]
  71. };
  72. };
  73. const searchGeneTemplate = (name) => {
  74. return {
  75. term: name + '[Gene Name]+AND+Human[Organism]',
  76. db: 'gene',
  77. api_key: '47796c7650360571735f00f510315f871607',
  78. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  79. query: `{"First_ID": **.Id[0]}`
  80. };
  81. };
  82. const searchTranscriptTemplate = (accession) => {
  83. return {
  84. term: accession + '+AND+Human[Organism]',
  85. db: 'nuccore',
  86. api_key: '47796c7650360571735f00f510315f871607',
  87. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  88. query: `{"First_ID": **.Id[0]}`
  89. };
  90. };
  91. const searchProtTemplate = (accession) => {
  92. return {
  93. term: accession + '+AND+Human[Organism]',
  94. db: 'protein',
  95. api_key: '47796c7650360571735f00f510315f871607',
  96. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  97. query: `{"First_ID": **.Id[0]}`
  98. };
  99. };
  100. const searchSNPTemplate = (accession) => {
  101. return {
  102. term: accession + '+AND+Human[Organism]',
  103. db: 'snp',
  104. api_key: '47796c7650360571735f00f510315f871607',
  105. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  106. query: `**.Id`
  107. };
  108. };
  109. const NCBITranscriptTemplate = (id) => {
  110. return {
  111. db: 'nuccore',
  112. id,
  113. retmode: 'xml',
  114. api_key: '47796c7650360571735f00f510315f871607',
  115. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  116. query: `{
  117. "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
  118. "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
  119. "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
  120. "Length": GBSet[0].GBSeq[0].GBSeq_length,
  121. "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
  122. "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
  123. "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
  124. "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
  125. "key": GBFeature_key,
  126. "location": GBFeature_location,
  127. "value":
  128. GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value :
  129. GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value :
  130. GBFeature_key in "CDS" ? {
  131. "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value,
  132. "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value,
  133. "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value
  134. } :
  135. GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
  136. },
  137. "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence)
  138. }`
  139. };
  140. };
  141. const NCBIProteinTemplate = (id) => {
  142. return {
  143. db: 'protein',
  144. id,
  145. retmode: 'xml',
  146. api_key: '47796c7650360571735f00f510315f871607',
  147. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  148. query: `{
  149. "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
  150. "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
  151. "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
  152. "Length": GBSet[0].GBSeq[0].GBSeq_length,
  153. "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
  154. "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
  155. "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
  156. "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value,
  157. "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
  158. "key": GBFeature_key,
  159. "location": GBFeature_location,
  160. "value":
  161. GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value :
  162. GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
  163. },
  164. "Sequence": $uppercase(GBSet[0].GBSeq[0].GBSeq_sequence)
  165. }`
  166. };
  167. };
  168. const NCBISNPTemplate = (id) => {
  169. return {
  170. db: 'snp',
  171. id,
  172. retmode: 'xml',
  173. api_key: '47796c7650360571735f00f510315f871607',
  174. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  175. query: `**.DocumentSummary.{
  176. "ID": SNP_ID,
  177. "Accession": ACC,
  178. "Position": $number($split(CHRPOS, ":")[1]),
  179. "Classe": FXN_CLASS,
  180. "MAF": GLOBAL_MAFS.MAF.{
  181. "Study": STUDY,
  182. "Frequency": FREQ
  183. },
  184. "Update_Date": UPDATEDATE
  185. }`
  186. };
  187. };
  188. const getEsearch = new piscina_1.default({
  189. filename: path_1.default.resolve(__dirname, './workers/esearch.js'),
  190. maxThreads: 4
  191. });
  192. const getGene = (name) => __awaiter(void 0, void 0, void 0, function* () {
  193. const id = (yield Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID;
  194. const result = (yield Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
  195. const accTranscript = result.Transcript.Accession;
  196. const idTr = (yield Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID;
  197. const resultTr = (yield Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value;
  198. result.Transcript.nuccore = resultTr;
  199. const prot = yield getProtein(result.Products.Accession);
  200. result.Products.protein = prot;
  201. return result;
  202. });
  203. const getTranscript = (accession) => __awaiter(void 0, void 0, void 0, function* () {
  204. const id = (yield Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
  205. const result = (yield Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
  206. return result;
  207. });
  208. const getProtein = (accession) => __awaiter(void 0, void 0, void 0, function* () {
  209. const id = (yield Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID;
  210. const result = (yield Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value;
  211. return result;
  212. });
  213. const getSNPs = (name) => __awaiter(void 0, void 0, void 0, function* () {
  214. const ids = (yield Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value;
  215. const result = (yield Promise.all(ids.map((id) => NCBISNPTemplate(String(id))).map((e) => getEsearch.run(e))));
  216. return result.flatMap(e => e.value);
  217. });
  218. (() => __awaiter(void 0, void 0, void 0, function* () {
  219. // https://www.ncbi.nlm.nih.gov/books/NBK25499/
  220. // const r = await getSNP('1667092841')
  221. // const r = await getProtein('NP_008818')
  222. // const r = await getSNPs('ZFP36L2')
  223. const r = yield getGene('ZFP36L2');
  224. console.log(r);
  225. yield fs_1.default.promises.writeFile('test.json', JSON.stringify(r));
  226. }))();