index.ts 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. 'use strict'
  2. import fs from "fs"
  3. import path from "path"
  4. import Piscina from "piscina"
  5. const NCBIGeneTemplate = (id:string) => {
  6. return {
  7. db: 'gene',
  8. id,
  9. api_key: '47796c7650360571735f00f510315f871607',
  10. retmode: 'xml',
  11. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  12. query: [
  13. `{ "Locus" : **.Gene_ref_locus,
  14. "Update_Date" : **.Gene_track_update_date.Date.Date_std.Date_std.{"Year": Date_std_year, "Month": Date_std_month, "Day": Date_std_month},
  15. "Location" : **.Gene_ref_maploc,
  16. "Summary" : **.Entrezgene_summary,
  17. "Genomic_Position": **.Entrezgene_locus.Gene_commentary[Gene_commentary_type.value='genomic'][0].{
  18. "Accession": Gene_commentary_accession,
  19. "Positions": $.{
  20. "from" : Gene_commentary_seqs.**.Seq_interval_from,
  21. "to" : Gene_commentary_seqs.**.Seq_interval_to,
  22. "strand": Gene_commentary_seqs.**.Na_strand.value
  23. }
  24. },
  25. "Gene_Ontology" : **.Entrezgene_properties.Gene_commentary[Gene_commentary_heading='GeneOntology'].Gene_commentary_comment.{
  26. "Functions" : Gene_commentary[Gene_commentary_label='Function'].Gene_commentary_comment.Gene_commentary.{
  27. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  28. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  29. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  30. },
  31. "Processes" : Gene_commentary[Gene_commentary_label='Process'].Gene_commentary_comment.Gene_commentary.{
  32. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  33. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  34. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  35. },
  36. "Components" : Gene_commentary[Gene_commentary_label='Component'].Gene_commentary_comment.Gene_commentary.{
  37. "ID" : Gene_commentary_source.Other_source.**.Object_id_id,
  38. "Pre_Text": Gene_commentary_source.Other_source.Other_source_pre_text,
  39. "Anchor" : Gene_commentary_source.Other_source.Other_source_anchor
  40. }
  41. },
  42. "Transcript": {
  43. "Accession" : **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_accession,
  44. "Exon_Count": **.Entrezgene_properties.Gene_commentary[Gene_commentary_label='Exon count'].Gene_commentary_text
  45. },
  46. "Products": **.Entrezgene_comments.Gene_commentary[Gene_commentary_heading='NCBI Reference Sequences (RefSeq)'].**.Gene_commentary[Gene_commentary_heading='mRNA Sequence'][0].Gene_commentary_products.Gene_commentary[Gene_commentary_heading='Product'][0].{
  47. "Accession": Gene_commentary_accession,
  48. "Domains" : Gene_commentary_comment.Gene_commentary[Gene_commentary_heading='Conserved Domains'].Gene_commentary_comment.Gene_commentary.{
  49. "DB" : Gene_commentary_source.**.Dbtag_db,
  50. "ID" : Gene_commentary_source.**.Object_id_id,
  51. "Anchor" : Gene_commentary_source.**.Other_source_anchor,
  52. "Location": Gene_commentary_comment.**.Gene_commentary_text
  53. }
  54. },
  55. "Biblio_PMID": **.PubMedId
  56. }`
  57. ]
  58. }
  59. }
  60. const searchGeneTemplate = (name:string) => {
  61. return {
  62. term: name + '[Gene Name]+AND+Human[Organism]',
  63. db: 'gene',
  64. api_key: '47796c7650360571735f00f510315f871607',
  65. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  66. query: `{"First_ID": **.Id[0]}`
  67. }
  68. }
  69. const searchTranscriptTemplate = (accession:string) => {
  70. return {
  71. term: accession + '+AND+Human[Organism]',
  72. db: 'nuccore',
  73. api_key: '47796c7650360571735f00f510315f871607',
  74. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  75. query: `{"First_ID": **.Id[0]}`
  76. }
  77. }
  78. const searchProtTemplate = (accession:string) => {
  79. return {
  80. term : accession + '+AND+Human[Organism]',
  81. db : 'protein',
  82. api_key : '47796c7650360571735f00f510315f871607',
  83. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  84. query : `{"First_ID": **.Id[0]}`
  85. }
  86. }
  87. const searchSNPTemplate = (accession:string) => {
  88. return {
  89. term : accession + '+AND+Human[Organism]',
  90. db : 'snp',
  91. api_key : '47796c7650360571735f00f510315f871607',
  92. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi',
  93. query : `**.Id`
  94. }
  95. }
  96. const NCBITranscriptTemplate = (id:string) => {
  97. return {
  98. db: 'nuccore',
  99. id,
  100. retmode: 'xml',
  101. api_key: '47796c7650360571735f00f510315f871607',
  102. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  103. query : `{
  104. "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
  105. "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
  106. "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
  107. "Length": GBSet[0].GBSeq[0].GBSeq_length,
  108. "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
  109. "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
  110. "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
  111. "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
  112. "key": GBFeature_key,
  113. "location": GBFeature_location,
  114. "value":
  115. GBFeature_key in "gene" ? GBFeature_quals.GBQualifier[GBQualifier_name='gene'][0].GBQualifier_value :
  116. GBFeature_key in "exon" ? GBFeature_quals.GBQualifier[GBQualifier_name='inference'][0].GBQualifier_value :
  117. GBFeature_key in "CDS" ? {
  118. "Codon_Start": GBFeature_quals.GBQualifier[GBQualifier_name='codon_start'][0].GBQualifier_value,
  119. "Protein_Id": GBFeature_quals.GBQualifier[GBQualifier_name='protein_id'][0].GBQualifier_value,
  120. "Translation": GBFeature_quals.GBQualifier[GBQualifier_name='translation'][0].GBQualifier_value
  121. } :
  122. GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
  123. },
  124. "Sequence": GBSet[0].GBSeq[0].GBSeq_sequence
  125. }`
  126. }
  127. }
  128. const NCBIProteinTemplate = (id:string) => {
  129. return {
  130. db: 'protein',
  131. id,
  132. retmode: 'xml',
  133. api_key: '47796c7650360571735f00f510315f871607',
  134. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  135. query : `{
  136. "Accession_version": GBSet[0].GBSeq[0].GBSeq_accession_version,
  137. "Update_Date": GBSet[0].GBSeq[0].GBSeq_update_date,
  138. "Molecular_Type": GBSet[0].GBSeq[0].GBSeq_moltype,
  139. "Length": GBSet[0].GBSeq[0].GBSeq_length,
  140. "Topology": GBSet[0].GBSeq[0].GBSeq_topology,
  141. "Definition": GBSet[0].GBSeq[0].GBSeq_definition,
  142. "Comment": GBSet[0].GBSeq[0].GBSeq_comment,
  143. "Calculated_Mol_Wt": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature[GBFeature_key='Protein'][0].GBFeature_quals.GBQualifier[GBQualifier_name='calculated_mol_wt'][0].GBQualifier_value,
  144. "Features": GBSet[0].GBSeq[0].GBSeq_feature_table.GBFeature.{
  145. "key": GBFeature_key,
  146. "location": GBFeature_location,
  147. "value":
  148. GBFeature_key in "Region" ? GBFeature_quals.GBQualifier[GBQualifier_name='region_name'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value :
  149. GBFeature_key in "Site" ? GBFeature_quals.GBQualifier[GBQualifier_name='site_type'][0].GBQualifier_value & ' ' & GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value : GBFeature_quals.GBQualifier[GBQualifier_name='note'][0].GBQualifier_value
  150. },
  151. "Sequence": GBSet[0].GBSeq[0].GBSeq_sequence
  152. }`
  153. }
  154. }
  155. const NCBISNPTemplate = (id:string) => {
  156. return {
  157. db: 'snp',
  158. id,
  159. retmode: 'xml',
  160. api_key: '47796c7650360571735f00f510315f871607',
  161. endpoint: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi',
  162. query : `**.DocumentSummary.{
  163. "ID": SNP_ID,
  164. "Accession": ACC ,
  165. "Position": $number($split(CHRPOS, ":")[1]),
  166. "Classe": FXN_CLASS,
  167. "MAF": GLOBAL_MAFS.MAF.{
  168. "Study": STUDY,
  169. "Frequency": FREQ
  170. },
  171. "Update_Date": UPDATEDATE
  172. }`
  173. }
  174. }
  175. const getEsearch = new Piscina({
  176. filename: path.resolve(__dirname, './workers/esearch.js'),
  177. maxThreads: 4
  178. })
  179. const getGene = async (name:string) => {
  180. const id = (await Promise.all([searchGeneTemplate(name)].map(e => getEsearch.run(e))))[0].value.First_ID
  181. const result = (await Promise.all([NCBIGeneTemplate(String(id))].map(e => getEsearch.run(e))))[0].value
  182. const accTranscript = result.Transcript.Accession
  183. const idTr = (await Promise.all([searchTranscriptTemplate(accTranscript)].map(e => getEsearch.run(e))))[0].value.First_ID
  184. const resultTr = (await Promise.all([NCBITranscriptTemplate(String(idTr))].map(e => getEsearch.run(e))))[0].value
  185. result.Transcript.nuccore = resultTr
  186. const prot = await getProtein(result.Products.Accession)
  187. result.Products.protein = prot
  188. return result
  189. }
  190. const getTranscript = async (accession:string) => {
  191. const id = (await Promise.all([searchTranscriptTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID
  192. const result = (await Promise.all([NCBITranscriptTemplate(String(id))].map(e => getEsearch.run(e))))[0].value
  193. return result
  194. }
  195. const getProtein = async (accession:string) => {
  196. const id = (await Promise.all([searchProtTemplate(accession)].map(e => getEsearch.run(e))))[0].value.First_ID
  197. const result = (await Promise.all([NCBIProteinTemplate(String(id))].map(e => getEsearch.run(e))))[0].value
  198. return result
  199. }
  200. const getSNPs = async (name:string) => {
  201. const ids = (await Promise.all([searchSNPTemplate(name)].map(e => getEsearch.run(e))))[0].value
  202. const result = (await Promise.all(ids.map((id:string) => NCBISNPTemplate(String(id))).map((e:any) => getEsearch.run(e))))
  203. return result.flatMap(e => e.value)
  204. }
  205. (async()=>{
  206. // https://www.ncbi.nlm.nih.gov/books/NBK25499/
  207. // const r = await getSNP('1667092841')
  208. // const r = await getProtein('NP_008818')
  209. const r = await getSNPs('ZFP36L2')
  210. console.log(r)
  211. await fs.promises.writeFile('test.json', JSON.stringify(r))
  212. })()