index.ts 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. import { spawn } from 'child_process';
  2. import { getSymbol, getFromAcc } from "gbffparser"
  3. import { asyncBwaMem, makeReference } from 'aligner'
  4. import fs from 'fs'
  5. import os from 'os'
  6. import path from 'path';
  7. const async_exec = (prog: string, args: string[], onData: Function) => {
  8. return new Promise((resolve, reject) => {
  9. const child = spawn(prog, args, {shell: true})
  10. child.stdout.on('data', data => onData(data.toString().trim()))
  11. // child.stderr.on('data', data => console.log(data.toString().trim()))
  12. child.on('error', err => reject(err))
  13. child.on('exit', code => resolve(code))
  14. })
  15. }
  16. const openSam = async (
  17. filePaths: string | string[],
  18. restraintTo ?: string,
  19. count?: boolean
  20. ) => {
  21. let accum = ''
  22. let jsonLines: any[] = []
  23. filePaths = Array.isArray(filePaths) ? filePaths : [filePaths]
  24. for (let filePath of filePaths) {
  25. let args = ['view']
  26. if (filePath.match(/\.sam$/)) {
  27. if(restraintTo) {
  28. if(!fs.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) {
  29. const tmpBam = path.join(os.tmpdir(), Math.random() + '.bam')
  30. await async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => {})
  31. filePath = filePath.split(/\.sam$/)[0] + '.bam'
  32. await async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => {})
  33. await fs.promises.unlink(tmpBam)
  34. } else {
  35. filePath = filePath.split(/\.sam$/)[0] + '.bam'
  36. }
  37. } else {
  38. args.push('-S')
  39. }
  40. }
  41. if(!count) {
  42. args = [...args, '-f', 'json']
  43. } else {
  44. //if(restraintTo) {
  45. args.push('-c')
  46. //}
  47. }
  48. args.push(filePath)
  49. if(restraintTo) args.push(restraintTo)
  50. const threads = os.cpus().length - 2 > 0 ? os.cpus().length - 2 : 1
  51. args.push('-t')
  52. args.push(String(threads))
  53. console.log(['sambamba', ...args].join(' '));
  54. await async_exec('sambamba', args, (m: string) => {
  55. accum+=m
  56. accum = accum.replace('}{','}\n{')
  57. if(accum.match('\n')) {
  58. accum.split('\n').map((e,i,a) => {
  59. if (i !== a.length -1) {
  60. try {
  61. jsonLines.push(JSON.parse(e))
  62. } catch (error) {
  63. console.log(error);
  64. }
  65. } else {
  66. accum = e
  67. }
  68. })
  69. }
  70. })
  71. if(accum !== '') {
  72. accum = accum.replace('}{','}\n{')
  73. accum.split('\n').map((e,i,a) => {
  74. try {
  75. jsonLines.push(JSON.parse(e))
  76. } catch (error) {
  77. console.log(error);
  78. }
  79. })
  80. }
  81. }
  82. return jsonLines
  83. }
  84. const extractReads = (
  85. reads: string[],
  86. fastqPaths: string | string[],
  87. ) => {
  88. fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths]
  89. }
  90. const analysisTranscript = async (
  91. accession: string,
  92. properBam:string,
  93. splittersSam: string,
  94. disordantsSam: string,
  95. rnaDBPath: string[]
  96. ) => {
  97. const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0]
  98. const accJson = await getFromAcc(accessionWoVersion, rnaDBPath)
  99. let json: any = {
  100. sequence : accJson?.sequence || '',
  101. version: accJson?.version,
  102. exons: accJson?.features.filter(entry => entry.type === 'exon').map((exon,i) => ({
  103. n: i + 1,
  104. start: exon.start,
  105. end: exon.end,
  106. strand: exon.strand,
  107. sequence: accJson.sequence.substring(exon.start-1,exon.end),
  108. counts: {},
  109. })) || [],
  110. counts: {},
  111. altTranscripts: {}
  112. }
  113. json.counts.all = await openSam(properBam, accession, true)
  114. json.counts.splitters = await openSam(splittersSam, accession, true)
  115. json.counts.discordants = await openSam(disordantsSam, accession, true)
  116. for (let index = 0; index < json.exons.length; index++) {
  117. const exon = json.exons[index];
  118. json.exons[index].counts.all = await openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true)
  119. json.exons[index].counts.splitters = await openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true)
  120. json.exons[index].counts.discordants = await openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true)
  121. }
  122. if(typeof json.exons !== 'undefined') {
  123. const samJSON = await openSam([
  124. 'test/bwa_mem_splitters_on_human_NM.sam',
  125. 'test/bwa_mem_discordants_on_human_NM.sam'],
  126. accession)
  127. const byRead = {} as {[key:string]: number[]}
  128. samJSON.map((entry:any) => ({
  129. qname: entry.qname.split('_')[0],
  130. pos: entry.pos,
  131. exon: json.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
  132. }))
  133. .map((entry:any) => {
  134. if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
  135. byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
  136. })
  137. const byAltern = {} as {[key:string]: string[]}
  138. Object.keys(byRead).map(qname => {
  139. const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
  140. .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
  141. for (const bridge of bridges) {
  142. if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
  143. byAltern[bridge].push(qname)
  144. }
  145. })
  146. json.altTranscripts = Object.keys(byAltern)
  147. .map(bridge => ({bridge, reads: byAltern[bridge]}))
  148. .sort((a,b) => b.reads.length - a.reads.length)
  149. }
  150. return json
  151. }
  152. export { analysisTranscript, openSam }
  153. /*(async()=>{
  154. // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
  155. // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
  156. // 'TEST', 'TEST', 'test/', console.log)
  157. const symbol = 'NOTCH1'
  158. const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene'
  159. const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt'
  160. const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
  161. const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff')
  162. const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath)
  163. await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4))
  164. const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA')
  165. .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')}))
  166. .map((entry:any) => ({
  167. accession: entry.product_accession,
  168. genomic_accession: entry.genomic_accession,
  169. start: entry.start,
  170. end: entry.end,
  171. sequence: entry.sequence,
  172. exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))]
  173. }))
  174. .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))}))
  175. await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4))
  176. for (let index = 0; index < transcripts.length; index++) {
  177. const transcript = transcripts[index]
  178. transcripts[index].count = {
  179. all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
  180. splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
  181. discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
  182. }
  183. const samJSON = await openSam([
  184. 'test/bwa_mem_splitters_on_human_NM.sam',
  185. 'test/bwa_mem_discordants_on_human_NM.sam'],
  186. transcript.accession)
  187. const byRead = {} as {[key:string]: number[]}
  188. samJSON.map((entry:any) => ({
  189. qname: entry.qname.split('_')[0],
  190. pos: entry.pos,
  191. exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
  192. }))
  193. .map((entry:any) => {
  194. if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
  195. byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
  196. })
  197. const byAltern = {} as {[key:string]: string[]}
  198. Object.keys(byRead).map(qname => {
  199. const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
  200. .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
  201. for (const bridge of bridges) {
  202. if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
  203. byAltern[bridge].push(qname)
  204. }
  205. })
  206. transcripts[index].altTranscripts = Object.keys(byAltern)
  207. .map(bridge => ({bridge, reads: byAltern[bridge]}))
  208. .sort((a,b) => b.reads.length - a.reads.length)
  209. await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4))
  210. }
  211. })()
  212. */