index.ts 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. import { spawn } from 'child_process';
  2. import { cpus } from 'os';
  3. import fs from 'fs'
  4. import path from 'path';
  5. const async_exec = (prog: string, args: string[], onData: Function) => {
  6. return new Promise((resolve, reject) => {
  7. const child = spawn(prog, args, {shell: true})
  8. child.stdout.on('data', data => onData(data.toString().trim()))
  9. child.stderr.on('data', data => onData(data.toString().trim()))
  10. child.on('error', err => reject(err))
  11. child.on('exit', code => resolve(code))
  12. })
  13. }
  14. const asyncBwaMem = (
  15. refPath : string,
  16. reads : string | Array<string> | Array<Array<string>>,
  17. // R1 : string | Array<string>,
  18. // R2 : string | Array<string>,
  19. runName : string,
  20. libName : string,
  21. outputDir : string,
  22. onData : Function,
  23. options? : any,
  24. ) => {
  25. const defaultOptions = {
  26. output_discordant: true,
  27. output_splitted: true,
  28. output_unmapped: true
  29. }
  30. if (typeof options === 'undefined') {
  31. options = defaultOptions
  32. } else {
  33. options = {...defaultOptions, ...options}
  34. }
  35. const refName = path.parse(refPath).name
  36. const bwa = 'bwa'
  37. const samblaster = 'samblaster'
  38. const samtools = 'samtools'
  39. const sambamba = 'sambamba'
  40. let readsIn: string
  41. let isPairedEnd = false
  42. if (Array.isArray(reads) ) {
  43. isPairedEnd = true
  44. console.log('Assuming paired end reads');
  45. const [R1, R2] = reads
  46. const R1_arr = Array.isArray(R1) ? R1.join(' ') : R1
  47. const R2_arr = Array.isArray(R2) ? R2.join(' ') : R2
  48. const R1_kitty = R1_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  49. const R2_kitty = R2_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  50. const R1_in = `'< ${R1_kitty} ${R1_arr}'`
  51. const R2_in = `'< ${R2_kitty} ${R2_arr}'`
  52. readsIn = R1_in + ' ' + R2_in
  53. } else {
  54. readsIn = reads
  55. }
  56. let bam = path.join(outputDir, `bwa_mem_properly_on_${refName}.bam`)
  57. let bamSorted = path.join(outputDir, `bwa_mem_properly_on_${refName}.sorted.bam`)
  58. let retObj: any = { bamSorted }
  59. if(options?.remove_mapped) {
  60. bam = '/dev/null'
  61. delete retObj.bamSorted
  62. }
  63. const threads = String(cpus().length)
  64. let samblasterCmd: Array<string> = []
  65. if (options?.output_discordant || options?.output_splitted) {
  66. console.log('Using samblaster');
  67. // https://github.com/GregoryFaust/samblaster
  68. samblasterCmd = ['|', samblaster,
  69. '--addMateTags',
  70. '-a', // Accept duplicate marks already in input file
  71. '-e', // Exclude reads marked as duplicates from discordant, splitter, and/or unmapped
  72. ]
  73. if(options?.output_discordant) {
  74. if(!isPairedEnd) {
  75. console.log('Discordant reads can be found only in paired reads, skipping')
  76. } else {
  77. const discordantFile = path.join(outputDir, `bwa_mem_discordants_on_${refName}.sam`)
  78. console.log('Discordant reads file path: ', discordantFile);
  79. samblasterCmd = [...samblasterCmd, '-d', discordantFile]
  80. retObj = {...retObj, discordantFile}
  81. }
  82. }
  83. if (!isPairedEnd) {
  84. samblasterCmd = [...samblasterCmd, '--ignoreUnmated']
  85. }
  86. if(options?.output_splitted) {
  87. const splitterFile = path.join(outputDir, `bwa_mem_splitters_on_${refName}.sam`)
  88. console.log('Splitted reads file path: ', splitterFile);
  89. samblasterCmd = [...samblasterCmd, '-s', splitterFile]
  90. retObj = {...retObj, splitterFile}
  91. }
  92. if(options?.output_unmapped) {
  93. const unmappedFile = path.join(outputDir, `bwa_mem_unmapped_on_${refName}.fq`)
  94. console.log('Unmapped reads file path: ', unmappedFile);
  95. samblasterCmd = [...samblasterCmd, '-u', unmappedFile]
  96. retObj = {...retObj, unmappedFile}
  97. }
  98. }
  99. return new Promise<string[]>(async (resolve, reject) => {
  100. try {
  101. const code = await async_exec(
  102. bwa, ['mem',
  103. '-t', threads,
  104. '-R', `"@RG\\tPL:Illumina\\tID:${+(new Date)}\\tSM:${runName}\\tLB:${libName}"`,
  105. refPath,
  106. readsIn,
  107. ...samblasterCmd,
  108. '|',
  109. samtools,
  110. 'view',
  111. '-Sb',
  112. '-',
  113. '>',
  114. bam
  115. ], (message: string) => onData('[BWA-MEM] ' + message))
  116. onData('[BWA-MEM][EXIT CODE] ' + code)
  117. if(retObj.bamSorted) {
  118. const code_sort = await async_exec(
  119. sambamba, ['sort',
  120. '-t', threads,
  121. bam
  122. ], (message: string) => onData('[SAMBAMBA-SORT] ' + message))
  123. onData('[SAMBAMBA-SORT][EXIT CODE] ' + code_sort)
  124. fs.unlinkSync(bam)
  125. }
  126. resolve(retObj)
  127. } catch (err) {
  128. reject(err)
  129. }
  130. })
  131. }
  132. export { asyncBwaMem }