index.ts 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. import { spawn } from 'child_process';
  2. import { cpus } from 'os';
  3. import fs from 'fs'
  4. import path from 'path';
  5. const async_exec = (prog: string, args: string[], onData: Function) => {
  6. return new Promise((resolve, reject) => {
  7. const child = spawn(prog, args, {shell: true})
  8. child.stdout.on('data', data => onData(data.toString().trim()))
  9. child.stderr.on('data', data => onData(data.toString().trim()))
  10. child.on('error', err => reject(err))
  11. child.on('exit', code => resolve(code))
  12. })
  13. }
  14. const asyncBwaMem = (
  15. refPath : string,
  16. reads : string | Array<string> | Array<Array<string>>,
  17. // R1 : string | Array<string>,
  18. // R2 : string | Array<string>,
  19. runName : string,
  20. libName : string,
  21. outputDir : string,
  22. onData : Function,
  23. options? : any,
  24. ) => {
  25. return new Promise<string[]>(async (resolve, reject) => {
  26. try {
  27. const defaultOptions = {
  28. output_discordant: true,
  29. output_splitted: true,
  30. output_unmapped: true
  31. }
  32. if (typeof options === 'undefined') {
  33. options = defaultOptions
  34. } else {
  35. options = {...defaultOptions, ...options}
  36. }
  37. const refName = path.parse(refPath).name
  38. const bwa = 'bwa'
  39. const samblaster = 'samblaster'
  40. const samtools = 'samtools'
  41. const sambamba = 'sambamba'
  42. let readsIn: string
  43. let isPairedEnd = false
  44. if (Array.isArray(reads) ) {
  45. isPairedEnd = true
  46. console.log('Assuming paired end reads');
  47. const [R1, R2] = reads
  48. const R1_arr = Array.isArray(R1) ? R1.join(' ') : R1
  49. const R2_arr = Array.isArray(R2) ? R2.join(' ') : R2
  50. const R1_kitty = R1_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  51. const R2_kitty = R2_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  52. const R1_in = `'< ${R1_kitty} ${R1_arr}'`
  53. const R2_in = `'< ${R2_kitty} ${R2_arr}'`
  54. readsIn = R1_in + ' ' + R2_in
  55. } else {
  56. readsIn = reads
  57. }
  58. let bam = path.join(outputDir, `bwa_mem_properly_on_${refName}.bam`)
  59. let bamSorted = path.join(outputDir, `bwa_mem_properly_on_${refName}.sorted.bam`)
  60. let retObj: any = { bamSorted }
  61. if(options?.remove_mapped) {
  62. bam = '/dev/null'
  63. delete retObj.bamSorted
  64. }
  65. const threads = String(cpus().length)
  66. let samblasterCmd: Array<string> = []
  67. // https://github.com/GregoryFaust/samblaster
  68. samblasterCmd = ['|', samblaster,
  69. '--addMateTags',
  70. '-a', // Accept duplicate marks already in input file
  71. '-e', // Exclude reads marked as duplicates from discordant, splitter, and/or unmapped
  72. ]
  73. if (options?.output_discordant || options?.output_splitted) {
  74. console.log('Using samblaster');
  75. if(options?.output_discordant) {
  76. if(!isPairedEnd) {
  77. console.log('Discordant reads can be found only in paired reads, skipping')
  78. } else {
  79. const discordantFile = path.join(outputDir, `bwa_mem_discordants_on_${refName}.sam`)
  80. console.log('Discordant reads file path: ', discordantFile);
  81. samblasterCmd = [...samblasterCmd, '-d', discordantFile]
  82. retObj = {...retObj, discordantFile}
  83. }
  84. }
  85. if (!isPairedEnd) {
  86. samblasterCmd = [...samblasterCmd, '--ignoreUnmated']
  87. }
  88. if(options?.output_splitted) {
  89. const splitterFile = path.join(outputDir, `bwa_mem_splitters_on_${refName}.sam`)
  90. console.log('Splitted reads file path: ', splitterFile);
  91. samblasterCmd = [...samblasterCmd, '-s', splitterFile]
  92. retObj = {...retObj, splitterFile}
  93. }
  94. }
  95. if(options?.output_unmapped) {
  96. const unmappedFile = path.join(outputDir, `bwa_mem_unmapped_on_${refName}.fq`)
  97. console.log('Unmapped reads file path: ', unmappedFile);
  98. samblasterCmd = [...samblasterCmd, '-u', unmappedFile]
  99. retObj = {...retObj, unmappedFile}
  100. }
  101. if(!fs.existsSync(refPath+'.amb')) {
  102. await async_exec(bwa, ['index', refPath], (message: string) => onData('[BWA-INDEX] ' + message))
  103. }
  104. console.log(options, samblasterCmd);
  105. const code = await async_exec(
  106. bwa, ['mem',
  107. '-t', threads,
  108. '-R', `"@RG\\tPL:Illumina\\tID:${+(new Date)}\\tSM:${runName}\\tLB:${libName}"`,
  109. refPath,
  110. readsIn,
  111. ...samblasterCmd,
  112. '|',
  113. samtools,
  114. 'view',
  115. '-Sb',
  116. '-',
  117. '>',
  118. bam
  119. ], (message: string) => onData('[BWA-MEM] ' + message))
  120. onData('[BWA-MEM][EXIT CODE] ' + code)
  121. if(retObj.bamSorted) {
  122. const code_sort = await async_exec(
  123. sambamba, ['sort',
  124. '-t', threads,
  125. bam
  126. ], (message: string) => onData('[SAMBAMBA-SORT] ' + message))
  127. onData('[SAMBAMBA-SORT][EXIT CODE] ' + code_sort)
  128. fs.unlinkSync(bam)
  129. }
  130. resolve(retObj)
  131. } catch (err) {
  132. reject(err)
  133. }
  134. })
  135. }
  136. export { asyncBwaMem }