index.ts 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import { spawn } from 'child_process';
  2. import { cpus } from 'os';
  3. import fs from 'fs'
  4. import path from 'path';
  5. const async_exec = (prog: string, args: string[], onData: Function) => {
  6. return new Promise((resolve, reject) => {
  7. const child = spawn(prog, args, {shell: true})
  8. child.stdout.on('data', data => onData(data.toString().trim()))
  9. child.stderr.on('data', data => onData(data.toString().trim()))
  10. child.on('error', err => reject(err))
  11. child.on('exit', code => resolve(code))
  12. })
  13. }
  14. const invReplace = (regex: RegExp, string: string, by = '_') => string.split('').map(letter => letter.match(regex) ? letter : by).join('')
  15. const writeSequence = async (sequenceName:string, sequence:string, filePath: string, lineN = 80) => {
  16. return new Promise<boolean>(async(resolve, reject) => {
  17. try {
  18. const r = new RegExp(".{1," + lineN + "}","g");
  19. const regex_sam_restriction: RegExp = /[>0-9A-Za-z!#$%&+\./:;?@^_|~-]|[\n\t]/g;
  20. const nSeqName = invReplace(regex_sam_restriction, sequenceName)
  21. await fs.promises.writeFile(filePath, '>' + nSeqName + '\n' + sequence.match(r)?.join('\n'))
  22. resolve(true)
  23. } catch (error) {
  24. console.log(error)
  25. reject(false)
  26. }
  27. })
  28. }
  29. const makeReference = async (sequenceName:string, sequence:string, filePath: string, lineN = 80) => {
  30. if (await writeSequence(sequenceName, sequence, filePath, lineN)) await async_exec('bwa', ['index', filePath], () => console.log)
  31. }
  32. const asyncBwaMem = (
  33. refPath : string,
  34. reads : string | Array<string> | Array<Array<string>>,
  35. // R1 : string | Array<string>,
  36. // R2 : string | Array<string>,
  37. runName : string,
  38. libName : string,
  39. outputDir : string,
  40. onData : Function,
  41. options? : any,
  42. ) => {
  43. return new Promise<string[]>(async (resolve, reject) => {
  44. try {
  45. const defaultOptions = {
  46. output_discordant: true,
  47. output_splitted: true,
  48. output_unmapped: true
  49. }
  50. if (typeof options === 'undefined') {
  51. options = defaultOptions
  52. } else {
  53. options = {...defaultOptions, ...options}
  54. }
  55. const refName = path.parse(refPath).name
  56. const bwa = 'bwa'
  57. const samblaster = 'samblaster'
  58. const samtools = 'samtools'
  59. const sambamba = 'sambamba'
  60. let readsIn: string
  61. let isPairedEnd = false
  62. if (Array.isArray(reads) ) {
  63. isPairedEnd = true
  64. console.log('Assuming paired end reads');
  65. const [R1, R2] = reads
  66. const R1_arr = Array.isArray(R1) ? R1.join(' ') : R1
  67. const R2_arr = Array.isArray(R2) ? R2.join(' ') : R2
  68. const R1_kitty = R1_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  69. const R2_kitty = R2_arr.slice(-2) === 'gz' ? 'zcat' : 'cat'
  70. const R1_in = `'< ${R1_kitty} ${R1_arr}'`
  71. const R2_in = `'< ${R2_kitty} ${R2_arr}'`
  72. readsIn = R1_in + ' ' + R2_in
  73. } else {
  74. readsIn = reads
  75. }
  76. let bam = path.join(outputDir, `bwa_mem_properly_on_${refName}.bam`)
  77. let bamSorted = path.join(outputDir, `bwa_mem_properly_on_${refName}.sorted.bam`)
  78. let retObj: any = { bamSorted }
  79. if(options?.remove_mapped) {
  80. bam = '/dev/null'
  81. delete retObj.bamSorted
  82. }
  83. const threads = String(cpus().length)
  84. let samblasterCmd: Array<string> = []
  85. // https://github.com/GregoryFaust/samblaster
  86. samblasterCmd = ['|', samblaster,
  87. '--addMateTags',
  88. '-a', // Accept duplicate marks already in input file
  89. '-e', // Exclude reads marked as duplicates from discordant, splitter, and/or unmapped
  90. ]
  91. if (options?.output_discordant || options?.output_splitted) {
  92. console.log('Using samblaster');
  93. if(options?.output_discordant) {
  94. if(!isPairedEnd) {
  95. console.log('Discordant reads can be found only in paired reads, skipping')
  96. } else {
  97. const discordantFile = path.join(outputDir, `bwa_mem_discordants_on_${refName}.sam`)
  98. console.log('Discordant reads file path: ', discordantFile);
  99. samblasterCmd = [...samblasterCmd, '-d', discordantFile]
  100. retObj = {...retObj, discordantFile}
  101. }
  102. }
  103. if (!isPairedEnd) {
  104. samblasterCmd = [...samblasterCmd, '--ignoreUnmated']
  105. }
  106. if(options?.output_splitted) {
  107. const splitterFile = path.join(outputDir, `bwa_mem_splitters_on_${refName}.sam`)
  108. console.log('Splitted reads file path: ', splitterFile);
  109. samblasterCmd = [...samblasterCmd, '-s', splitterFile]
  110. retObj = {...retObj, splitterFile}
  111. }
  112. }
  113. if(options?.output_unmapped) {
  114. const unmappedFile = path.join(outputDir, `bwa_mem_unmapped_on_${refName}.fq`)
  115. console.log('Unmapped reads file path: ', unmappedFile);
  116. samblasterCmd = [...samblasterCmd, '-u', unmappedFile]
  117. retObj = {...retObj, unmappedFile}
  118. }
  119. if(!fs.existsSync(refPath+'.amb')) {
  120. await async_exec(bwa, ['index', refPath], (message: string) => onData('[BWA-INDEX] ' + message))
  121. }
  122. console.log(options, samblasterCmd);
  123. const code = await async_exec(
  124. bwa, ['mem',
  125. '-t', threads,
  126. '-R', `"@RG\\tPL:Illumina\\tID:${+(new Date)}\\tSM:${runName}\\tLB:${libName}"`,
  127. refPath,
  128. readsIn,
  129. ...samblasterCmd,
  130. '|',
  131. samtools,
  132. 'view',
  133. '-Sb',
  134. '-',
  135. '>',
  136. bam
  137. ], (message: string) => onData('[BWA-MEM] ' + message))
  138. onData('[BWA-MEM][EXIT CODE] ' + code)
  139. if(retObj.bamSorted) {
  140. const code_sort = await async_exec(
  141. sambamba, ['sort',
  142. '-t', threads,
  143. bam
  144. ], (message: string) => onData('[SAMBAMBA-SORT] ' + message))
  145. onData('[SAMBAMBA-SORT][EXIT CODE] ' + code_sort)
  146. fs.unlinkSync(bam)
  147. }
  148. resolve(retObj)
  149. } catch (err) {
  150. reject(err)
  151. }
  152. })
  153. }
  154. export { asyncBwaMem, writeSequence, makeReference }