index.ts 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import { spawn } from 'child_process';
  2. import fs from 'fs'
  3. /* (c) Thomas Steimlé 2022
  4. * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
  5. * require os : cat, awk, sort, uniq
  6. *
  7. */
  8. const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => {
  9. return new Promise((resolve, reject) => {
  10. const child = spawn(prog, args, {shell: true})
  11. child.stdout.on('data', data => onData(data/*.toString().trim()*/))
  12. child.stderr.on('data', data => onErr(data.toString().trim()))
  13. child.on('error', err => reject(err))
  14. child.on('exit', code => resolve(code))
  15. })
  16. }
  17. const clusterSam = (
  18. input_sam: string | Array<string>,
  19. threshold: number,
  20. minReads : number,
  21. blackList ?: string[]
  22. ) => {
  23. return new Promise<any>( async (resolve, _reject) => {
  24. let inputSam: string = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam
  25. let lineAcc: string = ''
  26. interface position {
  27. rname: string;
  28. position: number;
  29. }
  30. interface byContigs {
  31. [key: string]: position[]
  32. }
  33. let byContigs: byContigs = {}
  34. await async_exec('cat', [
  35. inputSam,
  36. '|',
  37. 'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'', //skip header
  38. '|',
  39. 'sort',
  40. '|',
  41. 'uniq'
  42. ], (m: string) => {
  43. let tmpSeq: string[] = (lineAcc + m).split(/\n/)
  44. lineAcc = tmpSeq.pop() ! // 'uck typescript
  45. tmpSeq.map(e => {
  46. let tmpName: string = ''
  47. let tmpPos: position = {rname: '', position: 0}
  48. e.split(/\t/).map((el, i) => {
  49. switch (i) {
  50. case 0:
  51. tmpPos['rname'] = el
  52. break;
  53. case 1:
  54. tmpName = el
  55. break;
  56. case 2:
  57. tmpPos['position'] = Number(el)
  58. break;
  59. default:
  60. break;
  61. }
  62. })
  63. let add = true
  64. if (blackList) {
  65. add = blackList.includes(tmpName) ? false : true
  66. }
  67. if(add) {
  68. if (Array.isArray(byContigs[tmpName])) {
  69. byContigs[tmpName].push(tmpPos)
  70. } else {
  71. byContigs[tmpName] = [tmpPos]
  72. }
  73. }
  74. })
  75. }, console.log)
  76. interface byReads {
  77. [key: string]: string[]
  78. }
  79. let byReads: byReads = {}
  80. interface posObj {
  81. [key: string]: string
  82. }
  83. interface posAll {
  84. [key: string]: posObj
  85. }
  86. let posAll: posAll = {}
  87. Object
  88. .keys(byContigs)
  89. .map(name => {
  90. let cluster = 0
  91. let firstPos = 0
  92. byContigs[name]
  93. .filter(a => a)
  94. .sort((a, b) => a.position - b.position)
  95. /*.map((e, i, a) => {
  96. if(i === 0) {
  97. if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
  98. firstPos = e.position
  99. }
  100. if (a.length === 1) {
  101. posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
  102. }
  103. if (Math.abs(e.position - a[i-1]?.position) > threshold) {
  104. posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
  105. cluster = cluster + 1
  106. firstPos = e.position
  107. }
  108. if(i === (a.length - 1)) {
  109. posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
  110. }
  111. // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
  112. const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
  113. byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
  114. })*/
  115. .reduce((p,c,i,a) => {
  116. const currentCluster = (Object.keys(p).length - 1)
  117. let tmp = p
  118. if(p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
  119. tmp = {...p,
  120. [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]),
  121. [String(currentCluster + 1)]: [c.position]
  122. }
  123. } else {
  124. tmp = {...p, [String(currentCluster)]: [...p[String(currentCluster)], c.position]}
  125. }
  126. if(i === (a.length-1)) {
  127. tmp = {...p,
  128. [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position,
  129. }
  130. }
  131. posAll[name] = tmp
  132. const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
  133. byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [... new Set([...byReads[c.rname], clutserName])] : [clutserName]
  134. return tmp
  135. }, {'0': [] } as {[key: string]: any})
  136. })
  137. interface byClusters {
  138. [key: string]: string[]
  139. }
  140. let byClusters: byClusters = {}
  141. Object.keys(byReads).map(rname => {
  142. const tmpClusterName = byReads[rname].sort().map(e => {
  143. const splited = e.split(/@/)
  144. return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'
  145. }).join('<>')
  146. byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
  147. })
  148. Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
  149. resolve((Object.keys(byClusters).map(clusterName => ({clusterName, rnames: byClusters[clusterName]})).sort((a:any,b:any) => b.rnames.length - a.rnames.length)) )
  150. })
  151. }
  152. export { clusterSam }
  153. /*(async () => {
  154. const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
  155. console.log(bl);
  156. console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
  157. })()*/