index.js 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. "use strict";
  2. var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
  3. function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
  4. return new (P || (P = Promise))(function (resolve, reject) {
  5. function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
  6. function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
  7. function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
  8. step((generator = generator.apply(thisArg, _arguments || [])).next());
  9. });
  10. };
  11. Object.defineProperty(exports, "__esModule", { value: true });
  12. exports.clusterSam = void 0;
  13. const child_process_1 = require("child_process");
  14. /* (c) Thomas Steimlé 2022
  15. * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
  16. * require os : cat, awk, sort, uniq
  17. *
  18. */
  19. const async_exec = (prog, args, onData, onErr) => {
  20. return new Promise((resolve, reject) => {
  21. const child = (0, child_process_1.spawn)(prog, args, { shell: true });
  22. child.stdout.on('data', data => onData(data /*.toString().trim()*/));
  23. child.stderr.on('data', data => onErr(data.toString().trim()));
  24. child.on('error', err => reject(err));
  25. child.on('exit', code => resolve(code));
  26. });
  27. };
  28. const clusterSam = (input_sam, threshold, minReads, blackList) => {
  29. return new Promise((resolve, _reject) => __awaiter(void 0, void 0, void 0, function* () {
  30. let inputSam = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam;
  31. let lineAcc = '';
  32. let byContigs = {};
  33. yield async_exec('cat', [
  34. inputSam,
  35. '|',
  36. 'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'',
  37. '|',
  38. 'sort',
  39. '|',
  40. 'uniq'
  41. ], (m) => {
  42. let tmpSeq = (lineAcc + m).split(/\n/);
  43. lineAcc = tmpSeq.pop(); // 'uck typescript
  44. tmpSeq.map(e => {
  45. let tmpName = '';
  46. let tmpPos = { rname: '', position: 0 };
  47. e.split(/\t/).map((el, i) => {
  48. switch (i) {
  49. case 0:
  50. tmpPos['rname'] = el;
  51. break;
  52. case 1:
  53. tmpName = el;
  54. break;
  55. case 2:
  56. tmpPos['position'] = Number(el);
  57. break;
  58. default:
  59. break;
  60. }
  61. });
  62. let add = true;
  63. if (blackList) {
  64. add = blackList.includes(tmpName) ? false : true;
  65. }
  66. if (add) {
  67. if (Array.isArray(byContigs[tmpName])) {
  68. byContigs[tmpName].push(tmpPos);
  69. }
  70. else {
  71. byContigs[tmpName] = [tmpPos];
  72. }
  73. }
  74. });
  75. }, console.log);
  76. let byReads = {};
  77. let posAll = {};
  78. Object
  79. .keys(byContigs)
  80. .map(name => {
  81. let cluster = 0;
  82. let firstPos = 0;
  83. byContigs[name]
  84. .filter(a => a)
  85. .sort((a, b) => a.position - b.position)
  86. /*.map((e, i, a) => {
  87. if(i === 0) {
  88. if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
  89. firstPos = e.position
  90. }
  91. if (a.length === 1) {
  92. posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
  93. }
  94. if (Math.abs(e.position - a[i-1]?.position) > threshold) {
  95. posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
  96. cluster = cluster + 1
  97. firstPos = e.position
  98. }
  99. if(i === (a.length - 1)) {
  100. posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
  101. }
  102. // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
  103. const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
  104. byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
  105. })*/
  106. .reduce((p, c, i, a) => {
  107. const currentCluster = (Object.keys(p).length - 1);
  108. let tmp = p;
  109. if (p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
  110. tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]), [String(currentCluster + 1)]: [c.position] });
  111. }
  112. else {
  113. tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: [...p[String(currentCluster)], c.position] });
  114. }
  115. if (i === (a.length - 1)) {
  116. tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position });
  117. }
  118. posAll[name] = tmp;
  119. const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name;
  120. byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [...new Set([...byReads[c.rname], clutserName])] : [clutserName];
  121. return tmp;
  122. }, { '0': [] });
  123. });
  124. let byClusters = {};
  125. Object.keys(byReads).map(rname => {
  126. const tmpClusterName = byReads[rname].sort().map(e => {
  127. const splited = e.split(/@/);
  128. return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')';
  129. }).join('<>');
  130. byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [...new Set([...byClusters[tmpClusterName], rname])] : [rname];
  131. });
  132. Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
  133. resolve((Object.keys(byClusters).map(clusterName => ({ clusterName, rnames: byClusters[clusterName] })).sort((a, b) => b.rnames.length - a.rnames.length)));
  134. }));
  135. };
  136. exports.clusterSam = clusterSam;
  137. /*(async () => {
  138. const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
  139. console.log(bl);
  140. console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
  141. })()*/