"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.clusterSam = void 0; const child_process_1 = require("child_process"); /* (c) Thomas Steimlé 2022 * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more * require os : cat, awk, sort, uniq * */ const async_exec = (prog, args, onData, onErr) => { return new Promise((resolve, reject) => { const child = (0, child_process_1.spawn)(prog, args, { shell: true }); child.stdout.on('data', data => onData(data /*.toString().trim()*/)); child.stderr.on('data', data => onErr(data.toString().trim())); child.on('error', err => reject(err)); child.on('exit', code => resolve(code)); }); }; const clusterSam = (input_sam, threshold, minReads) => { return new Promise((resolve, _reject) => __awaiter(void 0, void 0, void 0, function* () { let inputSam = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam; let lineAcc = ''; let byContigs = {}; yield async_exec('cat', [ inputSam, '|', 'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'', '|', 'sort', '|', 'uniq' ], (m) => { let tmpSeq = (lineAcc + m).split(/\n/); lineAcc = tmpSeq.pop(); // 'uck typescript tmpSeq.map(e => { let tmpName = ''; let tmpPos = { rname: '', position: 0 }; e.split(/\t/).map((el, i) => { switch (i) { case 0: tmpPos['rname'] = el; break; case 1: tmpName = el; break; case 2: tmpPos['position'] = Number(el); break; default: break; } }); if (Array.isArray(byContigs[tmpName])) { byContigs[tmpName].push(tmpPos); } else { byContigs[tmpName] = [tmpPos]; } }); }, console.log); let byReads = {}; let posAll = {}; Object .keys(byContigs) .map(name => { let cluster = 0; let firstPos = 0; byContigs[name] .sort((a, b) => a.position - b.position) .map((e, i, a) => { var _a, _b; if (i === 0) { firstPos = e.position; } if (Math.abs(e.position - ((_a = a[i - 1]) === null || _a === void 0 ? void 0 : _a.position)) > threshold) { if (typeof posAll[name] === 'undefined') posAll[name] = {}; posAll[name][String(cluster)] = firstPos + '-' + ((_b = a[i - 1]) === null || _b === void 0 ? void 0 : _b.position); cluster = cluster + 1; firstPos = e.position; } // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster const clutserName = cluster + '@' + name; byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [...new Set([...byReads[e.rname], clutserName])] : [clutserName]; }); }); let byClusters = {}; Object.keys(byReads).map(rname => { const tmpClusterName = byReads[rname].sort().map(e => { const splited = e.split(/@/); return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'; }).join('<--->'); byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [...new Set([...byClusters[tmpClusterName], rname])] : [rname]; }); Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null); resolve((Object.keys(byClusters).map(clusterName => ({ clusterName, rnames: byClusters[clusterName] })).sort((a, b) => b.rnames.length - a.rnames.length))); })); }; exports.clusterSam = clusterSam; /* (async () => { console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55)); })() */