| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- "use strict";
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.clusterSam = void 0;
- const child_process_1 = require("child_process");
- /* (c) Thomas Steimlé 2022
- * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
- * require os : cat, awk, sort, uniq
- *
- */
- const async_exec = (prog, args, onData, onErr) => {
- return new Promise((resolve, reject) => {
- const child = (0, child_process_1.spawn)(prog, args, { shell: true });
- child.stdout.on('data', data => onData(data /*.toString().trim()*/));
- child.stderr.on('data', data => onErr(data.toString().trim()));
- child.on('error', err => reject(err));
- child.on('exit', code => resolve(code));
- });
- };
- const clusterSam = (input_sam, threshold, minReads, blackList) => {
- return new Promise((resolve, _reject) => __awaiter(void 0, void 0, void 0, function* () {
- let inputSam = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam;
- let lineAcc = '';
- let byContigs = {};
- yield async_exec('cat', [
- inputSam,
- '|',
- 'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'',
- '|',
- 'sort',
- '|',
- 'uniq'
- ], (m) => {
- let tmpSeq = (lineAcc + m).split(/\n/);
- lineAcc = tmpSeq.pop(); // 'uck typescript
- tmpSeq.map(e => {
- let tmpName = '';
- let tmpPos = { rname: '', position: 0 };
- e.split(/\t/).map((el, i) => {
- switch (i) {
- case 0:
- tmpPos['rname'] = el;
- break;
- case 1:
- tmpName = el;
- break;
- case 2:
- tmpPos['position'] = Number(el);
- break;
- default:
- break;
- }
- });
- let add = true;
- if (blackList) {
- add = blackList.includes(tmpName) ? false : true;
- }
- if (add) {
- if (Array.isArray(byContigs[tmpName])) {
- byContigs[tmpName].push(tmpPos);
- }
- else {
- byContigs[tmpName] = [tmpPos];
- }
- }
- });
- }, console.log);
- let byReads = {};
- let posAll = {};
- Object
- .keys(byContigs)
- .map(name => {
- let cluster = 0;
- let firstPos = 0;
- byContigs[name]
- .filter(a => a)
- .sort((a, b) => a.position - b.position)
- /*.map((e, i, a) => {
- if(i === 0) {
- if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
- firstPos = e.position
- }
- if (a.length === 1) {
- posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
- }
- if (Math.abs(e.position - a[i-1]?.position) > threshold) {
- posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
- cluster = cluster + 1
- firstPos = e.position
- }
- if(i === (a.length - 1)) {
- posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
- }
- // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
- const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
- byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
- })*/
- .reduce((p, c, i, a) => {
- const currentCluster = (Object.keys(p).length - 1);
- let tmp = p;
- if (p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
- tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]), [String(currentCluster + 1)]: [c.position] });
- }
- else {
- tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: [...p[String(currentCluster)], c.position] });
- }
- if (i === (a.length - 1)) {
- tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position });
- }
- posAll[name] = tmp;
- const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name;
- byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [...new Set([...byReads[c.rname], clutserName])] : [clutserName];
- return tmp;
- }, { '0': [] });
- });
- let byClusters = {};
- Object.keys(byReads).map(rname => {
- const tmpClusterName = byReads[rname].sort().map(e => {
- const splited = e.split(/@/);
- return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')';
- }).join('<>');
- byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [...new Set([...byClusters[tmpClusterName], rname])] : [rname];
- });
- Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
- resolve((Object.keys(byClusters).map(clusterName => ({ clusterName, rnames: byClusters[clusterName] })).sort((a, b) => b.rnames.length - a.rnames.length)));
- }));
- };
- exports.clusterSam = clusterSam;
- /*(async () => {
- const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
- console.log(bl);
- console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
- })()*/
|