| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- import { spawn } from 'child_process';
- /* (c) Thomas Steimlé 2022
- * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
- * require os : cat, awk, sort, uniq
- *
- */
- const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => {
- return new Promise((resolve, reject) => {
- const child = spawn(prog, args, {shell: true})
- child.stdout.on('data', data => onData(data/*.toString().trim()*/))
- child.stderr.on('data', data => onErr(data.toString().trim()))
- child.on('error', err => reject(err))
- child.on('exit', code => resolve(code))
- })
- }
- const clusterSam = (
- input_sam: string | Array<string>,
- threshold: number,
- minReads : number
- ) => {
- return new Promise<any>( async (resolve, _reject) => {
- let inputSam: string = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam
- let lineAcc: string = ''
- interface position {
- rname: string;
- position: number;
- }
- interface byContigs {
- [key: string]: position[]
- }
- let byContigs: byContigs = {}
- await async_exec('cat', [
- inputSam,
- '|',
- 'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'', //skip header
- '|',
- 'sort',
- '|',
- 'uniq'
- ], (m: string) => {
- let tmpSeq: string[] = (lineAcc + m).split(/\n/)
- lineAcc = tmpSeq.pop() ! // 'uck typescript
- tmpSeq.map(e => {
- let tmpName: string = ''
- let tmpPos: position = {rname: '', position: 0}
- e.split(/\t/).map((el, i) => {
- switch (i) {
- case 0:
- tmpPos['rname'] = el
- break;
- case 1:
- tmpName = el
- break;
- case 2:
- tmpPos['position'] = Number(el)
- break;
- default:
- break;
- }
- })
- if (Array.isArray(byContigs[tmpName])) {
- byContigs[tmpName].push(tmpPos)
- } else {
- byContigs[tmpName] = [tmpPos]
- }
- })
- }, console.log)
- interface byReads {
- [key: string]: string[]
- }
- let byReads: byReads = {}
- interface posObj {
- [key: string]: string
- }
- interface posAll {
- [key: string]: posObj
- }
- let posAll: posAll = {}
- Object
- .keys(byContigs)
- .map(name => {
- let cluster = 0
- let firstPos = 0
- byContigs[name]
- .sort((a, b) => a.position - b.position)
- .map((e, i, a) => {
- if(i === 0) {
- if(typeof posAll[name] === 'undefined') posAll[name] = {}
- firstPos = e.position
- }
- if (a.length === 1) {
- posAll[name][String(cluster)] = String(firstPos)
- }
- if (Math.abs(e.position - a[i-1]?.position) > threshold) {
- posAll[name][String(cluster)] = firstPos + '-' + a[i-1]?.position
- cluster = cluster + 1
- firstPos = e.position
- }
- // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
- const clutserName = cluster + '@' + name
- byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
- })
- })
- interface byClusters {
- [key: string]: string[]
- }
- let byClusters: byClusters = {}
-
- Object.keys(byReads).map(rname => {
- const tmpClusterName = byReads[rname].sort().map(e => {
- const splited = e.split(/@/)
- return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'
- }).join('<--->')
- byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
- })
- Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
- resolve((Object.keys(byClusters).map(clusterName => ({clusterName, rnames: byClusters[clusterName]})).sort((a:any,b:any) => b.rnames.length - a.rnames.length)) )
- })
- }
- export { clusterSam }
- /*
- (async () => {
- console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55));
- })()
- */
|