|
@@ -1,5 +1,5 @@
|
|
|
import { spawn } from 'child_process';
|
|
import { spawn } from 'child_process';
|
|
|
-
|
|
|
|
|
|
|
+import fs from 'fs'
|
|
|
/* (c) Thomas Steimlé 2022
|
|
/* (c) Thomas Steimlé 2022
|
|
|
* cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
|
|
* cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
|
|
|
* require os : cat, awk, sort, uniq
|
|
* require os : cat, awk, sort, uniq
|
|
@@ -97,24 +97,49 @@ const clusterSam = (
|
|
|
let cluster = 0
|
|
let cluster = 0
|
|
|
let firstPos = 0
|
|
let firstPos = 0
|
|
|
byContigs[name]
|
|
byContigs[name]
|
|
|
|
|
+ .filter(a => a)
|
|
|
.sort((a, b) => a.position - b.position)
|
|
.sort((a, b) => a.position - b.position)
|
|
|
- .map((e, i, a) => {
|
|
|
|
|
|
|
+ /*.map((e, i, a) => {
|
|
|
if(i === 0) {
|
|
if(i === 0) {
|
|
|
- if(typeof posAll[name] === 'undefined') posAll[name] = {}
|
|
|
|
|
|
|
+ if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
|
|
|
firstPos = e.position
|
|
firstPos = e.position
|
|
|
}
|
|
}
|
|
|
if (a.length === 1) {
|
|
if (a.length === 1) {
|
|
|
- posAll[name][String(cluster)] = String(firstPos)
|
|
|
|
|
|
|
+ posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
|
|
|
}
|
|
}
|
|
|
if (Math.abs(e.position - a[i-1]?.position) > threshold) {
|
|
if (Math.abs(e.position - a[i-1]?.position) > threshold) {
|
|
|
- posAll[name][String(cluster)] = firstPos + '-' + a[i-1]?.position
|
|
|
|
|
|
|
+ posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
|
|
|
cluster = cluster + 1
|
|
cluster = cluster + 1
|
|
|
firstPos = e.position
|
|
firstPos = e.position
|
|
|
}
|
|
}
|
|
|
|
|
+ if(i === (a.length - 1)) {
|
|
|
|
|
+ posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
|
|
|
|
|
+ }
|
|
|
// cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
|
|
// cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
|
|
|
- const clutserName = cluster + '@' + name
|
|
|
|
|
|
|
+ const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
|
|
|
byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
|
|
byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
|
|
|
- })
|
|
|
|
|
|
|
+ })*/
|
|
|
|
|
+ .reduce((p,c,i,a) => {
|
|
|
|
|
+ const currentCluster = (Object.keys(p).length - 1)
|
|
|
|
|
+ let tmp = p
|
|
|
|
|
+ if(p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
|
|
|
|
|
+ tmp = {...p,
|
|
|
|
|
+ [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]),
|
|
|
|
|
+ [String(currentCluster + 1)]: [c.position]
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ tmp = {...p, [String(currentCluster)]: [...p[String(currentCluster)], c.position]}
|
|
|
|
|
+ }
|
|
|
|
|
+ if(i === (a.length-1)) {
|
|
|
|
|
+ tmp = {...p,
|
|
|
|
|
+ [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ posAll[name] = tmp
|
|
|
|
|
+ const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
|
|
|
|
|
+ byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [... new Set([...byReads[c.rname], clutserName])] : [clutserName]
|
|
|
|
|
+ return tmp
|
|
|
|
|
+ }, {'0': [] } as {[key: string]: any})
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
interface byClusters {
|
|
interface byClusters {
|
|
@@ -126,7 +151,7 @@ const clusterSam = (
|
|
|
const tmpClusterName = byReads[rname].sort().map(e => {
|
|
const tmpClusterName = byReads[rname].sort().map(e => {
|
|
|
const splited = e.split(/@/)
|
|
const splited = e.split(/@/)
|
|
|
return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'
|
|
return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'
|
|
|
- }).join('<--->')
|
|
|
|
|
|
|
+ }).join('<>')
|
|
|
byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
|
|
byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
|
|
|
})
|
|
})
|
|
|
|
|
|
|
@@ -137,9 +162,10 @@ const clusterSam = (
|
|
|
|
|
|
|
|
export { clusterSam }
|
|
export { clusterSam }
|
|
|
|
|
|
|
|
-/*
|
|
|
|
|
-(async () => {
|
|
|
|
|
- console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55,
|
|
|
|
|
- ['NR_145819.1', 'NR_145822.1']));
|
|
|
|
|
-})()
|
|
|
|
|
-*/
|
|
|
|
|
|
|
+
|
|
|
|
|
+/*(async () => {
|
|
|
|
|
+ const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
|
|
|
|
|
+ console.log(bl);
|
|
|
|
|
+
|
|
|
|
|
+ console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
|
|
|
|
|
+})()*/
|