Thomas 3 yıl önce
ebeveyn
işleme
89ea026160
2 değiştirilmiş dosya ile 82 ekleme ve 37 silme
  1. 42 23
      index.js
  2. 40 14
      index.ts

+ 42 - 23
index.js

@@ -81,33 +81,52 @@ const clusterSam = (input_sam, threshold, minReads, blackList) => {
             let cluster = 0;
             let firstPos = 0;
             byContigs[name]
+                .filter(a => a)
                 .sort((a, b) => a.position - b.position)
-                .map((e, i, a) => {
-                var _a, _b;
-                if (i === 0) {
-                    if (typeof posAll[name] === 'undefined')
-                        posAll[name] = {};
-                    firstPos = e.position;
+                /*.map((e, i, a) => {
+                    if(i === 0) {
+                        if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
+                        firstPos = e.position
+                    }
+                    if (a.length === 1) {
+                        posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
+                    }
+                    if (Math.abs(e.position - a[i-1]?.position) > threshold) {
+                        posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
+                        cluster = cluster + 1
+                        firstPos = e.position
+                    }
+                    if(i === (a.length - 1)) {
+                        posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
+                    }
+                    // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
+                    const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
+                    byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
+                })*/
+                .reduce((p, c, i, a) => {
+                const currentCluster = (Object.keys(p).length - 1);
+                let tmp = p;
+                if (p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
+                    tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]), [String(currentCluster + 1)]: [c.position] });
                 }
-                if (a.length === 1) {
-                    posAll[name][String(cluster)] = String(firstPos);
+                else {
+                    tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: [...p[String(currentCluster)], c.position] });
                 }
-                if (Math.abs(e.position - ((_a = a[i - 1]) === null || _a === void 0 ? void 0 : _a.position)) > threshold) {
-                    posAll[name][String(cluster)] = firstPos + '-' + ((_b = a[i - 1]) === null || _b === void 0 ? void 0 : _b.position);
-                    cluster = cluster + 1;
-                    firstPos = e.position;
+                if (i === (a.length - 1)) {
+                    tmp = Object.assign(Object.assign({}, p), { [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position });
                 }
-                // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
-                const clutserName = cluster + '@' + name;
-                byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [...new Set([...byReads[e.rname], clutserName])] : [clutserName];
-            });
+                posAll[name] = tmp;
+                const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name;
+                byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [...new Set([...byReads[c.rname], clutserName])] : [clutserName];
+                return tmp;
+            }, { '0': [] });
         });
         let byClusters = {};
         Object.keys(byReads).map(rname => {
             const tmpClusterName = byReads[rname].sort().map(e => {
                 const splited = e.split(/@/);
                 return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')';
-            }).join('<--->');
+            }).join('<>');
             byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [...new Set([...byClusters[tmpClusterName], rname])] : [rname];
         });
         Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
@@ -115,9 +134,9 @@ const clusterSam = (input_sam, threshold, minReads, blackList) => {
     }));
 };
 exports.clusterSam = clusterSam;
-/*
-(async () => {
-    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55,
-    ['NR_145819.1', 'NR_145822.1']));
-})()
-*/ 
+/*(async () => {
+    const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
+    console.log(bl);
+
+    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
+})()*/

+ 40 - 14
index.ts

@@ -1,5 +1,5 @@
 import { spawn } from 'child_process';
-
+import fs from 'fs'
 /* (c) Thomas Steimlé 2022 
  * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
  * require os : cat, awk, sort, uniq
@@ -97,24 +97,49 @@ const clusterSam = (
                     let cluster = 0
                     let firstPos = 0
                     byContigs[name]
+                    .filter(a => a)
                     .sort((a, b) => a.position - b.position)
-                    .map((e, i, a) => {
+                    /*.map((e, i, a) => {
                         if(i === 0) {
-                            if(typeof posAll[name] === 'undefined') posAll[name] = {}
+                            if(typeof posAll[name] === 'undefined') posAll[name] = {'0': ''}
                             firstPos = e.position
                         }
                         if (a.length === 1) {
-                            posAll[name][String(cluster)] = String(firstPos)
+                            posAll[name][String(Object.keys(posAll[name]).length - 1)] = String(firstPos)
                         }
                         if (Math.abs(e.position - a[i-1]?.position) > threshold) {
-                            posAll[name][String(cluster)] = firstPos + '-' + a[i-1]?.position
+                            posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + a[i-1]?.position
                             cluster = cluster + 1
                             firstPos = e.position
                         }
+                        if(i === (a.length - 1)) {
+                            posAll[name][String(Object.keys(posAll[name]).length - 1)] = firstPos + '-' + e.position
+                        }
                         // cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
-                        const clutserName = cluster + '@' + name
+                        const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
                         byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
-                    })
+                    })*/
+                    .reduce((p,c,i,a) => {
+                        const currentCluster = (Object.keys(p).length - 1)
+                        let tmp = p
+                        if(p[String(currentCluster)].length > 0 && c.position - Math.max(...p[String(currentCluster)]) > threshold) {
+                            tmp = {...p, 
+                                [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + Math.max(...p[String(currentCluster)]),
+                                [String(currentCluster + 1)]: [c.position]
+                            }
+                        } else {
+                            tmp = {...p, [String(currentCluster)]: [...p[String(currentCluster)], c.position]}
+                        }
+                        if(i === (a.length-1)) {
+                            tmp = {...p, 
+                                [String(currentCluster)]: Math.min(...p[String(currentCluster)]) + '-' + c.position,
+                            }
+                        }
+                        posAll[name] = tmp
+                        const clutserName = String(Object.keys(posAll[name]).length - 1) + '@' + name
+                        byReads[c.rname] = Array.isArray(byReads[c.rname]) ? [... new Set([...byReads[c.rname], clutserName])] : [clutserName]
+                        return tmp
+                    }, {'0': [] } as {[key: string]: any})
                 })
 
             interface byClusters {
@@ -126,7 +151,7 @@ const clusterSam = (
                 const tmpClusterName = byReads[rname].sort().map(e => {
                     const splited = e.split(/@/)
                     return splited[1] + ':' + posAll[splited[1]][splited[0]] + '(' + splited[0] + ')'
-                }).join('<--->')
+                }).join('<>')
                 byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
             })
 
@@ -137,9 +162,10 @@ const clusterSam = (
 
 export { clusterSam }
 
-/*
-(async () => {
-    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55, 
-    ['NR_145819.1', 'NR_145822.1']));
-})()
-*/
+
+/*(async () => {
+    const bl = (await fs.promises.readFile('/home/thomas/Documents/Programmes/ttest/blackListRNA.txt')).toString().split('\n')
+    console.log(bl);
+
+    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_human_rna.sam', 333, 10, bl));
+})()*/