Thomas 7 сар өмнө
parent
commit
8fed952f37

+ 9 - 9
src/collection/bam.rs

@@ -28,7 +28,7 @@ pub struct WGSBam {
     pub modified: DateTime<Utc>,
     pub bam_stats: WGSBamStats,
     // pub cramino: Option<CraminoRes>,
-    pub composition: Vec<(String, f64)>, // acquisition id
+    pub composition: Vec<(String, String, f64)>, // acquisition id, fn
 }
 
 // #[derive(Debug, PartialEq, Clone, Deserialize, Serialize)]
@@ -162,7 +162,7 @@ impl BamCollection {
     pub fn by_acquisition_id(&self) -> HashMap<String, Vec<&WGSBam>> {
         let mut acq: HashMap<String, Vec<&WGSBam>> = HashMap::new();
         for bam in self.bams.iter() {
-            for (acq_id, _) in bam.composition.iter() {
+            for (acq_id, _, _) in bam.composition.iter() {
                 if let Some(entry) = acq.get_mut(acq_id) {
                     entry.push(bam);
                 } else {
@@ -229,22 +229,22 @@ pub fn load_bam_collection(result_dir: &str) -> BamCollection {
     BamCollection { bams }
 }
 
-pub fn bam_compo(file_path: &str, sample_size: usize) -> anyhow::Result<Vec<(String, f64)>> {
+pub fn bam_compo(file_path: &str, sample_size: usize) -> anyhow::Result<Vec<(String, String, f64)>> {
     let mut bam = rust_htslib::bam::Reader::from_path(file_path)?;
 
-    let mut rgs: HashMap<String, u64> = HashMap::new();
+    let mut rgs: HashMap<( String, String ), u64> = HashMap::new();
     for result in bam.records().filter_map(Result::ok).take(sample_size) {
-        if let rust_htslib::bam::record::Aux::String(s) = result.aux(b"RG")? {
-            *rgs.entry(s.to_string()).or_default() += 1;
+        if let ( rust_htslib::bam::record::Aux::String(s),  rust_htslib::bam::record::Aux::String(b)) = ( result.aux(b"RG")?, result.aux(b"fn")? ) {
+            *rgs.entry(( s.to_string(), b.to_string())).or_default() += 1;
         }
     }
 
     Ok(rgs
         .into_iter()
-        .map(|(rg, n)| (rg.to_string(), n as f64 * 100.0 / sample_size as f64))
-        .map(|(rg, p)| {
+        .map(|(k, n)| (k.0.to_string(), k.1.to_string(), n as f64 * 100.0 / sample_size as f64))
+        .map(|(rg, f, p)| {
             let (a, _) = rg.split_once('_').unwrap();
-            (a.to_string(), p)
+            (a.to_string(), f, p)
         })
         .collect())
 }

+ 2 - 2
src/commands/dorado.rs

@@ -195,11 +195,11 @@ impl Dorado {
     pub fn merge_bam(&self, bam: &Path) -> anyhow::Result<()> {
         let composition_a: Vec<String> = bam_compo(bam.to_string_lossy().as_ref(), 20000)?
             .iter()
-            .map(|(i, _)| i.clone())
+            .map(|(i, _, _)| i.clone())
             .collect();
         let composition_b: Vec<String> = bam_compo(&self.bam, 20000)?
             .iter()
-            .map(|(i, _)| i.clone())
+            .map(|(i, _, _)| i.clone())
             .collect();
         let n_id = composition_a
             .iter()