|
|
@@ -245,14 +245,14 @@ pub fn load_bam_collection(result_dir: &str) -> BamCollection {
|
|
|
/// # Returns
|
|
|
///
|
|
|
/// Returns a vector of tuples, each containing:
|
|
|
-/// - `String`: the prefix of the `RG` tag (up to the first underscore, or full tag if no underscore)
|
|
|
-/// - `String`: the prefix of the `fn` tag (similarly processed)
|
|
|
-/// - `f64`: percentage of records with this `(RG, fn)` prefix pair, relative to total successfully processed records
|
|
|
+/// - `String`: the <runid> (up to the first underscore of the `RG` tag, or full tag if no underscore)
|
|
|
+/// - `String`: the <flowcell-id> (up to the first underscore of the `fn` tag, or full tag if no underscore)
|
|
|
+/// - `f64`: percentage of records with this `(runid, flowcell-id)` prefix pair, relative to total successfully processed records
|
|
|
///
|
|
|
/// # Errors
|
|
|
///
|
|
|
/// Returns an error if the BAM file cannot be opened or read.
|
|
|
-/// Records missing the `RG` or `fn` tag (or with wrong type) are silently skipped.
|
|
|
+/// Any record in the sampled set does not have both required tags, or the tags are not strings.
|
|
|
///
|
|
|
/// # Examples
|
|
|
///
|
|
|
@@ -279,6 +279,7 @@ pub fn load_bam_collection(result_dir: &str) -> BamCollection {
|
|
|
/// # See also
|
|
|
///
|
|
|
/// - [SAM/BAM tag conventions](https://samtools.github.io/hts-specs/SAMv1.pdf)
|
|
|
+/// - [Dorado SAM specifications](https://github.com/nanoporetech/dorado/blob/release-v1.0/documentation/SAM.md)
|
|
|
///
|
|
|
pub fn bam_composition(
|
|
|
file_path: &str,
|
|
|
@@ -289,21 +290,23 @@ pub fn bam_composition(
|
|
|
|
|
|
let mut bam = Reader::from_path(file_path)?;
|
|
|
let mut rgs: HashMap<(String, String), u64> = HashMap::new();
|
|
|
- let mut total = 0u64;
|
|
|
|
|
|
- for rec in bam.records().filter_map(Result::ok).take(sample_size) {
|
|
|
+ let mut processed = 0u64;
|
|
|
+ for (i, rec) in bam.records().filter_map(Result::ok).take(sample_size).enumerate() {
|
|
|
let rg = match rec.aux(b"RG") {
|
|
|
Ok(Aux::String(s)) => s,
|
|
|
- _ => continue,
|
|
|
+ Ok(_) => return Err(anyhow::anyhow!("Record {}: RG tag is not a string", i + 1)),
|
|
|
+ Err(_) => return Err(anyhow::anyhow!("Record {}: RG tag missing", i + 1)),
|
|
|
};
|
|
|
let fn_tag = match rec.aux(b"fn") {
|
|
|
Ok(Aux::String(b)) => b,
|
|
|
- _ => continue,
|
|
|
+ Ok(_) => return Err(anyhow::anyhow!("Record {}: fn tag is not a string", i + 1)),
|
|
|
+ Err(_) => return Err(anyhow::anyhow!("Record {}: fn tag missing", i + 1)),
|
|
|
};
|
|
|
let rg_prefix = rg.split_once('_').map(|(a, _)| a).unwrap_or(rg);
|
|
|
let fn_prefix = fn_tag.split_once('_').map(|(b, _)| b).unwrap_or(fn_tag);
|
|
|
*rgs.entry((rg_prefix.to_string(), fn_prefix.to_string())).or_default() += 1;
|
|
|
- total += 1;
|
|
|
+ processed += 1;
|
|
|
}
|
|
|
|
|
|
let results: Vec<_> = rgs
|
|
|
@@ -311,7 +314,7 @@ pub fn bam_composition(
|
|
|
.map(|((rg, fn_tag), n)| (
|
|
|
rg,
|
|
|
fn_tag,
|
|
|
- n as f64 * 100.0 / total as f64
|
|
|
+ n as f64 * 100.0 / processed as f64
|
|
|
))
|
|
|
.collect();
|
|
|
|