#let cr_colors = ( dark_grey: rgb("#333333"), beige: rgb("#fdf0d5"), light_grey: rgb("#eeeeee"), dark_red: rgb("#780000"), red: rgb("#c1121f"), blue: rgb("#669bbc"), dark_blue: rgb("#003049"), green: rgb("#29bf12"), ) #import "@preview/fletcher:0.5.1" as fletcher: diagram, node, edge #import "@preview/metro:0.3.0": * #import "@preview/cetz:0.2.2" #import "@preview/badgery:0.1.1": * #import "@preview/cmarker:0.1.1" #set page( paper: "a4", footer: locate(loc => [ #set text(10pt) #let today = datetime.today() #if loc.page() != 1 { align(right, counter(page).display("1 / 1", both: true)) } #align(center, [Dr. Thomas Steimlé --- #today.display("[day] [month repr:long] [year]")]) ]), ) #show heading: set text(font: "Futura") #show heading.where(level: 1): it => [ #set align(center) #set text(fill: cr_colors.dark_blue) #it.body #v(18pt) ] #show image: set text(font: "FreeSans") #set text(size: 16pt, fill: cr_colors.dark_blue) #let contigs = ( "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX", "chrY", ) #let parseCustomDate(dateString) = { let parts = dateString.split("T") let datePart = parts.at(0).replace("-", "/") let timePart = parts.at(1).split(":") let hour = timePart.at(0) let minute = timePart.at(1) return datePart + " " + hour + "h" + minute } #let formatString(input) = { let words = input.split("_") let capitalizedWords = words.map(word => { if word.len() > 0 { upper(word.first()) + word.slice(1) } else { word } }) capitalizedWords.join(" ") } #let si-fmt(val, precision: 1, sep: "\u{202F}", binary: false) = { let factor = if binary { 1024 } else { 1000 } let gt1_suffixes = ("k", "M", "G", "T", "P", "E", "Z", "Y") let lt1_suffixes = ("m", "μ", "n", "p", "f", "a", "z", "y") let scale = "" let unit = "" if type(val) == content { if val.has("text") { val = val.text } else if val.has("children") { val = val.children.map(content => content.text).join() } else { panic(val.children.map(content => content.text).join()) } } // if val contains a unit, split it off if type(val) == str { unit = val.find(regex("(\D+)$")) val = float(val.split(unit).at(0)) } if calc.abs(val) > 1 { for suffix in gt1_suffixes { if calc.abs(val) < factor { break } val /= factor scale += " " + suffix } } else if val != 0 and calc.abs(val) < 1 { for suffix in lt1_suffixes { if calc.abs(val) > 1 { break } val *= factor scale += " " + suffix } } let formatted = str(calc.round(val, digits: precision)) formatted + sep + scale.split().at(-1, default: "") + unit } #let reportCoverage(prefix) = { image(prefix + "_global.svg", width: 100%) for contig in contigs { heading(level: 4, contig) let path = prefix + "_" + contig image(path + "_chromosome.svg") let data = json(path + "_stats.json") grid( columns: (1fr, 2fr), gutter: 3pt, align(left + horizon)[ #set text(size: 12pt) #table( stroke: none, columns: (auto, 1fr), gutter: 3pt, [Mean], [#calc.round( data.mean, digits: 2, )], [Standard dev.], [#calc.round( data.std_dev, digits: 2, )], ..data.breaks_values.map(r => ( [#r.at(0)], [#calc.round(r.at(1) * 100, digits: 1)%], )).flatten(), ) ], align(right, image(path + "_distrib.svg", width: 100%)), ) parbreak() } } #let reportBam(path) = { let data = json(path) table( gutter: 3pt, stroke: none, columns: (auto, 1fr), ..for (key, value) in ( data ) { if key != "cramino" and key != "composition" and key != "path" and key != "modified" { ([ #formatString(key) ], [ #value ]) } else if key == "modified" { ([ Modified Date (UTC) ], [ #parseCustomDate(value) ]) } else if key == "composition" { ( [ Run(s) ], [ #for (i, v) in value.enumerate() { if i > 0 [ \ ] [#v.at(0).slice(0, 5): #calc.round(v.at(1), digits: 0)%] } ], ) } else if key == "cramino" { for (k, v) in value { if k == "normalized_read_count_per_chromosome" { } else if k != "path" and k != "checksum" and k != "creation_time" and k != "file_name" { let k = formatString(k) let v = if type(v) == "integer" { si-fmt(v) } else { v } ([ #k ], [ #v ]) } else { () } }.flatten() } else { () } }.flatten(), ) } #let formatedReadCount(path) = { let data = json(path) let data = data.cramino.normalized_read_count_per_chromosome let res = () for contig in contigs { res.push(data.at(contig)) } res.push(data.at("chrM")) return res } #let printReadCount(diag_path, mrd_path) = { let index = 14 let c = contigs c.push("chrM") let diag = formatedReadCount(diag_path) let mrd = formatedReadCount(mrd_path) c.insert(0, "") diag.insert(0, "diag") mrd.insert(0, "mrd") let arrays1 = (c.slice(0, index), diag.slice(0, index), mrd.slice(0, index)) table( columns: arrays1.at(0).len(), ..arrays1 .map(arr => arr.map(item => [#item])) .flatten(), ) let arrays2 = (c.slice(index), diag.slice(index), mrd.slice(index)) arrays2.at(0).insert(0, "") arrays2.at(1).insert(0, "diag") arrays2.at(2).insert(0, "mrd") table( columns: arrays2.at(0).len(), ..arrays2 .map(arr => arr.map(item => [#item])) .flatten(), ) } #let variantsFlow(path) = { import fletcher.shapes: diamond, parallelogram, chevron let data = json(path) set text(8pt) diagram( spacing: (8pt, 25pt), node-fill: gradient.radial( cr_colors.light_grey, cr_colors.blue, radius: 300%, ), node-stroke: cr_colors.dark_blue + 1pt, edge-stroke: 1pt, mark-scale: 70%, node-inset: 8pt, node( (0.2, 0), [Variants MRD: #num(data.vcf_stats.n_tumoral_init)], corner-radius: 2pt, extrude: (0, 3), name: , ), node( (1.8, 0), [Variants Diag: #num(data.vcf_stats.n_constit_init)], corner-radius: 2pt, extrude: (0, 3), name: , ), node( (1, 1), align(center)[Variant in MRD ?], shape: diamond, name: , ), edge(, "s", , "-|>"), edge(, "s", , "-|>"), edge(, , "-|>", [Yes], label-pos: 0.8), node( (0.25, 2), [MRD variant depth \ < 4 ?], shape: diamond, name: , ), edge(, , "-|>"), node( (0, 3), [Low MRD depth: #num(data.vcf_stats.n_low_mrd_depth)], shape: parallelogram, name: , ), edge(, , "-|>", [No], label-pos: 0.8), node( (1.85, 2), [To BAM filters: #num(data.bam_stats.n_lasting)], shape: chevron, extrude: (-3, 0), name: , stroke: cr_colors.green, ), edge(, , "-|>"), node((1.5, 3), [VAF = 100% ?], shape: diamond, name: ), edge(, , "-|>", [Yes], label-pos: 0.5, bend: -80deg), edge(, , "-|>", [No], label-pos: 0.6), node( (1.5, 4), [$#sym.chi^2$ VAF MRD vs Diag ?], shape: diamond, name: , ), edge(, , "-|>", label-pos: 0.8), node( (1, 5), [Constit: #num(data.vcf_stats.n_constit)], shape: parallelogram, name: , ), edge(, , "-|>", [p < 0.01], label-pos: 0.8), node( (2, 5), [LOH: #num(data.vcf_stats.n_loh)], shape: parallelogram, name: , ), ) } #let bamFilter(path) = { import fletcher.shapes: diamond, parallelogram, hexagon let data = json(path) set text(8pt) diagram( spacing: (8pt, 25pt), node-fill: gradient.radial( cr_colors.light_grey, cr_colors.blue, radius: 300%, ), node-inset: 8pt, node-stroke: cr_colors.dark_blue + 1pt, mark-scale: 70%, edge-stroke: 1pt, node( (0.75, 0), [Variants not in MRD VCF: #num(data.bam_stats.n_lasting)], corner-radius: 2pt, extrude: (0, 3), name: , ), edge(, , "-|>"), node((0.75, 1), [MRD alignement depth ?], shape: diamond, name: ), edge(, , "-|>", [< 4]), node( (0, 2), [Low MRD depth: #num(data.bam_stats.n_low_mrd_depth)], shape: parallelogram, name: , ), edge(, , "-|>"), node( (0.75, 3), [Alt. base seen in MRD pileup ?], shape: diamond, name: , ), edge(, , "-|>", [Yes]), node( (0, 4), [Constit: #num(data.bam_stats.n_constit)], shape: parallelogram, name: , ), edge(, , "-|>", [No]), node( (1.1, 4), [Sequence #sym.plus.minus 20nt \ diversity ?], shape: diamond, name: , ), edge(, , "-|>", [entropy < 1.8]), node( (0.25, 5), [Low diversity, artefact: #num(data.bam_stats.n_low_diversity)], shape: parallelogram, name: , ), edge(, , "-|>"), node( (1.75, 5), [Somatic: #num(data.bam_stats.n_somatic)], shape: hexagon, extrude: (-3, 0), name: , stroke: cr_colors.green, ), ) } #let barCallers(path) = { import cetz.draw: * import cetz.chart let json_data = json(path).variants_stats let data = json_data.find(item => item.name == "callers_cat") let chart_data = data.counts.pairs().sorted(key: x => -x.at(1)) set text(11pt) cetz.canvas( length: 80%, { set-style(axes: ( bottom: (tick: (label: (angle: 45deg, anchor: "north-east"))), )) chart.columnchart( chart_data, size: (1, 0.5), ) }, ) } #let truncate(text, max-length) = { if text.len() <= max-length { text } else { text.slice(0, max-length - 3) + "..." } } // // #let add_newlines(text, n) = { // // let result = "" // // let chars = text.clusters() // // for (i, char) in chars.enumerate() { // // result += char // // if calc.rem((i + 1), n == 0 and i < chars.len() - 1 { // // result += "\n" // // } // // } // // result // // } // // #let break_long_words(text, max_length: 20, hyphen: "") = { // let words = text.split(" ") // let result = () // // for word in words { // if word.len() <= max_length { // result.push(word) // } else { // let segments = () // let current_segment = "" // for char in word.clusters() { // if current_segment.len() + 1 > max_length { // segments.push(current_segment + hyphen) // current_segment = "" // } // current_segment += char // } // if current_segment != "" { // segments.push(current_segment) // } // result += segments // } // } // // result.join(" ") // } // #let format_sequence(text, max_length: 40, hyphen: [#linebreak()]) = { let words = text.split(" ") let result = () // result.push("\n") for word in words { if word.len() <= max_length { result.push(word) } else { let segments = () let current_segment = "" for char in word.clusters() { if current_segment.len() + 1 > max_length { segments.push(current_segment + hyphen) current_segment = "" } current_segment += char } if current_segment != "" { segments.push(current_segment) } result += segments } } result.push("") let sequence = result.join(" ") box(width: 100%, par(leading: 0.2em, sequence)) } #let dna(sequence, line_length: 60) = { let formatted = sequence.clusters().map(c => { if c == "A" { text(fill: red)[A] } else if c == "T" { text(fill: green)[T] } else if c == "C" { text(fill: blue)[C] } else if c == "G" { text(fill: orange)[G] } else { c } }) let lines = formatted.chunks(line_length).map(line => line.join()) let n_lines = lines.len() let lines = lines.join("\n") if n_lines > 1 { parbreak() } align( left, box( fill: luma(240), inset: (x: 0.5em, y: 0.5em), radius: 4pt, align( left, text( font: "Fira Code", size: 10pt, lines, ), ), ), ) if n_lines > 1 { parbreak() } } #let to-string(content) = { if content.has("text") { content.text } else if content.has("children") { content.children.map(to-string).join("") } else if content.has("body") { to-string(content.body) } else if content == [ ] { " " } } #let format-number(num) = { let s = str(num).split("").filter(item => item != "") let result = "" let len = s.len() for (i, char) in s.enumerate() { result += char if (i < len - 1 and calc.rem((len - i - 1), 3) == 0) { result += "," } } result } #let format_json(json_data) = { let format_value(value) = { if value == none { "" } else if type(value) == "string" { if value != "." { value.replace(";", ", ").replace("=", ": ") } else { "" } } else if type(value) == "array" { let items = value.map(v => format_value(v)) "[" + items.join(", ") + "]" } else if type(value) == "dictionary" { "{" + format_json(value) + "}" } else { str(value) } } if type(json_data) == "dictionary" { let result = () for (key, value) in json_data { let formatted_value = format_value(value) if formatted_value != "" { if key == "svinsseq" { formatted_value = dna(formatted_value) } result.push(upper(key) + ": " + formatted_value) } } result.join(", ") } else { format_value(json_data) } } #let card(d) = { set text(12pt) let position_fmt = format-number(d.position) let title_bg_color = rgb("#f9fafb00") let grid_content = () let callers_data = json.decode(d.callers_data) // Title let alt = d.alternative // TODO: add that in pandora_lib_variants if d.callers == "Nanomonsv" and alt == "" { alt = d.reference + callers_data.at(0).info.Nanomonsv.svinsseq } let title = d.contig + ":" + position_fmt + " " + d.reference + sym .quote .angle .r .single + truncate(alt, 30) grid_content.push( grid.cell( fill: cr_colors.light_grey, align: center, block(width: 100%, title), ), ) // Consequences if d.consequence != none { let consequences = d.consequence.replace(",", ", ").replace( "_", " ", ) + " " + emph(strong(d.gene)) grid_content.push( grid.cell(fill: cr_colors.light_grey, align: center, consequences), ) } // hgvs_c if d.hgvs_c != none { grid_content.push( grid.cell(fill: rgb("#fef08a"), align: center, truncate(d.hgvs_c, 50)), ) } // hgvs_c if d.hgvs_p != none { grid_content.push( grid.cell(fill: rgb("#fecaca"), align: center, truncate(d.hgvs_p, 50)), ) } // Content let content = () content.push( badge-red("VAF: " + str(calc.round(d.m_vaf * 100, digits: 2)) + "%"), ) // content.push(" ") if d.cosmic_n != none { content.push(badge-red("Cosmic: " + str(d.cosmic_n))) } if d.gnomad_af != none { content.push(badge-blue("GnomAD: " + str(d.gnomad_af))) } let callers_contents = () for caller_data in callers_data { let caller = "" for (k, v) in caller_data.format { caller = k } callers_contents.push(underline(caller) + ":") if caller_data.qual != none { callers_contents.push([ Qual: #caller_data.qual, ]) } callers_contents.push([ #( format_json(caller_data.format.at(caller)), format_json(caller_data.info.at(caller)), ).filter(v => v != "").join(", ") ]) } content.push( grid( columns: 1, inset: 0.5em, ..callers_contents ), ) grid_content.push(grid.cell(fill: white, content.join(" "))) block( breakable: false, width: 100%, grid( columns: 1, inset: 0.5em, stroke: cr_colors.dark_grey, ..grid_content ), ) } #let variants(path, interpretation: "PATHO") = { let data = json(path) let patho = data.filter(d => d.interpretation == interpretation) for var in patho { card(var) } } #set heading(numbering: (..numbers) => { if numbers.pos().len() >= 2 and numbers.pos().len() <= 3 { numbering("1.1", ..numbers.pos().slice(1)) } }) #set list(marker: [---]) #heading(level: 1, outlined: false)[Whole Genome Sequencing Report] #outline(title: "Table of Contents", depth: 3) #pagebreak() == Interprétation #v(0.5cm) #let scoped-content = { show heading: it => { set text(font: "FreeSans", size: 14pt) align(left, it) v(5pt) } cmarker.render( read(sys.inputs.base + "/diag/report/" + sys.inputs.id + "_conclusion.md"), h1-level: 4, ) } #scoped-content #pagebreak() == Sample identity #sys.inputs.id == Alignement #grid( columns: (1fr, 1fr), gutter: 3pt, [ ==== Diagnostic sample #set text(size: 11pt) #reportBam(sys.inputs.base + "/diag/" + sys.inputs.id + "_diag_hs1_info.json") ], [ ==== MRD sample #set text(size: 11pt) #reportBam(sys.inputs.base + "/mrd/" + sys.inputs.id + "_mrd_hs1_info.json") #set footnote(numbering: n => { " " }) #footnote[Values computed by #link("https://github.com/wdecoster/cramino")[cramino] v0.14.5 ] ], ) #pagebreak() === Normalized read count by chromosome #[ #set text(size: 10pt) #printReadCount( sys.inputs.base + "/diag/" + sys.inputs.id + "_diag_hs1_info.json", sys.inputs.base + "/mrd/" + sys.inputs.id + "_mrd_hs1_info.json", ) ] == Variants === Variants calling #pagebreak() ==== VCF filters #pad( top: 0.8cm, align( center, scale( x: 100%, y: 100%, reflow: true, variantsFlow(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_variants_stats.json"), ), ), ) ==== BAM filters #pad( top: 0.8cm, align( center, scale( x: 100%, y: 100%, reflow: true, bamFilter(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_variants_stats.json"), ), ), ) #pagebreak() === Somatic variants ==== Callers #v(0.5cm) #image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_callers.svg") ==== Consequences (VEP) #v(0.5cm) #image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_consequences.svg") ==== NCBI features #v(0.5cm) #image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_ncbi.svg") #pagebreak() === Selected Variants ==== Classification - Pathogenic: experimentally proved that the variant participate in the oncogenic process. - Likely pathogenic: gene or variant that could be linked to the oncogenic process in bibliography. - Unknown significance: somatic variant without more information. ==== Pathogenics #variants( sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json", interpretation: "PATHO", ) ==== Likely Pathogenics #variants( sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json", interpretation: "PROBPATHO", ) ==== Variants of Unknown Significance #variants( sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json", interpretation: "US", ) == Coverage by chromosome === Proportion at given depth by chromosome #reportCoverage(sys.inputs.base + "/diag/report/data/scan/" + sys.inputs.id) #set footnote(numbering: n => { " " }) #footnote[Values computed by Pandora development version] == Method === Sample preparation and sequencing + DNA sampling and collection in EDTA tubes. + Buffy coat: pooling of multiple EDTA tubes then centrifugation (1200 rpm 10 minutes). + DNA extraction according to Maxwell® Promega RSC Buffy Coat DNA Kit. + Nanodrop DNA quantification. + DNA shearing: 3µg of DNA mechanically sheared by Covaris g-TUBE (8000 rpm, 1 minute). + DNA size qualification aiming a median of 10 kb determined by TapeStation. + Libary was constructed following the Oxford Nanopore Technologies Ligation Sequencing Kit V14 (SQK-LSK114) protocol with 1.5 µg as input DNA. + Qubit quantification + After evaluation of flowcell (rev 10) for pore availability (> 6000 available pores). Two distinct barcoded libraries were pooled for each flowcell. + sequencing run was initiated and controlled using MinKNOW software (sequencing 80 hours, data output format: Raw pod5 files). === Bioinformatic analysis + Orchestration and global analysis realized by in-house software (source code is accessible at Github). + Basecalling and alignment: dorado v0.8.2 with parameters "sup,5mC_5hmC --trim all" with alignment on hs1 genome (T2T chm13v2.0). + Variant calling was realized with ClairS v0.4.0, DeepVariant v1.6.1, DeepSomatic v1.7.0, Nanomonsv v0.7.2. + Variants filtering and merging done with in-house software. + Annotation: ensembl-VEP 112 with gene features defined by RefSeq Liftoff v5.1. SNP from gnomAD_4-2022_10 and Cosmic v99. + Interpretation and report generation performed on a local web service also published in open source.