#let cr_colors = (
  dark_grey: rgb("#333333"),
  beige: rgb("#fdf0d5"),
  light_grey: rgb("#eeeeee"),
  dark_red: rgb("#780000"),
  red: rgb("#c1121f"),
  blue: rgb("#669bbc"),
  dark_blue: rgb("#003049"),
  green: rgb("#29bf12"),
)

#import "@preview/fletcher:0.5.1" as fletcher: diagram, node, edge
#import "@preview/metro:0.3.0": *
#import "@preview/cetz:0.2.2"
#import "@preview/badgery:0.1.1": *
#import "@preview/cmarker:0.1.1"

#set page(
  paper: "a4",
  footer: locate(loc => [
    #set text(10pt)
    #let today = datetime.today()
    #if loc.page() != 1 {
      align(right, counter(page).display("1 / 1", both: true))
    }
    #align(center, [Dr. Thomas Steimlé --- #today.display("[day] [month repr:long] [year]")])
  ]),
)

#show heading: set text(font: "Futura")
#show heading.where(level: 1): it => [
  #set align(center)
  #set text(fill: cr_colors.dark_blue)
  #it.body
  #v(18pt)
]

#show image: set text(font: "FreeSans")

#set text(size: 16pt, fill: cr_colors.dark_blue)

#let contigs = (
  "chr1",
  "chr2",
  "chr3",
  "chr4",
  "chr5",
  "chr6",
  "chr7",
  "chr8",
  "chr9",
  "chr10",
  "chr11",
  "chr12",
  "chr13",
  "chr14",
  "chr15",
  "chr16",
  "chr17",
  "chr18",
  "chr19",
  "chr20",
  "chr21",
  "chr22",
  "chrX",
  "chrY",
)

#let parseCustomDate(dateString) = {
  let parts = dateString.split("T")
  let datePart = parts.at(0).replace("-", "/")
  let timePart = parts.at(1).split(":")
  let hour = timePart.at(0)
  let minute = timePart.at(1)

  return datePart + " " + hour + "h" + minute
}

#let formatString(input) = {
  let words = input.split("_")
  let capitalizedWords = words.map(word => {
    if word.len() > 0 {
      upper(word.first()) + word.slice(1)
    } else {
      word
    }
  })
  capitalizedWords.join(" ")
}
#let si-fmt(val, precision: 1, sep: "\u{202F}", binary: false) = {
  let factor = if binary {
    1024
  } else {
    1000
  }
  let gt1_suffixes = ("k", "M", "G", "T", "P", "E", "Z", "Y")
  let lt1_suffixes = ("m", "μ", "n", "p", "f", "a", "z", "y")
  let scale = ""
  let unit = ""

  if type(val) == content {
    if val.has("text") {
      val = val.text
    } else if val.has("children") {
      val = val.children.map(content => content.text).join()
    } else {
      panic(val.children.map(content => content.text).join())
    }
  }
  // if val contains a unit, split it off
  if type(val) == str {
    unit = val.find(regex("(\D+)$"))
    val = float(val.split(unit).at(0))
  }

  if calc.abs(val) > 1 {
    for suffix in gt1_suffixes {
      if calc.abs(val) < factor {
        break
      }
      val /= factor
      scale += " " + suffix
    }
  } else if val != 0 and calc.abs(val) < 1 {
    for suffix in lt1_suffixes {
      if calc.abs(val) > 1 {
        break
      }
      val *= factor
      scale += " " + suffix
    }
  }

  let formatted = str(calc.round(val, digits: precision))

  formatted + sep + scale.split().at(-1, default: "") + unit
}

#let reportCoverage(prefix) = {
  image(prefix + "_global.svg", width: 100%)
  for contig in contigs {
    heading(level: 4, contig)
    let path = prefix + "_" + contig
    image(path + "_chromosome.svg")
    let data = json(path + "_stats.json")
    grid(
      columns: (1fr, 2fr),
      gutter: 3pt,
      align(left + horizon)[
        #set text(size: 12pt)
        #table(
          stroke: none, columns: (auto, 1fr), gutter: 3pt, [Mean], [#calc.round(
              data.mean,
              digits: 2,
            )], [Standard dev.], [#calc.round(
              data.std_dev,
              digits: 2,
            )], ..data.breaks_values.map(r => (
            [#r.at(0)],
            [#calc.round(r.at(1) * 100, digits: 1)%],
          )).flatten(),
        )
      ],
      align(right, image(path + "_distrib.svg", width: 100%)),
    )

    parbreak()
  }
}

#let reportBam(path) = {
  let data = json(path)
  table(
    gutter: 3pt, stroke: none, columns: (auto, 1fr), ..for (key, value) in (
      data
    ) {
      if key != "cramino" and key != "composition" and key != "path" and key != "modified" {
        ([ #formatString(key) ], [ #value ])
      } else if key == "modified" {
        ([ Modified Date (UTC) ], [ #parseCustomDate(value) ])
      } else if key == "composition" {
        (
          [ Run(s) ],
          [
            #for (i, v) in value.enumerate() {
              if i > 0 [ \ ]
              [#v.at(0).slice(0, 5): #calc.round(v.at(1), digits: 0)%]
            }
          ],
        )
      } else if key == "cramino" {
        for (k, v) in value {
          if k == "normalized_read_count_per_chromosome" { } else if k != "path" and k != "checksum" and k != "creation_time" and k != "file_name" {
            let k = formatString(k)
            let v = if type(v) == "integer" {
              si-fmt(v)
            } else {
              v
            }
            ([ #k ], [ #v ])
          } else {
            ()
          }
        }.flatten()
      } else {
        ()
      }
    }.flatten(),
  )
}

#let formatedReadCount(path) = {
  let data = json(path)
  let data = data.cramino.normalized_read_count_per_chromosome
  let res = ()
  for contig in contigs {
    res.push(data.at(contig))
  }
  res.push(data.at("chrM"))
  return res
}

#let printReadCount(diag_path, mrd_path) = {
  let index = 14
  let c = contigs
  c.push("chrM")
  let diag = formatedReadCount(diag_path)
  let mrd = formatedReadCount(mrd_path)
  c.insert(0, "")
  diag.insert(0, "diag")
  mrd.insert(0, "mrd")
  let arrays1 = (c.slice(0, index), diag.slice(0, index), mrd.slice(0, index))
  table(
    columns: arrays1.at(0).len(), ..arrays1
      .map(arr => arr.map(item => [#item]))
      .flatten(),
  )

  let arrays2 = (c.slice(index), diag.slice(index), mrd.slice(index))
  arrays2.at(0).insert(0, "")
  arrays2.at(1).insert(0, "diag")
  arrays2.at(2).insert(0, "mrd")

  table(
    columns: arrays2.at(0).len(), ..arrays2
      .map(arr => arr.map(item => [#item]))
      .flatten(),
  )
}

#let variantsFlow(path) = {
  import fletcher.shapes: diamond, parallelogram, chevron
  let data = json(path)
  set text(8pt)
  diagram(
    spacing: (8pt, 25pt),
    node-fill: gradient.radial(
      cr_colors.light_grey,
      cr_colors.blue,
      radius: 300%,
    ),
    node-stroke: cr_colors.dark_blue + 1pt,
    edge-stroke: 1pt,
    mark-scale: 70%,
    node-inset: 8pt,
    node(
      (0.2, 0),
      [Variants MRD: #num(data.vcf_stats.n_tumoral_init)],
      corner-radius: 2pt,
      extrude: (0, 3),
      name: <input_mrd>,
    ),
    node(
      (1.8, 0),
      [Variants Diag: #num(data.vcf_stats.n_constit_init)],
      corner-radius: 2pt,
      extrude: (0, 3),
      name: <input_diag>,
    ),
    node(
      (1, 1),
      align(center)[Variant in MRD ?],
      shape: diamond,
      name: <is_in_mrd>,
    ),
    edge(<input_mrd>, "s", <is_in_mrd>, "-|>"),
    edge(<input_diag>, "s", <is_in_mrd>, "-|>"),
    edge(<is_in_mrd>, <is_low_mrd>, "-|>", [Yes], label-pos: 0.8),
    node(
      (0.25, 2),
      [MRD variant depth \ < 4 ?],
      shape: diamond,
      name: <is_low_mrd>,
    ),
    edge(<is_low_mrd>, <low_mrd>, "-|>"),
    node(
      (0, 3),
      [Low MRD depth: #num(data.vcf_stats.n_low_mrd_depth)],
      shape: parallelogram,
      name: <low_mrd>,
    ),
    edge(<is_in_mrd>, <next>, "-|>", [No], label-pos: 0.8),
    node(
      (1.85, 2),
      [To BAM filters: #num(data.bam_stats.n_lasting)],
      shape: chevron,
      extrude: (-3, 0),
      name: <next>,
      stroke: cr_colors.green,
    ),
    edge(<is_low_mrd>, <homo>, "-|>"),
    node((1.5, 3), [VAF = 100% ?], shape: diamond, name: <homo>),
    edge(<homo>, <constit>, "-|>", [Yes], label-pos: 0.5, bend: -80deg),
    edge(<homo>, <chi>, "-|>", [No], label-pos: 0.6),
    node(
      (1.5, 4),
      [$#sym.chi^2$ VAF MRD vs Diag ?],
      shape: diamond,
      name: <chi>,
    ),
    edge(<chi>, <constit>, "-|>", label-pos: 0.8),
    node(
      (1, 5),
      [Constit: #num(data.vcf_stats.n_constit)],
      shape: parallelogram,
      name: <constit>,
    ),
    edge(<chi>, <loh>, "-|>", [p < 0.01], label-pos: 0.8),
    node(
      (2, 5),
      [LOH: #num(data.vcf_stats.n_loh)],
      shape: parallelogram,
      name: <loh>,
    ),
  )
}

#let bamFilter(path) = {
  import fletcher.shapes: diamond, parallelogram, hexagon
  let data = json(path)
  set text(8pt)

  diagram(
    spacing: (8pt, 25pt),
    node-fill: gradient.radial(
      cr_colors.light_grey,
      cr_colors.blue,
      radius: 300%,
    ),
    node-inset: 8pt,
    node-stroke: cr_colors.dark_blue + 1pt,
    mark-scale: 70%,
    edge-stroke: 1pt,
    node(
      (0.75, 0),
      [Variants not in MRD VCF: #num(data.bam_stats.n_lasting)],
      corner-radius: 2pt,
      extrude: (0, 3),
      name: <input_mrd>,
    ),
    edge(<input_mrd>, <depth>, "-|>"),
    node((0.75, 1), [MRD alignement depth ?], shape: diamond, name: <depth>),
    edge(<depth>, <low_depth>, "-|>", [< 4]),
    node(
      (0, 2),
      [Low MRD depth: #num(data.bam_stats.n_low_mrd_depth)],
      shape: parallelogram,
      name: <low_depth>,
    ),
    edge(<depth>, <seen>, "-|>"),
    node(
      (0.75, 3),
      [Alt. base seen in MRD pileup ?],
      shape: diamond,
      name: <seen>,
    ),
    edge(<seen>, <constit>, "-|>", [Yes]),
    node(
      (0, 4),
      [Constit: #num(data.bam_stats.n_constit)],
      shape: parallelogram,
      name: <constit>,
    ),
    edge(<seen>, <is_div>, "-|>", [No]),
    node(
      (1.1, 4),
      [Sequence #sym.plus.minus 20nt \ diversity ?],
      shape: diamond,
      name: <is_div>,
    ),
    edge(<is_div>, <low_div>, "-|>", [entropy < 1.8]),
    node(
      (0.25, 5),
      [Low diversity, artefact: #num(data.bam_stats.n_low_diversity)],
      shape: parallelogram,
      name: <low_div>,
    ),
    edge(<is_div>, <somatic>, "-|>"),
    node(
      (1.75, 5),
      [Somatic: #num(data.bam_stats.n_somatic)],
      shape: hexagon,
      extrude: (-3, 0),
      name: <somatic>,
      stroke: cr_colors.green,
    ),
  )
}

#let barCallers(path) = {
  import cetz.draw: *
  import cetz.chart

  let json_data = json(path).variants_stats
  let data = json_data.find(item => item.name == "callers_cat")
  let chart_data = data.counts.pairs().sorted(key: x => -x.at(1))

  set text(11pt)
  cetz.canvas(
    length: 80%,
    {
      set-style(axes: (
        bottom: (tick: (label: (angle: 45deg, anchor: "north-east"))),
      ))
      chart.columnchart(
        chart_data,
        size: (1, 0.5),
      )
    },
  )
}
#let truncate(text, max-length) = {
  if text.len() <= max-length {
    text
  } else {
    text.slice(0, max-length - 3) + "..."
  }
}

// // #let add_newlines(text, n) = {
// //   let result = ""
// //   let chars = text.clusters()
// //   for (i, char) in chars.enumerate() {
// //     result += char
// //     if calc.rem((i + 1), n == 0 and i < chars.len() - 1 {
// //       result += "\n"
// //     }
// //   }
// //   result
// // }
//
// #let break_long_words(text, max_length: 20, hyphen: "") = {
//   let words = text.split(" ")
//   let result = ()
//
//   for word in words {
//     if word.len() <= max_length {
//       result.push(word)
//     } else {
//       let segments = ()
//       let current_segment = ""
//       for char in word.clusters() {
//         if current_segment.len() + 1 > max_length {
//           segments.push(current_segment + hyphen)
//           current_segment = ""
//         }
//         current_segment += char
//       }
//       if current_segment != "" {
//         segments.push(current_segment)
//       }
//       result += segments
//     }
//   }
//
//   result.join(" ")
// }
//
#let format_sequence(text, max_length: 40, hyphen: [#linebreak()]) = {
  let words = text.split(" ")
  let result = ()
  // result.push("\n")

  for word in words {
    if word.len() <= max_length {
      result.push(word)
    } else {
      let segments = ()
      let current_segment = ""
      for char in word.clusters() {
        if current_segment.len() + 1 > max_length {
          segments.push(current_segment + hyphen)
          current_segment = ""
        }
        current_segment += char
      }
      if current_segment != "" {
        segments.push(current_segment)
      }
      result += segments
    }
  }
  result.push("")
  let sequence = result.join(" ")

  box(width: 100%, par(leading: 0.2em, sequence))
}

#let dna(sequence, line_length: 60) = {
  let formatted = sequence.clusters().map(c => {
    if c == "A" {
      text(fill: red)[A]
    } else if c == "T" {
      text(fill: green)[T]
    } else if c == "C" {
      text(fill: blue)[C]
    } else if c == "G" {
      text(fill: orange)[G]
    } else {
      c
    }
  })

  let lines = formatted.chunks(line_length).map(line => line.join())
  let n_lines = lines.len()

  let lines = lines.join("\n")

  if n_lines > 1 {
    parbreak()
  }
  align(
    left,
    box(
      fill: luma(240),
      inset: (x: 0.5em, y: 0.5em),
      radius: 4pt,
      align(
        left,
        text(
          font: "Fira Code",
          size: 10pt,
          lines,
        ),
      ),
    ),
  )
  if n_lines > 1 {
    parbreak()
  }
}

#let to-string(content) = {
  if content.has("text") {
    content.text
  } else if content.has("children") {
    content.children.map(to-string).join("")
  } else if content.has("body") {
    to-string(content.body)
  } else if content == [ ] {
    " "
  }
}

#let format-number(num) = {
  let s = str(num).split("").filter(item => item != "")
  let result = ""
  let len = s.len()
  for (i, char) in s.enumerate() {
    result += char
    if (i < len - 1 and calc.rem((len - i - 1), 3) == 0) {
      result += ","
    }
  }
  result
}

#let format_json(json_data) = {
  let format_value(value) = {
    if value == none {
      ""
    } else if type(value) == "string" {
      if value != "." {
        value.replace(";", ", ").replace("=", ": ")
      } else {
        ""
      }
    } else if type(value) == "array" {
      let items = value.map(v => format_value(v))
      "[" + items.join(", ") + "]"
    } else if type(value) == "dictionary" {
      "{" + format_json(value) + "}"
    } else {
      str(value)
    }
  }

  if type(json_data) == "dictionary" {
    let result = ()
    for (key, value) in json_data {
      let formatted_value = format_value(value)
      if formatted_value != "" {
        if key == "svinsseq" {
          formatted_value = dna(formatted_value)
        }
        result.push(upper(key) + ": " + formatted_value)
      }
    }
    result.join(", ")
  } else {
    format_value(json_data)
  }
}

#let card(d) = {
  set text(12pt)
  let position_fmt = format-number(d.position)
  let title_bg_color = rgb("#f9fafb00")

  let grid_content = ()

  let callers_data = json.decode(d.callers_data)

  // Title
  let alt = d.alternative

  // TODO: add that in pandora_lib_variants
  if d.callers == "Nanomonsv" and alt == "<INS>" {
    alt = d.reference + callers_data.at(0).info.Nanomonsv.svinsseq
  }

  let title = d.contig + ":" + position_fmt + " " + d.reference + sym
    .quote
    .angle
    .r
    .single + truncate(alt, 30)

  grid_content.push(
    grid.cell(
      fill: cr_colors.light_grey,
      align: center,
      block(width: 100%, title),
    ),
  )

  // Consequences
  if d.consequence != none {
    let consequences = d.consequence.replace(",", ", ").replace(
      "_",
      " ",
    ) + " " + emph(strong(d.gene))
    grid_content.push(
      grid.cell(fill: cr_colors.light_grey, align: center, consequences),
    )
  }

  // hgvs_c
  if d.hgvs_c != none {
    grid_content.push(
      grid.cell(fill: rgb("#fef08a"), align: center, truncate(d.hgvs_c, 50)),
    )
  }

  // hgvs_c
  if d.hgvs_p != none {
    grid_content.push(
      grid.cell(fill: rgb("#fecaca"), align: center, truncate(d.hgvs_p, 50)),
    )
  }

  // Content
  let content = ()
  content.push(
    badge-red("VAF: " + str(calc.round(d.m_vaf * 100, digits: 2)) + "%"),
  )
  // content.push(" ")

  if d.cosmic_n != none {
    content.push(badge-red("Cosmic: " + str(d.cosmic_n)))
  }

  if d.gnomad_af != none {
    content.push(badge-blue("GnomAD: " + str(d.gnomad_af)))
  }

  let callers_contents = ()
  for caller_data in callers_data {
    let caller = ""
    for (k, v) in caller_data.format {
      caller = k
    }
    callers_contents.push(underline(caller) + ":")
    if caller_data.qual != none {
      callers_contents.push([
        Qual: #caller_data.qual,
      ])
    }

    callers_contents.push([
      #(
        format_json(caller_data.format.at(caller)),
        format_json(caller_data.info.at(caller)),
      ).filter(v => v != "").join(", ")
    ])
  }

  content.push(
    grid(
      columns: 1,
      inset: 0.5em,
      ..callers_contents
    ),
  )

  grid_content.push(grid.cell(fill: white, content.join(" ")))


  block(
    breakable: false,
    width: 100%,
    grid(
      columns: 1,
      inset: 0.5em,
      stroke: cr_colors.dark_grey,
      ..grid_content
    ),
  )
}

#let variants(path, interpretation: "PATHO") = {
  let data = json(path)
  let patho = data.filter(d => d.interpretation == interpretation)
  for var in patho {
    card(var)
  }
}

#set heading(numbering: (..numbers) => {
  if numbers.pos().len() >= 2 and numbers.pos().len() <= 3 {
    numbering("1.1", ..numbers.pos().slice(1))
  }
})
#set list(marker: [---])
#heading(level: 1, outlined: false)[Whole Genome Sequencing Report]

#outline(title: "Table of Contents", depth: 3)
#pagebreak()

== Interprétation
#v(0.5cm)

#let scoped-content = {

  show heading: it => {
    set text(font: "FreeSans", size: 14pt)
    align(left, it)
    v(5pt)
  }

  cmarker.render(
    read(sys.inputs.base + "/diag/report/" + sys.inputs.id + "_conclusion.md"), h1-level: 4,
  )
}

#scoped-content

#pagebreak()

== Sample identity
#sys.inputs.id

== Alignement
#grid(
  columns: (1fr, 1fr),
  gutter: 3pt,
  [
    ==== Diagnostic sample
    #set text(size: 11pt)
    #reportBam(sys.inputs.base + "/diag/" + sys.inputs.id + "_diag_hs1_info.json")
  ],
  [
    ==== MRD sample
    #set text(size: 11pt)
    #reportBam(sys.inputs.base + "/mrd/" + sys.inputs.id + "_mrd_hs1_info.json")
    #set footnote(numbering: n => {
      " "
    })
    #footnote[Values computed by #link("https://github.com/wdecoster/cramino")[cramino] v0.14.5
    ]
  ],
)

#pagebreak()
=== Normalized read count by chromosome
#[
  #set text(size: 10pt)
  #printReadCount(
    sys.inputs.base + "/diag/" + sys.inputs.id + "_diag_hs1_info.json",
    sys.inputs.base + "/mrd/" + sys.inputs.id + "_mrd_hs1_info.json",
  )
]

== Variants
=== Variants calling
#pagebreak()
==== VCF filters
#pad(
  top: 0.8cm,
  align(
    center,
    scale(
      x: 100%,
      y: 100%,
      reflow: true,
      variantsFlow(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_variants_stats.json"),
    ),
  ),
)
==== BAM filters
#pad(
  top: 0.8cm,
  align(
    center,
    scale(
      x: 100%,
      y: 100%,
      reflow: true,
      bamFilter(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_variants_stats.json"),
    ),
  ),
)
#pagebreak()

=== Somatic variants
==== Callers
#v(0.5cm)
#image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_callers.svg")
==== Consequences (VEP)
#v(0.5cm)
#image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_consequences.svg")
==== NCBI features
#v(0.5cm)
#image(sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_barcharts_ncbi.svg")
#pagebreak()

=== Selected Variants
==== Classification 
- Pathogenic: experimentally proved that the variant participate in the oncogenic process.
- Likely pathogenic: gene or variant that could be linked to the oncogenic process in bibliography.
- Unknown significance: somatic variant without more information.

==== Pathogenics
#variants(
  sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json",
  interpretation: "PATHO",
)

==== Likely Pathogenics
#variants(
  sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json",
  interpretation: "PROBPATHO",
)

==== Variants of Unknown Significance
#variants(
  sys.inputs.base + "/diag/report/data/" + sys.inputs.id + "_annot_variants.json",
  interpretation: "US",
)

== Coverage by chromosome
=== Proportion at given depth by chromosome
#reportCoverage(sys.inputs.base + "/diag/report/data/scan/" + sys.inputs.id)
#set footnote(numbering: n => {
  " "
})
#footnote[Values computed by Pandora development version]

== Method
=== Sample preparation and sequencing
+ DNA sampling and collection in EDTA tubes.
+ Buffy coat: pooling of multiple EDTA tubes then centrifugation (1200 rpm 10 minutes).
+ DNA extraction according to Maxwell® Promega RSC Buffy Coat DNA Kit.
+ Nanodrop DNA quantification.
+ DNA shearing: 3µg of DNA mechanically sheared by Covaris g-TUBE (8000 rpm, 1 minute).
+ DNA size qualification aiming a median of 10 kb determined by TapeStation.
+ Libary was constructed following the Oxford Nanopore Technologies Ligation Sequencing Kit V14 (SQK-LSK114) protocol with 1.5 µg as input DNA.
+ Qubit quantification
+ After evaluation of flowcell (rev 10) for pore availability (> 6000 available pores). Two distinct barcoded libraries were pooled for each flowcell.
+ sequencing run was initiated and controlled using MinKNOW software (sequencing 80 hours, data output format: Raw pod5 files).

=== Bioinformatic analysis
+ Orchestration and global analysis realized by in-house software (source code is accessible at Github).
+ Basecalling and alignment: dorado v0.8.2 with parameters "sup,5mC_5hmC --trim all" with alignment on hs1 genome (T2T chm13v2.0).
+ Variant calling was realized with ClairS v0.4.0, DeepVariant v1.6.1, DeepSomatic v1.7.0, Nanomonsv v0.7.2.
+ Variants filtering and merging done with in-house software.
+ Annotation: ensembl-VEP 112 with gene features defined by RefSeq Liftoff v5.1. SNP from gnomAD_4-2022_10 and Cosmic v99.
+ Interpretation and report generation performed on a local web service also published in open source.