Browse Source

Add Windows htslib patch setup

STEIMLE Thomas 2 weeks ago
parent
commit
0a2145264f
4 changed files with 118 additions and 4 deletions
  1. 1 0
      .gitignore
  2. 0 4
      Cargo.lock
  3. 4 0
      Cargo.toml
  4. 113 0
      setup-patches.ps1

+ 1 - 0
.gitignore

@@ -1 +1,2 @@
 /target
+/patches/

+ 0 - 4
Cargo.lock

@@ -456,8 +456,6 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 [[package]]
 name = "hts-sys"
 version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e38d7f1c121cd22aa214cb4dadd4277dc5447391eac518b899b29ba6356fbbb2"
 dependencies = [
  "bindgen",
  "bzip2-sys",
@@ -862,8 +860,6 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
 [[package]]
 name = "rust-htslib"
 version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f22161678c3d72e6434c5f3383325dbf88c3cacce665f0c7b4b077fc6e957ba9"
 dependencies = [
  "bio-types",
  "byteorder",

+ 4 - 0
Cargo.toml

@@ -25,3 +25,7 @@ thiserror = "1.0.63"
 seq_io = "0.3.2"
 # rust-spoa = "0.2.4"
 
+[patch.crates-io]
+hts-sys = { path = "patches/hts-sys" }
+rust-htslib = { path = "patches/rust-htslib" }
+

+ 113 - 0
setup-patches.ps1

@@ -0,0 +1,113 @@
+param()
+$ErrorActionPreference = "Stop"
+
+$found = Resolve-Path "$env:USERPROFILE\.cargo\registry\src\index.crates.io-*\hts-sys-2.2.0" `
+         -ErrorAction SilentlyContinue
+if (-not $found) {
+    Write-Error "hts-sys 2.2.0 not in cargo registry. Run 'cargo fetch' first."
+    exit 1
+}
+
+$dest = Join-Path $PSScriptRoot "patches\hts-sys"
+if (Test-Path $dest) { Remove-Item $dest -Recurse -Force }
+Copy-Item $found.Path $dest -Recurse
+Write-Host "Copied hts-sys 2.2.0 -> patches\hts-sys"
+
+$buildRs = Join-Path $dest "build.rs"
+$text = [System.IO.File]::ReadAllText($buildRs) -replace "`r`n", "`n"
+
+# Fix 1: guard HAVE_DRAND48. drand48/srand48 are absent from MinGW libc.
+$old1 = "    let mut config_lines = vec![`n        `"/* Default config.h generated by build.rs */`",`n        `"#define HAVE_DRAND48 1`",`n    ];"
+$new1 = "    let mut config_lines = vec![`n        `"/* Default config.h generated by build.rs */`",`n    ];`n    if target_os != `"windows`" {`n        config_lines.push(`"#define HAVE_DRAND48 1`");`n    }"
+$result = $text.Replace($old1, $new1)
+if ($result -eq $text) { Write-Warning "Fix 1 pattern not matched - verify hts-sys version" }
+else { Write-Host "Fix 1 applied (HAVE_DRAND48 guard)"; $text = $result }
+
+# Fix 2: run version.sh through bash. Windows CreateProcess cannot execute .sh files.
+$old2 = "        let version = std::process::Command::new(out.join(`"htslib`").join(`"version.sh`"))`n            .output()`n            .expect(`"failed to execute process`");`n        let version_str = std::str::from_utf8(&version.stdout).unwrap().trim();"
+$new2 = "        let version_str = std::process::Command::new(`"bash`")`n            .arg(out.join(`"htslib`").join(`"version.sh`"))`n            .output()`n            .map(|o| std::str::from_utf8(&o.stdout).unwrap_or(`"1.19.1`").trim().to_string())`n            .unwrap_or_else(|_| `"1.19.1`".to_string());"
+$result = $text.Replace($old2, $new2)
+if ($result -eq $text) { Write-Warning "Fix 2 pattern not matched - verify hts-sys version" }
+else { Write-Host "Fix 2 applied (version.sh via bash)"; $text = $result }
+
+# Fix 3: 64-bit file offsets on Windows MinGW.
+# Both the C compiler and bindgen must see the same define so types match.
+$old3 = "    if want_static {`n        cfg.warnings(false).static_flag(true).pic(true);`n    } else {`n        cfg.warnings(false).static_flag(false).pic(true);`n    }"
+$new3 = "    if want_static {`n        cfg.warnings(false).static_flag(true).pic(true);`n    } else {`n        cfg.warnings(false).static_flag(false).pic(true);`n    }`n`n    // Fix 3: 64-bit file offsets on Windows MinGW.`n    // Without this, off_t = i32 and seeks wrap at 2 GB.`n    if target_os == `"windows`" {`n        cfg.define(`"_FILE_OFFSET_BITS`", `"64`");`n    }"
+$result = $text.Replace($old3, $new3)
+if ($result -eq $text) { Write-Warning "Fix 3a pattern not matched - verify hts-sys version" }
+else { Write-Host "Fix 3a applied (_FILE_OFFSET_BITS=64 cc define)"; $text = $result }
+
+$old4 = "        bindgen::Builder::default()`n            .header(`"wrapper.h`")`n            .generate_comments(false)`n            .blocklist_function(`"strtold`")`n            .blocklist_type(`"max_align_t`")`n            .generate()"
+$new4 = "        let mut bindgen_builder = bindgen::Builder::default()`n            .header(`"wrapper.h`")`n            .generate_comments(false)`n            .blocklist_function(`"strtold`")`n            .blocklist_type(`"max_align_t`");`n        if target_os == `"windows`" {`n            bindgen_builder = bindgen_builder.clang_arg(`"-D_FILE_OFFSET_BITS=64`");`n        }`n        bindgen_builder`n            .generate()"
+$result = $text.Replace($old4, $new4)
+if ($result -eq $text) { Write-Warning "Fix 3b pattern not matched - verify hts-sys version" }
+else { Write-Host "Fix 3b applied (_FILE_OFFSET_BITS=64 bindgen)"; $text = $result }
+
+# Fix 4: static-link MinGW regex libraries on Windows.
+# hts_expr.c needs POSIX regex; systre.c needs TRE. libregex depends on
+# libintl/gettext, which depends on libiconv.
+$old5 = "    cfg.file(`"wrapper.c`");`n    cfg.compile(`"hts`");"
+$new5 = "    cfg.file(`"wrapper.c`");`n    cfg.compile(`"hts`");`n`n    // hts_expr.c uses POSIX regex (regcomp/regexec/regfree) -- provided by libregex (gnurx).`n    // systre.c uses the TRE regex API (tre_regexec/tre_regerror) -- provided by libtre.`n    // Both live in the MinGW lib dir; locate it via gcc -print-file-name.`n    // Link statically so the exe has no runtime dependency on .dll files.`n    if target_os == `"windows`" {`n        let compiler = cfg.get_compiler();`n        let mingw_lib = std::process::Command::new(compiler.path())`n            .arg(`"-print-file-name=libtre.a`")`n            .output()`n            .ok()`n            .and_then(|o| String::from_utf8(o.stdout).ok())`n            .map(|s| s.trim().to_string())`n            .filter(|s| s != `"libtre.a`")`n            .and_then(|s| std::path::PathBuf::from(s).parent().map(|p| p.to_path_buf()));`n        if let Some(dir) = mingw_lib {`n            println!(`"cargo:rustc-link-search=native={}`", dir.display());`n        }`n        println!(`"cargo:rustc-link-lib=dylib:+verbatim=libcurl.dll.a`"); // hfile_libcurl.c: curl_easy/curl_multi APIs`n        println!(`"cargo:rustc-link-lib=static=tre`");    // systre.c: tre_regexec/tre_regerror`n        println!(`"cargo:rustc-link-lib=static=regex`");  // hts_expr.c: regcomp/regexec/regfree`n        println!(`"cargo:rustc-link-lib=static=intl`");   // libregex dep: gettext`n        println!(`"cargo:rustc-link-lib=static=iconv`");  // libintl dep: iconv`n    }"
+$result = $text.Replace($old5, $new5)
+if ($result -eq $text) { Write-Warning "Fix 4 pattern not matched - verify hts-sys version" }
+else { Write-Host "Fix 4 applied (static-link regex/tre/intl/iconv on Windows)"; $text = $result }
+
+[System.IO.File]::WriteAllText($buildRs, $text, (New-Object System.Text.UTF8Encoding $false))
+
+$cargoToml = Join-Path $dest "Cargo.toml"
+$manifest = [System.IO.File]::ReadAllText($cargoToml) -replace "`r`n", "`n"
+$oldManifest = "[target.'cfg(all(unix, not(target_os = `"macos`")))'.dependencies.openssl-sys]`nversion = `"0.9.56`"`noptional = true"
+$newManifest = "[target.'cfg(any(windows, all(unix, not(target_os = `"macos`"))))'.dependencies.openssl-sys]`nversion = `"0.9.56`"`noptional = true"
+$result = $manifest.Replace($oldManifest, $newManifest)
+if ($result -eq $manifest) { Write-Warning "Manifest OpenSSL target patch not matched - verify hts-sys version" }
+else {
+    Write-Host "Manifest patch applied (openssl-sys enabled on Windows)"
+    [System.IO.File]::WriteAllText($cargoToml, $result, (New-Object System.Text.UTF8Encoding $false))
+}
+
+Write-Host "patches\hts-sys is ready."
+
+$rhl = Resolve-Path "$env:USERPROFILE\.cargo\registry\src\index.crates.io-*\rust-htslib-1.0.0" `
+       -ErrorAction SilentlyContinue
+if (-not $rhl) {
+    Write-Warning "rust-htslib 1.0.0 not in cargo registry. Run 'cargo fetch' first."
+} else {
+    $rhlDest = Join-Path $PSScriptRoot "patches\rust-htslib"
+    if (Test-Path $rhlDest) { Remove-Item $rhlDest -Recurse -Force }
+    Copy-Item $rhl.Path $rhlDest -Recurse
+    Write-Host "Copied rust-htslib 1.0.0 -> patches\rust-htslib"
+
+    $bamMod = Join-Path $rhlDest "src\bam\mod.rs"
+    $rhlText = [System.IO.File]::ReadAllText($bamMod) -replace "`r`n", "`n"
+
+    $rhlText = $rhlText.Replace("offset as libc::off_t,", "offset as hts_sys::off_t,")
+    if ($rhlText -match "offset as libc::off_t") {
+        Write-Warning "rust-htslib fix A not applied - pattern not matched"
+    } else {
+        Write-Host "rust-htslib fix A applied (libc::off_t -> hts_sys::off_t)"
+    }
+
+    $oldIdxLoad = "        let idx = unsafe { htslib::sam_index_load(htsfile, c_str.as_ptr()) };"
+    $newIdxLoad = "        // flags=0 omits HTS_IDX_SAVE_REMOTE so remote .bai files are not cached locally.`n        let idx = unsafe { htslib::sam_index_load3(htsfile, c_str.as_ptr(), std::ptr::null(), 0) };"
+    $rhlText = $rhlText.Replace($oldIdxLoad, $newIdxLoad)
+    if ($rhlText -match "sam_index_load\(htsfile") {
+        Write-Warning "rust-htslib fix B not applied - pattern not matched"
+    } else {
+        Write-Host "rust-htslib fix B applied (sam_index_load3 no-cache)"
+    }
+
+    $fromUrl = "    pub fn from_url(url: &Url) -> Result<Self> {`n        Self::new(url.as_str().as_bytes())`n    }"
+    $fromUrlAndIndex = "    pub fn from_url(url: &Url) -> Result<Self> {`n        Self::new(url.as_str().as_bytes())`n    }`n`n    /// Open a remote BAM via URL with a pre-downloaded local index file.`n    /// Using a local index bypasses htslib's remote-index caching entirely.`n    pub fn from_url_and_index<P: AsRef<Path>>(url: &Url, index_path: P) -> Result<Self> {`n        Self::new_with_index_path(`n            url.as_str().as_bytes(),`n            &path_as_bytes(index_path, true)?,`n        )`n    }"
+    $count = ([regex]::Matches($rhlText, [regex]::Escape($fromUrl))).Count
+    if ($count -ge 2) {
+        $idx = $rhlText.IndexOf($fromUrl, $rhlText.IndexOf($fromUrl) + 1)
+        $rhlText = $rhlText.Substring(0, $idx) + $fromUrlAndIndex + $rhlText.Substring($idx + $fromUrl.Length)
+        Write-Host "rust-htslib fix C applied (IndexedReader::from_url_and_index)"
+    } else {
+        Write-Warning "rust-htslib fix C not applied - could not locate second from_url in IndexedReader"
+    }
+
+    [System.IO.File]::WriteAllText($bamMod, $rhlText, (New-Object System.Text.UTF8Encoding $false))
+    Write-Host "patches\rust-htslib is ready."
+}