feat: add wasm rigging

Signed-off-by: Xe Iaso <me@xeiaso.net>
2026-02-07 12:29:44 -06:00 · 2025-09-23 03:34:29 +00:00
parent ec90a8b87d
commit 908f85db91
22 changed files with 1339 additions and 5 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -10,7 +10,11 @@
  "postStartCommand": "bash ./.devcontainer/poststart.sh",
  "features": {
    "ghcr.io/xe/devcontainer-features/ko:1.1.0": {},
-    "ghcr.io/devcontainers/features/github-cli:1": {}
+    "ghcr.io/devcontainers/features/github-cli:1": {},
+    "ghcr.io/devcontainers/features/rust:1": {
+      "version": "latest",
+      "targets": "wasm32-unknown-unknown"
+    }
  },
  "initializeCommand": "mkdir -p ${localEnv:HOME}${localEnv:USERPROFILE}/.local/share/atuin",
  "customizations": {
--- a/.devcontainer/docker-compose.yaml
+++ b/.devcontainer/docker-compose.yaml
@@ -20,7 +20,11 @@ services:
      dockerfile: .devcontainer/Dockerfile
    volumes:
      - ../:/workspace/anubis:cached
+      - cargo-target:/workspace/anubis/target:cached
    environment:
      VALKEY_URL: redis://valkey:6379/0
    #entrypoint: ["/usr/bin/sleep", "infinity"]
    user: vscode
+
+volumes:
+  cargo-target:
--- a/.devcontainer/poststart.sh
+++ b/.devcontainer/poststart.sh
@@ -5,5 +5,6 @@ pwd
 npm ci &
 go mod download &
 go install ./utils/cmd/... &
+cargo fetch &

 wait
--- a/.gitignore
+++ b/.gitignore
@@ -21,4 +21,8 @@ node_modules
 # how does this get here
 doc/VERSION

-web/static/locales/*.json
+web/static/locales/*.json
+
+# Rust
+target/*
+*.wasm
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -0,0 +1,250 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "anubis"
+version = "0.1.0"
+dependencies = [
+ "wee_alloc",
+]
+
+[[package]]
+name = "argon2"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072"
+dependencies = [
+ "base64ct",
+ "blake2",
+ "cpufeatures",
+ "password-hash",
+]
+
+[[package]]
+name = "argon2id"
+version = "0.1.0"
+dependencies = [
+ "anubis",
+ "argon2",
+]
+
+[[package]]
+name = "base64ct"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
+
+[[package]]
+name = "blake2"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe"
+dependencies = [
+ "digest 0.10.7",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.11.0-rc.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9ef36a6fcdb072aa548f3da057640ec10859eb4e91ddf526ee648d50c76a949"
+dependencies = [
+ "hybrid-array",
+]
+
+[[package]]
+name = "cfg-if"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+
+[[package]]
+name = "const-oid"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dabb6555f92fb9ee4140454eb5dcd14c7960e1225c6d1a6cc361f032947713e"
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "crypto-common"
+version = "0.2.0-rc.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8235645834fbc6832939736ce2f2d08192652269e11010a6240f61b908a1c6"
+dependencies = [
+ "hybrid-array",
+]
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer 0.10.4",
+ "crypto-common 0.1.6",
+ "subtle",
+]
+
+[[package]]
+name = "digest"
+version = "0.11.0-rc.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6749b668519cd7149ee3d11286a442a8a8bdc3a9d529605f579777bfccc5a4bc"
+dependencies = [
+ "block-buffer 0.11.0-rc.5",
+ "const-oid",
+ "crypto-common 0.2.0-rc.4",
+]
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "hybrid-array"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a09fa0190457fce307a699c050054974f81b6975b7a017f1e784eb7d9c2d4bc"
+dependencies = [
+ "typenum",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.175"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+
+[[package]]
+name = "memory_units"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3"
+
+[[package]]
+name = "password-hash"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166"
+dependencies = [
+ "base64ct",
+ "rand_core",
+ "subtle",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+
+[[package]]
+name = "sha2"
+version = "0.11.0-rc.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1e3878ab0f98e35b2df35fe53201d088299b41a6bb63e3e34dada2ac4abd924"
+dependencies = [
+ "cfg-if 1.0.3",
+ "cpufeatures",
+ "digest 0.11.0-rc.2",
+]
+
+[[package]]
+name = "sha256"
+version = "0.1.0"
+dependencies = [
+ "anubis",
+ "sha2",
+]
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "typenum"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wee_alloc"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e"
+dependencies = [
+ "cfg-if 0.1.10",
+ "libc",
+ "memory_units",
+ "winapi",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,10 @@
+[workspace]
+resolver = "2"
+members = ["wasm/anubis", "wasm/pow/*"]
+
+[profile.release]
+#strip = true
+opt-level = "s"
+lto = "thin"
+codegen-units = 1
+panic = "abort"
--- a/9
+++ b/9
@@ -1,17 +1,22 @@
 VERSION= $(shell cat ./VERSION)
 GO?= go
 NPM?= npm
+CARGO?= cargo

-.PHONY: build assets deps lint prebaked-build test
+.PHONY: build assets assets-wasm deps lint prebaked-build test

 all: build

 deps:
 	$(NPM) ci
 	$(GO) mod download
+	$(CARGO) fetch
+
+assets-wasm:
+	bash ./scripts/build_wasm.sh

 assets: PATH:=$(PWD)/node_modules/.bin:$(PATH)
-assets: deps
+assets: deps assets-wasm
 	$(GO) generate ./...
 	./web/build.sh
 	./xess/build.sh
--- a/go.mod
+++ b/go.mod
@@ -25,6 +25,7 @@ require (
 	github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
 	github.com/shirou/gopsutil/v4 v4.25.6
 	github.com/testcontainers/testcontainers-go v0.38.0
+	github.com/tetratelabs/wazero v1.9.0
 	go.etcd.io/bbolt v1.4.2
 	golang.org/x/net v0.42.0
 	golang.org/x/text v0.27.0
--- a/go.sum
+++ b/go.sum
@@ -395,6 +395,8 @@ github.com/suzuki-shunsuke/urfave-cli-help-all v0.0.4 h1:YGHgrVjGTYHY98II6zijXUH
 github.com/suzuki-shunsuke/urfave-cli-help-all v0.0.4/go.mod h1:sSi6xaUaHfaqu32ECLeyE7NTMv+ZM5dW0JikhllaalY=
 github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
 github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
+github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
+github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
 github.com/tklauser/go-sysconf v0.3.15 h1:VE89k0criAymJ/Os65CSn1IXaol+1wrsFHEB8Ol49K4=
 github.com/tklauser/go-sysconf v0.3.15/go.mod h1:Dmjwr6tYFIseJw7a3dRLJfsHAMXZ3nEnL/aZY+0IuI4=
 github.com/tklauser/numcpus v0.10.0 h1:18njr6LDBk1zuna922MgdjQuJFjrdppsZG60sHGfjso=
--- a/package.json
+++ b/package.json
@@ -8,7 +8,8 @@
    "test:integration": "npm run assets && go test -v ./internal/test",
    "test:integration:podman": "npm run assets && go test -v ./internal/test --playwright-runner=podman",
    "test:integration:docker": "npm run assets && go test -v ./internal/test --playwright-runner=docker",
-    "assets": "go generate ./... && ./web/build.sh && ./xess/build.sh",
+    "assets:wasm": "bash ./scripts/build_wasm.sh",
+    "assets": "go generate ./... && ./web/build.sh && ./xess/build.sh && npm run assets:wasm",
    "build": "npm run assets && go build -o ./var/anubis ./cmd/anubis",
    "dev": "npm run assets && go run ./cmd/anubis --use-remote-address --target http://localhost:3000",
    "container": "npm run assets && go run ./cmd/containerbuild",
--- a/scripts/build_wasm.sh
+++ b/scripts/build_wasm.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+mkdir -p ./web/static/wasm/{simd128,baseline}
+
+cargo clean
+
+# With simd128
+RUSTFLAGS='-C target-feature=+simd128' cargo build --release --target wasm32-unknown-unknown
+cp -vf ./target/wasm32-unknown-unknown/release/*.wasm ./web/static/wasm/simd128
+
+cargo clean
+
+# Without simd128
+cargo build --release --target wasm32-unknown-unknown
+cp -vf ./target/wasm32-unknown-unknown/release/*.wasm ./web/static/wasm/baseline
--- a/wasm/anubis/Cargo.toml
+++ b/wasm/anubis/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "anubis"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+wee_alloc = "0.4"
--- a/wasm/anubis/src/lib.rs
+++ b/wasm/anubis/src/lib.rs
@@ -0,0 +1,60 @@
+use std::sync::{LazyLock, Mutex};
+
+extern crate wee_alloc;
+
+#[global_allocator]
+static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
+
+#[cfg(target_arch = "wasm32")]
+mod hostimport {
+    use crate::{DATA_BUFFER, DATA_LENGTH};
+
+    #[link(wasm_import_module = "anubis")]
+    unsafe extern "C" {
+        /// The runtime expects this function to be defined. It is called whenever the Anubis check
+        /// worker processes about 1024 hashes. This can be a no-op if you want.
+        fn anubis_update_nonce(nonce: u32);
+    }
+
+    /// Safe wrapper to `anubis_update_nonce`.
+    pub fn update_nonce(nonce: u32) {
+        unsafe {
+            anubis_update_nonce(nonce);
+        }
+    }
+
+    #[unsafe(no_mangle)]
+    pub extern "C" fn data_ptr() -> *const u8 {
+        let challenge = &DATA_BUFFER;
+        challenge.as_ptr()
+    }
+
+    #[unsafe(no_mangle)]
+    pub extern "C" fn set_data_length(len: u32) {
+        let mut data_length = DATA_LENGTH.lock().unwrap();
+        *data_length = len as usize;
+    }
+}
+
+#[cfg(not(target_arch = "wasm32"))]
+mod hostimport {
+    pub fn update_nonce(_nonce: u32) {
+        // This is intentionally blank
+    }
+}
+
+/// The data buffer is a bit weird in that it doesn't have an explicit length as it can
+/// and will change depending on the challenge input that was sent by the server.
+/// However, it can only fit 4096 bytes of data (one amd64 machine page). This is
+/// slightly overkill for the purposes of an Anubis check, but it's fine to assume
+/// that the browser can afford this much ram usage.
+///
+/// Callers should fetch the base data pointer, write up to 4096 bytes, and then
+/// `set_data_length` the number of bytes they have written
+///
+/// This is also functionally a write-only buffer, so it doesn't really matter that
+/// the length of this buffer isn't exposed.
+pub static DATA_BUFFER: LazyLock<[u8; 4096]> = LazyLock::new(|| [0; 4096]);
+pub static DATA_LENGTH: LazyLock<Mutex<usize>> = LazyLock::new(|| Mutex::new(0));
+
+pub use hostimport::update_nonce;
--- a/wasm/pow/argon2id/Cargo.toml
+++ b/wasm/pow/argon2id/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "argon2id"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+argon2 = "0.5"
+
+anubis = { path = "../../anubis" }
+
+[lints.clippy]
+nursery = { level = "warn", priority = -1 }
+pedantic = { level = "warn", priority = -1 }
+unwrap_used = "warn"
+uninlined_format_args = "allow"
+missing_panics_doc = "allow"
+missing_errors_doc = "allow"
+cognitive_complexity = "allow"
--- a/wasm/pow/argon2id/src/lib.rs
+++ b/wasm/pow/argon2id/src/lib.rs
@@ -0,0 +1,176 @@
+use anubis::{DATA_BUFFER, DATA_LENGTH, update_nonce};
+use argon2::Argon2;
+use std::boxed::Box;
+use std::sync::{LazyLock, Mutex};
+
+/// SHA-256 hashes are 32 bytes (256 bits). These are stored in static buffers due to the
+/// fact that you cannot easily pass data from host space to WebAssembly space.
+pub static RESULT_HASH: LazyLock<Mutex<[u8; 32]>> = LazyLock::new(|| Mutex::new([0; 32]));
+
+pub static VERIFICATION_HASH: LazyLock<Box<Mutex<[u8; 32]>>> =
+    LazyLock::new(|| Box::new(Mutex::new([0; 32])));
+
+/// Core validation function. Compare each bit in the hash by progressively masking bits until
+/// some are found to not be matching.
+///
+/// There are probably more clever ways to do this, likely involving lookup tables or something
+/// really fun like that. However in my testing this lets us get up to 200 kilohashes per second
+/// on my Ryzen 7950x3D, up from about 50 kilohashes per second in JavaScript.
+fn validate(hash: &[u8], difficulty: u32) -> bool {
+    let mut remaining = difficulty;
+    for &byte in hash {
+        // If we're out of bits to check, exit. This is all good.
+        if remaining == 0 {
+            break;
+        }
+
+        // If there are more than 8 bits remaining, the entire byte should be a
+        // zero. This fast-path compares the byte to 0 and if it matches, subtract
+        // 8 bits.
+        if remaining >= 8 {
+            if byte != 0 {
+                return false;
+            }
+            remaining -= 8;
+        } else {
+            // Otherwise mask off individual bits and check against them.
+            let mask = 0xFF << (8 - remaining);
+            if (byte & mask) != 0 {
+                return false;
+            }
+            remaining = 0;
+        }
+    }
+    true
+}
+
+/// Computes hash for given nonce.
+///
+/// This differs from the JavaScript implementations by constructing the hash differently. In
+/// JavaScript implementations, the SHA-256 input is the result of appending the nonce as an
+/// integer to the hex-formatted challenge, eg:
+///
+///     sha256(`${challenge}${nonce}`);
+///
+/// This **does work**, however I think that this can be done a bit better by operating on the
+/// challenge bytes _directly_ and treating the nonce as a salt.
+///
+/// The nonce is also randomly encoded in either big or little endian depending on the last
+/// byte of the data buffer in an effort to make it more annoying to automate with GPUs.
+fn compute_hash(nonce: u32) -> [u8; 32] {
+    let data = &DATA_BUFFER;
+    let data_len = *DATA_LENGTH.lock().unwrap();
+    let use_le = data[data_len - 1] >= 128;
+    let mut result = [0u8; 32];
+
+    let nonce = nonce as u64;
+
+    let data_slice = &data[..data_len];
+
+    let nonce = if use_le {
+        nonce.to_le_bytes()
+    } else {
+        nonce.to_be_bytes()
+    };
+
+    let argon2 = Argon2::default();
+    argon2
+        .hash_password_into(&data_slice, &nonce, &mut result)
+        .unwrap();
+    result
+}
+
+/// This function is the main entrypoint for the Anubis proof of work implementation.
+///
+/// This expects `DATA_BUFFER` to be pre-populated with the challenge value as "raw bytes".
+/// The definition of what goes in the data buffer is an exercise for the implementor, but
+/// for SHA-256 we store the hash as "raw bytes". The data buffer is intentionally oversized
+/// so that the challenge value can be expanded in the future.
+///
+/// `difficulty` is the number of leading bits that must match `0` in order for the
+/// challenge to be successfully passed. This will be validated by the server.
+///
+/// `initial_nonce` is the initial value of the nonce (number used once). This nonce will be
+/// appended to the challenge value in order to find a hash matching the specified
+/// difficulty.
+///
+/// `iterand` (noun form of iterate) is the amount that the nonce should be increased by
+/// every iteration of the proof of work loop. This will vary by how many threads are
+/// running the proof-of-work check, and also functions as a thread ID. This prevents
+/// wasting CPU time retrying a hash+nonce pair that likely won't work.
+#[unsafe(no_mangle)]
+pub extern "C" fn anubis_work(difficulty: u32, initial_nonce: u32, iterand: u32) -> u32 {
+    let mut nonce = initial_nonce;
+
+    loop {
+        let hash = compute_hash(nonce);
+
+        if validate(&hash, difficulty) {
+            // If the challenge worked, copy the bytes into `RESULT_HASH` so the runtime
+            // can pick it up.
+            let mut challenge = RESULT_HASH.lock().unwrap();
+            challenge.copy_from_slice(&hash);
+            return nonce;
+        }
+
+        let old_nonce = nonce;
+        nonce = nonce.wrapping_add(iterand);
+
+        // send a progress update every 1024 iterations. since each thread checks
+        // separate values, one simple way to do this is by bit masking the
+        // nonce for multiples of 1024. unfortunately, if the number of threads
+        // is not prime, only some of the threads will be sending the status
+        // update and they will get behind the others. this is slightly more
+        // complicated but ensures an even distribution between threads.
+        if nonce > old_nonce + 1023 && (nonce >> 10) % iterand == initial_nonce {
+            update_nonce(nonce);
+        }
+    }
+}
+
+/// This function is called by the server in order to validate a proof-of-work challenge.
+/// This expects `DATA_BUFFER` to be set to the challenge value and `VERIFICATION_HASH` to
+/// be set to the "raw bytes" of the SHA-256 hash that the client calculated.
+///
+/// If everything is good, it returns true. Otherwise, it returns false.
+///
+/// XXX(Xe): this could probably return an error code for what step fails, but this is fine
+/// for now.
+#[unsafe(no_mangle)]
+pub extern "C" fn anubis_validate(nonce: u32, difficulty: u32) -> bool {
+    let computed = compute_hash(nonce);
+    let valid = validate(&computed, difficulty);
+    if !valid {
+        return false;
+    }
+
+    let verification = VERIFICATION_HASH.lock().unwrap();
+    computed == *verification
+}
+
+// These functions exist to give pointers and lengths to the runtime around the Anubis
+// checks, this allows JavaScript and Go to safely manipulate the memory layout that Rust
+// has statically allocated at compile time without having to assume how the Rust compiler
+// is going to lay it out.
+
+#[unsafe(no_mangle)]
+pub extern "C" fn result_hash_ptr() -> *const u8 {
+    let challenge = RESULT_HASH.lock().unwrap();
+    challenge.as_ptr()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn result_hash_size() -> usize {
+    RESULT_HASH.lock().unwrap().len()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn verification_hash_ptr() -> *const u8 {
+    let verification = VERIFICATION_HASH.lock().unwrap();
+    verification.as_ptr()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn verification_hash_size() -> usize {
+    VERIFICATION_HASH.lock().unwrap().len()
+}
--- a/wasm/pow/sha256/Cargo.toml
+++ b/wasm/pow/sha256/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "sha256"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+sha2 = "0.11.0-pre.5"
+
+anubis = { path = "../../anubis" }
+
+[lints.clippy]
+nursery = { level = "warn", priority = -1 }
+pedantic = { level = "warn", priority = -1 }
+unwrap_used = "warn"
+uninlined_format_args = "allow"
+missing_panics_doc = "allow"
+missing_errors_doc = "allow"
+cognitive_complexity = "allow"
--- a/wasm/pow/sha256/run.html
+++ b/wasm/pow/sha256/run.html
@@ -0,0 +1 @@
+<script src="run.js" type="module"></script>
--- a/wasm/pow/sha256/run.js
+++ b/wasm/pow/sha256/run.js
@@ -0,0 +1,105 @@
+// Load and instantiate the .wasm file
+const response = await fetch("sha256.wasm");
+
+const importObject = {
+  anubis: {
+    anubis_update_nonce: (nonce) => {
+      console.log(`Received nonce update: ${nonce}`);
+      // Your logic here
+    }
+  }
+};
+
+const module = await WebAssembly.compileStreaming(response);
+const instance = await WebAssembly.instantiate(module, importObject);
+
+// Get exports
+const {
+  anubis_work,
+  anubis_validate,
+  data_ptr,
+  result_hash_ptr,
+  result_hash_size,
+  verification_hash_ptr,
+  verification_hash_size,
+  set_data_length,
+  memory
+} = instance.exports;
+
+console.log(instance.exports);
+
+function uint8ArrayToHex(arr) {
+  return Array.from(arr)
+    .map((c) => c.toString(16).padStart(2, "0"))
+    .join("");
+}
+
+function hexToUint8Array(hexString) {
+  // Remove whitespace and optional '0x' prefix
+  hexString = hexString.replace(/\s+/g, '').replace(/^0x/, '');
+
+  // Check for valid length
+  if (hexString.length % 2 !== 0) {
+    throw new Error('Invalid hex string length');
+  }
+
+  // Check for valid characters
+  if (!/^[0-9a-fA-F]+$/.test(hexString)) {
+    throw new Error('Invalid hex characters');
+  }
+
+  // Convert to Uint8Array
+  const byteArray = new Uint8Array(hexString.length / 2);
+  for (let i = 0; i < byteArray.length; i++) {
+    const byteValue = parseInt(hexString.substr(i * 2, 2), 16);
+    byteArray[i] = byteValue;
+  }
+
+  return byteArray;
+}
+
+// Write data to buffer
+function writeToBuffer(data) {
+  if (data.length > 1024) throw new Error("Data exceeds buffer size");
+
+  // Get pointer and create view
+  const offset = data_ptr();
+  const buffer = new Uint8Array(memory.buffer, offset, data.length);
+
+  // Copy data
+  buffer.set(data);
+
+  // Set data length
+  set_data_length(data.length);
+}
+
+function readFromChallenge() {
+  const offset = result_hash_ptr();
+  const buffer = new Uint8Array(memory.buffer, offset, result_hash_size());
+
+  return buffer;
+}
+
+// Example usage:
+const data = hexToUint8Array("98ea6e4f216f2fb4b69fff9b3a44842c38686ca685f3f55dc48c5d3fb1107be4");
+writeToBuffer(data);
+
+// Call work function
+const t0 = Date.now();
+const nonce = anubis_work(16, 0, 1);
+const t1 = Date.now();
+
+console.log(`Done! Took ${t1 - t0}ms, ${nonce} iterations`);
+
+const challengeBuffer = readFromChallenge();
+
+{
+  const buffer = new Uint8Array(memory.buffer, verification_hash_ptr(), verification_hash_size());
+  buffer.set(challengeBuffer);
+}
+
+// Validate
+const isValid = anubis_validate(nonce, 10) === 1;
+console.log(isValid);
+
+console.log(uint8ArrayToHex(readFromChallenge()));
--- a/wasm/pow/sha256/src/lib.rs
+++ b/wasm/pow/sha256/src/lib.rs
@@ -0,0 +1,171 @@
+use anubis::{DATA_BUFFER, DATA_LENGTH, update_nonce};
+use sha2::{Digest, Sha256};
+use std::boxed::Box;
+use std::sync::{LazyLock, Mutex};
+
+/// SHA-256 hashes are 32 bytes (256 bits). These are stored in static buffers due to the
+/// fact that you cannot easily pass data from host space to WebAssembly space.
+pub static RESULT_HASH: LazyLock<Box<Mutex<[u8; 32]>>> =
+    LazyLock::new(|| Box::new(Mutex::new([0; 32])));
+
+pub static VERIFICATION_HASH: LazyLock<Box<Mutex<[u8; 32]>>> =
+    LazyLock::new(|| Box::new(Mutex::new([0; 32])));
+
+/// Core validation function. Compare each bit in the hash by progressively masking bits until
+/// some are found to not be matching.
+///
+/// There are probably more clever ways to do this, likely involving lookup tables or something
+/// really fun like that. However in my testing this lets us get up to 200 kilohashes per second
+/// on my Ryzen 7950x3D, up from about 50 kilohashes per second in JavaScript.
+fn validate(hash: &[u8], difficulty: u32) -> bool {
+    let mut remaining = difficulty;
+    for &byte in hash {
+        // If we're out of bits to check, exit. This is all good.
+        if remaining == 0 {
+            break;
+        }
+
+        // If there are more than 8 bits remaining, the entire byte should be a
+        // zero. This fast-path compares the byte to 0 and if it matches, subtract
+        // 8 bits.
+        if remaining >= 8 {
+            if byte != 0 {
+                return false;
+            }
+            remaining -= 8;
+        } else {
+            // Otherwise mask off individual bits and check against them.
+            let mask = 0xFF << (8 - remaining);
+            if (byte & mask) != 0 {
+                return false;
+            }
+            remaining = 0;
+        }
+    }
+    true
+}
+
+/// Computes hash for given nonce.
+///
+/// This differs from the JavaScript implementations by constructing the hash differently. In
+/// JavaScript implementations, the SHA-256 input is the result of appending the nonce as an
+/// integer to the hex-formatted challenge, eg:
+///
+///     sha256(`${challenge}${nonce}`);
+///
+/// This **does work**, however I think that this can be done a bit better by operating on the
+/// challenge bytes _directly_ and treating the nonce as a salt.
+///
+/// The nonce is also randomly encoded in either big or little endian depending on the last
+/// byte of the data buffer in an effort to make it more annoying to automate with GPUs.
+fn compute_hash(nonce: u32) -> [u8; 32] {
+    let data = &DATA_BUFFER;
+    let data_len = *DATA_LENGTH.lock().unwrap();
+    let use_le = data[data_len - 1] >= 128;
+
+    let data_slice = &data[..data_len];
+
+    let mut hasher = Sha256::new();
+    hasher.update(data_slice);
+    hasher.update(if use_le {
+        nonce.to_le_bytes()
+    } else {
+        nonce.to_be_bytes()
+    });
+    hasher.finalize().into()
+}
+
+/// This function is the main entrypoint for the Anubis proof of work implementation.
+///
+/// This expects `DATA_BUFFER` to be pre-populated with the challenge value as "raw bytes".
+/// The definition of what goes in the data buffer is an exercise for the implementor, but
+/// for SHA-256 we store the hash as "raw bytes". The data buffer is intentionally oversized
+/// so that the challenge value can be expanded in the future.
+///
+/// `difficulty` is the number of leading bits that must match `0` in order for the
+/// challenge to be successfully passed. This will be validated by the server.
+///
+/// `initial_nonce` is the initial value of the nonce (number used once). This nonce will be
+/// appended to the challenge value in order to find a hash matching the specified
+/// difficulty.
+///
+/// `iterand` (noun form of iterate) is the amount that the nonce should be increased by
+/// every iteration of the proof of work loop. This will vary by how many threads are
+/// running the proof-of-work check, and also functions as a thread ID. This prevents
+/// wasting CPU time retrying a hash+nonce pair that likely won't work.
+#[unsafe(no_mangle)]
+pub extern "C" fn anubis_work(difficulty: u32, initial_nonce: u32, iterand: u32) -> u32 {
+    let mut nonce = initial_nonce;
+
+    loop {
+        let hash = compute_hash(nonce);
+
+        if validate(&hash, difficulty) {
+            // If the challenge worked, copy the bytes into `RESULT_HASH` so the runtime
+            // can pick it up.
+            let mut challenge = RESULT_HASH.lock().unwrap();
+            challenge.copy_from_slice(&hash);
+            return nonce;
+        }
+
+        let old_nonce = nonce;
+        nonce = nonce.wrapping_add(iterand);
+
+        // send a progress update every 1024 iterations. since each thread checks
+        // separate values, one simple way to do this is by bit masking the
+        // nonce for multiples of 1024. unfortunately, if the number of threads
+        // is not prime, only some of the threads will be sending the status
+        // update and they will get behind the others. this is slightly more
+        // complicated but ensures an even distribution between threads.
+        if nonce > old_nonce | 1023 && (nonce >> 10) % iterand == initial_nonce {
+            update_nonce(nonce);
+        }
+    }
+}
+
+/// This function is called by the server in order to validate a proof-of-work challenge.
+/// This expects `DATA_BUFFER` to be set to the challenge value and `VERIFICATION_HASH` to
+/// be set to the "raw bytes" of the SHA-256 hash that the client calculated.
+///
+/// If everything is good, it returns true. Otherwise, it returns false.
+///
+/// XXX(Xe): this could probably return an error code for what step fails, but this is fine
+/// for now.
+#[unsafe(no_mangle)]
+pub extern "C" fn anubis_validate(nonce: u32, difficulty: u32) -> bool {
+    let computed = compute_hash(nonce);
+    let valid = validate(&computed, difficulty);
+    if !valid {
+        return false;
+    }
+
+    let verification = VERIFICATION_HASH.lock().unwrap();
+    computed == *verification
+}
+
+// These functions exist to give pointers and lengths to the runtime around the Anubis
+// checks, this allows JavaScript and Go to safely manipulate the memory layout that Rust
+// has statically allocated at compile time without having to assume how the Rust compiler
+// is going to lay it out.
+
+#[unsafe(no_mangle)]
+pub extern "C" fn result_hash_ptr() -> *const u8 {
+    let challenge = RESULT_HASH.lock().unwrap();
+    challenge.as_ptr()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn result_hash_size() -> usize {
+    RESULT_HASH.lock().unwrap().len()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn verification_hash_ptr() -> *const u8 {
+    let verification = VERIFICATION_HASH.lock().unwrap();
+    verification.as_ptr()
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn verification_hash_size() -> usize {
+    VERIFICATION_HASH.lock().unwrap().len()
+}
--- a/wasm/wasm.go
+++ b/wasm/wasm.go
@@ -0,0 +1,299 @@
+package wasm
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/api"
+)
+
+func UpdateNonce(uint32) {}
+
+var (
+	validationTime = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Name:    "anubis_wasm_validation_time",
+		Help:    "The time taken for the validation function to run per checker (nanoseconds)",
+		Buckets: prometheus.ExponentialBucketsRange(1, math.Pow(2, 31), 32),
+	}, []string{"fname"})
+
+	validationCount = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "anubis_wasm_validation",
+		Help: "The number of times the validation logic has been run and its success rate",
+	}, []string{"fname", "success"})
+)
+
+type Runner struct {
+	r     wazero.Runtime
+	code  wazero.CompiledModule
+	fname string
+}
+
+func NewRunner(ctx context.Context, fname string, fin io.ReadCloser) (*Runner, error) {
+	data, err := io.ReadAll(fin)
+	if err != nil {
+		return nil, fmt.Errorf("wasm: can't read from fin: %w", err)
+	}
+
+	r := wazero.NewRuntime(ctx)
+
+	_, err = r.NewHostModuleBuilder("anubis").
+		NewFunctionBuilder().
+		WithFunc(func(context.Context, uint32) {}).
+		Export("anubis_update_nonce").
+		Instantiate(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("wasm: can't export anubis_update_nonce: %w", err)
+	}
+
+	code, err := r.CompileModule(ctx, data)
+	if err != nil {
+		return nil, fmt.Errorf("wasm: can't compile module: %w", err)
+	}
+
+	result := &Runner{
+		r:     r,
+		code:  code,
+		fname: fname,
+	}
+
+	return result, nil
+}
+
+func (r *Runner) checkExports(module api.Module) error {
+	funcs := []string{
+		"anubis_work",
+		"anubis_validate",
+		"data_ptr",
+		"set_data_length",
+		"result_hash_ptr",
+		"result_hash_size",
+		"verification_hash_ptr",
+		"verification_hash_size",
+	}
+
+	var errs []error
+
+	for _, fun := range funcs {
+		if module.ExportedFunction(fun) == nil {
+			errs = append(errs, fmt.Errorf("function %s is not defined", fun))
+		}
+	}
+
+	if len(errs) != 0 {
+		return errors.Join(errs...)
+	}
+
+	return nil
+}
+
+func (r *Runner) anubisWork(ctx context.Context, module api.Module, difficulty, initialNonce, iterand uint32) (uint32, error) {
+	results, err := module.ExportedFunction("anubis_work").Call(ctx, uint64(difficulty), uint64(initialNonce), uint64(iterand))
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) anubisValidate(ctx context.Context, module api.Module, nonce, difficulty uint32) (bool, error) {
+	results, err := module.ExportedFunction("anubis_validate").Call(ctx, uint64(nonce), uint64(difficulty))
+	if err != nil {
+		return false, err
+	}
+
+	// Rust booleans are 1 if true
+	return results[0] == 1, nil
+}
+
+func (r *Runner) dataPtr(ctx context.Context, module api.Module) (uint32, error) {
+	results, err := module.ExportedFunction("data_ptr").Call(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) setDataLength(ctx context.Context, module api.Module, length uint32) error {
+	_, err := module.ExportedFunction("set_data_length").Call(ctx, uint64(length))
+	return err
+}
+
+func (r *Runner) resultHashPtr(ctx context.Context, module api.Module) (uint32, error) {
+	results, err := module.ExportedFunction("result_hash_ptr").Call(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) resultHashSize(ctx context.Context, module api.Module) (uint32, error) {
+	results, err := module.ExportedFunction("result_hash_size").Call(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) verificationHashPtr(ctx context.Context, module api.Module) (uint32, error) {
+	results, err := module.ExportedFunction("verification_hash_ptr").Call(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) verificationHashSize(ctx context.Context, module api.Module) (uint32, error) {
+	results, err := module.ExportedFunction("verification_hash_size").Call(ctx)
+	if err != nil {
+		return 0, err
+	}
+
+	return uint32(results[0]), nil
+}
+
+func (r *Runner) writeData(ctx context.Context, module api.Module, data []byte) error {
+	if len(data) > 4096 {
+		return os.ErrInvalid
+	}
+
+	length := uint32(len(data))
+
+	dataPtr, err := r.dataPtr(ctx, module)
+	if err != nil {
+		return fmt.Errorf("can't read data pointer: %w", err)
+	}
+
+	if !module.Memory().Write(dataPtr, data) {
+		return fmt.Errorf("[unexpected] can't write memory, is data out of range??")
+	}
+
+	if err := r.setDataLength(ctx, module, length); err != nil {
+		return fmt.Errorf("can't set data length: %w", err)
+	}
+
+	return nil
+}
+
+func (r *Runner) readResult(ctx context.Context, module api.Module) ([]byte, error) {
+	length, err := r.resultHashSize(ctx, module)
+	if err != nil {
+		return nil, fmt.Errorf("can't get result hash size: %w", err)
+	}
+
+	ptr, err := r.resultHashPtr(ctx, module)
+	if err != nil {
+		return nil, fmt.Errorf("can't get result hash pointer: %w", err)
+	}
+
+	buf, ok := module.Memory().Read(ptr, length)
+	if !ok {
+		return nil, fmt.Errorf("[unexpected] can't read from memory, is something out of range??")
+	}
+
+	return buf, nil
+}
+
+func (r *Runner) run(ctx context.Context, data []byte, difficulty, initialNonce, iterand uint32) (uint32, []byte, api.Module, error) {
+	mod, err := r.r.InstantiateModule(ctx, r.code, wazero.NewModuleConfig().WithName(r.fname))
+	if err != nil {
+		return 0, nil, nil, fmt.Errorf("can't instantiate module: %w", err)
+	}
+
+	if err := r.checkExports(mod); err != nil {
+		return 0, nil, nil, err
+	}
+
+	if err := r.writeData(ctx, mod, data); err != nil {
+		return 0, nil, nil, err
+	}
+
+	nonce, err := r.anubisWork(ctx, mod, difficulty, initialNonce, iterand)
+	if err != nil {
+		return 0, nil, nil, fmt.Errorf("can't run work function: %w", err)
+	}
+
+	hash, err := r.readResult(ctx, mod)
+	if err != nil {
+		return 0, nil, nil, fmt.Errorf("can't read result: %w", err)
+	}
+
+	return nonce, hash, mod, nil
+}
+
+func (r *Runner) Run(ctx context.Context, data []byte, difficulty, initialNonce, iterand uint32) (uint32, []byte, error) {
+	nonce, hash, _, err := r.run(ctx, data, difficulty, initialNonce, iterand)
+	if err != nil {
+		return 0, nil, fmt.Errorf("can't run %s: %w", r.fname, err)
+	}
+
+	return nonce, hash, nil
+}
+
+func (r *Runner) verify(ctx context.Context, data, verify []byte, nonce, difficulty uint32) (bool, api.Module, error) {
+	mod, err := r.r.InstantiateModule(ctx, r.code, wazero.NewModuleConfig().WithName(r.fname))
+	if err != nil {
+		return false, nil, fmt.Errorf("can't instantiate module: %w", err)
+	}
+
+	if err := r.checkExports(mod); err != nil {
+		return false, nil, err
+	}
+
+	if err := r.writeData(ctx, mod, data); err != nil {
+		return false, nil, err
+	}
+
+	if err := r.writeVerification(ctx, mod, verify); err != nil {
+		return false, nil, err
+	}
+
+	ok, err := r.anubisValidate(ctx, mod, nonce, difficulty)
+	if err != nil {
+		return false, nil, fmt.Errorf("can't validate hash %x from challenge %x, nonce %d and difficulty %d: %w", verify, data, nonce, difficulty, err)
+	}
+
+	return ok, mod, nil
+}
+
+func (r *Runner) Verify(ctx context.Context, data, verify []byte, nonce, difficulty uint32) (bool, error) {
+	t0 := time.Now()
+	ok, _, err := r.verify(ctx, data, verify, nonce, difficulty)
+	validationTime.WithLabelValues(r.fname).Observe(float64(time.Since(t0)))
+	validationCount.WithLabelValues(r.fname, strconv.FormatBool(ok))
+	return ok, err
+}
+
+func (r *Runner) writeVerification(ctx context.Context, module api.Module, data []byte) error {
+	length, err := r.verificationHashSize(ctx, module)
+	if err != nil {
+		return fmt.Errorf("can't get verification hash size: %v", err)
+	}
+
+	if length != uint32(len(data)) {
+		return fmt.Errorf("data is too big, want %d bytes, got: %d", length, len(data))
+	}
+
+	ptr, err := r.verificationHashPtr(ctx, module)
+	if err != nil {
+		return fmt.Errorf("can't get verification hash pointer: %v", err)
+	}
+
+	if !module.Memory().Write(ptr, data) {
+		return fmt.Errorf("[unexpected] can't write memory, is data out of range??")
+	}
+
+	return nil
+}
--- a/wasm/wasm_test.go
+++ b/wasm/wasm_test.go
@@ -0,0 +1,174 @@
+package wasm
+
+import (
+	"context"
+	"crypto/sha256"
+	"fmt"
+	"io/fs"
+	"testing"
+	"time"
+
+	"github.com/TecharoHQ/anubis/web"
+)
+
+func abiTest(t testing.TB, kind, fname string, difficulty uint32) {
+	fin, err := web.Static.Open("static/wasm/" + kind + "/" + fname)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer fin.Close()
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	t.Cleanup(cancel)
+
+	runner, err := NewRunner(ctx, fname, fin)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	h := sha256.New()
+	fmt.Fprint(h, t.Name())
+	data := h.Sum(nil)
+
+	nonce, hash, mod, err := runner.run(ctx, data, difficulty, 0, 1)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := runner.writeVerification(ctx, mod, hash); err != nil {
+		t.Fatalf("can't write verification: %v", err)
+	}
+
+	ok, err := runner.anubisValidate(ctx, mod, nonce, difficulty)
+	if err != nil {
+		t.Fatalf("can't run validation: %v", err)
+	}
+
+	if !ok {
+		t.Error("validation failed")
+	}
+
+	t.Logf("used %d pages of wasm memory (%d bytes)", mod.Memory().Size()/63356, mod.Memory().Size())
+}
+
+func TestAlgos(t *testing.T) {
+	fnames, err := fs.ReadDir(web.Static, "static/wasm/baseline")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, kind := range []string{"baseline", "simd128"} {
+		for _, fname := range fnames {
+			fname := fname
+			t.Run(fname.Name(), func(t *testing.T) {
+				abiTest(t, kind, fname.Name(), 4)
+			})
+		}
+	}
+}
+
+func bench(b *testing.B, kind, fname string, difficulties []uint32) {
+	b.Helper()
+
+	fin, err := web.Static.Open("static/wasm/" + kind + "/" + fname)
+	if err != nil {
+		b.Fatal(err)
+	}
+	defer fin.Close()
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	b.Cleanup(cancel)
+
+	runner, err := NewRunner(ctx, fname, fin)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	h := sha256.New()
+	fmt.Fprint(h, "This is an example value that exists only to test the system.")
+	data := h.Sum(nil)
+
+	_, _, mod, err := runner.run(ctx, data, 0, 0, 1)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	for _, difficulty := range difficulties {
+		b.Run(fmt.Sprintf("difficulty/%d", difficulty), func(b *testing.B) {
+			for b.Loop() {
+				difficulty := difficulty
+				_, err := runner.anubisWork(ctx, mod, difficulty, 0, 1)
+				if err != nil {
+					b.Fatalf("can't do test work run: %v", err)
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkSHA256(b *testing.B) {
+	for _, kind := range []string{"baseline", "simd128"} {
+		b.Run(kind, func(b *testing.B) {
+			bench(b, kind, "sha256.wasm", []uint32{4, 6, 8, 10, 12, 14, 16})
+		})
+	}
+}
+
+func BenchmarkArgon2ID(b *testing.B) {
+	for _, kind := range []string{"baseline", "simd128"} {
+		b.Run(kind, func(b *testing.B) {
+			bench(b, kind, "argon2id.wasm", []uint32{4, 6, 8})
+		})
+	}
+}
+
+func BenchmarkValidate(b *testing.B) {
+	fnames, err := fs.ReadDir(web.Static, "static/wasm/simd128")
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	h := sha256.New()
+	fmt.Fprint(h, "This is an example value that exists only to test the system.")
+	data := h.Sum(nil)
+
+	for _, fname := range fnames {
+		fname := fname.Name()
+
+		difficulty := uint32(1)
+
+		switch fname {
+		case "sha256.wasm":
+			difficulty = 16
+		}
+
+		b.Run(fname, func(b *testing.B) {
+			fin, err := web.Static.Open("static/wasm/simd128/" + fname)
+			if err != nil {
+				b.Fatal(err)
+			}
+			defer fin.Close()
+			ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+			b.Cleanup(cancel)
+
+			runner, err := NewRunner(ctx, fname, fin)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			nonce, hash, mod, err := runner.run(ctx, data, difficulty, 0, 1)
+			if err != nil {
+				b.Fatal(err)
+			}
+
+			if err := runner.writeVerification(ctx, mod, hash); err != nil {
+				b.Fatalf("can't write verification: %v", err)
+			}
+
+			for b.Loop() {
+				_, err := runner.anubisValidate(ctx, mod, nonce, difficulty)
+				if err != nil {
+					b.Fatalf("can't run validation: %v", err)
+				}
+			}
+		})
+	}
+}
--- a/web/static/wasm/.gitignore
+++ b/web/static/wasm/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
				`@@ -0,0 +1 @@`
				`<script src="run.js" type="module"></script>`