diff --git a/Cargo.lock b/Cargo.lock index 30043153..ee8ad48e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,45 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "anubis" +version = "0.1.0" + +[[package]] +name = "argon2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" +dependencies = [ + "base64ct", + "blake2", + "cpufeatures", + "password-hash", +] + +[[package]] +name = "argon2id" +version = "0.1.0" +dependencies = [ + "anubis", + "argon2", +] + +[[package]] +name = "base64ct" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -44,6 +83,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", + "subtle", ] [[package]] @@ -62,6 +102,23 @@ version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +[[package]] +name = "password-hash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" +dependencies = [ + "base64ct", + "rand_core", + "subtle", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + [[package]] name = "sha2" version = "0.10.8" @@ -77,9 +134,16 @@ dependencies = [ name = "sha256" version = "0.1.0" dependencies = [ + "anubis", "sha2", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "typenum" version = "1.18.0" diff --git a/Cargo.toml b/Cargo.toml index 9e5a7ac4..ae9e7a09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [workspace] resolver = "2" -members = ["wasm/pow/*"] +members = ["wasm/anubis", "wasm/pow/*"] [profile.release] strip = true opt-level = "s" lto = "thin" codegen-units = 1 +panic = "abort" diff --git a/package.json b/package.json index 74c8f7de..c45a4494 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,7 @@ "test": "npm run assets && go test ./...", "test:integration": "npm run assets && go test -v ./internal/test", "assets:frontend": "go generate ./... && ./web/build.sh && ./xess/build.sh", - "assets:wasm": "cargo build --release --target wasm32-unknown-unknown && cp -vf ./target/wasm32-unknown-unknown/release/*.wasm ./web/static/wasm", + "assets:wasm": "cargo build --release --target wasm32-unknown-unknown && sh -c 'cp -vf ./target/wasm32-unknown-unknown/release/*.wasm ./web/static/wasm'", "assets": "npm run assets:frontend && npm run assets:wasm", "build": "npm run assets && go build -o ./var/anubis ./cmd/anubis", "dev": "npm run assets && go run ./cmd/anubis --use-remote-address", diff --git a/wasm/anubis/Cargo.toml b/wasm/anubis/Cargo.toml new file mode 100644 index 00000000..30f1747a --- /dev/null +++ b/wasm/anubis/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "anubis" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/wasm/anubis/src/lib.rs b/wasm/anubis/src/lib.rs new file mode 100644 index 00000000..25a61b8f --- /dev/null +++ b/wasm/anubis/src/lib.rs @@ -0,0 +1,25 @@ +#[cfg(target_arch = "wasm32")] +mod hostimport { + #[link(wasm_import_module = "anubis")] + unsafe extern "C" { + /// The runtime expects this function to be defined. It is called whenever the Anubis check + /// worker processes about 1024 hashes. This can be a no-op if you want. + fn anubis_update_nonce(nonce: u32); + } + + /// Safe wrapper to `anubis_update_nonce`. + pub fn update_nonce(nonce: u32) { + unsafe { + anubis_update_nonce(nonce); + } + } +} + +#[cfg(not(target_arch = "wasm32"))] +mod hostimport { + pub fn update_nonce(_nonce: u32) { + // This is intentionally blank + } +} + +pub use hostimport::update_nonce; diff --git a/wasm/pow/argon2id/Cargo.toml b/wasm/pow/argon2id/Cargo.toml new file mode 100644 index 00000000..b96b2933 --- /dev/null +++ b/wasm/pow/argon2id/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "argon2id" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +argon2 = "0.5" + +anubis = { path = "../../anubis" } + +[lints.clippy] +nursery = { level = "warn", priority = -1 } +pedantic = { level = "warn", priority = -1 } +unwrap_used = "warn" +uninlined_format_args = "allow" +missing_panics_doc = "allow" +missing_errors_doc = "allow" +cognitive_complexity = "allow" diff --git a/wasm/pow/argon2id/src/lib.rs b/wasm/pow/argon2id/src/lib.rs new file mode 100644 index 00000000..7466a5ce --- /dev/null +++ b/wasm/pow/argon2id/src/lib.rs @@ -0,0 +1,203 @@ +use anubis::update_nonce; +use argon2::Argon2; +use std::boxed::Box; +use std::sync::{LazyLock, Mutex}; + +/// The data buffer is a bit weird in that it doesn't have an explicit length as it can +/// and will change depending on the challenge input that was sent by the server. +/// However, it can only fit 4096 bytes of data (one amd64 machine page). This is +/// slightly overkill for the purposes of an Anubis check, but it's fine to assume +/// that the browser can afford this much ram usage. +/// +/// Callers should fetch the base data pointer, write up to 4096 bytes, and then +/// `set_data_length` the number of bytes they have written +/// +/// This is also functionally a write-only buffer, so it doesn't really matter that +/// the length of this buffer isn't exposed. +pub static DATA_BUFFER: LazyLock> = LazyLock::new(|| Box::new([0; 4096])); + +pub static DATA_LENGTH: LazyLock> = LazyLock::new(|| Mutex::new(0)); + +/// SHA-256 hashes are 32 bytes (256 bits). These are stored in static buffers due to the +/// fact that you cannot easily pass data from host space to WebAssembly space. +pub static RESULT_HASH: LazyLock> = LazyLock::new(|| Mutex::new([0; 32])); + +pub static VERIFICATION_HASH: LazyLock>> = + LazyLock::new(|| Box::new(Mutex::new([0; 32]))); + +/// Core validation function. Compare each bit in the hash by progressively masking bits until +/// some are found to not be matching. +/// +/// There are probably more clever ways to do this, likely involving lookup tables or something +/// really fun like that. However in my testing this lets us get up to 200 kilohashes per second +/// on my Ryzen 7950x3D, up from about 50 kilohashes per second in JavaScript. +fn validate(hash: &[u8], difficulty: u32) -> bool { + let mut remaining = difficulty; + for &byte in hash { + // If we're out of bits to check, exit. This is all good. + if remaining == 0 { + break; + } + + // If there are more than 8 bits remaining, the entire byte should be a + // zero. This fast-path compares the byte to 0 and if it matches, subtract + // 8 bits. + if remaining >= 8 { + if byte != 0 { + return false; + } + remaining -= 8; + } else { + // Otherwise mask off individual bits and check against them. + let mask = 0xFF << (8 - remaining); + if (byte & mask) != 0 { + return false; + } + remaining = 0; + } + } + true +} + +/// Computes hash for given nonce. +/// +/// This differs from the JavaScript implementations by constructing the hash differently. In +/// JavaScript implementations, the SHA-256 input is the result of appending the nonce as an +/// integer to the hex-formatted challenge, eg: +/// +/// sha256(`${challenge}${nonce}`); +/// +/// This **does work**, however I think that this can be done a bit better by operating on the +/// challenge bytes _directly_ and treating the nonce as a salt. +/// +/// The nonce is also randomly encoded in either big or little endian depending on the last +/// byte of the data buffer in an effort to make it more annoying to automate with GPUs. +fn compute_hash(nonce: u32) -> [u8; 32] { + let data = &DATA_BUFFER; + let data_len = *DATA_LENGTH.lock().unwrap(); + let use_le = data[data_len - 1] >= 128; + let mut result = [0u8; 32]; + + let nonce = nonce as u64; + + let data_slice = &data[..data_len]; + + let nonce = if use_le { + nonce.to_le_bytes() + } else { + nonce.to_be_bytes() + }; + + let argon2 = Argon2::default(); + argon2 + .hash_password_into(&data_slice, &nonce, &mut result) + .unwrap(); + result +} + +/// This function is the main entrypoint for the Anubis proof of work implementation. +/// +/// This expects `DATA_BUFFER` to be pre-populated with the challenge value as "raw bytes". +/// The definition of what goes in the data buffer is an exercise for the implementor, but +/// for SHA-256 we store the hash as "raw bytes". The data buffer is intentionally oversized +/// so that the challenge value can be expanded in the future. +/// +/// `difficulty` is the number of leading bits that must match `0` in order for the +/// challenge to be successfully passed. This will be validated by the server. +/// +/// `initial_nonce` is the initial value of the nonce (number used once). This nonce will be +/// appended to the challenge value in order to find a hash matching the specified +/// difficulty. +/// +/// `iterand` (noun form of iterate) is the amount that the nonce should be increased by +/// every iteration of the proof of work loop. This will vary by how many threads are +/// running the proof-of-work check, and also functions as a thread ID. This prevents +/// wasting CPU time retrying a hash+nonce pair that likely won't work. +#[unsafe(no_mangle)] +pub extern "C" fn anubis_work(difficulty: u32, initial_nonce: u32, iterand: u32) -> u32 { + let mut nonce = initial_nonce; + + loop { + let hash = compute_hash(nonce); + + if validate(&hash, difficulty) { + // If the challenge worked, copy the bytes into `RESULT_HASH` so the runtime + // can pick it up. + let mut challenge = RESULT_HASH.lock().unwrap(); + challenge.copy_from_slice(&hash); + return nonce; + } + + let old_nonce = nonce; + nonce = nonce.wrapping_add(iterand); + + // send a progress update every 1024 iterations. since each thread checks + // separate values, one simple way to do this is by bit masking the + // nonce for multiples of 1024. unfortunately, if the number of threads + // is not prime, only some of the threads will be sending the status + // update and they will get behind the others. this is slightly more + // complicated but ensures an even distribution between threads. + if nonce > old_nonce + 1023 && (nonce >> 10) % iterand == initial_nonce { + update_nonce(nonce); + } + } +} + +/// This function is called by the server in order to validate a proof-of-work challenge. +/// This expects `DATA_BUFFER` to be set to the challenge value and `VERIFICATION_HASH` to +/// be set to the "raw bytes" of the SHA-256 hash that the client calculated. +/// +/// If everything is good, it returns true. Otherwise, it returns false. +/// +/// XXX(Xe): this could probably return an error code for what step fails, but this is fine +/// for now. +#[unsafe(no_mangle)] +pub extern "C" fn anubis_validate(nonce: u32, difficulty: u32) -> bool { + let computed = compute_hash(nonce); + let valid = validate(&computed, difficulty); + if !valid { + return false; + } + + let verification = VERIFICATION_HASH.lock().unwrap(); + computed == *verification +} + +// These functions exist to give pointers and lengths to the runtime around the Anubis +// checks, this allows JavaScript and Go to safely manipulate the memory layout that Rust +// has statically allocated at compile time without having to assume how the Rust compiler +// is going to lay it out. + +#[unsafe(no_mangle)] +pub extern "C" fn result_hash_ptr() -> *const u8 { + let challenge = RESULT_HASH.lock().unwrap(); + challenge.as_ptr() +} + +#[unsafe(no_mangle)] +pub extern "C" fn result_hash_size() -> usize { + RESULT_HASH.lock().unwrap().len() +} + +#[unsafe(no_mangle)] +pub extern "C" fn verification_hash_ptr() -> *const u8 { + let verification = VERIFICATION_HASH.lock().unwrap(); + verification.as_ptr() +} + +#[unsafe(no_mangle)] +pub extern "C" fn verification_hash_size() -> usize { + VERIFICATION_HASH.lock().unwrap().len() +} + +#[unsafe(no_mangle)] +pub extern "C" fn data_ptr() -> *const u8 { + let challenge = &DATA_BUFFER; + challenge.as_ptr() +} + +#[unsafe(no_mangle)] +pub extern "C" fn set_data_length(len: u32) { + let mut data_length = DATA_LENGTH.lock().unwrap(); + *data_length = len as usize; +} diff --git a/wasm/pow/sha256/Cargo.toml b/wasm/pow/sha256/Cargo.toml index 35a12f3f..7e60576e 100644 --- a/wasm/pow/sha256/Cargo.toml +++ b/wasm/pow/sha256/Cargo.toml @@ -9,6 +9,8 @@ crate-type = ["cdylib"] [dependencies] sha2 = "0.10" +anubis = { path = "../../anubis" } + [lints.clippy] nursery = { level = "warn", priority = -1 } pedantic = { level = "warn", priority = -1 } diff --git a/wasm/pow/sha256/src/lib.rs b/wasm/pow/sha256/src/lib.rs index c61f26c8..649c0977 100644 --- a/wasm/pow/sha256/src/lib.rs +++ b/wasm/pow/sha256/src/lib.rs @@ -1,3 +1,4 @@ +use anubis::update_nonce; use sha2::{Digest, Sha256}; use std::boxed::Box; use std::sync::{LazyLock, Mutex}; @@ -13,32 +14,18 @@ use std::sync::{LazyLock, Mutex}; /// /// This is also functionally a write-only buffer, so it doesn't really matter that /// the length of this buffer isn't exposed. -static DATA_BUFFER: LazyLock> = LazyLock::new(|| Box::new([0; 4096])); +pub static DATA_BUFFER: LazyLock> = LazyLock::new(|| Box::new([0; 4096])); -static DATA_LENGTH: LazyLock> = LazyLock::new(|| Mutex::new(0)); +pub static DATA_LENGTH: LazyLock> = LazyLock::new(|| Mutex::new(0)); /// SHA-256 hashes are 32 bytes (256 bits). These are stored in static buffers due to the /// fact that you cannot easily pass data from host space to WebAssembly space. -static RESULT_HASH: LazyLock>> = +pub static RESULT_HASH: LazyLock>> = LazyLock::new(|| Box::new(Mutex::new([0; 32]))); -static VERIFICATION_HASH: LazyLock>> = +pub static VERIFICATION_HASH: LazyLock>> = LazyLock::new(|| Box::new(Mutex::new([0; 32]))); -#[link(wasm_import_module = "anubis")] -unsafe extern "C" { - /// The runtime expects this function to be defined. It is called whenever the Anubis check - /// worker processes about 1024 hashes. This can be a no-op if you want. - fn anubis_update_nonce(nonce: u32); -} - -/// Safe wrapper to `anubis_update_nonce`. -fn update_nonce(nonce: u32) { - unsafe { - anubis_update_nonce(nonce); - } -} - /// Core validation function. Compare each bit in the hash by progressively masking bits until /// some are found to not be matching. /// diff --git a/wasm/wasm_test.go b/wasm/wasm_test.go index c64dc7c9..4caa0b61 100644 --- a/wasm/wasm_test.go +++ b/wasm/wasm_test.go @@ -11,10 +11,10 @@ import ( "github.com/TecharoHQ/anubis/web" ) -func TestSHA256(t *testing.T) { +func TestArgon2ID(t *testing.T) { const difficulty = 4 // one nibble, intentionally easy for testing - fin, err := web.Static.Open("static/wasm/sha256.wasm") + fin, err := web.Static.Open("static/wasm/argon2id.wasm") if err != nil { t.Fatal(err) } @@ -22,13 +22,13 @@ func TestSHA256(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), time.Minute) t.Cleanup(cancel) - runner, err := NewRunner(ctx, "sha256.wasm", fin) + runner, err := NewRunner(ctx, "argon2id.wasm", fin) if err != nil { t.Fatal(err) } h := sha256.New() - fmt.Fprint(h, os.Args[0]) + fmt.Fprint(h, t.Name()) data := h.Sum(nil) if n, err := runner.WriteData(ctx, data); err != nil { @@ -63,6 +63,64 @@ func TestSHA256(t *testing.T) { if !ok { t.Error("validation failed") } + + t.Logf("used %d pages of wasm memory (%d bytes)", runner.module.Memory().Size()/63356, runner.module.Memory().Size()) +} + +func TestSHA256(t *testing.T) { + const difficulty = 4 // one nibble, intentionally easy for testing + + fin, err := web.Static.Open("static/wasm/sha256.wasm") + if err != nil { + t.Fatal(err) + } + defer fin.Close() + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + t.Cleanup(cancel) + + runner, err := NewRunner(ctx, "sha256.wasm", fin) + if err != nil { + t.Fatal(err) + } + + h := sha256.New() + fmt.Fprint(h, t.Name()) + data := h.Sum(nil) + + if n, err := runner.WriteData(ctx, data); err != nil { + t.Fatalf("can't write data: %v", err) + } else { + t.Logf("wrote %d bytes to data segment", n) + } + + t0 := time.Now() + nonce, err := runner.anubisWork(ctx, difficulty, 0, 1) + if err != nil { + t.Fatalf("can't do test work run: %v", err) + } + t.Logf("got nonce %d in %s", nonce, time.Since(t0)) + + hash, err := runner.ReadResult(ctx) + if err != nil { + t.Fatalf("can't read result: %v", err) + } + + t.Logf("got hash %x", hash) + + if err := runner.WriteVerification(ctx, hash); err != nil { + t.Fatalf("can't write verification: %v", err) + } + + ok, err := runner.anubisValidate(ctx, nonce, difficulty) + if err != nil { + t.Fatalf("can't run validation: %v", err) + } + + if !ok { + t.Error("validation failed") + } + + t.Logf("used %d pages of wasm memory (%d bytes)", runner.module.Memory().Size()/63356, runner.module.Memory().Size()) } func BenchmarkSHA256(b *testing.B) { @@ -98,3 +156,37 @@ func BenchmarkSHA256(b *testing.B) { } } } + +func BenchmarkArgon2ID(b *testing.B) { + const difficulty = 4 // one nibble, intentionally easy for testing + + fin, err := web.Static.Open("static/wasm/argon2id.wasm") + if err != nil { + b.Fatal(err) + } + defer fin.Close() + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + b.Cleanup(cancel) + + runner, err := NewRunner(ctx, "argon2id.wasm", fin) + if err != nil { + b.Fatal(err) + } + + h := sha256.New() + fmt.Fprint(h, os.Args[0]) + data := h.Sum(nil) + + if n, err := runner.WriteData(ctx, data); err != nil { + b.Fatalf("can't write data: %v", err) + } else { + b.Logf("wrote %d bytes to data segment", n) + } + + for b.Loop() { + _, err := runner.anubisWork(ctx, difficulty, 0, 1) + if err != nil { + b.Fatalf("can't do test work run: %v", err) + } + } +}