From 89a24958a171c5227aded3ae8ea4916b1e3478e6 Mon Sep 17 00:00:00 2001 From: blackshirt Date: Mon, 15 Sep 2025 23:20:43 +0700 Subject: [PATCH] x.crypto.chacha20: improves the internals of chacha20, add a bench (#25311) --- vlib/x/crypto/chacha20/bench/bench.v | 57 ++++ vlib/x/crypto/chacha20/chacha.v | 464 +++------------------------ vlib/x/crypto/chacha20/chacha_test.v | 347 ++++++++++---------- vlib/x/crypto/chacha20/stream.v | 438 +++++++++++++++++++++++++ vlib/x/crypto/chacha20/stream_test.v | 83 +++++ vlib/x/crypto/chacha20/xchacha.v | 72 +++-- 6 files changed, 835 insertions(+), 626 deletions(-) create mode 100644 vlib/x/crypto/chacha20/bench/bench.v create mode 100644 vlib/x/crypto/chacha20/stream.v create mode 100644 vlib/x/crypto/chacha20/stream_test.v diff --git a/vlib/x/crypto/chacha20/bench/bench.v b/vlib/x/crypto/chacha20/bench/bench.v new file mode 100644 index 0000000000..ed9e994ac0 --- /dev/null +++ b/vlib/x/crypto/chacha20/bench/bench.v @@ -0,0 +1,57 @@ +// This is a benchmark for`x.crypto.chacha20` encryption and decryption +// +// Current output on my tests +// +// Chacha20 Encryption +// ----------- +// Iterations: 10000 Total Duration: 76.045ms ns/op: 7604 B/op: 4 allocs/op: 2 +// +// ChaCha20 Decryption +// ----------- +// Iterations: 10000 Total Duration: 71.275ms ns/op: 7127 B/op: 11 allocs/op: 14 +// +// After the patch +// Chacha20 Encryption +// ----------- +// Iterations: 10000 Total Duration: 46.833ms ns/op: 4683 B/op: 11 allocs/op: 11 +// +// ChaCha20 Decryption +// ----------- +// Iterations: 10000 Total Duration: 48.242ms ns/op: 4824 B/op: 3 allocs/op: 4 +// +import x.benchmark +import encoding.hex +import x.crypto.chacha20 + +// randomly generated key and nonce, 32-bytes of key, 12-bytes of nonce +const key = hex.decode('9d9603f4fc460e273b80795ea50eab5873c04f589226c7d591b5336feb32fcba')! +const nonce = hex.decode('9a3c83e4236ea9a2c4e482da')! + +const plaintext = 'ChaCha20 encrypt decrypt benchmarking message'.bytes() + +// expected ciphertext +const ciphertext = hex.decode('dbddb264e4c478d96805b2d557649232b4b3f37c51035464d12e3675e5e36ce6f6822b49dd6494ccd5213a89c9')! + +fn bench_chacha20_encrypt() ! { + _ := chacha20.encrypt(key, nonce, plaintext)! +} + +fn bench_chacha20_decrypt() ! { + _ := chacha20.decrypt(key, nonce, ciphertext)! +} + +fn main() { + cf := benchmark.BenchmarkDefaults{ + n: 10000 + } + println('Chacha20 Encryption') + println('-----------') + mut b0 := benchmark.setup(bench_chacha20_encrypt, cf)! + b0.run() + + println('') + println('ChaCha20 Decryption') + println('-----------') + mut b1 := benchmark.setup(bench_chacha20_decrypt, cf)! + b1.run() +} diff --git a/vlib/x/crypto/chacha20/chacha.v b/vlib/x/crypto/chacha20/chacha.v index 4ca550cb09..b017feafae 100644 --- a/vlib/x/crypto/chacha20/chacha.v +++ b/vlib/x/crypto/chacha20/chacha.v @@ -5,9 +5,7 @@ // Chacha20 symmetric key stream cipher encryption based on RFC 8439 module chacha20 -import math.bits import crypto.internal.subtle -import encoding.binary // The size of ChaCha20 key, ie 256 bits size, in bytes pub const key_size = 32 @@ -34,35 +32,35 @@ enum CipherMode { original } +// encrypt encrypts plaintext bytes with ChaCha20 cipher instance with provided key and nonce. +// It was a thin wrapper around two supported nonce size, ChaCha20 with 96 bits +// and XChaCha20 with 192 bits nonce. Internally, encrypt start with 0's counter value. +// If you want more control, use Cipher instance and setup the counter by your self. +pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 { + mut stream := new_stream(key, nonce)! + mut dst := []u8{len: plaintext.len} + stream.keystream_full(mut dst, plaintext) + return dst +} + +// decrypt does reverse of encrypt operation by decrypting ciphertext with ChaCha20 cipher +// instance with provided key and nonce. +pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 { + mut stream := new_stream(key, nonce)! + mut dst := []u8{len: ciphertext.len} + stream.keystream_full(mut dst, ciphertext) + return dst +} + // Cipher represents ChaCha20 stream cipher instances. +@[noinit] pub struct Cipher { - // The mode of ChaCha20 cipher, set on cipher's creation. - mode CipherMode = .standard + Stream mut: - // The internal's of ChaCha20 states contains 512 bits (64 bytes), contains of - // 4 words (16 bytes) of ChaCha20 constants, - // 8 words (32 bytes) of ChaCha20 keys, - // 4 words (16 bytes) of raw nonces, with internal counter, support for 32 and 64 bit counters. - key [8]u32 - nonce [4]u32 - - // Flag that tells whether this cipher was an extended XChaCha20 standard variant. - // only make sense when mode == .standard - extended bool - // internal buffer for storing key stream results block []u8 = []u8{len: block_size} // The last length of leftover unprocessed keystream from internal buffer length int - - // Additional fields, follows the go version. Its mainly used to optimize - // standard IETF ciphers operations by pre-chache some quarter_round step. - // vfmt off - precomp bool - p1 u32 p5 u32 p9 u32 p13 u32 - p2 u32 p6 u32 p10 u32 p14 u32 - p3 u32 p7 u32 p11 u32 p15 u32 - // vfmt on } // new_cipher creates a new ChaCha20 stream cipher with the given 32 bytes key @@ -71,57 +69,11 @@ mut: // with support for 64-bit counter, use 8 bytes length nonce's instead // If 24 bytes of nonce was provided, the XChaCha20 construction will be used. // It returns new ChaCha20 cipher instance or an error if key or nonce have any other length. -@[direct_array_access] pub fn new_cipher(key []u8, nonce []u8) !&Cipher { - if key.len != key_size { - return error('Bad key size provided') + stream := new_stream(key, nonce)! + return &Cipher{ + Stream: stream } - mut mode := CipherMode.standard - mut extended := false - match nonce.len { - nonce_size {} - x_nonce_size { - extended = true - } - orig_nonce_size { - mode = .original - } - else { - return error('Unsupported nonce size') - } - } - mut c := &Cipher{ - mode: mode - extended: extended - } - // we dont need reset on new cipher instance - c.do_rekey(key, nonce)! - - return c -} - -// encrypt encrypts plaintext bytes with ChaCha20 cipher instance with provided key and nonce. -// It was a thin wrapper around two supported nonce size, ChaCha20 with 96 bits -// and XChaCha20 with 192 bits nonce. Internally, encrypt start with 0's counter value. -// If you want more control, use Cipher instance and setup the counter by your self. -pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 { - mut c := new_cipher(key, nonce)! - mut out := []u8{len: plaintext.len} - - c.encrypt(mut out, plaintext) - unsafe { c.reset() } - return out -} - -// decrypt does reverse of encrypt operation by decrypting ciphertext with ChaCha20 cipher -// instance with provided key and nonce. -pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 { - mut c := new_cipher(key, nonce)! - mut out := []u8{len: ciphertext.len} - - c.encrypt(mut out, ciphertext) - unsafe { c.reset() } - return out } // xor_key_stream xors each byte in the given slice in the src with a byte from the @@ -140,6 +92,12 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { mut idx := 0 mut src_len := src.len + // check for counter overflow + num_blocks := (u64(src_len) + block_size - 1) / block_size + if c.Stream.check_ctr(num_blocks) { + panic('chacha20: internal counter overflow') + } + dst = unsafe { dst[..src_len] } if subtle.inexact_overlap(dst, src) { @@ -162,22 +120,14 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { idx += kstream.len src_len -= kstream.len } - if src_len == 0 { - return - } - - // check for counter overflow - num_blocks := (u64(src_len) + block_size - 1) / block_size - if c.check_for_ctr_overflow(num_blocks) { - panic('chacha20: internal counter overflow') - } // take the most full bytes of multiples block_size from the src, // build the keystream from the cipher's state and stores the result // into dst full := src_len - src_len % block_size if full > 0 { - c.chacha20_block_generic(mut dst[idx..idx + full], src[idx..idx + full]) + src_block := unsafe { src[idx..idx + full] } + c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block) } idx += full src_len -= full @@ -185,14 +135,13 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { // If we have a partial block, pad it for chacha20_block_generic, and // keep the leftover keystream for the next invocation. if src_len > 0 { - // Make sure, internal buffer cleared with the new one - // or the old garbaged data from previous call still there - // See https://github.com/vlang/v/issues/24043 - c.block = []u8{len: block_size} + // Make sure, internal buffer cleared or the old garbaged data from previous call still there + // See the issue at https://github.com/vlang/v/issues/24043 + unsafe { c.block.reset() } // = []u8{len: block_size} // copy the last src block to internal buffer, and performs // chacha20_block_generic on this buffer, and stores into remaining dst _ := copy(mut c.block, src[idx..]) - c.chacha20_block_generic(mut c.block, c.block) + c.Stream.keystream_with_blocksize(mut c.block, c.block) n := copy(mut dst[idx..], c.block) // the length of remaining bytes of unprocessed keystream c.length = block_size - n @@ -210,183 +159,13 @@ pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) { return } if dst.len < src.len { - panic('chacha20/chacha: dst buffer is to small') + panic('chacha20: dst buffer is to small') } if subtle.inexact_overlap(dst, src) { panic('chacha20: invalid buffer overlap') } - nr_blocks := src.len / block_size - for i := 0; i < nr_blocks; i++ { - // get current src block to be xor-ed - block := unsafe { src[i * block_size..(i + 1) * block_size] } - // build keystream, xor-ed with the block and stores into dst - c.chacha20_block_generic(mut dst[i * block_size..(i + 1) * block_size], block) - } - // process for partial block - if src.len % block_size != 0 { - // get the remaining last partial block - block := unsafe { src[nr_blocks * block_size..] } - // pad it into block_size, and then performs chacha20_block_generic - // on this src_block - mut src_block := []u8{len: block_size} - _ := copy(mut src_block, block) - c.chacha20_block_generic(mut src_block, src_block) - - // copy the src_block key stream result into desired dst - n := copy(mut dst[nr_blocks * block_size..], src_block) - assert n == block.len - } -} - -// chacha20_block_generic generates a generic ChaCha20 keystream. -// This is main building block for ChaCha20 keystream generator. -// This routine was intended to work only for msg source with multiples of block_size in size. -@[direct_array_access] -fn (mut c Cipher) chacha20_block_generic(mut dst []u8, src []u8) { - // ChaCha20 keystream generator was relatively easy to understand. - // Its contains steps: - // - Loads current ChaCha20 into temporary state, used for later. - // - Performs quarter_round function on this state and returns some new state. - // - Adds back the new state with the old state. - // - Performs xor-ing between src bytes (loaded as little endian number) with result from previous step. - // - Serializes, in little endian form, this xor-ed state into destination buffer. - // - // Makes sure its works for size of multiple of block_size - if dst.len != src.len || dst.len % block_size != 0 { - panic('chacha20: internal error: wrong dst and/or src length') - } - // check for counter overflow - num_blocks := u64((src.len + block_size - 1) / block_size) - if c.check_for_ctr_overflow(num_blocks) { - panic('chacha20: internal counter overflow') - } - - // initializes ChaCha20 state - // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc - // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk - // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk - // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn - // - // where c=constant k=key b=blockcounter n=nonce - c0, c1, c2, c3 := cc0, cc1, cc2, cc3 - c4, c5, c6, c7 := c.key[0], c.key[1], c.key[2], c.key[3] - c8, c9, c10, c11 := c.key[4], c.key[5], c.key[6], c.key[7] - - // internal cipher's counter - mut c12 := c.nonce[0] - mut c13 := c.nonce[1] - - c14, c15 := c.nonce[2], c.nonce[3] - - // copy current cipher's states into temporary states - mut x0, mut x1, mut x2, mut x3 := c0, c1, c2, c3 - mut x4, mut x5, mut x6, mut x7 := c4, c5, c6, c7 - mut x8, mut x9, mut x10, mut x11 := c8, c9, c10, c11 - mut x12, mut x13, mut x14, mut x15 := c12, c13, c14, c15 - - // this only for standard mode - if c.mode == .standard { - // precomputes three first column rounds that do not depend on counter - if !c.precomp { - c.p1, c.p5, c.p9, c.p13 = quarter_round(c1, c5, c9, c13) - c.p2, c.p6, c.p10, c.p14 = quarter_round(c2, c6, c10, c14) - c.p3, c.p7, c.p11, c.p15 = quarter_round(c3, c7, c11, c15) - c.precomp = true - } - } - - mut idx := 0 - mut src_len := src.len - for src_len >= block_size { - if c.mode == .standard { - // this for standard mode - // remaining first column round - fcr0, fcr4, fcr8, fcr12 := quarter_round(c0, c4, c8, c12) - - // The second diagonal round. - x0, x5, x10, x15 = quarter_round(fcr0, c.p5, c.p10, c.p15) - x1, x6, x11, x12 = quarter_round(c.p1, c.p6, c.p11, fcr12) - x2, x7, x8, x13 = quarter_round(c.p2, c.p7, fcr8, c.p13) - x3, x4, x9, x14 = quarter_round(c.p3, fcr4, c.p9, c.p14) - } - - // The remaining rounds, for standard its already precomputed, - // for original, its use full quarter round - n := if c.mode == .standard { 9 } else { 10 } - for i := 0; i < n; i++ { - // Column round. - // 0 | 1 | 2 | 3 - // 4 | 5 | 6 | 7 - // 8 | 9 | 10 | 11 - // 12 | 13 | 14 | 15 - x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12) - x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13) - x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14) - x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15) - - // Diagonal round. - // 0 \ 1 \ 2 \ 3 - // 5 \ 6 \ 7 \ 4 - // 10 \ 11 \ 8 \ 9 - // 15 \ 12 \ 13 \ 14 - x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15) - x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12) - x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13) - x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14) - } - - // add back keystream result to initial state, xor-ing with the src and stores into dst - binary.little_endian_put_u32(mut dst[idx + 0..idx + 4], binary.little_endian_u32(src[idx + 0.. - idx + 4]) ^ (x0 + c0)) - binary.little_endian_put_u32(mut dst[idx + 4..idx + 8], binary.little_endian_u32(src[idx + 4.. - idx + 8]) ^ (x1 + c1)) - binary.little_endian_put_u32(mut dst[idx + 8..idx + 12], binary.little_endian_u32(src[idx + - 8..idx + 12]) ^ (x2 + c2)) - binary.little_endian_put_u32(mut dst[idx + 12..idx + 16], binary.little_endian_u32(src[ - idx + 12..idx + 16]) ^ (x3 + c3)) - binary.little_endian_put_u32(mut dst[idx + 16..idx + 20], binary.little_endian_u32(src[ - idx + 16..idx + 20]) ^ (x4 + c4)) - binary.little_endian_put_u32(mut dst[idx + 20..idx + 24], binary.little_endian_u32(src[ - idx + 20..idx + 24]) ^ (x5 + c5)) - binary.little_endian_put_u32(mut dst[idx + 24..idx + 28], binary.little_endian_u32(src[ - idx + 24..idx + 28]) ^ (x6 + c6)) - binary.little_endian_put_u32(mut dst[idx + 28..idx + 32], binary.little_endian_u32(src[ - idx + 28..idx + 32]) ^ (x7 + c7)) - binary.little_endian_put_u32(mut dst[idx + 32..idx + 36], binary.little_endian_u32(src[ - idx + 32..idx + 36]) ^ (x8 + c8)) - binary.little_endian_put_u32(mut dst[idx + 36..idx + 40], binary.little_endian_u32(src[ - idx + 36..idx + 40]) ^ (x9 + c9)) - binary.little_endian_put_u32(mut dst[idx + 40..idx + 44], binary.little_endian_u32(src[ - idx + 40..idx + 44]) ^ (x10 + c10)) - binary.little_endian_put_u32(mut dst[idx + 44..idx + 48], binary.little_endian_u32(src[ - idx + 44..idx + 48]) ^ (x11 + c11)) - binary.little_endian_put_u32(mut dst[idx + 48..idx + 52], binary.little_endian_u32(src[ - idx + 48..idx + 52]) ^ (x12 + c12)) - binary.little_endian_put_u32(mut dst[idx + 52..idx + 56], binary.little_endian_u32(src[ - idx + 52..idx + 56]) ^ (x13 + c13)) - binary.little_endian_put_u32(mut dst[idx + 56..idx + 60], binary.little_endian_u32(src[ - idx + 56..idx + 60]) ^ (x14 + c14)) - binary.little_endian_put_u32(mut dst[idx + 60..idx + 64], binary.little_endian_u32(src[ - idx + 60..idx + 64]) ^ (x15 + c15)) - - // Updates internal counter - // - // Its safe to update internal counter, its already checked before. - if c.mode == .original { - mut curr_ctr := u64(c13) << 32 | u64(c12) - curr_ctr += 1 - // stores back the counter - c.nonce[0] = u32(curr_ctr) - c.nonce[1] = u32(curr_ctr >> 32) - } else { - c12 += 1 - c.nonce[0] = c12 - } - - idx += block_size - src_len -= block_size - } + c.Stream.keystream_full(mut dst, src) } // free the resources taken by the Cipher `c`. Dont use cipher after .free call @@ -403,171 +182,30 @@ pub fn (mut c Cipher) free() { // reset quickly sets all Cipher's fields to default value @[unsafe] pub fn (mut c Cipher) reset() { + c.Stream.reset() unsafe { - _ := vmemset(&c.key, 0, 32) - _ := vmemset(&c.nonce, 0, 16) c.block.reset() } c.length = 0 - c.precomp = false - - c.p1, c.p5, c.p9, c.p13 = u32(0), u32(0), u32(0), u32(0) - c.p2, c.p6, c.p10, c.p14 = u32(0), u32(0), u32(0), u32(0) - c.p3, c.p7, c.p11, c.p15 = u32(0), u32(0), u32(0), u32(0) } // set_counter sets Cipher's counter -@[direct_array_access; inline] pub fn (mut c Cipher) set_counter(ctr u64) { - match c.mode { - .original { - c.nonce[0] = u32(ctr) - c.nonce[1] = u32(ctr >> 32) - } - .standard { - // check for ctr value that may exceed the counter limit - if ctr > max_32bit_counter { - panic('set_counter: counter value exceed the limit ') - } - c.nonce[0] = u32(ctr) - } - } + c.Stream.set_ctr(ctr) } // rekey resets internal Cipher's state and reinitializes state with the provided key and nonce pub fn (mut c Cipher) rekey(key []u8, nonce []u8) ! { unsafe { c.reset() } - // this routine was publicly accessible to user, so we add a check here - // to ensure the supplied key and nonce has the correct size. - if key.len != key_size { - return error('Bad key size provided for rekey') - } - // For the standard cipher, allowed nonce size was nonce_size or x_nonce_size - if c.mode == .standard { - if nonce.len != x_nonce_size && nonce.len != nonce_size { - return error('Bad nonce size for standard cipher, use 12 or 24 bytes length nonce') - } - if c.extended && nonce.len != x_nonce_size { - return error('Bad nonce size provided for extended variant cipher') - } - } - // in the original variant, nonce should be orig_nonce_size length (8 bytes) - if c.mode == .original && nonce.len != orig_nonce_size { - return error('Bad nonce size provided for original mode') - } - c.do_rekey(key, nonce)! + stream := new_stream(key, nonce)! + c.Stream = stream } -// do_rekey reinitializes ChaCha20 instance with the provided key and nonce. -@[direct_array_access] -fn (mut c Cipher) do_rekey(key []u8, nonce []u8) ! { - mut nonces := nonce.clone() - mut keys := key.clone() - - // Its now awares of the new flag, mode and extended - // If this cipher was standard mode with extended flag, derives a new key and nonce - // for later setup operation - if c.mode == .standard && c.extended { - keys, nonces = derive_xchacha20_key_nonce(key, nonce)! - } - - // Its shared the same cipher key setup on the both of mode. - c.key[0] = binary.little_endian_u32(keys[0..4]) - c.key[1] = binary.little_endian_u32(keys[4..8]) - c.key[2] = binary.little_endian_u32(keys[8..12]) - c.key[3] = binary.little_endian_u32(keys[12..16]) - c.key[4] = binary.little_endian_u32(keys[16..20]) - c.key[5] = binary.little_endian_u32(keys[20..24]) - c.key[6] = binary.little_endian_u32(keys[24..28]) - c.key[7] = binary.little_endian_u32(keys[28..32]) - - // first counter value - c.nonce[0] = 0 - if c.mode == .standard { - c.nonce[1] = binary.little_endian_u32(nonces[0..4]) - c.nonce[2] = binary.little_endian_u32(nonces[4..8]) - c.nonce[3] = binary.little_endian_u32(nonces[8..12]) - } else { - // original mode - // second of 64-bit counter value - c.nonce[1] = 0 - - // nonce size on original mode was 64 bits - c.nonce[2] = binary.little_endian_u32(nonces[0..4]) - c.nonce[3] = binary.little_endian_u32(nonces[4..8]) - } -} - -// Helper and core function for ChaCha20 +// Helpers // -// quarter_round is the basic operation of the ChaCha algorithm. It operates -// on four 32-bit unsigned integers, by performing AXR (add, xor, rotate) -// operation on this quartet u32 numbers. -fn quarter_round(a u32, b u32, c u32, d u32) (u32, u32, u32, u32) { - // The operation is as follows (in C-like notation): - // where `<<<=` denotes bits rotate left operation - // a += b; d ^= a; d <<<= 16; - // c += d; b ^= c; b <<<= 12; - // a += b; d ^= a; d <<<= 8; - // c += d; b ^= c; b <<<= 7; - - mut ax := a - mut bx := b - mut cx := c - mut dx := d - - ax += bx - dx ^= ax - dx = bits.rotate_left_32(dx, 16) - - cx += dx - bx ^= cx - bx = bits.rotate_left_32(bx, 12) - - ax += bx - dx ^= ax - dx = bits.rotate_left_32(dx, 8) - - cx += dx - bx ^= cx - bx = bits.rotate_left_32(bx, 7) - - return ax, bx, cx, dx -} - -// Cipher's counter handling routine -// -// We define counter limit to simplify the access -const max_64bit_counter = max_u64 -const max_32bit_counter = u64(max_u32) - -// load_ctr loads underlying cipher's counter as u64 value. -@[direct_array_access; inline] -fn (c Cipher) load_ctr() u64 { - match c.mode { - // In the original mode, counter was 64-bit size - // stored on c.nonce[0], and c.nonce[1] - .original { - return u64(c.nonce[1]) << 32 | u64(c.nonce[0]) - } - .standard { - // in standard mode, counter was 32-bit value, stored on c.nonce[0] - return u64(c.nonce[0]) - } - } -} - -// max_ctr_value returns maximum value of cipher's counter. -@[inline] -fn (c Cipher) max_ctr_value() u64 { - match c.mode { - .original { return max_64bit_counter } - .standard { return max_32bit_counter } - } -} // derive_xchacha20_key_nonce derives a new key and nonces for extended -// variant of standard mode. Its separated for simplify the access. +// variant of Standard IETF ChaCha20 variant. Its separated for simplify the access. @[direct_array_access; inline] fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) { // Its only for x_nonce_size @@ -584,15 +222,3 @@ fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) { return new_key, new_nonce } - -@[direct_array_access; inline] -fn (c Cipher) check_for_ctr_overflow(add_value u64) bool { - // check for counter overflow - ctr := c.load_ctr() - sum := ctr + add_value - max := c.max_ctr_value() - if sum < ctr || sum < add_value || sum > max { - return true - } - return false -} diff --git a/vlib/x/crypto/chacha20/chacha_test.v b/vlib/x/crypto/chacha20/chacha_test.v index a8c6af9783..843e544ca0 100644 --- a/vlib/x/crypto/chacha20/chacha_test.v +++ b/vlib/x/crypto/chacha20/chacha_test.v @@ -1,7 +1,177 @@ -module chacha20 - -import rand import encoding.hex +import x.crypto.chacha20 + +fn test_chacha20_block_function() ! { + for val in blocks_testcases { + key_bytes := hex.decode(val.key)! + nonce_bytes := hex.decode(val.nonce)! + mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)! + cs.set_counter(val.counter) + mut block := []u8{len: chacha20.block_size} + cs.xor_key_stream(mut block, block) + exp_bytes := hex.decode(val.output)! + + assert block == exp_bytes + } +} + +fn test_chacha20_simple_block_function() ! { + key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f' + key_bytes := hex.decode(key)! + + nonce := '000000090000004a00000000' + nonce_bytes := hex.decode(nonce)! + + mut block := []u8{len: chacha20.block_size} + mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)! + cs.set_counter(1) + cs.xor_key_stream(mut block, block) + + expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' + exp_bytes := hex.decode(expected_raw_bytes)! + + assert block == exp_bytes +} + +// test poly1305 key generator as specified in https://datatracker.ietf.org/doc/html/rfc8439#section-2.6.2 +fn test_chacha20_onetime_poly1305_key_generation() ! { + for i, v in otk_cases { + key := hex.decode(v.key)! + nonce := hex.decode(v.nonce)! + + otk := hex.decode(v.otk)! + mut c := chacha20.new_cipher(key, nonce)! + mut out := []u8{len: chacha20.key_size} + c.xor_key_stream(mut out, out) + + assert out == otk + } +} + +fn test_xor_key_stream_consecutive() { + // See https://github.com/vlang/v/issues/23977 + key := [u8(64), 116, 63, 11, 221, 199, 187, 110, 217, 68, 0, 50, 65, 79, 24, 10, 124, 174, + 66, 2, 172, 153, 237, 145, 244, 41, 131, 84, 247, 42, 73, 131] + nonce := [u8(86), 124, 222, 94, 253, 187, 151, 219, 17, 83, 118, 255] + encoded_data_one := [u8(201), 199, 66, 226] + decoded_data_one := [u8(0), 0, 0, 9] + encoded_data_two := [u8(82), 189, 125, 3, 24, 185, 183, 240, 29, 223, 17, 241, 103, 69, 45, + 101] + decoded_data_two := [u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + + mut c := chacha20.new_cipher(key, nonce)! + mut dst := []u8{len: encoded_data_one.len} + c.xor_key_stream(mut dst, encoded_data_one) + assert dst == decoded_data_one + + // consecutive call + dst = []u8{len: encoded_data_two.len} + c.xor_key_stream(mut dst, encoded_data_two) + assert dst == decoded_data_two + + // additional data + msg := 'billy the kid'.bytes() + mut dst2 := []u8{len: msg.len} + c.xor_key_stream(mut dst2, msg) + // the go version produces: [40 17 78 116 255 224 2 52 92 151 103 107 138] + assert dst2 == [u8(40), 17, 78, 116, 255, 224, 2, 52, 92, 151, 103, 107, 138] +} + +// See https://github.com/vlang/v/issues/24043 +fn test_for_more_consecutive_xor_key_stream() { + key := [u8(225), 2, 1, 178, 238, 127, 187, 188, 27, 237, 18, 62, 181, 65, 67, 152, 13, 247, + 147, 148, 101, 220, 185, 120, 234, 58, 144, 173, 3, 218, 193, 130] + nonce := [u8(153), 221, 244, 134, 99, 135, 243, 247, 169, 121, 69, 54] + + mut cipher := chacha20.new_cipher(key, nonce)! + for i := 0; i < encoded_data.len; i++ { + p := encoded_data[i] + e := expected_data[i] + mut dst := []u8{len: p.len} + cipher.xor_key_stream(mut dst, p) + assert dst == e + } +} + +fn test_chacha20_cipher_plain_encrypt() ! { + // work with xorkeystream_testcases without explicitly set the counter + for c in xorkeystream_testcases { + key_bytes := hex.decode(c.key)! + nonce_bytes := hex.decode(c.nonce)! + plaintext_bytes := hex.decode(c.input)! + + output := chacha20.encrypt(key_bytes, nonce_bytes, plaintext_bytes)! + expected := hex.decode(c.output)! + assert output == expected + } +} + +fn test_chacha20_cipher_plain_decrypt() ! { + // work with xorkeystream_testcases without explicitly set the counter + for c in xorkeystream_testcases { + key_bytes := hex.decode(c.key)! + nonce_bytes := hex.decode(c.nonce)! + input_bytes := hex.decode(c.input)! + output_bytes := hex.decode(c.output)! + input := chacha20.decrypt(key_bytes, nonce_bytes, output_bytes)! + + assert input == input_bytes + } +} + +fn test_chacha20_cipher_encrypt_with_xor_keystream() ! { + for c in encryption_test_cases { + key_bytes := hex.decode(c.key)! + nonce_bytes := hex.decode(c.nonce)! + plaintext_bytes := hex.decode(c.plaintext)! + + mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)! + cs.set_counter(c.counter) + + mut output := []u8{len: plaintext_bytes.len} + cs.encrypt(mut output, plaintext_bytes) + + expected := hex.decode(c.output)! + assert output == expected + } +} + +fn test_chacha20_cipher_decrypt_with_xor_keystream() ! { + for c in encryption_test_cases { + key_bytes := hex.decode(c.key)! + nonce_bytes := hex.decode(c.nonce)! + + ciphertext := hex.decode(c.output)! + mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)! + cs.set_counter(c.counter) + + mut output := []u8{len: ciphertext.len} + cs.encrypt(mut output, ciphertext) + + expected_decrypted_message := hex.decode(c.plaintext)! + assert output == expected_decrypted_message + } +} + +fn test_chacha20_no_overlap_xor_key_stream() ! { + for i, t in xorkeystream_testcases { + key := hex.decode(t.key)! + nonce := hex.decode(t.nonce)! + mut cs := chacha20.new_cipher(key, nonce)! + + input := hex.decode(t.input)! + mut output := []u8{len: input.len} + cs.xor_key_stream(mut output, input) + got := hex.encode(output) + + // for decryption, we can not use cs.xor_key_stream directly on output bytes + // internally, Cipher stream has updates the counter, thats differ from encryption phase + // you can use Cipher instance by rekey with the key and nonce + cs.rekey(key, nonce)! + cs.xor_key_stream(mut output, output) + assert output == input + } +} const encoded_data = [ [u8(231), 121, 9, 28], @@ -40,69 +210,6 @@ const expected_data = [ [u8(164), 169, 216, 98, 61, 175, 20, 175], ] -// See https://github.com/vlang/v/issues/24043 -fn test_for_more_consecutive_xor_key_stream() { - key := [u8(225), 2, 1, 178, 238, 127, 187, 188, 27, 237, 18, 62, 181, 65, 67, 152, 13, 247, - 147, 148, 101, 220, 185, 120, 234, 58, 144, 173, 3, 218, 193, 130] - nonce := [u8(153), 221, 244, 134, 99, 135, 243, 247, 169, 121, 69, 54] - - mut cipher := new_cipher(key, nonce)! - for i := 0; i < encoded_data.len; i++ { - p := encoded_data[i] - e := expected_data[i] - mut dst := []u8{len: p.len} - cipher.xor_key_stream(mut dst, p) - assert dst == e - } -} - -fn test_xor_key_stream_consecutive() { - // See https://github.com/vlang/v/issues/23977 - key := [u8(64), 116, 63, 11, 221, 199, 187, 110, 217, 68, 0, 50, 65, 79, 24, 10, 124, 174, - 66, 2, 172, 153, 237, 145, 244, 41, 131, 84, 247, 42, 73, 131] - nonce := [u8(86), 124, 222, 94, 253, 187, 151, 219, 17, 83, 118, 255] - encoded_data_one := [u8(201), 199, 66, 226] - decoded_data_one := [u8(0), 0, 0, 9] - encoded_data_two := [u8(82), 189, 125, 3, 24, 185, 183, 240, 29, 223, 17, 241, 103, 69, 45, - 101] - decoded_data_two := [u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - - mut c := new_cipher(key, nonce)! - mut dst := []u8{len: encoded_data_one.len} - c.xor_key_stream(mut dst, encoded_data_one) - assert dst == decoded_data_one - - // consecutive call - dst = []u8{len: encoded_data_two.len} - c.xor_key_stream(mut dst, encoded_data_two) - assert dst == decoded_data_two - - // additional data - msg := 'billy the kid'.bytes() - mut dst2 := []u8{len: msg.len} - c.xor_key_stream(mut dst2, msg) - // the go version produces: [40 17 78 116 255 224 2 52 92 151 103 107 138] - assert dst2 == [u8(40), 17, 78, 116, 255, 224, 2, 52, 92, 151, 103, 107, 138] -} - -fn test_chacha20_cipher_reset() ! { - mut key := []u8{len: 32} - mut nonce := []u8{len: 12} - rand.read(mut key) - rand.read(mut nonce) - - mut c := new_cipher(key, nonce)! - unsafe { c.reset() } - - for i, _ in c.key { - assert c.key[i] == u32(0) - } - - for i, _ in c.nonce { - assert c.nonce[i] == u32(0) - } -} - struct BlockCase { key string nonce string @@ -110,80 +217,6 @@ struct BlockCase { output string } -fn test_chacha20_no_overlap_xor_key_stream() ! { - for i, t in xorkeystream_testcases { - key := hex.decode(t.key)! - nonce := hex.decode(t.nonce)! - mut cs := new_cipher(key, nonce)! - - input := hex.decode(t.input)! - mut output := []u8{len: input.len} - cs.xor_key_stream(mut output, input) - got := hex.encode(output) - - // for decryption, we can not use cs.xor_key_stream directly on output bytes - // internally, Cipher stream has updates the counter, thats differ from encryption phase - // you can use Cipher instance by set Cipher counter - plaintext := decrypt(key, nonce, output)! - assert plaintext == input - } -} - -fn test_chacha20_block_function() ! { - for val in blocks_testcases { - key_bytes := hex.decode(val.key)! - nonce_bytes := hex.decode(val.nonce)! - mut cs := new_cipher(key_bytes, nonce_bytes)! - cs.set_counter(val.counter) - mut block := []u8{len: block_size} - cs.chacha20_block_generic(mut block, block) - exp_bytes := hex.decode(val.output)! - - assert block == exp_bytes - } -} - -fn test_chacha20_simple_block_function() ! { - key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f' - key_bytes := hex.decode(key)! - - nonce := '000000090000004a00000000' - nonce_bytes := hex.decode(nonce)! - - mut block := []u8{len: block_size} - mut cs := new_cipher(key_bytes, nonce_bytes)! - cs.set_counter(u32(1)) - cs.chacha20_block_generic(mut block, block) - - expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' - exp_bytes := hex.decode(expected_raw_bytes)! - - assert block == exp_bytes -} - -fn test_chacha20_quarter_round() { - a, b, c, d := quarter_round(0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567) - assert a == 0xea2a92f4 - assert b == 0xcb1cf8ce - assert c == 0x4581472e - assert d == 0x5881c4bb -} - -// test poly1305 key generator as specified in https://datatracker.ietf.org/doc/html/rfc8439#section-2.6.2 -fn test_chacha20_onetime_poly1305_key_generation() ! { - for i, v in otk_cases { - key := hex.decode(v.key)! - nonce := hex.decode(v.nonce)! - - otk := hex.decode(v.otk)! - mut c := new_cipher(key, nonce)! - mut out := []u8{len: key_size} - c.xor_key_stream(mut out, out) - - assert out == otk - } -} - struct PolyOtk { key string nonce string @@ -271,40 +304,6 @@ struct EncryptionCase { output string } -fn test_chacha20_cipher_encrypt() ! { - for c in encryption_test_cases { - key_bytes := hex.decode(c.key)! - nonce_bytes := hex.decode(c.nonce)! - plaintext_bytes := hex.decode(c.plaintext)! - - mut cs := new_cipher(key_bytes, nonce_bytes)! - cs.set_counter(c.counter) - - mut output := []u8{len: plaintext_bytes.len} - cs.xor_key_stream(mut output, plaintext_bytes) - - expected := hex.decode(c.output)! - assert output == expected - } -} - -fn test_chacha20_cipher_decrypt() ! { - for c in encryption_test_cases { - key_bytes := hex.decode(c.key)! - nonce_bytes := hex.decode(c.nonce)! - - ciphertext := hex.decode(c.output)! - mut cs := new_cipher(key_bytes, nonce_bytes)! - cs.set_counter(c.counter) - - mut output := []u8{len: ciphertext.len} - cs.xor_key_stream(mut output, ciphertext) - - expected_decrypted_message := hex.decode(c.plaintext)! - assert output == expected_decrypted_message - } -} - const encryption_test_cases = [ // core test EncryptionCase{ diff --git a/vlib/x/crypto/chacha20/stream.v b/vlib/x/crypto/chacha20/stream.v new file mode 100644 index 0000000000..6a429cfdef --- /dev/null +++ b/vlib/x/crypto/chacha20/stream.v @@ -0,0 +1,438 @@ +// Copyright (c) 2024 blackshirt. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +// +module chacha20 + +import math.bits +import encoding.binary + +// max_64bit_counter is a 64-bit maximum internal counter of original ChaCha20 variant. +const max_64bit_counter = max_u64 +// max_64bit_counter is a 32-bit maximum internal counter of standard IETF ChaCha20 variant. +const max_32bit_counter = u64(max_u32) + +// default chacha20 quarter round number +const default_qround_nr = 10 + +// ChaCha20 stream with internal counter +@[noinit] +struct Stream { +mut: + // The mode (variant) of this ChaCha20 stream + // Standard IETF variant or original (from DJ Bernstein) variant, set on creation. + mode CipherMode = .standard + // Flag that tells whether this stream was an extended XChaCha20 standard variant. + // only make sense when mode == .standard + extended bool + // underlying stream's key + key [8]u32 + // underlying stream's nonce with internal counter + nonce [4]u32 + + // counter-independent precomputed values + precomp bool + // vfmt off + p1 u32 p5 u32 p9 u32 p13 u32 + p2 u32 p6 u32 p10 u32 p14 u32 + p3 u32 p7 u32 p11 u32 p15 u32 + // vfmt on +} + +// new_stream creates a new chacha20 stream. The supported nonce size is 8, 12 or 24 bytes. +@[direct_array_access; inline] +fn new_stream(key []u8, nonce []u8) !Stream { + if key.len != key_size { + return error('Bad key size provided') + } + // setup for default value + mut mode := CipherMode.standard + mut extended := false + + // Based on the nonce.len supplied, it determines the variant (mode) and extended form of + // the new chacha20 stream intended to create. + match nonce.len { + nonce_size {} + x_nonce_size { + extended = true + } + orig_nonce_size { + mode = .original + } + else { + return error('new_stream: unsupported nonce size') + } + } + // if this an extended chacha20 construct, derives a new key and nonce + new_key, new_nonce := if extended { + xkey, xnonce := derive_xchacha20_key_nonce(key, nonce)! + xkey, xnonce + } else { + // otherwise, use provided key and nonce + key, nonce + } + // Build a new stream and setup the key + mut b := Stream{ + mode: mode + extended: extended + } + // store the key + b.key[0] = binary.little_endian_u32(new_key[0..4]) + b.key[1] = binary.little_endian_u32(new_key[4..8]) + b.key[2] = binary.little_endian_u32(new_key[8..12]) + b.key[3] = binary.little_endian_u32(new_key[12..16]) + b.key[4] = binary.little_endian_u32(new_key[16..20]) + b.key[5] = binary.little_endian_u32(new_key[20..24]) + b.key[6] = binary.little_endian_u32(new_key[24..28]) + b.key[7] = binary.little_endian_u32(new_key[28..32]) + + // store the nonce + if b.mode == .standard { + // in standard IETF variant, first nonce was used as internal counter + b.nonce[0] = 0 + b.nonce[1] = binary.little_endian_u32(new_nonce[0..4]) + b.nonce[2] = binary.little_endian_u32(new_nonce[4..8]) + b.nonce[3] = binary.little_endian_u32(new_nonce[8..12]) + } else { + // in the original variant, two's of first counter servers as 64-bit counter value + b.nonce[0] = 0 + b.nonce[1] = 0 + + b.nonce[2] = binary.little_endian_u32(new_nonce[0..4]) + b.nonce[3] = binary.little_endian_u32(new_nonce[4..8]) + } + return b +} + +// reset resets internal stream +@[unsafe] +fn (mut s Stream) reset() { + s.extended = false + unsafe { + _ := vmemset(&s.key, 0, 32) + _ := vmemset(&s.nonce, 0, 16) + } +} + +// new_curr_state creates a new State from current stream +@[direct_array_access] +fn (s Stream) new_curr_state() State { + // initializes ChaCha20 state + // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc + // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk + // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk + // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn + // + // where c=constant k=key b=blockcounter n=nonce + mut state := State{} + // load chacha20 constant into state + state[0] = cc0 + state[1] = cc1 + state[2] = cc2 + state[3] = cc3 + // load key into state + for i, k in s.key { + state[i + 4] = k + } + // load nonce into state + for j, v in s.nonce { + state[j + 12] = v + } + return state +} + +// keystream_full process with full size of src being processed +@[direct_array_access] +fn (mut s Stream) keystream_full(mut dst []u8, src []u8) { + // number of block to be processed + nr_blocks := src.len / block_size + // check for counter overflow + if s.check_ctr(u64(nr_blocks)) { + panic('chacha20: internal counter overflow') + } + // process for full block_size-d msg + for i := 0; i < nr_blocks; i++ { + block := unsafe { src[i * block_size..(i + 1) * block_size] } + // process with block_size keystream + s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block) + } + + // process for remaining partial block + if src.len % block_size != 0 { + last_block := unsafe { src[nr_blocks * block_size..] } + // pad to align with block_size + mut last_bytes := []u8{len: block_size} + _ := copy(mut last_bytes, last_block) + + // process the padded last block + s.keystream_with_blocksize(mut last_bytes, last_bytes) + _ := copy(mut dst[nr_blocks * block_size..], last_bytes) + } +} + +// keystream_with_blocksize produces stream from src bytes that aligns with block_size, +// serialized in little-endian form and stored into dst buffer. +@[direct_array_access] +fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) { + // ChaCha20 keystream generator was relatively easy to understand. + // Its contains steps: + // - loads current ChaCha20 into temporary state, used for later. + // - performs quarter_round function on this state and returns some new state. + // - adding back the new state with the old state. + // - performs xor-ing between src bytes (loaded as little endian number) with result from previous step. + // - serializes, in little endian form, this xor-ed state into destination buffer. + // + // Makes sure its works for size of multiple of block_size + if dst.len != src.len || dst.len % block_size != 0 { + panic('chacha20: internal error: wrong dst and/or src length') + } + + // load state from current stream + st := s.new_curr_state() + // clone the state + mut st_c := clone_state(st) + + // cache counter-independent precomputed values + if s.mode == .standard { + // first column round + mut fcr := Quartet{st[0], st[4], st[8], st[12]} + // precomputes three first column rounds that do not depend on counter + if !s.precomp { + mut pcr1 := Quartet{st[1], st[5], st[9], st[13]} + mut pcr2 := Quartet{st[2], st[6], st[10], st[14]} + mut pcr3 := Quartet{st[3], st[7], st[11], st[15]} + + qround_on_quartet(mut pcr1) + qround_on_quartet(mut pcr2) + qround_on_quartet(mut pcr3) + + s.p1 = pcr1.e0 + s.p5 = pcr1.e1 + s.p9 = pcr1.e2 + s.p13 = pcr1.e3 + + s.p2 = pcr2.e0 + s.p6 = pcr2.e1 + s.p10 = pcr2.e2 + s.p14 = pcr2.e3 + + s.p3 = pcr3.e0 + s.p7 = pcr3.e1 + s.p11 = pcr3.e2 + s.p15 = pcr3.e3 + + s.precomp = true + } + // remaining first column round + qround_on_quartet(mut fcr) + + // First diagonal round. + qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15) + qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12) + qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13) + qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14) + } + + mut idx := 0 + mut src_len := src.len + for src_len >= block_size { + // The remaining rounds + // + // For standard variant, the first column-round was already precomputed, + // For original variant, its use full quarter round number. + n := if s.mode == .standard { 9 } else { default_qround_nr } + // perform chacha20 quarter round n-times + for i := 0; i < n; i++ { + // Column-round + // 0 | 1 | 2 | 3 + // 4 | 5 | 6 | 7 + // 8 | 9 | 10 | 11 + // 12 | 13 | 14 | 15 + qround_on_state(mut st_c, 0, 4, 8, 12) // 0 + qround_on_state(mut st_c, 1, 5, 9, 13) // 1 + qround_on_state(mut st_c, 2, 6, 10, 14) // 2 + qround_on_state(mut st_c, 3, 7, 11, 15) // 3 + + // Diagonal round. + // 0 \ 1 \ 2 \ 3 + // 5 \ 6 \ 7 \ 4 + // 10 \ 11 \ 8 \ 9 + // 15 \ 12 \ 13 \ 14 + qround_on_state(mut st_c, 0, 5, 10, 15) + qround_on_state(mut st_c, 1, 6, 11, 12) + qround_on_state(mut st_c, 2, 7, 8, 13) + qround_on_state(mut st_c, 3, 4, 9, 14) + } + + // add back keystream result to initial state, xor-ing with the src and stores into dst + for i := 0; i < 16; i++ { + src_block := unsafe { src[idx + (i * 4)..idx + (i + 1) * 4] } + binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], binary.little_endian_u32(src_block) ^ ( + st_c[i] + st[i])) + } + + // increases Stream's internal counter + s.inc_ctr() + + // updates index + idx += block_size + src_len -= block_size + } +} + +// Handling of Stream's internal counter +// + +// ctr returns a current Stream's counter as u64 value. +@[direct_array_access; inline] +fn (b Stream) ctr() u64 { + match b.mode { + // In the original mode, counter was 64-bit size + // stored on b.nonce[0], and b.nonce[1] + .original { + return u64(b.nonce[1]) << 32 | u64(b.nonce[0]) + } + .standard { + // in standard mode, counter was 32-bit value, stored on b.nonce[0] + return u64(b.nonce[0]) + } + } +} + +// set_ctr sets Stream's counter +@[direct_array_access; inline] +fn (mut b Stream) set_ctr(ctr u64) { + // if this set counter would overflow internal counter + // we do panic instead + if b.check_ctr(ctr) { + panic('set_ctr: invalid check, maybe would overflow') + } + match b.mode { + .original { + b.nonce[0] = u32(ctr) + b.nonce[1] = u32(ctr >> 32) + } + .standard { + // check for ctr value that may exceed the counter limit + if ctr > max_32bit_counter { + panic('set_ctr: counter value exceed the limit ') + } + b.nonce[0] = u32(ctr) + } + } +} + +// inc_ctr increases internal counter by one. +@[inline] +fn (mut b Stream) inc_ctr() { + mut curr_ctr := b.ctr() + curr_ctr += 1 + + b.set_ctr(curr_ctr) +} + +// check_ctr checks for counter overflow when added by value. +// It returns true on counter overflow. +@[inline] +fn (b Stream) check_ctr(value u64) bool { + ctr := b.ctr() + sum := ctr + value + max := b.max_ctr() + if sum < ctr || sum < value || sum > max { + return true + } + return false +} + +// max_ctr returns maximum counter value of this stream variant +@[inline] +fn (b Stream) max_ctr() u64 { + match b.mode { + .original { return max_64bit_counter } + .standard { return max_32bit_counter } + } +} + +// State represents the running 64-bytes of chacha20 stream, +type State = [16]u32 + +@[direct_array_access; inline; unsafe] +fn reset_state(mut s State) { + unsafe { + _ := vmemset(&s, 0, 64) + } +} + +@[direct_array_access; inline] +fn clone_state(s State) State { + mut sc := State{} + for i, v in s { + sc[i] = v + } + return sc +} + +@[direct_array_access] +fn qround_on_state_with_quartet(mut s State, q0 u32, q1 u32, q2 u32, q3 u32, a int, b int, c int, d int) { + s[a] = q0 + s[b] = q1 + s[c] = q2 + s[d] = q3 + qround_on_state(mut s, a, b, c, d) +} + +// qround_on_state performs chacha20 quarter round on states with quartet index a, b, c, d. +@[direct_array_access] +fn qround_on_state(mut s State, a int, b int, c int, d int) { + // a += b; d ^= a; d <<<= 16; + s[a] += s[b] + s[d] ^= s[a] + s[d] = bits.rotate_left_32(s[d], 16) + + // c += d; b ^= c; b <<<= 12; + s[c] += s[d] + s[b] ^= s[c] + s[b] = bits.rotate_left_32(s[b], 12) + + // a += b; d ^= a; d <<<= 8; + s[a] += s[b] + s[d] ^= s[a] + s[d] = bits.rotate_left_32(s[d], 8) + + // c += d; b ^= c; b <<<= 7; + s[c] += s[d] + s[b] ^= s[c] + s[b] = bits.rotate_left_32(s[b], 7) +} + +// quartet of u32 values. +struct Quartet { +mut: + e0 u32 + e1 u32 + e2 u32 + e3 u32 +} + +// chacha20 quarter round run on Quartet and stored into res +fn qround_on_quartet(mut q Quartet) { + // a += b; d ^= a; d <<<= 16; + q.e0 += q.e1 + q.e3 ^= q.e0 + q.e3 = bits.rotate_left_32(q.e3, 16) + + // c += d; b ^= c; b <<<= 12; + q.e2 += q.e3 + q.e1 ^= q.e2 + q.e1 = bits.rotate_left_32(q.e1, 12) + + // a += b; d ^= a; d <<< 8; + q.e0 += q.e1 + q.e3 ^= q.e0 + q.e3 = bits.rotate_left_32(q.e3, 8) + + // c += d; b ^= c; b <<<= 7; + q.e2 += q.e3 + q.e1 ^= q.e2 + q.e1 = bits.rotate_left_32(q.e1, 7) +} diff --git a/vlib/x/crypto/chacha20/stream_test.v b/vlib/x/crypto/chacha20/stream_test.v new file mode 100644 index 0000000000..a0bf74bd92 --- /dev/null +++ b/vlib/x/crypto/chacha20/stream_test.v @@ -0,0 +1,83 @@ +module chacha20 + +import encoding.hex + +fn test_qround_on_state() { + mut s := State{} + s[0] = 0x11111111 + s[1] = 0x01020304 + s[2] = 0x9b8d6f43 + s[3] = 0x01234567 + + qround_on_state(mut s, 0, 1, 2, 3) + assert s[0] == 0xea2a92f4 + assert s[1] == 0xcb1cf8ce + assert s[2] == 0x4581472e + assert s[3] == 0x5881c4bb +} + +fn test_state_of_chacha20_block_simple() ! { + key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f' + key_bytes := hex.decode(key)! + + nonce := '000000090000004a00000000' + nonce_bytes := hex.decode(nonce)! + + mut stream := new_stream(key_bytes, nonce_bytes)! + + mut block := []u8{len: block_size} + stream.set_ctr(1) + stream.keystream_with_blocksize(mut block, block) + + expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' + exp_bytes := hex.decode(expected_raw_bytes)! + + assert block == exp_bytes +} + +fn test_keystream_with_blocksize() ! { + for val in blocks_testcases { + key := hex.decode(val.key)! + nonce := hex.decode(val.nonce)! + + mut stream := new_stream(key, nonce)! + stream.set_ctr(val.counter) + + mut block := []u8{len: block_size} + stream.keystream_with_blocksize(mut block, block) + exp_bytes := hex.decode(val.output)! + + assert block == exp_bytes + } +} + +struct BlockCase { + key string + nonce string + counter u32 + output string +} + +const blocks_testcases = [ + // section 2.3.4 https://datatracker.ietf.org/doc/html/rfc8439#section-2.3.2 + BlockCase{ + key: '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f' + nonce: '000000090000004a00000000' + counter: u32(1) + output: '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' + }, + // https://datatracker.ietf.org/doc/html/rfc8439#appendix-A.1.1 + BlockCase{ + key: '0000000000000000000000000000000000000000000000000000000000000000' + nonce: '000000000000000000000000' + counter: u32(0) + output: '76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586' + }, + // #appendix-A.1.2 + BlockCase{ + key: '0000000000000000000000000000000000000000000000000000000000000000' + nonce: '000000000000000000000000' + counter: u32(1) + output: '9f07e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f' + }, +] diff --git a/vlib/x/crypto/chacha20/xchacha.v b/vlib/x/crypto/chacha20/xchacha.v index 8cb5816d4d..c27943fcd8 100644 --- a/vlib/x/crypto/chacha20/xchacha.v +++ b/vlib/x/crypto/chacha20/xchacha.v @@ -22,55 +22,61 @@ fn xchacha20(key []u8, nonce []u8) ![]u8 { return error('xchacha: Bad nonce size') } // initializes ChaCha20 state - mut x0 := cc0 - mut x1 := cc1 - mut x2 := cc2 - mut x3 := cc3 + mut x := State{} + x[0] = cc0 + x[1] = cc1 + x[2] = cc2 + x[3] = cc3 - mut x4 := binary.little_endian_u32(key[0..4]) - mut x5 := binary.little_endian_u32(key[4..8]) - mut x6 := binary.little_endian_u32(key[8..12]) - mut x7 := binary.little_endian_u32(key[12..16]) + x[4] = binary.little_endian_u32(key[0..4]) + x[5] = binary.little_endian_u32(key[4..8]) + x[6] = binary.little_endian_u32(key[8..12]) + x[7] = binary.little_endian_u32(key[12..16]) - mut x8 := binary.little_endian_u32(key[16..20]) - mut x9 := binary.little_endian_u32(key[20..24]) - mut x10 := binary.little_endian_u32(key[24..28]) - mut x11 := binary.little_endian_u32(key[28..32]) + x[8] = binary.little_endian_u32(key[16..20]) + x[9] = binary.little_endian_u32(key[20..24]) + x[10] = binary.little_endian_u32(key[24..28]) + x[11] = binary.little_endian_u32(key[28..32]) // we have no counter - mut x12 := binary.little_endian_u32(nonce[0..4]) - mut x13 := binary.little_endian_u32(nonce[4..8]) - mut x14 := binary.little_endian_u32(nonce[8..12]) - mut x15 := binary.little_endian_u32(nonce[12..16]) + x[12] = binary.little_endian_u32(nonce[0..4]) + x[13] = binary.little_endian_u32(nonce[4..8]) + x[14] = binary.little_endian_u32(nonce[8..12]) + x[15] = binary.little_endian_u32(nonce[12..16]) // After initialization, proceed through the ChaCha20 rounds as usual. for i := 0; i < 10; i++ { // Diagonal round. - x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12) - x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13) - x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14) - x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15) + qround_on_state(mut x, 0, 4, 8, 12) // 0 + qround_on_state(mut x, 1, 5, 9, 13) // 1 + qround_on_state(mut x, 2, 6, 10, 14) // 2 + qround_on_state(mut x, 3, 7, 11, 15) // 3 - // Column round. - x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15) - x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12) - x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13) - x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14) + // quarter diagonal round + // Diagonal round. + // 0 \ 1 \ 2 \ 3 + // 5 \ 6 \ 7 \ 4 + // 10 \ 11 \ 8 \ 9 + // 15 \ 12 \ 13 \ 14 + qround_on_state(mut x, 0, 5, 10, 15) + qround_on_state(mut x, 1, 6, 11, 12) + qround_on_state(mut x, 2, 7, 8, 13) + qround_on_state(mut x, 3, 4, 9, 14) } // Once the 20 ChaCha rounds have been completed, the first 128 bits (16 bytes) and // last 128 bits (16 bytes) of the ChaCha state (both little-endian) are // concatenated, and this 256-bit (32 bytes) subkey is returned. mut out := []u8{len: 32} - binary.little_endian_put_u32(mut out[0..4], x0) - binary.little_endian_put_u32(mut out[4..8], x1) - binary.little_endian_put_u32(mut out[8..12], x2) - binary.little_endian_put_u32(mut out[12..16], x3) + binary.little_endian_put_u32(mut out[0..4], x[0]) + binary.little_endian_put_u32(mut out[4..8], x[1]) + binary.little_endian_put_u32(mut out[8..12], x[2]) + binary.little_endian_put_u32(mut out[12..16], x[3]) - binary.little_endian_put_u32(mut out[16..20], x12) - binary.little_endian_put_u32(mut out[20..24], x13) - binary.little_endian_put_u32(mut out[24..28], x14) - binary.little_endian_put_u32(mut out[28..32], x15) + binary.little_endian_put_u32(mut out[16..20], x[12]) + binary.little_endian_put_u32(mut out[20..24], x[13]) + binary.little_endian_put_u32(mut out[24..28], x[14]) + binary.little_endian_put_u32(mut out[28..32], x[15]) return out }