From 5fd2278df4d2a5a6767a8ff2aa4f4a84e508a8d2 Mon Sep 17 00:00:00 2001 From: blackshirt Date: Fri, 19 Sep 2025 18:25:24 +0700 Subject: [PATCH] x.crypto.chacha20: fix internal counter handling (#25334) --- vlib/x/crypto/chacha20/chacha.v | 32 +-- vlib/x/crypto/chacha20/chacha_64bitctr_test.v | 4 +- vlib/x/crypto/chacha20/chacha_test.v | 4 +- vlib/x/crypto/chacha20/stream.v | 182 ++++++++++-------- vlib/x/crypto/chacha20/stream_test.v | 35 +++- vlib/x/crypto/chacha20/xchacha_test.v | 4 +- .../chacha20poly1305/chacha20poly1305.v | 8 +- 7 files changed, 162 insertions(+), 107 deletions(-) diff --git a/vlib/x/crypto/chacha20/chacha.v b/vlib/x/crypto/chacha20/chacha.v index fdf43f3e2e..dc6130a7d8 100644 --- a/vlib/x/crypto/chacha20/chacha.v +++ b/vlib/x/crypto/chacha20/chacha.v @@ -39,7 +39,7 @@ enum CipherMode { pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 { mut stream := new_stream(key, nonce)! mut dst := []u8{len: plaintext.len} - stream.keystream_full(mut dst, plaintext) + stream.keystream_full(mut dst, plaintext)! unsafe { stream.reset() } return dst } @@ -49,7 +49,7 @@ pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 { pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 { mut stream := new_stream(key, nonce)! mut dst := []u8{len: ciphertext.len} - stream.keystream_full(mut dst, ciphertext) + stream.keystream_full(mut dst, ciphertext)! unsafe { stream.reset() } return dst } @@ -94,14 +94,8 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { mut idx := 0 mut src_len := src.len - // check for counter overflow - num_blocks := (u64(src_len) + block_size - 1) / block_size - if c.Stream.check_ctr(num_blocks) { - panic('chacha20: internal counter overflow') - } dst = unsafe { dst[..src_len] } - if subtle.inexact_overlap(dst, src) { panic('chacha20: invalid buffer overlap') } @@ -129,21 +123,27 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { full := src_len - src_len % block_size if full > 0 { src_block := unsafe { src[idx..idx + full] } - c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block) + c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block) or { + c.Stream.overflow = true + panic('chacha20: xor_key_stream leads to counter overflow') + } } idx += full src_len -= full - // If we have a partial block, pad it for chacha20_block_generic, and + // If we have a partial block, pad it for keystream_with_blocksize, and // keep the leftover keystream for the next invocation. if src_len > 0 { // Make sure, internal buffer cleared or the old garbaged data from previous call still there // See the issue at https://github.com/vlang/v/issues/24043 unsafe { c.block.reset() } // = []u8{len: block_size} // copy the last src block to internal buffer, and performs - // chacha20_block_generic on this buffer, and stores into remaining dst + // keystream_with_blocksize on this buffer, and stores into remaining dst _ := copy(mut c.block, src[idx..]) - c.Stream.keystream_with_blocksize(mut c.block, c.block) + c.Stream.keystream_with_blocksize(mut c.block, c.block) or { + c.Stream.overflow = true + panic('chacha20: xor_key_stream leads to counter overflow') + } n := copy(mut dst[idx..], c.block) // the length of remaining bytes of unprocessed keystream c.length = block_size - n @@ -156,18 +156,18 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { // Its added to allow `chacha20poly1305` modules to work without key stream fashion. // TODO: integrates it with the rest @[direct_array_access] -pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) { +pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) ! { if src.len == 0 { return } if dst.len < src.len { - panic('chacha20: dst buffer is to small') + return error('chacha20: dst buffer is to small') } if subtle.inexact_overlap(dst, src) { - panic('chacha20: invalid buffer overlap') + return error('chacha20: invalid buffer overlap') } - c.Stream.keystream_full(mut dst, src) + c.Stream.keystream_full(mut dst, src)! } // free the resources taken by the Cipher `c`. Dont use cipher after .free call diff --git a/vlib/x/crypto/chacha20/chacha_64bitctr_test.v b/vlib/x/crypto/chacha20/chacha_64bitctr_test.v index 0e58cabd5e..cf5a735aed 100644 --- a/vlib/x/crypto/chacha20/chacha_64bitctr_test.v +++ b/vlib/x/crypto/chacha20/chacha_64bitctr_test.v @@ -97,13 +97,13 @@ fn test_chacha20_encrypt_with_64bit_counter() ! { mut c := new_cipher(key, nonce)! mut dst := []u8{len: plaintext.len} - c.encrypt(mut dst, plaintext) + c.encrypt(mut dst, plaintext)! assert dst == ciphertext // decrypts the ciphertext back // we need rekey the ciphers, because internal states has changed from previous invocations. c.rekey(key, nonce)! - c.encrypt(mut dst, ciphertext) + c.encrypt(mut dst, ciphertext)! assert dst == plaintext } } diff --git a/vlib/x/crypto/chacha20/chacha_test.v b/vlib/x/crypto/chacha20/chacha_test.v index 843e544ca0..d2d965695f 100644 --- a/vlib/x/crypto/chacha20/chacha_test.v +++ b/vlib/x/crypto/chacha20/chacha_test.v @@ -129,7 +129,7 @@ fn test_chacha20_cipher_encrypt_with_xor_keystream() ! { cs.set_counter(c.counter) mut output := []u8{len: plaintext_bytes.len} - cs.encrypt(mut output, plaintext_bytes) + cs.encrypt(mut output, plaintext_bytes)! expected := hex.decode(c.output)! assert output == expected @@ -146,7 +146,7 @@ fn test_chacha20_cipher_decrypt_with_xor_keystream() ! { cs.set_counter(c.counter) mut output := []u8{len: ciphertext.len} - cs.encrypt(mut output, ciphertext) + cs.encrypt(mut output, ciphertext)! expected_decrypted_message := hex.decode(c.plaintext)! assert output == expected_decrypted_message diff --git a/vlib/x/crypto/chacha20/stream.v b/vlib/x/crypto/chacha20/stream.v index 26008a839d..955ddb3d4a 100644 --- a/vlib/x/crypto/chacha20/stream.v +++ b/vlib/x/crypto/chacha20/stream.v @@ -15,20 +15,23 @@ const max_32bit_counter = u64(max_u32) // default chacha20 quarter round number const default_qround_nr = 10 -// ChaCha20 stream with internal counter +// Stream is an internal structure where main ChaCha20 algorithm operates on. @[noinit] struct Stream { mut: + // underlying stream's key + key [8]u32 + // underlying stream's nonce with internal counter + nonce [4]u32 + // The mode (variant) of this ChaCha20 stream // Standard IETF variant or original (from DJ Bernstein) variant, set on creation. mode CipherMode = .standard // Flag that tells whether this stream was an extended XChaCha20 standard variant. // only make sense when mode == .standard extended bool - // underlying stream's key - key [8]u32 - // underlying stream's nonce with internal counter - nonce [4]u32 + // Flag tells whether this stream has reached the counter limit + overflow bool // counter-independent precomputed values precomp bool @@ -64,7 +67,7 @@ fn new_stream(key []u8, nonce []u8) !Stream { } } // if this an extended chacha20 construct, derives a new key and nonce - new_key, new_nonce := if extended { + new_key, new_nonce := if mode == .standard && extended { xkey, xnonce := derive_xchacha20_key_nonce(key, nonce)! xkey, xnonce } else { @@ -107,8 +110,6 @@ fn new_stream(key []u8, nonce []u8) !Stream { // reset resets internal stream @[unsafe] fn (mut s Stream) reset() { - s.mode = .standard - s.extended = false unsafe { _ := vmemset(&s.key, 0, 32) _ := vmemset(&s.nonce, 0, 16) @@ -148,18 +149,22 @@ fn (s Stream) new_curr_state() State { // keystream_full process with full size of src being processed @[direct_array_access] -fn (mut s Stream) keystream_full(mut dst []u8, src []u8) { +fn (mut s Stream) keystream_full(mut dst []u8, src []u8) ! { + if s.overflow { + return error('chacha20: keystream_full counter has reached the limit') + } // number of block to be processed nr_blocks := src.len / block_size // check for counter overflow if s.check_ctr(u64(nr_blocks)) { - panic('chacha20: internal counter overflow') + s.overflow = true + return error('chacha20: internal counter overflow') } // process for full block_size-d msg for i := 0; i < nr_blocks; i++ { block := unsafe { src[i * block_size..(i + 1) * block_size] } // process with block_size keystream - s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block) + s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block)! } // process for remaining partial block @@ -170,7 +175,7 @@ fn (mut s Stream) keystream_full(mut dst []u8, src []u8) { _ := copy(mut last_bytes, last_block) // process the padded last block - s.keystream_with_blocksize(mut last_bytes, last_bytes) + s.keystream_with_blocksize(mut last_bytes, last_bytes)! _ := copy(mut dst[nr_blocks * block_size..], last_bytes) } } @@ -178,7 +183,7 @@ fn (mut s Stream) keystream_full(mut dst []u8, src []u8) { // keystream_with_blocksize produces stream from src bytes that aligns with block_size, // serialized in little-endian form and stored into dst buffer. @[direct_array_access] -fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) { +fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) ! { // ChaCha20 keystream generator was relatively easy to understand. // Its contains steps: // - loads current ChaCha20 into temporary state, used for later. @@ -189,64 +194,54 @@ fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) { // // Makes sure its works for size of multiple of block_size if dst.len != src.len || dst.len % block_size != 0 { - panic('chacha20: internal error: wrong dst and/or src length') + return error('chacha20: internal error: wrong dst and/or src length') + } + // check if this stream has reached the counter limit + if s.overflow { + return error('chacha20: internal counter has reached the limit, please rekey') + } + // check for counter overflow when processing number of blocks + num_blocks := (u64(src.len) + block_size - 1) / block_size + if s.check_ctr(num_blocks) { + s.overflow = true + return error('chacha20.check_ctr: internal counter overflow') } // load state from current stream - st := s.new_curr_state() + mut st := s.new_curr_state() // clone the state mut st_c := clone_state(st) - // cache counter-independent precomputed values - if s.mode == .standard { - // first column round - mut fcr := Quartet{st[0], st[4], st[8], st[12]} - // precomputes three first column rounds that do not depend on counter - if !s.precomp { - mut pcr1 := Quartet{st[1], st[5], st[9], st[13]} - mut pcr2 := Quartet{st[2], st[6], st[10], st[14]} - mut pcr3 := Quartet{st[3], st[7], st[11], st[15]} - - qround_on_quartet(mut pcr1) - qround_on_quartet(mut pcr2) - qround_on_quartet(mut pcr3) - - s.p1 = pcr1.e0 - s.p5 = pcr1.e1 - s.p9 = pcr1.e2 - s.p13 = pcr1.e3 - - s.p2 = pcr2.e0 - s.p6 = pcr2.e1 - s.p10 = pcr2.e2 - s.p14 = pcr2.e3 - - s.p3 = pcr3.e0 - s.p7 = pcr3.e1 - s.p11 = pcr3.e2 - s.p15 = pcr3.e3 - - s.precomp = true - } - // remaining first column round - qround_on_quartet(mut fcr) - - // First diagonal round. - qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15) - qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12) - qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13) - qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14) + // precomputes cache counter-independent values + if s.mode == .standard && !s.precomp { + s.precomp(st) } mut idx := 0 mut src_len := src.len for src_len >= block_size { + if s.mode == .standard { + // remaining first column round + mut fcr := Quartet{st[0], st[4], st[8], st[12]} + qround_on_quartet(mut fcr) + + // First diagonal round. + qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, + 15) + qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, + 12) + qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, + 13) + qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, + 14) + } // The remaining rounds // // For standard variant, the first column-round was already precomputed, // For original variant, its use full quarter round number. - n := if s.mode == .standard { 9 } else { default_qround_nr } + // perform chacha20 quarter round n-times + n := if s.mode == .standard { 9 } else { default_qround_nr } for i := 0; i < n; i++ { // Column-round // 0 | 1 | 2 | 3 @@ -272,12 +267,33 @@ fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) { // add back keystream result to initial state, xor-ing with the src and stores into dst for i := 0; i < 16; i++ { src_block := unsafe { src[idx + (i * 4)..idx + (i + 1) * 4] } - binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], binary.little_endian_u32(src_block) ^ ( - st_c[i] + st[i])) + add_xored := binary.little_endian_u32(src_block) ^ (st_c[i] + st[i]) + binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], add_xored) } // increases Stream's internal counter - s.inc_ctr() + if s.mode == .original { + st[12] += 1 + // first counter reset ? + if st[12] == 0 { + // increase second counter, if reset, mark as an overflow and return error + st[13] += 1 + if st[13] == 0 { + s.overflow = true + return error('chacha20.keystream_with_blocksize: 64-bit counter reached') + } + } + // store the counter + s.nonce[0] = st[12] + s.nonce[1] = st[13] + } else { + st[12] += 1 + if st[12] == 0 { + s.overflow = true + return error('chacha20.keystream_with_blocksize: overflow 32-bit counter') + } + s.nonce[0] = st[12] + } // updates index idx += block_size @@ -285,6 +301,35 @@ fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) { } } +// precomp does quarter round on counter-independent quartet values on running state st. +@[direct_array_access; inline] +fn (mut s Stream) precomp(st State) { + mut pcr1 := Quartet{st[1], st[5], st[9], st[13]} + mut pcr2 := Quartet{st[2], st[6], st[10], st[14]} + mut pcr3 := Quartet{st[3], st[7], st[11], st[15]} + + qround_on_quartet(mut pcr1) + qround_on_quartet(mut pcr2) + qround_on_quartet(mut pcr3) + + s.p1 = pcr1.e0 + s.p5 = pcr1.e1 + s.p9 = pcr1.e2 + s.p13 = pcr1.e3 + + s.p2 = pcr2.e0 + s.p6 = pcr2.e1 + s.p10 = pcr2.e2 + s.p14 = pcr2.e3 + + s.p3 = pcr3.e0 + s.p7 = pcr3.e1 + s.p11 = pcr3.e2 + s.p15 = pcr3.e3 + + s.precomp = true +} + // Handling of Stream's internal counter // @@ -307,11 +352,6 @@ fn (b Stream) ctr() u64 { // set_ctr sets Stream's counter @[direct_array_access; inline] fn (mut b Stream) set_ctr(ctr u64) { - // if this set counter would overflow internal counter - // we do panic instead - if b.check_ctr(ctr) { - panic('set_ctr: invalid check, maybe would overflow') - } match b.mode { .original { b.nonce[0] = u32(ctr) @@ -327,15 +367,6 @@ fn (mut b Stream) set_ctr(ctr u64) { } } -// inc_ctr increases internal counter by one. -@[inline] -fn (mut b Stream) inc_ctr() { - mut curr_ctr := b.ctr() - curr_ctr += 1 - - b.set_ctr(curr_ctr) -} - // check_ctr checks for counter overflow when added by value. // It returns true on counter overflow. @[inline] @@ -361,13 +392,6 @@ fn (b Stream) max_ctr() u64 { // State represents the running 64-bytes of chacha20 stream, type State = [16]u32 -@[direct_array_access; inline; unsafe] -fn reset_state(mut s State) { - unsafe { - _ := vmemset(&s, 0, 64) - } -} - @[direct_array_access; inline] fn clone_state(s State) State { mut sc := State{} diff --git a/vlib/x/crypto/chacha20/stream_test.v b/vlib/x/crypto/chacha20/stream_test.v index a0bf74bd92..ebdc8a4c09 100644 --- a/vlib/x/crypto/chacha20/stream_test.v +++ b/vlib/x/crypto/chacha20/stream_test.v @@ -1,7 +1,38 @@ module chacha20 +import rand import encoding.hex +// Test for Stream counter handling. +// See the discussion at [here](https://discord.com/channels/592103645835821068/592114487759470596/1417900997090607215) +fn test_stream_counter_handling() ! { + // creates a original mode of the cipher with 64-bit counter + mut ctx := new_cipher(rand.bytes(32)!, rand.bytes(8)!)! + // set the cipher's counter near the maximum of 64-bit counter + ctr := max_u64 - 2 + ctx.set_counter(ctr) + + // by setting internal counter into near of max 64-bit counter, + // it need a message with minimum length of 2*block_size bytes to reach the limit. + // let's build this message with 2 * block_size bytes in size + msg0 := []u8{len: 2 * block_size} + mut dst := []u8{len: msg0.len} + ctx.xor_key_stream(mut dst, msg0) + // at this step, the counter has reached the maximum_64bit_counter, but still not overflow + assert ctx.Stream.overflow == false + assert ctx.Stream.ctr() == max_64bit_counter + + // after above process the counter should have at the maximum limit + // we use keystream_with_blocksize to test this counter handling, because + // xor_key_stream would panic on counter reset + msg1 := []u8{len: block_size} + ctx.Stream.keystream_with_blocksize(mut dst[..block_size], msg1) or { + assert ctx.Stream.overflow == true + assert err == error('chacha20.check_ctr: internal counter overflow') + return + } +} + fn test_qround_on_state() { mut s := State{} s[0] = 0x11111111 @@ -27,7 +58,7 @@ fn test_state_of_chacha20_block_simple() ! { mut block := []u8{len: block_size} stream.set_ctr(1) - stream.keystream_with_blocksize(mut block, block) + stream.keystream_with_blocksize(mut block, block)! expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' exp_bytes := hex.decode(expected_raw_bytes)! @@ -44,7 +75,7 @@ fn test_keystream_with_blocksize() ! { stream.set_ctr(val.counter) mut block := []u8{len: block_size} - stream.keystream_with_blocksize(mut block, block) + stream.keystream_with_blocksize(mut block, block)! exp_bytes := hex.decode(val.output)! assert block == exp_bytes diff --git a/vlib/x/crypto/chacha20/xchacha_test.v b/vlib/x/crypto/chacha20/xchacha_test.v index c20e04b782..e258cd4f4a 100644 --- a/vlib/x/crypto/chacha20/xchacha_test.v +++ b/vlib/x/crypto/chacha20/xchacha_test.v @@ -38,7 +38,7 @@ fn test_xchacha20_encrypt_vector_test_a321() ! { nonce_bytes := hex.decode(nonce)! ciphertext_bytes := hex.decode(ciphertext)! - encrypted_message := encrypt(key_bytes, nonce_bytes, plaintext_bytes) or { return } + encrypted_message := encrypt(key_bytes, nonce_bytes, plaintext_bytes)! assert encrypted_message == ciphertext_bytes } @@ -63,7 +63,7 @@ fn test_xchach20_encrypt_vector_test_a322() ! { c.set_counter(counter) mut encrypted_message := []u8{len: plaintext_bytes.len} - c.encrypt(mut encrypted_message, plaintext_bytes) + c.encrypt(mut encrypted_message, plaintext_bytes)! assert encrypted_message == ciphertext_bytes } diff --git a/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v b/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v index 718c3416f3..5fdef192ef 100644 --- a/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v +++ b/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v @@ -127,13 +127,13 @@ fn (c Chacha20Poly1305) encrypt_generic(plaintext []u8, nonce []u8, ad []u8) ![] // see https://datatracker.ietf.org/doc/html/rfc8439#section-2.6 mut polykey := []u8{len: key_size} mut s := chacha20.new_cipher(c.key, nonce)! - s.encrypt(mut polykey, polykey) + s.encrypt(mut polykey, polykey)! // Next, the ChaCha20 encryption function is called to encrypt the plaintext, // using the same key and nonce, and with the initial ChaCha20 counter set to 1. mut ciphertext := []u8{len: plaintext.len} s.set_counter(1) - s.encrypt(mut ciphertext, plaintext) + s.encrypt(mut ciphertext, plaintext)! // Finally, the Poly1305 function is called with the generated Poly1305 one-time key // calculated above, and a message constructed as described in @@ -177,7 +177,7 @@ fn (c Chacha20Poly1305) decrypt_generic(ciphertext []u8, nonce []u8, ad []u8) ![ // generates poly1305 one-time key for later calculation mut polykey := []u8{len: key_size} mut s := chacha20.new_cipher(c.key, nonce)! - s.encrypt(mut polykey, polykey) + s.encrypt(mut polykey, polykey)! // Remember, ciphertext is concatenation of associated cipher output plus tag (mac) bytes encrypted := ciphertext[0..ciphertext.len - c.overhead()] @@ -186,7 +186,7 @@ fn (c Chacha20Poly1305) decrypt_generic(ciphertext []u8, nonce []u8, ad []u8) ![ mut plaintext := []u8{len: encrypted.len} s.set_counter(1) // doing reverse encrypt on cipher output part produces plaintext - s.encrypt(mut plaintext, encrypted) + s.encrypt(mut plaintext, encrypted)! // authenticated messages part mut constructed_msg := []u8{}