From a199a6ea2eca96e6e19f9e90b1c3333a27aeac51 Mon Sep 17 00:00:00 2001 From: blackshirt Date: Sun, 23 Mar 2025 23:49:43 +0700 Subject: [PATCH] x.crypto.chacha20: fix consecutive calls of `xor_key_stream`, add tests (fix #23977) (#24003) --- vlib/x/crypto/chacha20/chacha.v | 337 ++++++++++-------- vlib/x/crypto/chacha20/chacha_test.v | 38 +- .../chacha20poly1305/chacha20poly1305.v | 8 +- 3 files changed, 236 insertions(+), 147 deletions(-) diff --git a/vlib/x/crypto/chacha20/chacha.v b/vlib/x/crypto/chacha20/chacha.v index 68f1bb3d58..3c2dee4835 100644 --- a/vlib/x/crypto/chacha20/chacha.v +++ b/vlib/x/crypto/chacha20/chacha.v @@ -6,7 +6,6 @@ module chacha20 import math.bits -import crypto.cipher import crypto.internal.subtle import encoding.binary @@ -19,8 +18,6 @@ pub const x_nonce_size = 24 // internal block size ChaCha20 operates on, in bytes const block_size = 64 -// vfmt off - // four constants of ChaCha20 state. const cc0 = u32(0x61707865) // expa const cc1 = u32(0x3320646e) // nd 3 @@ -37,14 +34,16 @@ mut: counter u32 overflow bool // internal buffer for storing key stream results - block []u8 = []u8{len: chacha20.block_size} + block []u8 = []u8{len: block_size} + length int // additional fields, follow the go version + // vfmt off precomp bool p1 u32 p5 u32 p9 u32 p13 u32 p2 u32 p6 u32 p10 u32 p14 u32 p3 u32 p7 u32 p11 u32 p15 u32 + // vfmt on } -// vfmt on // new_cipher creates a new ChaCha20 stream cipher with the given 32 bytes key, a 12 or 24 bytes nonce. // If 24 bytes of nonce was provided, the XChaCha20 construction will be used. @@ -73,7 +72,7 @@ pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 { // xor_key_stream xors each byte in the given slice in the src with a byte from the // cipher's key stream. It fulfills `cipher.Stream` interface. It encrypts the plaintext message -// in src and stores the ciphertext result in dst in a single run of encryption. +// in src and stores the ciphertext result in dst in a key stream fashion. // You must never use the same (key, nonce) pair more than once for encryption. // This would void any confidentiality guarantees for the messages encrypted with the same nonce and key. @[direct_array_access] @@ -88,34 +87,204 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { panic('chacha20: invalid buffer overlap') } - // ChaCha20's encryption mechanism is a relatively simple operation. - // for every block_sized block from src bytes, build ChaCha20 keystream block, - // then xor each byte in the block with keystresm block and then stores xor-ed bytes - // to the output buffer. If there are remaining (trailing) partial bytes, - // generate one more keystream block, xors keystream block with partial bytes - // and stores the result. - // - // Let's process for multiple blocks - // number of blocks the src bytes should be split into + mut idx := 0 + mut src_len := src.len + + // We adapt and ports the go version here + // First, drain any remaining key stream + if c.length != 0 { + // remaining keystream on internal buffer + mut kstream := c.block[block_size - c.length..] + if src_len < kstream.len { + kstream = unsafe { kstream[..src_len] } + } + _ = src[kstream.len - 1] // bounds check elimination hint + for i, b in kstream { + dst[idx + i] = src[idx + i] ^ b + } + // updates the idx for dst and src + c.length -= kstream.len + idx += kstream.len + src_len -= kstream.len + } + if src_len == 0 { + return + } + + // check for counter overflow + num_blocks := (u64(src_len) + block_size - 1) / block_size + if c.overflow || u64(c.counter) + num_blocks > max_u32 { + panic('chacha20: counter overflow') + } else if u64(c.counter) + num_blocks == max_u32 { + c.overflow = true + } + + // take the most full bytes of multiples block_size from the src, + // build the keystream from the cipher's state and stores the result + // into dst + full := src_len - src_len % block_size + if full > 0 { + c.chacha20_block_generic(mut dst[idx..idx + full], src[idx..idx + full]) + } + idx += full + src_len -= full + + // we dont support bufsize + if u64(c.counter) + 1 > max_u32 { + numblocks := (src_len + block_size - 1) / block_size + mut buf := c.block[block_size - numblocks * block_size..] + _ := copy(mut buf, src[idx..]) + c.chacha20_block_generic(mut buf, buf) + m := copy(mut dst[idx..], buf) + c.length = buf.len - m + return + } + // If we have a partial block, pad it for chacha20_block_generic, and + // keep the leftover keystream for the next invocation. + if src_len > 0 { + // copy the last src block to internal buffer, and performs + // chacha20_block_generic on this buffer, and stores into remaining dst + _ := copy(mut c.block, src[idx..]) + c.chacha20_block_generic(mut c.block, c.block) + n := copy(mut dst[idx..], c.block) + // the length of remaining bytes of unprocessed keystream + c.length = block_size - n + } +} + +// encrypt encrypts src and stores into dst buffer. It works like `xor_key_stream` except +// its ignore key streaming process by ignoring remaining key stream in the internal buffer, +// so, its works in one shot of fashion. +// Its added to allow `chacha20poly1305` modules to work without key stream fashion. +// TODO: integrates it with the rest +@[direct_array_access] +pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) { + if src.len == 0 { + return + } + if dst.len < src.len { + panic('chacha20/chacha: dst buffer is to small') + } + if subtle.inexact_overlap(dst, src) { + panic('chacha20: invalid buffer overlap') + } + nr_blocks := src.len / block_size for i := 0; i < nr_blocks; i++ { - // generate ciphers keystream block, stored in c.block - c.generic_key_stream() // get current src block to be xor-ed block := unsafe { src[i * block_size..(i + 1) * block_size] } - - // instead allocating output buffer for every block, we use dst buffer directly. - // xor current block of plaintext with keystream in c.block - n := cipher.xor_bytes(mut dst[i * block_size..(i + 1) * block_size], block, c.block) - assert n == c.block.len + // build keystream, xor-ed with the block and stores into dst + c.chacha20_block_generic(mut dst[i * block_size..(i + 1) * block_size], block) } // process for partial block if src.len % block_size != 0 { - c.generic_key_stream() // get the remaining last partial block block := unsafe { src[nr_blocks * block_size..] } - // xor block with keystream - _ := cipher.xor_bytes(mut dst[nr_blocks * block_size..], block, c.block) + // pad it into block_size, and then performs chacha20_block_generic + // on this src_block + mut src_block := []u8{len: block_size} + _ := copy(mut src_block, block) + c.chacha20_block_generic(mut src_block, src_block) + + // copy the src_block key stream result into desired dst + n := copy(mut dst[nr_blocks * block_size..], src_block) + assert n == block.len + } +} + +// chacha20_block_generic generates ChaCha20 generic keystream +@[direct_array_access] +fn (mut c Cipher) chacha20_block_generic(mut dst []u8, src []u8) { + // Makes sure its works for size of multiple of block_size + if dst.len != src.len || dst.len % block_size != 0 { + panic('chacha20: internal error: wrong dst and/or src length') + } + // initializes ChaCha20 state + // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc + // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk + // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk + // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn + // + // where c=constant k=key b=blockcounter n=nonce + c0, c1, c2, c3 := cc0, cc1, cc2, cc3 + c4, c5, c6, c7 := c.key[0], c.key[1], c.key[2], c.key[3] + c8, c9, c10, c11 := c.key[4], c.key[5], c.key[6], c.key[7] + + _ := c.counter + c13, c14, c15 := c.nonce[0], c.nonce[1], c.nonce[2] + + // precomputes three first column rounds that do not depend on counter + if !c.precomp { + c.p1, c.p5, c.p9, c.p13 = quarter_round(c1, c5, c9, c13) + c.p2, c.p6, c.p10, c.p14 = quarter_round(c2, c6, c10, c14) + c.p3, c.p7, c.p11, c.p15 = quarter_round(c3, c7, c11, c15) + c.precomp = true + } + mut idx := 0 + mut src_len := src.len + for src_len >= block_size { + // remaining first column round + fcr0, fcr4, fcr8, fcr12 := quarter_round(c0, c4, c8, c.counter) + + // The second diagonal round. + mut x0, mut x5, mut x10, mut x15 := quarter_round(fcr0, c.p5, c.p10, c.p15) + mut x1, mut x6, mut x11, mut x12 := quarter_round(c.p1, c.p6, c.p11, fcr12) + mut x2, mut x7, mut x8, mut x13 := quarter_round(c.p2, c.p7, fcr8, c.p13) + mut x3, mut x4, mut x9, mut x14 := quarter_round(c.p3, fcr4, c.p9, c.p14) + + // The remaining 18 rounds. + for i := 0; i < 9; i++ { + // Column round. + x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12) + x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13) + x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14) + x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15) + + // Diagonal round. + x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15) + x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12) + x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13) + x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14) + } + + // add back keystream result to initial state, xor-ing with the src and stores into dst + binary.little_endian_put_u32(mut dst[idx + 0..idx + 4], binary.little_endian_u32(src[idx + 0.. + idx + 4]) ^ (x0 + c0)) + binary.little_endian_put_u32(mut dst[idx + 4..idx + 8], binary.little_endian_u32(src[idx + 4.. + idx + 8]) ^ (x1 + c1)) + binary.little_endian_put_u32(mut dst[idx + 8..idx + 12], binary.little_endian_u32(src[idx + + 8..idx + 12]) ^ (x2 + c2)) + binary.little_endian_put_u32(mut dst[idx + 12..idx + 16], binary.little_endian_u32(src[ + idx + 12..idx + 16]) ^ (x3 + c3)) + binary.little_endian_put_u32(mut dst[idx + 16..idx + 20], binary.little_endian_u32(src[ + idx + 16..idx + 20]) ^ (x4 + c4)) + binary.little_endian_put_u32(mut dst[idx + 20..idx + 24], binary.little_endian_u32(src[ + idx + 20..idx + 24]) ^ (x5 + c5)) + binary.little_endian_put_u32(mut dst[idx + 24..idx + 28], binary.little_endian_u32(src[ + idx + 24..idx + 28]) ^ (x6 + c6)) + binary.little_endian_put_u32(mut dst[idx + 28..idx + 32], binary.little_endian_u32(src[ + idx + 28..idx + 32]) ^ (x7 + c7)) + binary.little_endian_put_u32(mut dst[idx + 32..idx + 36], binary.little_endian_u32(src[ + idx + 32..idx + 36]) ^ (x8 + c8)) + binary.little_endian_put_u32(mut dst[idx + 36..idx + 40], binary.little_endian_u32(src[ + idx + 36..idx + 40]) ^ (x9 + c9)) + binary.little_endian_put_u32(mut dst[idx + 40..idx + 44], binary.little_endian_u32(src[ + idx + 40..idx + 44]) ^ (x10 + c10)) + binary.little_endian_put_u32(mut dst[idx + 44..idx + 48], binary.little_endian_u32(src[ + idx + 44..idx + 48]) ^ (x11 + c11)) + binary.little_endian_put_u32(mut dst[idx + 48..idx + 52], binary.little_endian_u32(src[ + idx + 48..idx + 52]) ^ (x12 + c.counter)) + binary.little_endian_put_u32(mut dst[idx + 52..idx + 56], binary.little_endian_u32(src[ + idx + 52..idx + 56]) ^ (x13 + c13)) + binary.little_endian_put_u32(mut dst[idx + 56..idx + 60], binary.little_endian_u32(src[ + idx + 56..idx + 60]) ^ (x14 + c14)) + binary.little_endian_put_u32(mut dst[idx + 60..idx + 64], binary.little_endian_u32(src[ + idx + 60..idx + 64]) ^ (x15 + c15)) + + c.counter += 1 + + idx += block_size + src_len -= block_size } } @@ -138,6 +307,7 @@ pub fn (mut c Cipher) reset() { _ := vmemset(&c.nonce, 0, 12) c.block.reset() } + c.length = 0 c.counter = u32(0) c.overflow = false c.precomp = false @@ -160,7 +330,7 @@ pub fn (mut c Cipher) reset() { // set_counter sets Cipher's counter pub fn (mut c Cipher) set_counter(ctr u32) { - if ctr >= max_u32 { + if u64(ctr) >= max_u32 { c.overflow = true } if c.overflow { @@ -220,119 +390,8 @@ fn (mut c Cipher) do_rekey(key []u8, nonce []u8) ! { c.nonce[2] = binary.little_endian_u32(nonces[8..12]) } -// chacha20_block transforms a ChaCha20 state by running -// multiple quarter rounds. -// see https://datatracker.ietf.org/doc/html/rfc8439#section-2.3 -@[direct_array_access] -fn (mut c Cipher) chacha20_block() { - // initializes ChaCha20 state - // 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc - // 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk - // 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk - // 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn - // - // where c=constant k=key b=blockcounter n=nonce - c0, c1, c2, c3 := cc0, cc1, cc2, cc3 - c4 := c.key[0] - c5 := c.key[1] - c6 := c.key[2] - c7 := c.key[3] - c8 := c.key[4] - c9 := c.key[5] - c10 := c.key[6] - c11 := c.key[7] - - _ := c.counter - c13 := c.nonce[0] - c14 := c.nonce[1] - c15 := c.nonce[2] - - // precomputes three first column rounds that do not depend on counter - if !c.precomp { - c.p1, c.p5, c.p9, c.p13 = quarter_round(c1, c5, c9, c13) - c.p2, c.p6, c.p10, c.p14 = quarter_round(c2, c6, c10, c14) - c.p3, c.p7, c.p11, c.p15 = quarter_round(c3, c7, c11, c15) - c.precomp = true - } - // remaining first column round - fcr0, fcr4, fcr8, fcr12 := quarter_round(c0, c4, c8, c.counter) - - // The second diagonal round. - mut x0, mut x5, mut x10, mut x15 := quarter_round(fcr0, c.p5, c.p10, c.p15) - mut x1, mut x6, mut x11, mut x12 := quarter_round(c.p1, c.p6, c.p11, fcr12) - mut x2, mut x7, mut x8, mut x13 := quarter_round(c.p2, c.p7, fcr8, c.p13) - mut x3, mut x4, mut x9, mut x14 := quarter_round(c.p3, fcr4, c.p9, c.p14) - - // The remaining 18 rounds. - for i := 0; i < 9; i++ { - // Column round. - x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12) - x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13) - x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14) - x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15) - - // Diagonal round. - x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15) - x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12) - x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13) - x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14) - } - - // add back to initial state and stores to dst - x0 += c0 - x1 += c1 - x2 += c2 - x3 += c3 - x4 += c4 - x5 += c5 - x6 += c6 - x7 += c7 - x8 += c8 - x9 += c9 - x10 += c10 - x11 += c11 - // x12 is Cipher.counter - x12 += c.counter - x13 += c13 - x14 += c14 - x15 += c15 - - binary.little_endian_put_u32(mut c.block[0..4], x0) - binary.little_endian_put_u32(mut c.block[4..8], x1) - binary.little_endian_put_u32(mut c.block[8..12], x2) - binary.little_endian_put_u32(mut c.block[12..16], x3) - binary.little_endian_put_u32(mut c.block[16..20], x4) - binary.little_endian_put_u32(mut c.block[20..24], x5) - binary.little_endian_put_u32(mut c.block[24..28], x6) - binary.little_endian_put_u32(mut c.block[28..32], x7) - binary.little_endian_put_u32(mut c.block[32..36], x8) - binary.little_endian_put_u32(mut c.block[36..40], x9) - binary.little_endian_put_u32(mut c.block[40..44], x10) - binary.little_endian_put_u32(mut c.block[44..48], x11) - binary.little_endian_put_u32(mut c.block[48..52], x12) - binary.little_endian_put_u32(mut c.block[52..56], x13) - binary.little_endian_put_u32(mut c.block[56..60], x14) - binary.little_endian_put_u32(mut c.block[60..64], x15) -} - -// generic_key_stream creates generic ChaCha20 keystream block and stores the result in Cipher.block -@[direct_array_access] -fn (mut c Cipher) generic_key_stream() { - // creates ChaCha20 block stream - c.chacha20_block() - // updates counter and checks for overflow - ctr := u64(c.counter) + u64(1) - if ctr >= max_u32 { - c.overflow = true - } - if c.overflow || ctr > max_u32 { - panic('counter overflow') - } - c.counter += 1 -} - // Helper and core function for ChaCha20 - +// // quarter_round is the basic operation of the ChaCha algorithm. It operates // on four 32-bit unsigned integers, by performing AXR (add, xor, rotate) // operation on this quartet u32 numbers. @@ -393,7 +452,7 @@ fn chacha20_encrypt_with_counter(key []u8, nonce []u8, ctr u32, plaintext []u8) c.set_counter(ctr) mut out := []u8{len: plaintext.len} - c.xor_key_stream(mut out, plaintext) + c.encrypt(mut out, plaintext) return out } diff --git a/vlib/x/crypto/chacha20/chacha_test.v b/vlib/x/crypto/chacha20/chacha_test.v index 318fdc10d8..de24a433ea 100644 --- a/vlib/x/crypto/chacha20/chacha_test.v +++ b/vlib/x/crypto/chacha20/chacha_test.v @@ -4,6 +4,35 @@ import crypto.cipher import rand import encoding.hex +fn test_xor_key_stream_consecutive() { + // See https://github.com/vlang/v/issues/23977 + key := [u8(64), 116, 63, 11, 221, 199, 187, 110, 217, 68, 0, 50, 65, 79, 24, 10, 124, 174, + 66, 2, 172, 153, 237, 145, 244, 41, 131, 84, 247, 42, 73, 131] + nonce := [u8(86), 124, 222, 94, 253, 187, 151, 219, 17, 83, 118, 255] + encoded_data_one := [u8(201), 199, 66, 226] + decoded_data_one := [u8(0), 0, 0, 9] + encoded_data_two := [u8(82), 189, 125, 3, 24, 185, 183, 240, 29, 223, 17, 241, 103, 69, 45, + 101] + decoded_data_two := [u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + + mut c := new_cipher(key, nonce)! + mut dst := []u8{len: encoded_data_one.len} + c.xor_key_stream(mut dst, encoded_data_one) + assert dst == decoded_data_one + + // consecutive call + dst = []u8{len: encoded_data_two.len} + c.xor_key_stream(mut dst, encoded_data_two) + assert dst == decoded_data_two + + // additional data + msg := 'billy the kid'.bytes() + mut dst2 := []u8{len: msg.len} + c.xor_key_stream(mut dst2, msg) + // the go version produces: [40 17 78 116 255 224 2 52 92 151 103 107 138] + assert dst2 == [u8(40), 17, 78, 116, 255, 224, 2, 52, 92, 151, 103, 107, 138] +} + struct StreamCipher { mut: cipher &cipher.Stream @@ -72,10 +101,11 @@ fn test_chacha20_block_function() ! { nonce_bytes := hex.decode(val.nonce)! mut cs := new_cipher(key_bytes, nonce_bytes)! cs.set_counter(val.counter) - cs.chacha20_block() + mut block := []u8{len: block_size} + cs.chacha20_block_generic(mut block, block) exp_bytes := hex.decode(val.output)! - assert cs.block == exp_bytes + assert block == exp_bytes } } @@ -89,12 +119,12 @@ fn test_chacha20_simple_block_function() ! { mut block := []u8{len: block_size} mut cs := new_cipher(key_bytes, nonce_bytes)! cs.set_counter(u32(1)) - cs.chacha20_block() + cs.chacha20_block_generic(mut block, block) expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' exp_bytes := hex.decode(expected_raw_bytes)! - assert cs.block == exp_bytes + assert block == exp_bytes } fn test_chacha20_quarter_round() { diff --git a/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v b/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v index b056292a9a..718c3416f3 100644 --- a/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v +++ b/vlib/x/crypto/chacha20poly1305/chacha20poly1305.v @@ -127,13 +127,13 @@ fn (c Chacha20Poly1305) encrypt_generic(plaintext []u8, nonce []u8, ad []u8) ![] // see https://datatracker.ietf.org/doc/html/rfc8439#section-2.6 mut polykey := []u8{len: key_size} mut s := chacha20.new_cipher(c.key, nonce)! - s.xor_key_stream(mut polykey, polykey) + s.encrypt(mut polykey, polykey) // Next, the ChaCha20 encryption function is called to encrypt the plaintext, // using the same key and nonce, and with the initial ChaCha20 counter set to 1. mut ciphertext := []u8{len: plaintext.len} s.set_counter(1) - s.xor_key_stream(mut ciphertext, plaintext) + s.encrypt(mut ciphertext, plaintext) // Finally, the Poly1305 function is called with the generated Poly1305 one-time key // calculated above, and a message constructed as described in @@ -177,7 +177,7 @@ fn (c Chacha20Poly1305) decrypt_generic(ciphertext []u8, nonce []u8, ad []u8) ![ // generates poly1305 one-time key for later calculation mut polykey := []u8{len: key_size} mut s := chacha20.new_cipher(c.key, nonce)! - s.xor_key_stream(mut polykey, polykey) + s.encrypt(mut polykey, polykey) // Remember, ciphertext is concatenation of associated cipher output plus tag (mac) bytes encrypted := ciphertext[0..ciphertext.len - c.overhead()] @@ -186,7 +186,7 @@ fn (c Chacha20Poly1305) decrypt_generic(ciphertext []u8, nonce []u8, ad []u8) ![ mut plaintext := []u8{len: encrypted.len} s.set_counter(1) // doing reverse encrypt on cipher output part produces plaintext - s.xor_key_stream(mut plaintext, encrypted) + s.encrypt(mut plaintext, encrypted) // authenticated messages part mut constructed_msg := []u8{}