From bf41714c87cfcfca21452ad83af9c8589c106ffc Mon Sep 17 00:00:00 2001 From: blackshirt Date: Sun, 21 Sep 2025 18:38:12 +0700 Subject: [PATCH] x.crypto.chacha20: makes 64-bit counter cipher work too (#25363) --- vlib/x/crypto/chacha20/bench/bench.v | 19 ++- vlib/x/crypto/chacha20/chacha.v | 80 +++++----- vlib/x/crypto/chacha20/stream.v | 228 +++++++++++++-------------- vlib/x/crypto/chacha20/stream_test.v | 14 +- 4 files changed, 171 insertions(+), 170 deletions(-) diff --git a/vlib/x/crypto/chacha20/bench/bench.v b/vlib/x/crypto/chacha20/bench/bench.v index ed9e994ac0..7245ac337a 100644 --- a/vlib/x/crypto/chacha20/bench/bench.v +++ b/vlib/x/crypto/chacha20/bench/bench.v @@ -19,6 +19,13 @@ // ----------- // Iterations: 10000 Total Duration: 48.242ms ns/op: 4824 B/op: 3 allocs/op: 4 // +// Chacha20 old xor_key_stream_backup +// ----------- +// Iterations: 10000 Total Duration: 53.430ms ns/op: 5343 B/op: 11 allocs/op: 12 +// ChaCha20 new xor_key_stream +// ----------- +// Iterations: 10000 Total Duration: 43.668ms ns/op: 4366 B/op: 0 allocs/op: 1 +// import x.benchmark import encoding.hex import x.crypto.chacha20 @@ -40,6 +47,12 @@ fn bench_chacha20_decrypt() ! { _ := chacha20.decrypt(key, nonce, ciphertext)! } +fn bench_chacha20_xor_key_stream() ! { + mut dst := []u8{len: plaintext.len} + mut cs := chacha20.new_cipher(key, nonce)! + cs.xor_key_stream(mut dst, plaintext) +} + fn main() { cf := benchmark.BenchmarkDefaults{ n: 10000 @@ -49,9 +62,13 @@ fn main() { mut b0 := benchmark.setup(bench_chacha20_encrypt, cf)! b0.run() - println('') println('ChaCha20 Decryption') println('-----------') mut b1 := benchmark.setup(bench_chacha20_decrypt, cf)! b1.run() + + println('ChaCha20 new xor_key_stream') + println('-----------') + mut b3 := benchmark.setup(bench_chacha20_xor_key_stream, cf)! + b3.run() } diff --git a/vlib/x/crypto/chacha20/chacha.v b/vlib/x/crypto/chacha20/chacha.v index dc6130a7d8..12346ff50a 100644 --- a/vlib/x/crypto/chacha20/chacha.v +++ b/vlib/x/crypto/chacha20/chacha.v @@ -91,62 +91,60 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) { if dst.len < src.len { panic('chacha20/chacha: dst buffer is to small') } - - mut idx := 0 - mut src_len := src.len - - dst = unsafe { dst[..src_len] } + dst = unsafe { dst[..src.len] } if subtle.inexact_overlap(dst, src) { panic('chacha20: invalid buffer overlap') } + // index of position within src bytes + mut idx := 0 - // We adapt and ports the go version here - // First, drain any remaining key stream + // First, try to drain any remaining key stream from internal buffer if c.length != 0 { // remaining keystream on internal buffer mut kstream := c.block[block_size - c.length..] - if src_len < kstream.len { - kstream = unsafe { kstream[..src_len] } + if src.len < kstream.len { + kstream = unsafe { kstream[..src.len] } } + // xors every bytes in src with bytes from key stream and stored into dst for i, b in kstream { dst[idx + i] = src[idx + i] ^ b } - // updates the idx for dst and src + // updates position and internal buffer length. + // when c.length reaches the block_size, we reset it for future use. c.length -= kstream.len idx += kstream.len - src_len -= kstream.len - } - - // take the most full bytes of multiples block_size from the src, - // build the keystream from the cipher's state and stores the result - // into dst - full := src_len - src_len % block_size - if full > 0 { - src_block := unsafe { src[idx..idx + full] } - c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block) or { - c.Stream.overflow = true - panic('chacha20: xor_key_stream leads to counter overflow') + if c.length == block_size { + unsafe { c.block.reset() } + c.length = 0 } } - idx += full - src_len -= full + // process for remaining unprocessed src bytes + mut remains := unsafe { src[idx..] } + nr_blocks := remains.len / block_size - // If we have a partial block, pad it for keystream_with_blocksize, and - // keep the leftover keystream for the next invocation. - if src_len > 0 { - // Make sure, internal buffer cleared or the old garbaged data from previous call still there - // See the issue at https://github.com/vlang/v/issues/24043 - unsafe { c.block.reset() } // = []u8{len: block_size} - // copy the last src block to internal buffer, and performs - // keystream_with_blocksize on this buffer, and stores into remaining dst - _ := copy(mut c.block, src[idx..]) - c.Stream.keystream_with_blocksize(mut c.block, c.block) or { - c.Stream.overflow = true - panic('chacha20: xor_key_stream leads to counter overflow') + // process for full block_size-d message + for i := 0; i < nr_blocks; i++ { + // for every block_sized message, we generates 64-bytes block key stream + // and then xor-ing this block with generated key stream + block := unsafe { remains[i * block_size..(i + 1) * block_size] } + ks := c.keystream() or { panic(err) } + for j, b in ks { + dst[idx + j] = block[j] ^ b } - n := copy(mut dst[idx..], c.block) - // the length of remaining bytes of unprocessed keystream - c.length = block_size - n + // updates position + idx += block_size + } + + // process for remaining partial block + if remains.len % block_size != 0 { + last_block := unsafe { remains[nr_blocks * block_size..] } + // generates one 64-bytes keystream block + c.block = c.keystream() or { panic(err) } + for i, b in last_block { + dst[idx + i] = b ^ c.block[i] + } + c.length = block_size - last_block.len + idx += last_block.len } } @@ -181,7 +179,9 @@ pub fn (mut c Cipher) free() { } } -// reset quickly sets all Cipher's fields to default value +// reset quickly sets all Cipher's fields to default value. +// This method will be deprecated. +@[deprecated_after: '2025-11-30'] @[unsafe] pub fn (mut c Cipher) reset() { c.Stream.reset() diff --git a/vlib/x/crypto/chacha20/stream.v b/vlib/x/crypto/chacha20/stream.v index 955ddb3d4a..d4d628e447 100644 --- a/vlib/x/crypto/chacha20/stream.v +++ b/vlib/x/crypto/chacha20/stream.v @@ -160,148 +160,104 @@ fn (mut s Stream) keystream_full(mut dst []u8, src []u8) ! { s.overflow = true return error('chacha20: internal counter overflow') } + mut idx := 0 // process for full block_size-d msg for i := 0; i < nr_blocks; i++ { + // for every block_sized message, we generates 64-bytes block key stream + // and then xor-ing this block with generated key stream block := unsafe { src[i * block_size..(i + 1) * block_size] } - // process with block_size keystream - s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block)! + ks := s.keystream()! + for j, b in ks { + dst[idx + j] = block[j] ^ b + } + // updates position + idx += block_size } // process for remaining partial block if src.len % block_size != 0 { last_block := unsafe { src[nr_blocks * block_size..] } - // pad to align with block_size - mut last_bytes := []u8{len: block_size} - _ := copy(mut last_bytes, last_block) - - // process the padded last block - s.keystream_with_blocksize(mut last_bytes, last_bytes)! - _ := copy(mut dst[nr_blocks * block_size..], last_bytes) + // generates one 64-bytes keystream block, and xor-ing bytes + // in last_block with the key stream + ks := s.keystream()! + for i, b in last_block { + dst[idx + i] = b ^ ks[i] + } + idx += last_block.len } } -// keystream_with_blocksize produces stream from src bytes that aligns with block_size, -// serialized in little-endian form and stored into dst buffer. +// keystream generates and retursns a 64-bytes block of key stream and increases internal counter. @[direct_array_access] -fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) ! { - // ChaCha20 keystream generator was relatively easy to understand. - // Its contains steps: - // - loads current ChaCha20 into temporary state, used for later. - // - performs quarter_round function on this state and returns some new state. - // - adding back the new state with the old state. - // - performs xor-ing between src bytes (loaded as little endian number) with result from previous step. - // - serializes, in little endian form, this xor-ed state into destination buffer. - // - // Makes sure its works for size of multiple of block_size - if dst.len != src.len || dst.len % block_size != 0 { - return error('chacha20: internal error: wrong dst and/or src length') - } - // check if this stream has reached the counter limit - if s.overflow { - return error('chacha20: internal counter has reached the limit, please rekey') - } - // check for counter overflow when processing number of blocks - num_blocks := (u64(src.len) + block_size - 1) / block_size - if s.check_ctr(num_blocks) { - s.overflow = true - return error('chacha20.check_ctr: internal counter overflow') - } - - // load state from current stream - mut st := s.new_curr_state() - // clone the state - mut st_c := clone_state(st) +fn (mut s Stream) keystream() ![]u8 { + // initializes current state and working state + mut awal := s.new_curr_state() + mut ws := clone_state(awal) // precomputes cache counter-independent values if s.mode == .standard && !s.precomp { - s.precomp(st) + s.precomp(awal) + } + // remaining first column round + if s.mode == .standard { + mut fcr := Quartet{awal[0], awal[4], awal[8], awal[12]} + qround_on_quartet(mut fcr) + + // First diagonal round. + qround_on_state_with_quartet(mut ws, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15) + qround_on_state_with_quartet(mut ws, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12) + qround_on_state_with_quartet(mut ws, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13) + qround_on_state_with_quartet(mut ws, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14) } - mut idx := 0 - mut src_len := src.len - for src_len >= block_size { - if s.mode == .standard { - // remaining first column round - mut fcr := Quartet{st[0], st[4], st[8], st[12]} - qround_on_quartet(mut fcr) + // The remaining quarter rounds + // + // For standard variant, the first column-round was already precomputed, + // For original variant, its use full quarter round number. + // + // perform chacha20 quarter round n-times + n := if s.mode == .standard { 9 } else { default_qround_nr } + for i := 0; i < n; i++ { + // Column-round + // 0 | 1 | 2 | 3 + // 4 | 5 | 6 | 7 + // 8 | 9 | 10 | 11 + // 12 | 13 | 14 | 15 + qround_on_state(mut ws, 0, 4, 8, 12) // 0 + qround_on_state(mut ws, 1, 5, 9, 13) // 1 + qround_on_state(mut ws, 2, 6, 10, 14) // 2 + qround_on_state(mut ws, 3, 7, 11, 15) // 3 - // First diagonal round. - qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, - 15) - qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, - 12) - qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, - 13) - qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, - 14) - } - // The remaining rounds - // - // For standard variant, the first column-round was already precomputed, - // For original variant, its use full quarter round number. - - // perform chacha20 quarter round n-times - n := if s.mode == .standard { 9 } else { default_qround_nr } - for i := 0; i < n; i++ { - // Column-round - // 0 | 1 | 2 | 3 - // 4 | 5 | 6 | 7 - // 8 | 9 | 10 | 11 - // 12 | 13 | 14 | 15 - qround_on_state(mut st_c, 0, 4, 8, 12) // 0 - qround_on_state(mut st_c, 1, 5, 9, 13) // 1 - qround_on_state(mut st_c, 2, 6, 10, 14) // 2 - qround_on_state(mut st_c, 3, 7, 11, 15) // 3 - - // Diagonal round. - // 0 \ 1 \ 2 \ 3 - // 5 \ 6 \ 7 \ 4 - // 10 \ 11 \ 8 \ 9 - // 15 \ 12 \ 13 \ 14 - qround_on_state(mut st_c, 0, 5, 10, 15) - qround_on_state(mut st_c, 1, 6, 11, 12) - qround_on_state(mut st_c, 2, 7, 8, 13) - qround_on_state(mut st_c, 3, 4, 9, 14) - } - - // add back keystream result to initial state, xor-ing with the src and stores into dst - for i := 0; i < 16; i++ { - src_block := unsafe { src[idx + (i * 4)..idx + (i + 1) * 4] } - add_xored := binary.little_endian_u32(src_block) ^ (st_c[i] + st[i]) - binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], add_xored) - } - - // increases Stream's internal counter - if s.mode == .original { - st[12] += 1 - // first counter reset ? - if st[12] == 0 { - // increase second counter, if reset, mark as an overflow and return error - st[13] += 1 - if st[13] == 0 { - s.overflow = true - return error('chacha20.keystream_with_blocksize: 64-bit counter reached') - } - } - // store the counter - s.nonce[0] = st[12] - s.nonce[1] = st[13] - } else { - st[12] += 1 - if st[12] == 0 { - s.overflow = true - return error('chacha20.keystream_with_blocksize: overflow 32-bit counter') - } - s.nonce[0] = st[12] - } - - // updates index - idx += block_size - src_len -= block_size + // Diagonal round. + // 0 \ 1 \ 2 \ 3 + // 5 \ 6 \ 7 \ 4 + // 10 \ 11 \ 8 \ 9 + // 15 \ 12 \ 13 \ 14 + qround_on_state(mut ws, 0, 5, 10, 15) + qround_on_state(mut ws, 1, 6, 11, 12) + qround_on_state(mut ws, 2, 7, 8, 13) + qround_on_state(mut ws, 3, 4, 9, 14) } + // Adding the working state values with inital state values. + // We dont performs xor-ing here, its done on xor_key_stream and or keystream_full. + for i, _ in ws { + ws[i] += awal[i] + } + // increases stream internal counter + s.inc_ctr(mut awal)! + + // serializes current working state in little-endian form + mut block := []u8{len: block_size} + for i, v in ws { + block[i * 4] = u8(v) + block[i * 4 + 1] = u8(v >> 8) + block[i * 4 + 2] = u8(v >> 16) + block[i * 4 + 3] = u8(v >> 24) + } + return block } -// precomp does quarter round on counter-independent quartet values on running state st. +// precomp performs quarter round on counter-independent quartet values on running state st. @[direct_array_access; inline] fn (mut s Stream) precomp(st State) { mut pcr1 := Quartet{st[1], st[5], st[9], st[13]} @@ -349,7 +305,7 @@ fn (b Stream) ctr() u64 { } } -// set_ctr sets Stream's counter +// set_ctr sets stream internal counter @[direct_array_access; inline] fn (mut b Stream) set_ctr(ctr u64) { match b.mode { @@ -380,6 +336,34 @@ fn (b Stream) check_ctr(value u64) bool { return false } +// inc_ctr increases stream counter by one from the current state st +@[direct_array_access] +fn (mut s Stream) inc_ctr(mut st State) ! { + // updates internal counter + if s.mode == .original { + st[12] += 1 + // first counter reset ? + if st[12] == 0 { + // increase second counter, if reset, mark as an overflow and return error + st[13] += 1 + if st[13] == 0 { + s.overflow = true + return error('chacha20.keystream: 64-bit counter reached') + } + } + // store the counter + s.nonce[0] = st[12] + s.nonce[1] = st[13] + } else { + st[12] += 1 + if st[12] == 0 { + s.overflow = true + return error('chacha20.keystream: overflow 32-bit counter') + } + s.nonce[0] = st[12] + } +} + // max_ctr returns maximum counter value of this stream variant @[inline] fn (b Stream) max_ctr() u64 { diff --git a/vlib/x/crypto/chacha20/stream_test.v b/vlib/x/crypto/chacha20/stream_test.v index ebdc8a4c09..f497e7f3ca 100644 --- a/vlib/x/crypto/chacha20/stream_test.v +++ b/vlib/x/crypto/chacha20/stream_test.v @@ -22,13 +22,13 @@ fn test_stream_counter_handling() ! { assert ctx.Stream.overflow == false assert ctx.Stream.ctr() == max_64bit_counter - // after above process the counter should have at the maximum limit - // we use keystream_with_blocksize to test this counter handling, because + // after above process, the counter should reach the maximum limit + // we use keystream_full to test this counter handling, because // xor_key_stream would panic on counter reset msg1 := []u8{len: block_size} - ctx.Stream.keystream_with_blocksize(mut dst[..block_size], msg1) or { + ctx.Stream.keystream_full(mut dst[..block_size], msg1) or { assert ctx.Stream.overflow == true - assert err == error('chacha20.check_ctr: internal counter overflow') + assert err == error('chacha20: internal counter overflow') return } } @@ -58,7 +58,7 @@ fn test_state_of_chacha20_block_simple() ! { mut block := []u8{len: block_size} stream.set_ctr(1) - stream.keystream_with_blocksize(mut block, block)! + stream.keystream_full(mut block, block)! expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e' exp_bytes := hex.decode(expected_raw_bytes)! @@ -66,7 +66,7 @@ fn test_state_of_chacha20_block_simple() ! { assert block == exp_bytes } -fn test_keystream_with_blocksize() ! { +fn test_keystream_encryption() ! { for val in blocks_testcases { key := hex.decode(val.key)! nonce := hex.decode(val.nonce)! @@ -75,7 +75,7 @@ fn test_keystream_with_blocksize() ! { stream.set_ctr(val.counter) mut block := []u8{len: block_size} - stream.keystream_with_blocksize(mut block, block)! + stream.keystream_full(mut block, block)! exp_bytes := hex.decode(val.output)! assert block == exp_bytes