x.crypto.chacha20: makes 64-bit counter cipher work too (#25363)

This commit is contained in:
blackshirt 2025-09-21 18:38:12 +07:00 committed by GitHub
parent cd152169e8
commit bf41714c87
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 171 additions and 170 deletions

View File

@ -19,6 +19,13 @@
// -----------
// Iterations: 10000 Total Duration: 48.242ms ns/op: 4824 B/op: 3 allocs/op: 4
//
// Chacha20 old xor_key_stream_backup
// -----------
// Iterations: 10000 Total Duration: 53.430ms ns/op: 5343 B/op: 11 allocs/op: 12
// ChaCha20 new xor_key_stream
// -----------
// Iterations: 10000 Total Duration: 43.668ms ns/op: 4366 B/op: 0 allocs/op: 1
//
import x.benchmark
import encoding.hex
import x.crypto.chacha20
@ -40,6 +47,12 @@ fn bench_chacha20_decrypt() ! {
_ := chacha20.decrypt(key, nonce, ciphertext)!
}
fn bench_chacha20_xor_key_stream() ! {
mut dst := []u8{len: plaintext.len}
mut cs := chacha20.new_cipher(key, nonce)!
cs.xor_key_stream(mut dst, plaintext)
}
fn main() {
cf := benchmark.BenchmarkDefaults{
n: 10000
@ -49,9 +62,13 @@ fn main() {
mut b0 := benchmark.setup(bench_chacha20_encrypt, cf)!
b0.run()
println('')
println('ChaCha20 Decryption')
println('-----------')
mut b1 := benchmark.setup(bench_chacha20_decrypt, cf)!
b1.run()
println('ChaCha20 new xor_key_stream')
println('-----------')
mut b3 := benchmark.setup(bench_chacha20_xor_key_stream, cf)!
b3.run()
}

View File

@ -91,62 +91,60 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) {
if dst.len < src.len {
panic('chacha20/chacha: dst buffer is to small')
}
mut idx := 0
mut src_len := src.len
dst = unsafe { dst[..src_len] }
dst = unsafe { dst[..src.len] }
if subtle.inexact_overlap(dst, src) {
panic('chacha20: invalid buffer overlap')
}
// index of position within src bytes
mut idx := 0
// We adapt and ports the go version here
// First, drain any remaining key stream
// First, try to drain any remaining key stream from internal buffer
if c.length != 0 {
// remaining keystream on internal buffer
mut kstream := c.block[block_size - c.length..]
if src_len < kstream.len {
kstream = unsafe { kstream[..src_len] }
if src.len < kstream.len {
kstream = unsafe { kstream[..src.len] }
}
// xors every bytes in src with bytes from key stream and stored into dst
for i, b in kstream {
dst[idx + i] = src[idx + i] ^ b
}
// updates the idx for dst and src
// updates position and internal buffer length.
// when c.length reaches the block_size, we reset it for future use.
c.length -= kstream.len
idx += kstream.len
src_len -= kstream.len
}
// take the most full bytes of multiples block_size from the src,
// build the keystream from the cipher's state and stores the result
// into dst
full := src_len - src_len % block_size
if full > 0 {
src_block := unsafe { src[idx..idx + full] }
c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block) or {
c.Stream.overflow = true
panic('chacha20: xor_key_stream leads to counter overflow')
if c.length == block_size {
unsafe { c.block.reset() }
c.length = 0
}
}
idx += full
src_len -= full
// process for remaining unprocessed src bytes
mut remains := unsafe { src[idx..] }
nr_blocks := remains.len / block_size
// If we have a partial block, pad it for keystream_with_blocksize, and
// keep the leftover keystream for the next invocation.
if src_len > 0 {
// Make sure, internal buffer cleared or the old garbaged data from previous call still there
// See the issue at https://github.com/vlang/v/issues/24043
unsafe { c.block.reset() } // = []u8{len: block_size}
// copy the last src block to internal buffer, and performs
// keystream_with_blocksize on this buffer, and stores into remaining dst
_ := copy(mut c.block, src[idx..])
c.Stream.keystream_with_blocksize(mut c.block, c.block) or {
c.Stream.overflow = true
panic('chacha20: xor_key_stream leads to counter overflow')
// process for full block_size-d message
for i := 0; i < nr_blocks; i++ {
// for every block_sized message, we generates 64-bytes block key stream
// and then xor-ing this block with generated key stream
block := unsafe { remains[i * block_size..(i + 1) * block_size] }
ks := c.keystream() or { panic(err) }
for j, b in ks {
dst[idx + j] = block[j] ^ b
}
n := copy(mut dst[idx..], c.block)
// the length of remaining bytes of unprocessed keystream
c.length = block_size - n
// updates position
idx += block_size
}
// process for remaining partial block
if remains.len % block_size != 0 {
last_block := unsafe { remains[nr_blocks * block_size..] }
// generates one 64-bytes keystream block
c.block = c.keystream() or { panic(err) }
for i, b in last_block {
dst[idx + i] = b ^ c.block[i]
}
c.length = block_size - last_block.len
idx += last_block.len
}
}
@ -181,7 +179,9 @@ pub fn (mut c Cipher) free() {
}
}
// reset quickly sets all Cipher's fields to default value
// reset quickly sets all Cipher's fields to default value.
// This method will be deprecated.
@[deprecated_after: '2025-11-30']
@[unsafe]
pub fn (mut c Cipher) reset() {
c.Stream.reset()

View File

@ -160,148 +160,104 @@ fn (mut s Stream) keystream_full(mut dst []u8, src []u8) ! {
s.overflow = true
return error('chacha20: internal counter overflow')
}
mut idx := 0
// process for full block_size-d msg
for i := 0; i < nr_blocks; i++ {
// for every block_sized message, we generates 64-bytes block key stream
// and then xor-ing this block with generated key stream
block := unsafe { src[i * block_size..(i + 1) * block_size] }
// process with block_size keystream
s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block)!
ks := s.keystream()!
for j, b in ks {
dst[idx + j] = block[j] ^ b
}
// updates position
idx += block_size
}
// process for remaining partial block
if src.len % block_size != 0 {
last_block := unsafe { src[nr_blocks * block_size..] }
// pad to align with block_size
mut last_bytes := []u8{len: block_size}
_ := copy(mut last_bytes, last_block)
// process the padded last block
s.keystream_with_blocksize(mut last_bytes, last_bytes)!
_ := copy(mut dst[nr_blocks * block_size..], last_bytes)
// generates one 64-bytes keystream block, and xor-ing bytes
// in last_block with the key stream
ks := s.keystream()!
for i, b in last_block {
dst[idx + i] = b ^ ks[i]
}
idx += last_block.len
}
}
// keystream_with_blocksize produces stream from src bytes that aligns with block_size,
// serialized in little-endian form and stored into dst buffer.
// keystream generates and retursns a 64-bytes block of key stream and increases internal counter.
@[direct_array_access]
fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) ! {
// ChaCha20 keystream generator was relatively easy to understand.
// Its contains steps:
// - loads current ChaCha20 into temporary state, used for later.
// - performs quarter_round function on this state and returns some new state.
// - adding back the new state with the old state.
// - performs xor-ing between src bytes (loaded as little endian number) with result from previous step.
// - serializes, in little endian form, this xor-ed state into destination buffer.
//
// Makes sure its works for size of multiple of block_size
if dst.len != src.len || dst.len % block_size != 0 {
return error('chacha20: internal error: wrong dst and/or src length')
}
// check if this stream has reached the counter limit
if s.overflow {
return error('chacha20: internal counter has reached the limit, please rekey')
}
// check for counter overflow when processing number of blocks
num_blocks := (u64(src.len) + block_size - 1) / block_size
if s.check_ctr(num_blocks) {
s.overflow = true
return error('chacha20.check_ctr: internal counter overflow')
}
// load state from current stream
mut st := s.new_curr_state()
// clone the state
mut st_c := clone_state(st)
fn (mut s Stream) keystream() ![]u8 {
// initializes current state and working state
mut awal := s.new_curr_state()
mut ws := clone_state(awal)
// precomputes cache counter-independent values
if s.mode == .standard && !s.precomp {
s.precomp(st)
s.precomp(awal)
}
// remaining first column round
if s.mode == .standard {
mut fcr := Quartet{awal[0], awal[4], awal[8], awal[12]}
qround_on_quartet(mut fcr)
// First diagonal round.
qround_on_state_with_quartet(mut ws, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15)
qround_on_state_with_quartet(mut ws, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12)
qround_on_state_with_quartet(mut ws, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13)
qround_on_state_with_quartet(mut ws, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14)
}
mut idx := 0
mut src_len := src.len
for src_len >= block_size {
if s.mode == .standard {
// remaining first column round
mut fcr := Quartet{st[0], st[4], st[8], st[12]}
qround_on_quartet(mut fcr)
// The remaining quarter rounds
//
// For standard variant, the first column-round was already precomputed,
// For original variant, its use full quarter round number.
//
// perform chacha20 quarter round n-times
n := if s.mode == .standard { 9 } else { default_qround_nr }
for i := 0; i < n; i++ {
// Column-round
// 0 | 1 | 2 | 3
// 4 | 5 | 6 | 7
// 8 | 9 | 10 | 11
// 12 | 13 | 14 | 15
qround_on_state(mut ws, 0, 4, 8, 12) // 0
qround_on_state(mut ws, 1, 5, 9, 13) // 1
qround_on_state(mut ws, 2, 6, 10, 14) // 2
qround_on_state(mut ws, 3, 7, 11, 15) // 3
// First diagonal round.
qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10,
15)
qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11,
12)
qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8,
13)
qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9,
14)
}
// The remaining rounds
//
// For standard variant, the first column-round was already precomputed,
// For original variant, its use full quarter round number.
// perform chacha20 quarter round n-times
n := if s.mode == .standard { 9 } else { default_qround_nr }
for i := 0; i < n; i++ {
// Column-round
// 0 | 1 | 2 | 3
// 4 | 5 | 6 | 7
// 8 | 9 | 10 | 11
// 12 | 13 | 14 | 15
qround_on_state(mut st_c, 0, 4, 8, 12) // 0
qround_on_state(mut st_c, 1, 5, 9, 13) // 1
qround_on_state(mut st_c, 2, 6, 10, 14) // 2
qround_on_state(mut st_c, 3, 7, 11, 15) // 3
// Diagonal round.
// 0 \ 1 \ 2 \ 3
// 5 \ 6 \ 7 \ 4
// 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
qround_on_state(mut st_c, 0, 5, 10, 15)
qround_on_state(mut st_c, 1, 6, 11, 12)
qround_on_state(mut st_c, 2, 7, 8, 13)
qround_on_state(mut st_c, 3, 4, 9, 14)
}
// add back keystream result to initial state, xor-ing with the src and stores into dst
for i := 0; i < 16; i++ {
src_block := unsafe { src[idx + (i * 4)..idx + (i + 1) * 4] }
add_xored := binary.little_endian_u32(src_block) ^ (st_c[i] + st[i])
binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], add_xored)
}
// increases Stream's internal counter
if s.mode == .original {
st[12] += 1
// first counter reset ?
if st[12] == 0 {
// increase second counter, if reset, mark as an overflow and return error
st[13] += 1
if st[13] == 0 {
s.overflow = true
return error('chacha20.keystream_with_blocksize: 64-bit counter reached')
}
}
// store the counter
s.nonce[0] = st[12]
s.nonce[1] = st[13]
} else {
st[12] += 1
if st[12] == 0 {
s.overflow = true
return error('chacha20.keystream_with_blocksize: overflow 32-bit counter')
}
s.nonce[0] = st[12]
}
// updates index
idx += block_size
src_len -= block_size
// Diagonal round.
// 0 \ 1 \ 2 \ 3
// 5 \ 6 \ 7 \ 4
// 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
qround_on_state(mut ws, 0, 5, 10, 15)
qround_on_state(mut ws, 1, 6, 11, 12)
qround_on_state(mut ws, 2, 7, 8, 13)
qround_on_state(mut ws, 3, 4, 9, 14)
}
// Adding the working state values with inital state values.
// We dont performs xor-ing here, its done on xor_key_stream and or keystream_full.
for i, _ in ws {
ws[i] += awal[i]
}
// increases stream internal counter
s.inc_ctr(mut awal)!
// serializes current working state in little-endian form
mut block := []u8{len: block_size}
for i, v in ws {
block[i * 4] = u8(v)
block[i * 4 + 1] = u8(v >> 8)
block[i * 4 + 2] = u8(v >> 16)
block[i * 4 + 3] = u8(v >> 24)
}
return block
}
// precomp does quarter round on counter-independent quartet values on running state st.
// precomp performs quarter round on counter-independent quartet values on running state st.
@[direct_array_access; inline]
fn (mut s Stream) precomp(st State) {
mut pcr1 := Quartet{st[1], st[5], st[9], st[13]}
@ -349,7 +305,7 @@ fn (b Stream) ctr() u64 {
}
}
// set_ctr sets Stream's counter
// set_ctr sets stream internal counter
@[direct_array_access; inline]
fn (mut b Stream) set_ctr(ctr u64) {
match b.mode {
@ -380,6 +336,34 @@ fn (b Stream) check_ctr(value u64) bool {
return false
}
// inc_ctr increases stream counter by one from the current state st
@[direct_array_access]
fn (mut s Stream) inc_ctr(mut st State) ! {
// updates internal counter
if s.mode == .original {
st[12] += 1
// first counter reset ?
if st[12] == 0 {
// increase second counter, if reset, mark as an overflow and return error
st[13] += 1
if st[13] == 0 {
s.overflow = true
return error('chacha20.keystream: 64-bit counter reached')
}
}
// store the counter
s.nonce[0] = st[12]
s.nonce[1] = st[13]
} else {
st[12] += 1
if st[12] == 0 {
s.overflow = true
return error('chacha20.keystream: overflow 32-bit counter')
}
s.nonce[0] = st[12]
}
}
// max_ctr returns maximum counter value of this stream variant
@[inline]
fn (b Stream) max_ctr() u64 {

View File

@ -22,13 +22,13 @@ fn test_stream_counter_handling() ! {
assert ctx.Stream.overflow == false
assert ctx.Stream.ctr() == max_64bit_counter
// after above process the counter should have at the maximum limit
// we use keystream_with_blocksize to test this counter handling, because
// after above process, the counter should reach the maximum limit
// we use keystream_full to test this counter handling, because
// xor_key_stream would panic on counter reset
msg1 := []u8{len: block_size}
ctx.Stream.keystream_with_blocksize(mut dst[..block_size], msg1) or {
ctx.Stream.keystream_full(mut dst[..block_size], msg1) or {
assert ctx.Stream.overflow == true
assert err == error('chacha20.check_ctr: internal counter overflow')
assert err == error('chacha20: internal counter overflow')
return
}
}
@ -58,7 +58,7 @@ fn test_state_of_chacha20_block_simple() ! {
mut block := []u8{len: block_size}
stream.set_ctr(1)
stream.keystream_with_blocksize(mut block, block)!
stream.keystream_full(mut block, block)!
expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e'
exp_bytes := hex.decode(expected_raw_bytes)!
@ -66,7 +66,7 @@ fn test_state_of_chacha20_block_simple() ! {
assert block == exp_bytes
}
fn test_keystream_with_blocksize() ! {
fn test_keystream_encryption() ! {
for val in blocks_testcases {
key := hex.decode(val.key)!
nonce := hex.decode(val.nonce)!
@ -75,7 +75,7 @@ fn test_keystream_with_blocksize() ! {
stream.set_ctr(val.counter)
mut block := []u8{len: block_size}
stream.keystream_with_blocksize(mut block, block)!
stream.keystream_full(mut block, block)!
exp_bytes := hex.decode(val.output)!
assert block == exp_bytes