x.crypto.chacha20: improves the internals of chacha20, add a bench (#25311)

This commit is contained in:
blackshirt 2025-09-15 23:20:43 +07:00 committed by GitHub
parent 5d98162960
commit 89a24958a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 835 additions and 626 deletions

View File

@ -0,0 +1,57 @@
// This is a benchmark for`x.crypto.chacha20` encryption and decryption
//
// Current output on my tests
//
// Chacha20 Encryption
// -----------
// Iterations: 10000 Total Duration: 76.045ms ns/op: 7604 B/op: 4 allocs/op: 2
//
// ChaCha20 Decryption
// -----------
// Iterations: 10000 Total Duration: 71.275ms ns/op: 7127 B/op: 11 allocs/op: 14
//
// After the patch
// Chacha20 Encryption
// -----------
// Iterations: 10000 Total Duration: 46.833ms ns/op: 4683 B/op: 11 allocs/op: 11
//
// ChaCha20 Decryption
// -----------
// Iterations: 10000 Total Duration: 48.242ms ns/op: 4824 B/op: 3 allocs/op: 4
//
import x.benchmark
import encoding.hex
import x.crypto.chacha20
// randomly generated key and nonce, 32-bytes of key, 12-bytes of nonce
const key = hex.decode('9d9603f4fc460e273b80795ea50eab5873c04f589226c7d591b5336feb32fcba')!
const nonce = hex.decode('9a3c83e4236ea9a2c4e482da')!
const plaintext = 'ChaCha20 encrypt decrypt benchmarking message'.bytes()
// expected ciphertext
const ciphertext = hex.decode('dbddb264e4c478d96805b2d557649232b4b3f37c51035464d12e3675e5e36ce6f6822b49dd6494ccd5213a89c9')!
fn bench_chacha20_encrypt() ! {
_ := chacha20.encrypt(key, nonce, plaintext)!
}
fn bench_chacha20_decrypt() ! {
_ := chacha20.decrypt(key, nonce, ciphertext)!
}
fn main() {
cf := benchmark.BenchmarkDefaults{
n: 10000
}
println('Chacha20 Encryption')
println('-----------')
mut b0 := benchmark.setup(bench_chacha20_encrypt, cf)!
b0.run()
println('')
println('ChaCha20 Decryption')
println('-----------')
mut b1 := benchmark.setup(bench_chacha20_decrypt, cf)!
b1.run()
}

View File

@ -5,9 +5,7 @@
// Chacha20 symmetric key stream cipher encryption based on RFC 8439 // Chacha20 symmetric key stream cipher encryption based on RFC 8439
module chacha20 module chacha20
import math.bits
import crypto.internal.subtle import crypto.internal.subtle
import encoding.binary
// The size of ChaCha20 key, ie 256 bits size, in bytes // The size of ChaCha20 key, ie 256 bits size, in bytes
pub const key_size = 32 pub const key_size = 32
@ -34,35 +32,35 @@ enum CipherMode {
original original
} }
// encrypt encrypts plaintext bytes with ChaCha20 cipher instance with provided key and nonce.
// It was a thin wrapper around two supported nonce size, ChaCha20 with 96 bits
// and XChaCha20 with 192 bits nonce. Internally, encrypt start with 0's counter value.
// If you want more control, use Cipher instance and setup the counter by your self.
pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 {
mut stream := new_stream(key, nonce)!
mut dst := []u8{len: plaintext.len}
stream.keystream_full(mut dst, plaintext)
return dst
}
// decrypt does reverse of encrypt operation by decrypting ciphertext with ChaCha20 cipher
// instance with provided key and nonce.
pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 {
mut stream := new_stream(key, nonce)!
mut dst := []u8{len: ciphertext.len}
stream.keystream_full(mut dst, ciphertext)
return dst
}
// Cipher represents ChaCha20 stream cipher instances. // Cipher represents ChaCha20 stream cipher instances.
@[noinit]
pub struct Cipher { pub struct Cipher {
// The mode of ChaCha20 cipher, set on cipher's creation. Stream
mode CipherMode = .standard
mut: mut:
// The internal's of ChaCha20 states contains 512 bits (64 bytes), contains of
// 4 words (16 bytes) of ChaCha20 constants,
// 8 words (32 bytes) of ChaCha20 keys,
// 4 words (16 bytes) of raw nonces, with internal counter, support for 32 and 64 bit counters.
key [8]u32
nonce [4]u32
// Flag that tells whether this cipher was an extended XChaCha20 standard variant.
// only make sense when mode == .standard
extended bool
// internal buffer for storing key stream results // internal buffer for storing key stream results
block []u8 = []u8{len: block_size} block []u8 = []u8{len: block_size}
// The last length of leftover unprocessed keystream from internal buffer // The last length of leftover unprocessed keystream from internal buffer
length int length int
// Additional fields, follows the go version. Its mainly used to optimize
// standard IETF ciphers operations by pre-chache some quarter_round step.
// vfmt off
precomp bool
p1 u32 p5 u32 p9 u32 p13 u32
p2 u32 p6 u32 p10 u32 p14 u32
p3 u32 p7 u32 p11 u32 p15 u32
// vfmt on
} }
// new_cipher creates a new ChaCha20 stream cipher with the given 32 bytes key // new_cipher creates a new ChaCha20 stream cipher with the given 32 bytes key
@ -71,57 +69,11 @@ mut:
// with support for 64-bit counter, use 8 bytes length nonce's instead // with support for 64-bit counter, use 8 bytes length nonce's instead
// If 24 bytes of nonce was provided, the XChaCha20 construction will be used. // If 24 bytes of nonce was provided, the XChaCha20 construction will be used.
// It returns new ChaCha20 cipher instance or an error if key or nonce have any other length. // It returns new ChaCha20 cipher instance or an error if key or nonce have any other length.
@[direct_array_access]
pub fn new_cipher(key []u8, nonce []u8) !&Cipher { pub fn new_cipher(key []u8, nonce []u8) !&Cipher {
if key.len != key_size { stream := new_stream(key, nonce)!
return error('Bad key size provided') return &Cipher{
Stream: stream
} }
mut mode := CipherMode.standard
mut extended := false
match nonce.len {
nonce_size {}
x_nonce_size {
extended = true
}
orig_nonce_size {
mode = .original
}
else {
return error('Unsupported nonce size')
}
}
mut c := &Cipher{
mode: mode
extended: extended
}
// we dont need reset on new cipher instance
c.do_rekey(key, nonce)!
return c
}
// encrypt encrypts plaintext bytes with ChaCha20 cipher instance with provided key and nonce.
// It was a thin wrapper around two supported nonce size, ChaCha20 with 96 bits
// and XChaCha20 with 192 bits nonce. Internally, encrypt start with 0's counter value.
// If you want more control, use Cipher instance and setup the counter by your self.
pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 {
mut c := new_cipher(key, nonce)!
mut out := []u8{len: plaintext.len}
c.encrypt(mut out, plaintext)
unsafe { c.reset() }
return out
}
// decrypt does reverse of encrypt operation by decrypting ciphertext with ChaCha20 cipher
// instance with provided key and nonce.
pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 {
mut c := new_cipher(key, nonce)!
mut out := []u8{len: ciphertext.len}
c.encrypt(mut out, ciphertext)
unsafe { c.reset() }
return out
} }
// xor_key_stream xors each byte in the given slice in the src with a byte from the // xor_key_stream xors each byte in the given slice in the src with a byte from the
@ -140,6 +92,12 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) {
mut idx := 0 mut idx := 0
mut src_len := src.len mut src_len := src.len
// check for counter overflow
num_blocks := (u64(src_len) + block_size - 1) / block_size
if c.Stream.check_ctr(num_blocks) {
panic('chacha20: internal counter overflow')
}
dst = unsafe { dst[..src_len] } dst = unsafe { dst[..src_len] }
if subtle.inexact_overlap(dst, src) { if subtle.inexact_overlap(dst, src) {
@ -162,22 +120,14 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) {
idx += kstream.len idx += kstream.len
src_len -= kstream.len src_len -= kstream.len
} }
if src_len == 0 {
return
}
// check for counter overflow
num_blocks := (u64(src_len) + block_size - 1) / block_size
if c.check_for_ctr_overflow(num_blocks) {
panic('chacha20: internal counter overflow')
}
// take the most full bytes of multiples block_size from the src, // take the most full bytes of multiples block_size from the src,
// build the keystream from the cipher's state and stores the result // build the keystream from the cipher's state and stores the result
// into dst // into dst
full := src_len - src_len % block_size full := src_len - src_len % block_size
if full > 0 { if full > 0 {
c.chacha20_block_generic(mut dst[idx..idx + full], src[idx..idx + full]) src_block := unsafe { src[idx..idx + full] }
c.Stream.keystream_with_blocksize(mut dst[idx..idx + full], src_block)
} }
idx += full idx += full
src_len -= full src_len -= full
@ -185,14 +135,13 @@ pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) {
// If we have a partial block, pad it for chacha20_block_generic, and // If we have a partial block, pad it for chacha20_block_generic, and
// keep the leftover keystream for the next invocation. // keep the leftover keystream for the next invocation.
if src_len > 0 { if src_len > 0 {
// Make sure, internal buffer cleared with the new one // Make sure, internal buffer cleared or the old garbaged data from previous call still there
// or the old garbaged data from previous call still there // See the issue at https://github.com/vlang/v/issues/24043
// See https://github.com/vlang/v/issues/24043 unsafe { c.block.reset() } // = []u8{len: block_size}
c.block = []u8{len: block_size}
// copy the last src block to internal buffer, and performs // copy the last src block to internal buffer, and performs
// chacha20_block_generic on this buffer, and stores into remaining dst // chacha20_block_generic on this buffer, and stores into remaining dst
_ := copy(mut c.block, src[idx..]) _ := copy(mut c.block, src[idx..])
c.chacha20_block_generic(mut c.block, c.block) c.Stream.keystream_with_blocksize(mut c.block, c.block)
n := copy(mut dst[idx..], c.block) n := copy(mut dst[idx..], c.block)
// the length of remaining bytes of unprocessed keystream // the length of remaining bytes of unprocessed keystream
c.length = block_size - n c.length = block_size - n
@ -210,183 +159,13 @@ pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) {
return return
} }
if dst.len < src.len { if dst.len < src.len {
panic('chacha20/chacha: dst buffer is to small') panic('chacha20: dst buffer is to small')
} }
if subtle.inexact_overlap(dst, src) { if subtle.inexact_overlap(dst, src) {
panic('chacha20: invalid buffer overlap') panic('chacha20: invalid buffer overlap')
} }
nr_blocks := src.len / block_size c.Stream.keystream_full(mut dst, src)
for i := 0; i < nr_blocks; i++ {
// get current src block to be xor-ed
block := unsafe { src[i * block_size..(i + 1) * block_size] }
// build keystream, xor-ed with the block and stores into dst
c.chacha20_block_generic(mut dst[i * block_size..(i + 1) * block_size], block)
}
// process for partial block
if src.len % block_size != 0 {
// get the remaining last partial block
block := unsafe { src[nr_blocks * block_size..] }
// pad it into block_size, and then performs chacha20_block_generic
// on this src_block
mut src_block := []u8{len: block_size}
_ := copy(mut src_block, block)
c.chacha20_block_generic(mut src_block, src_block)
// copy the src_block key stream result into desired dst
n := copy(mut dst[nr_blocks * block_size..], src_block)
assert n == block.len
}
}
// chacha20_block_generic generates a generic ChaCha20 keystream.
// This is main building block for ChaCha20 keystream generator.
// This routine was intended to work only for msg source with multiples of block_size in size.
@[direct_array_access]
fn (mut c Cipher) chacha20_block_generic(mut dst []u8, src []u8) {
// ChaCha20 keystream generator was relatively easy to understand.
// Its contains steps:
// - Loads current ChaCha20 into temporary state, used for later.
// - Performs quarter_round function on this state and returns some new state.
// - Adds back the new state with the old state.
// - Performs xor-ing between src bytes (loaded as little endian number) with result from previous step.
// - Serializes, in little endian form, this xor-ed state into destination buffer.
//
// Makes sure its works for size of multiple of block_size
if dst.len != src.len || dst.len % block_size != 0 {
panic('chacha20: internal error: wrong dst and/or src length')
}
// check for counter overflow
num_blocks := u64((src.len + block_size - 1) / block_size)
if c.check_for_ctr_overflow(num_blocks) {
panic('chacha20: internal counter overflow')
}
// initializes ChaCha20 state
// 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc
// 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk
// 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk
// 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn
//
// where c=constant k=key b=blockcounter n=nonce
c0, c1, c2, c3 := cc0, cc1, cc2, cc3
c4, c5, c6, c7 := c.key[0], c.key[1], c.key[2], c.key[3]
c8, c9, c10, c11 := c.key[4], c.key[5], c.key[6], c.key[7]
// internal cipher's counter
mut c12 := c.nonce[0]
mut c13 := c.nonce[1]
c14, c15 := c.nonce[2], c.nonce[3]
// copy current cipher's states into temporary states
mut x0, mut x1, mut x2, mut x3 := c0, c1, c2, c3
mut x4, mut x5, mut x6, mut x7 := c4, c5, c6, c7
mut x8, mut x9, mut x10, mut x11 := c8, c9, c10, c11
mut x12, mut x13, mut x14, mut x15 := c12, c13, c14, c15
// this only for standard mode
if c.mode == .standard {
// precomputes three first column rounds that do not depend on counter
if !c.precomp {
c.p1, c.p5, c.p9, c.p13 = quarter_round(c1, c5, c9, c13)
c.p2, c.p6, c.p10, c.p14 = quarter_round(c2, c6, c10, c14)
c.p3, c.p7, c.p11, c.p15 = quarter_round(c3, c7, c11, c15)
c.precomp = true
}
}
mut idx := 0
mut src_len := src.len
for src_len >= block_size {
if c.mode == .standard {
// this for standard mode
// remaining first column round
fcr0, fcr4, fcr8, fcr12 := quarter_round(c0, c4, c8, c12)
// The second diagonal round.
x0, x5, x10, x15 = quarter_round(fcr0, c.p5, c.p10, c.p15)
x1, x6, x11, x12 = quarter_round(c.p1, c.p6, c.p11, fcr12)
x2, x7, x8, x13 = quarter_round(c.p2, c.p7, fcr8, c.p13)
x3, x4, x9, x14 = quarter_round(c.p3, fcr4, c.p9, c.p14)
}
// The remaining rounds, for standard its already precomputed,
// for original, its use full quarter round
n := if c.mode == .standard { 9 } else { 10 }
for i := 0; i < n; i++ {
// Column round.
// 0 | 1 | 2 | 3
// 4 | 5 | 6 | 7
// 8 | 9 | 10 | 11
// 12 | 13 | 14 | 15
x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12)
x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13)
x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14)
x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15)
// Diagonal round.
// 0 \ 1 \ 2 \ 3
// 5 \ 6 \ 7 \ 4
// 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15)
x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12)
x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13)
x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14)
}
// add back keystream result to initial state, xor-ing with the src and stores into dst
binary.little_endian_put_u32(mut dst[idx + 0..idx + 4], binary.little_endian_u32(src[idx + 0..
idx + 4]) ^ (x0 + c0))
binary.little_endian_put_u32(mut dst[idx + 4..idx + 8], binary.little_endian_u32(src[idx + 4..
idx + 8]) ^ (x1 + c1))
binary.little_endian_put_u32(mut dst[idx + 8..idx + 12], binary.little_endian_u32(src[idx +
8..idx + 12]) ^ (x2 + c2))
binary.little_endian_put_u32(mut dst[idx + 12..idx + 16], binary.little_endian_u32(src[
idx + 12..idx + 16]) ^ (x3 + c3))
binary.little_endian_put_u32(mut dst[idx + 16..idx + 20], binary.little_endian_u32(src[
idx + 16..idx + 20]) ^ (x4 + c4))
binary.little_endian_put_u32(mut dst[idx + 20..idx + 24], binary.little_endian_u32(src[
idx + 20..idx + 24]) ^ (x5 + c5))
binary.little_endian_put_u32(mut dst[idx + 24..idx + 28], binary.little_endian_u32(src[
idx + 24..idx + 28]) ^ (x6 + c6))
binary.little_endian_put_u32(mut dst[idx + 28..idx + 32], binary.little_endian_u32(src[
idx + 28..idx + 32]) ^ (x7 + c7))
binary.little_endian_put_u32(mut dst[idx + 32..idx + 36], binary.little_endian_u32(src[
idx + 32..idx + 36]) ^ (x8 + c8))
binary.little_endian_put_u32(mut dst[idx + 36..idx + 40], binary.little_endian_u32(src[
idx + 36..idx + 40]) ^ (x9 + c9))
binary.little_endian_put_u32(mut dst[idx + 40..idx + 44], binary.little_endian_u32(src[
idx + 40..idx + 44]) ^ (x10 + c10))
binary.little_endian_put_u32(mut dst[idx + 44..idx + 48], binary.little_endian_u32(src[
idx + 44..idx + 48]) ^ (x11 + c11))
binary.little_endian_put_u32(mut dst[idx + 48..idx + 52], binary.little_endian_u32(src[
idx + 48..idx + 52]) ^ (x12 + c12))
binary.little_endian_put_u32(mut dst[idx + 52..idx + 56], binary.little_endian_u32(src[
idx + 52..idx + 56]) ^ (x13 + c13))
binary.little_endian_put_u32(mut dst[idx + 56..idx + 60], binary.little_endian_u32(src[
idx + 56..idx + 60]) ^ (x14 + c14))
binary.little_endian_put_u32(mut dst[idx + 60..idx + 64], binary.little_endian_u32(src[
idx + 60..idx + 64]) ^ (x15 + c15))
// Updates internal counter
//
// Its safe to update internal counter, its already checked before.
if c.mode == .original {
mut curr_ctr := u64(c13) << 32 | u64(c12)
curr_ctr += 1
// stores back the counter
c.nonce[0] = u32(curr_ctr)
c.nonce[1] = u32(curr_ctr >> 32)
} else {
c12 += 1
c.nonce[0] = c12
}
idx += block_size
src_len -= block_size
}
} }
// free the resources taken by the Cipher `c`. Dont use cipher after .free call // free the resources taken by the Cipher `c`. Dont use cipher after .free call
@ -403,171 +182,30 @@ pub fn (mut c Cipher) free() {
// reset quickly sets all Cipher's fields to default value // reset quickly sets all Cipher's fields to default value
@[unsafe] @[unsafe]
pub fn (mut c Cipher) reset() { pub fn (mut c Cipher) reset() {
c.Stream.reset()
unsafe { unsafe {
_ := vmemset(&c.key, 0, 32)
_ := vmemset(&c.nonce, 0, 16)
c.block.reset() c.block.reset()
} }
c.length = 0 c.length = 0
c.precomp = false
c.p1, c.p5, c.p9, c.p13 = u32(0), u32(0), u32(0), u32(0)
c.p2, c.p6, c.p10, c.p14 = u32(0), u32(0), u32(0), u32(0)
c.p3, c.p7, c.p11, c.p15 = u32(0), u32(0), u32(0), u32(0)
} }
// set_counter sets Cipher's counter // set_counter sets Cipher's counter
@[direct_array_access; inline]
pub fn (mut c Cipher) set_counter(ctr u64) { pub fn (mut c Cipher) set_counter(ctr u64) {
match c.mode { c.Stream.set_ctr(ctr)
.original {
c.nonce[0] = u32(ctr)
c.nonce[1] = u32(ctr >> 32)
}
.standard {
// check for ctr value that may exceed the counter limit
if ctr > max_32bit_counter {
panic('set_counter: counter value exceed the limit ')
}
c.nonce[0] = u32(ctr)
}
}
} }
// rekey resets internal Cipher's state and reinitializes state with the provided key and nonce // rekey resets internal Cipher's state and reinitializes state with the provided key and nonce
pub fn (mut c Cipher) rekey(key []u8, nonce []u8) ! { pub fn (mut c Cipher) rekey(key []u8, nonce []u8) ! {
unsafe { c.reset() } unsafe { c.reset() }
// this routine was publicly accessible to user, so we add a check here stream := new_stream(key, nonce)!
// to ensure the supplied key and nonce has the correct size. c.Stream = stream
if key.len != key_size {
return error('Bad key size provided for rekey')
}
// For the standard cipher, allowed nonce size was nonce_size or x_nonce_size
if c.mode == .standard {
if nonce.len != x_nonce_size && nonce.len != nonce_size {
return error('Bad nonce size for standard cipher, use 12 or 24 bytes length nonce')
}
if c.extended && nonce.len != x_nonce_size {
return error('Bad nonce size provided for extended variant cipher')
}
}
// in the original variant, nonce should be orig_nonce_size length (8 bytes)
if c.mode == .original && nonce.len != orig_nonce_size {
return error('Bad nonce size provided for original mode')
}
c.do_rekey(key, nonce)!
} }
// do_rekey reinitializes ChaCha20 instance with the provided key and nonce. // Helpers
@[direct_array_access]
fn (mut c Cipher) do_rekey(key []u8, nonce []u8) ! {
mut nonces := nonce.clone()
mut keys := key.clone()
// Its now awares of the new flag, mode and extended
// If this cipher was standard mode with extended flag, derives a new key and nonce
// for later setup operation
if c.mode == .standard && c.extended {
keys, nonces = derive_xchacha20_key_nonce(key, nonce)!
}
// Its shared the same cipher key setup on the both of mode.
c.key[0] = binary.little_endian_u32(keys[0..4])
c.key[1] = binary.little_endian_u32(keys[4..8])
c.key[2] = binary.little_endian_u32(keys[8..12])
c.key[3] = binary.little_endian_u32(keys[12..16])
c.key[4] = binary.little_endian_u32(keys[16..20])
c.key[5] = binary.little_endian_u32(keys[20..24])
c.key[6] = binary.little_endian_u32(keys[24..28])
c.key[7] = binary.little_endian_u32(keys[28..32])
// first counter value
c.nonce[0] = 0
if c.mode == .standard {
c.nonce[1] = binary.little_endian_u32(nonces[0..4])
c.nonce[2] = binary.little_endian_u32(nonces[4..8])
c.nonce[3] = binary.little_endian_u32(nonces[8..12])
} else {
// original mode
// second of 64-bit counter value
c.nonce[1] = 0
// nonce size on original mode was 64 bits
c.nonce[2] = binary.little_endian_u32(nonces[0..4])
c.nonce[3] = binary.little_endian_u32(nonces[4..8])
}
}
// Helper and core function for ChaCha20
// //
// quarter_round is the basic operation of the ChaCha algorithm. It operates
// on four 32-bit unsigned integers, by performing AXR (add, xor, rotate)
// operation on this quartet u32 numbers.
fn quarter_round(a u32, b u32, c u32, d u32) (u32, u32, u32, u32) {
// The operation is as follows (in C-like notation):
// where `<<<=` denotes bits rotate left operation
// a += b; d ^= a; d <<<= 16;
// c += d; b ^= c; b <<<= 12;
// a += b; d ^= a; d <<<= 8;
// c += d; b ^= c; b <<<= 7;
mut ax := a
mut bx := b
mut cx := c
mut dx := d
ax += bx
dx ^= ax
dx = bits.rotate_left_32(dx, 16)
cx += dx
bx ^= cx
bx = bits.rotate_left_32(bx, 12)
ax += bx
dx ^= ax
dx = bits.rotate_left_32(dx, 8)
cx += dx
bx ^= cx
bx = bits.rotate_left_32(bx, 7)
return ax, bx, cx, dx
}
// Cipher's counter handling routine
//
// We define counter limit to simplify the access
const max_64bit_counter = max_u64
const max_32bit_counter = u64(max_u32)
// load_ctr loads underlying cipher's counter as u64 value.
@[direct_array_access; inline]
fn (c Cipher) load_ctr() u64 {
match c.mode {
// In the original mode, counter was 64-bit size
// stored on c.nonce[0], and c.nonce[1]
.original {
return u64(c.nonce[1]) << 32 | u64(c.nonce[0])
}
.standard {
// in standard mode, counter was 32-bit value, stored on c.nonce[0]
return u64(c.nonce[0])
}
}
}
// max_ctr_value returns maximum value of cipher's counter.
@[inline]
fn (c Cipher) max_ctr_value() u64 {
match c.mode {
.original { return max_64bit_counter }
.standard { return max_32bit_counter }
}
}
// derive_xchacha20_key_nonce derives a new key and nonces for extended // derive_xchacha20_key_nonce derives a new key and nonces for extended
// variant of standard mode. Its separated for simplify the access. // variant of Standard IETF ChaCha20 variant. Its separated for simplify the access.
@[direct_array_access; inline] @[direct_array_access; inline]
fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) { fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) {
// Its only for x_nonce_size // Its only for x_nonce_size
@ -584,15 +222,3 @@ fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) {
return new_key, new_nonce return new_key, new_nonce
} }
@[direct_array_access; inline]
fn (c Cipher) check_for_ctr_overflow(add_value u64) bool {
// check for counter overflow
ctr := c.load_ctr()
sum := ctr + add_value
max := c.max_ctr_value()
if sum < ctr || sum < add_value || sum > max {
return true
}
return false
}

View File

@ -1,7 +1,177 @@
module chacha20
import rand
import encoding.hex import encoding.hex
import x.crypto.chacha20
fn test_chacha20_block_function() ! {
for val in blocks_testcases {
key_bytes := hex.decode(val.key)!
nonce_bytes := hex.decode(val.nonce)!
mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(val.counter)
mut block := []u8{len: chacha20.block_size}
cs.xor_key_stream(mut block, block)
exp_bytes := hex.decode(val.output)!
assert block == exp_bytes
}
}
fn test_chacha20_simple_block_function() ! {
key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f'
key_bytes := hex.decode(key)!
nonce := '000000090000004a00000000'
nonce_bytes := hex.decode(nonce)!
mut block := []u8{len: chacha20.block_size}
mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(1)
cs.xor_key_stream(mut block, block)
expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e'
exp_bytes := hex.decode(expected_raw_bytes)!
assert block == exp_bytes
}
// test poly1305 key generator as specified in https://datatracker.ietf.org/doc/html/rfc8439#section-2.6.2
fn test_chacha20_onetime_poly1305_key_generation() ! {
for i, v in otk_cases {
key := hex.decode(v.key)!
nonce := hex.decode(v.nonce)!
otk := hex.decode(v.otk)!
mut c := chacha20.new_cipher(key, nonce)!
mut out := []u8{len: chacha20.key_size}
c.xor_key_stream(mut out, out)
assert out == otk
}
}
fn test_xor_key_stream_consecutive() {
// See https://github.com/vlang/v/issues/23977
key := [u8(64), 116, 63, 11, 221, 199, 187, 110, 217, 68, 0, 50, 65, 79, 24, 10, 124, 174,
66, 2, 172, 153, 237, 145, 244, 41, 131, 84, 247, 42, 73, 131]
nonce := [u8(86), 124, 222, 94, 253, 187, 151, 219, 17, 83, 118, 255]
encoded_data_one := [u8(201), 199, 66, 226]
decoded_data_one := [u8(0), 0, 0, 9]
encoded_data_two := [u8(82), 189, 125, 3, 24, 185, 183, 240, 29, 223, 17, 241, 103, 69, 45,
101]
decoded_data_two := [u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
mut c := chacha20.new_cipher(key, nonce)!
mut dst := []u8{len: encoded_data_one.len}
c.xor_key_stream(mut dst, encoded_data_one)
assert dst == decoded_data_one
// consecutive call
dst = []u8{len: encoded_data_two.len}
c.xor_key_stream(mut dst, encoded_data_two)
assert dst == decoded_data_two
// additional data
msg := 'billy the kid'.bytes()
mut dst2 := []u8{len: msg.len}
c.xor_key_stream(mut dst2, msg)
// the go version produces: [40 17 78 116 255 224 2 52 92 151 103 107 138]
assert dst2 == [u8(40), 17, 78, 116, 255, 224, 2, 52, 92, 151, 103, 107, 138]
}
// See https://github.com/vlang/v/issues/24043
fn test_for_more_consecutive_xor_key_stream() {
key := [u8(225), 2, 1, 178, 238, 127, 187, 188, 27, 237, 18, 62, 181, 65, 67, 152, 13, 247,
147, 148, 101, 220, 185, 120, 234, 58, 144, 173, 3, 218, 193, 130]
nonce := [u8(153), 221, 244, 134, 99, 135, 243, 247, 169, 121, 69, 54]
mut cipher := chacha20.new_cipher(key, nonce)!
for i := 0; i < encoded_data.len; i++ {
p := encoded_data[i]
e := expected_data[i]
mut dst := []u8{len: p.len}
cipher.xor_key_stream(mut dst, p)
assert dst == e
}
}
fn test_chacha20_cipher_plain_encrypt() ! {
// work with xorkeystream_testcases without explicitly set the counter
for c in xorkeystream_testcases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
plaintext_bytes := hex.decode(c.input)!
output := chacha20.encrypt(key_bytes, nonce_bytes, plaintext_bytes)!
expected := hex.decode(c.output)!
assert output == expected
}
}
fn test_chacha20_cipher_plain_decrypt() ! {
// work with xorkeystream_testcases without explicitly set the counter
for c in xorkeystream_testcases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
input_bytes := hex.decode(c.input)!
output_bytes := hex.decode(c.output)!
input := chacha20.decrypt(key_bytes, nonce_bytes, output_bytes)!
assert input == input_bytes
}
}
fn test_chacha20_cipher_encrypt_with_xor_keystream() ! {
for c in encryption_test_cases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
plaintext_bytes := hex.decode(c.plaintext)!
mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(c.counter)
mut output := []u8{len: plaintext_bytes.len}
cs.encrypt(mut output, plaintext_bytes)
expected := hex.decode(c.output)!
assert output == expected
}
}
fn test_chacha20_cipher_decrypt_with_xor_keystream() ! {
for c in encryption_test_cases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
ciphertext := hex.decode(c.output)!
mut cs := chacha20.new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(c.counter)
mut output := []u8{len: ciphertext.len}
cs.encrypt(mut output, ciphertext)
expected_decrypted_message := hex.decode(c.plaintext)!
assert output == expected_decrypted_message
}
}
fn test_chacha20_no_overlap_xor_key_stream() ! {
for i, t in xorkeystream_testcases {
key := hex.decode(t.key)!
nonce := hex.decode(t.nonce)!
mut cs := chacha20.new_cipher(key, nonce)!
input := hex.decode(t.input)!
mut output := []u8{len: input.len}
cs.xor_key_stream(mut output, input)
got := hex.encode(output)
// for decryption, we can not use cs.xor_key_stream directly on output bytes
// internally, Cipher stream has updates the counter, thats differ from encryption phase
// you can use Cipher instance by rekey with the key and nonce
cs.rekey(key, nonce)!
cs.xor_key_stream(mut output, output)
assert output == input
}
}
const encoded_data = [ const encoded_data = [
[u8(231), 121, 9, 28], [u8(231), 121, 9, 28],
@ -40,69 +210,6 @@ const expected_data = [
[u8(164), 169, 216, 98, 61, 175, 20, 175], [u8(164), 169, 216, 98, 61, 175, 20, 175],
] ]
// See https://github.com/vlang/v/issues/24043
fn test_for_more_consecutive_xor_key_stream() {
key := [u8(225), 2, 1, 178, 238, 127, 187, 188, 27, 237, 18, 62, 181, 65, 67, 152, 13, 247,
147, 148, 101, 220, 185, 120, 234, 58, 144, 173, 3, 218, 193, 130]
nonce := [u8(153), 221, 244, 134, 99, 135, 243, 247, 169, 121, 69, 54]
mut cipher := new_cipher(key, nonce)!
for i := 0; i < encoded_data.len; i++ {
p := encoded_data[i]
e := expected_data[i]
mut dst := []u8{len: p.len}
cipher.xor_key_stream(mut dst, p)
assert dst == e
}
}
fn test_xor_key_stream_consecutive() {
// See https://github.com/vlang/v/issues/23977
key := [u8(64), 116, 63, 11, 221, 199, 187, 110, 217, 68, 0, 50, 65, 79, 24, 10, 124, 174,
66, 2, 172, 153, 237, 145, 244, 41, 131, 84, 247, 42, 73, 131]
nonce := [u8(86), 124, 222, 94, 253, 187, 151, 219, 17, 83, 118, 255]
encoded_data_one := [u8(201), 199, 66, 226]
decoded_data_one := [u8(0), 0, 0, 9]
encoded_data_two := [u8(82), 189, 125, 3, 24, 185, 183, 240, 29, 223, 17, 241, 103, 69, 45,
101]
decoded_data_two := [u8(0), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
mut c := new_cipher(key, nonce)!
mut dst := []u8{len: encoded_data_one.len}
c.xor_key_stream(mut dst, encoded_data_one)
assert dst == decoded_data_one
// consecutive call
dst = []u8{len: encoded_data_two.len}
c.xor_key_stream(mut dst, encoded_data_two)
assert dst == decoded_data_two
// additional data
msg := 'billy the kid'.bytes()
mut dst2 := []u8{len: msg.len}
c.xor_key_stream(mut dst2, msg)
// the go version produces: [40 17 78 116 255 224 2 52 92 151 103 107 138]
assert dst2 == [u8(40), 17, 78, 116, 255, 224, 2, 52, 92, 151, 103, 107, 138]
}
fn test_chacha20_cipher_reset() ! {
mut key := []u8{len: 32}
mut nonce := []u8{len: 12}
rand.read(mut key)
rand.read(mut nonce)
mut c := new_cipher(key, nonce)!
unsafe { c.reset() }
for i, _ in c.key {
assert c.key[i] == u32(0)
}
for i, _ in c.nonce {
assert c.nonce[i] == u32(0)
}
}
struct BlockCase { struct BlockCase {
key string key string
nonce string nonce string
@ -110,80 +217,6 @@ struct BlockCase {
output string output string
} }
fn test_chacha20_no_overlap_xor_key_stream() ! {
for i, t in xorkeystream_testcases {
key := hex.decode(t.key)!
nonce := hex.decode(t.nonce)!
mut cs := new_cipher(key, nonce)!
input := hex.decode(t.input)!
mut output := []u8{len: input.len}
cs.xor_key_stream(mut output, input)
got := hex.encode(output)
// for decryption, we can not use cs.xor_key_stream directly on output bytes
// internally, Cipher stream has updates the counter, thats differ from encryption phase
// you can use Cipher instance by set Cipher counter
plaintext := decrypt(key, nonce, output)!
assert plaintext == input
}
}
fn test_chacha20_block_function() ! {
for val in blocks_testcases {
key_bytes := hex.decode(val.key)!
nonce_bytes := hex.decode(val.nonce)!
mut cs := new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(val.counter)
mut block := []u8{len: block_size}
cs.chacha20_block_generic(mut block, block)
exp_bytes := hex.decode(val.output)!
assert block == exp_bytes
}
}
fn test_chacha20_simple_block_function() ! {
key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f'
key_bytes := hex.decode(key)!
nonce := '000000090000004a00000000'
nonce_bytes := hex.decode(nonce)!
mut block := []u8{len: block_size}
mut cs := new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(u32(1))
cs.chacha20_block_generic(mut block, block)
expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e'
exp_bytes := hex.decode(expected_raw_bytes)!
assert block == exp_bytes
}
fn test_chacha20_quarter_round() {
a, b, c, d := quarter_round(0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567)
assert a == 0xea2a92f4
assert b == 0xcb1cf8ce
assert c == 0x4581472e
assert d == 0x5881c4bb
}
// test poly1305 key generator as specified in https://datatracker.ietf.org/doc/html/rfc8439#section-2.6.2
fn test_chacha20_onetime_poly1305_key_generation() ! {
for i, v in otk_cases {
key := hex.decode(v.key)!
nonce := hex.decode(v.nonce)!
otk := hex.decode(v.otk)!
mut c := new_cipher(key, nonce)!
mut out := []u8{len: key_size}
c.xor_key_stream(mut out, out)
assert out == otk
}
}
struct PolyOtk { struct PolyOtk {
key string key string
nonce string nonce string
@ -271,40 +304,6 @@ struct EncryptionCase {
output string output string
} }
fn test_chacha20_cipher_encrypt() ! {
for c in encryption_test_cases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
plaintext_bytes := hex.decode(c.plaintext)!
mut cs := new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(c.counter)
mut output := []u8{len: plaintext_bytes.len}
cs.xor_key_stream(mut output, plaintext_bytes)
expected := hex.decode(c.output)!
assert output == expected
}
}
fn test_chacha20_cipher_decrypt() ! {
for c in encryption_test_cases {
key_bytes := hex.decode(c.key)!
nonce_bytes := hex.decode(c.nonce)!
ciphertext := hex.decode(c.output)!
mut cs := new_cipher(key_bytes, nonce_bytes)!
cs.set_counter(c.counter)
mut output := []u8{len: ciphertext.len}
cs.xor_key_stream(mut output, ciphertext)
expected_decrypted_message := hex.decode(c.plaintext)!
assert output == expected_decrypted_message
}
}
const encryption_test_cases = [ const encryption_test_cases = [
// core test // core test
EncryptionCase{ EncryptionCase{

View File

@ -0,0 +1,438 @@
// Copyright (c) 2024 blackshirt.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
//
module chacha20
import math.bits
import encoding.binary
// max_64bit_counter is a 64-bit maximum internal counter of original ChaCha20 variant.
const max_64bit_counter = max_u64
// max_64bit_counter is a 32-bit maximum internal counter of standard IETF ChaCha20 variant.
const max_32bit_counter = u64(max_u32)
// default chacha20 quarter round number
const default_qround_nr = 10
// ChaCha20 stream with internal counter
@[noinit]
struct Stream {
mut:
// The mode (variant) of this ChaCha20 stream
// Standard IETF variant or original (from DJ Bernstein) variant, set on creation.
mode CipherMode = .standard
// Flag that tells whether this stream was an extended XChaCha20 standard variant.
// only make sense when mode == .standard
extended bool
// underlying stream's key
key [8]u32
// underlying stream's nonce with internal counter
nonce [4]u32
// counter-independent precomputed values
precomp bool
// vfmt off
p1 u32 p5 u32 p9 u32 p13 u32
p2 u32 p6 u32 p10 u32 p14 u32
p3 u32 p7 u32 p11 u32 p15 u32
// vfmt on
}
// new_stream creates a new chacha20 stream. The supported nonce size is 8, 12 or 24 bytes.
@[direct_array_access; inline]
fn new_stream(key []u8, nonce []u8) !Stream {
if key.len != key_size {
return error('Bad key size provided')
}
// setup for default value
mut mode := CipherMode.standard
mut extended := false
// Based on the nonce.len supplied, it determines the variant (mode) and extended form of
// the new chacha20 stream intended to create.
match nonce.len {
nonce_size {}
x_nonce_size {
extended = true
}
orig_nonce_size {
mode = .original
}
else {
return error('new_stream: unsupported nonce size')
}
}
// if this an extended chacha20 construct, derives a new key and nonce
new_key, new_nonce := if extended {
xkey, xnonce := derive_xchacha20_key_nonce(key, nonce)!
xkey, xnonce
} else {
// otherwise, use provided key and nonce
key, nonce
}
// Build a new stream and setup the key
mut b := Stream{
mode: mode
extended: extended
}
// store the key
b.key[0] = binary.little_endian_u32(new_key[0..4])
b.key[1] = binary.little_endian_u32(new_key[4..8])
b.key[2] = binary.little_endian_u32(new_key[8..12])
b.key[3] = binary.little_endian_u32(new_key[12..16])
b.key[4] = binary.little_endian_u32(new_key[16..20])
b.key[5] = binary.little_endian_u32(new_key[20..24])
b.key[6] = binary.little_endian_u32(new_key[24..28])
b.key[7] = binary.little_endian_u32(new_key[28..32])
// store the nonce
if b.mode == .standard {
// in standard IETF variant, first nonce was used as internal counter
b.nonce[0] = 0
b.nonce[1] = binary.little_endian_u32(new_nonce[0..4])
b.nonce[2] = binary.little_endian_u32(new_nonce[4..8])
b.nonce[3] = binary.little_endian_u32(new_nonce[8..12])
} else {
// in the original variant, two's of first counter servers as 64-bit counter value
b.nonce[0] = 0
b.nonce[1] = 0
b.nonce[2] = binary.little_endian_u32(new_nonce[0..4])
b.nonce[3] = binary.little_endian_u32(new_nonce[4..8])
}
return b
}
// reset resets internal stream
@[unsafe]
fn (mut s Stream) reset() {
s.extended = false
unsafe {
_ := vmemset(&s.key, 0, 32)
_ := vmemset(&s.nonce, 0, 16)
}
}
// new_curr_state creates a new State from current stream
@[direct_array_access]
fn (s Stream) new_curr_state() State {
// initializes ChaCha20 state
// 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc
// 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk
// 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk
// 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn
//
// where c=constant k=key b=blockcounter n=nonce
mut state := State{}
// load chacha20 constant into state
state[0] = cc0
state[1] = cc1
state[2] = cc2
state[3] = cc3
// load key into state
for i, k in s.key {
state[i + 4] = k
}
// load nonce into state
for j, v in s.nonce {
state[j + 12] = v
}
return state
}
// keystream_full process with full size of src being processed
@[direct_array_access]
fn (mut s Stream) keystream_full(mut dst []u8, src []u8) {
// number of block to be processed
nr_blocks := src.len / block_size
// check for counter overflow
if s.check_ctr(u64(nr_blocks)) {
panic('chacha20: internal counter overflow')
}
// process for full block_size-d msg
for i := 0; i < nr_blocks; i++ {
block := unsafe { src[i * block_size..(i + 1) * block_size] }
// process with block_size keystream
s.keystream_with_blocksize(mut dst[i * block_size..(i + 1) * block_size], block)
}
// process for remaining partial block
if src.len % block_size != 0 {
last_block := unsafe { src[nr_blocks * block_size..] }
// pad to align with block_size
mut last_bytes := []u8{len: block_size}
_ := copy(mut last_bytes, last_block)
// process the padded last block
s.keystream_with_blocksize(mut last_bytes, last_bytes)
_ := copy(mut dst[nr_blocks * block_size..], last_bytes)
}
}
// keystream_with_blocksize produces stream from src bytes that aligns with block_size,
// serialized in little-endian form and stored into dst buffer.
@[direct_array_access]
fn (mut s Stream) keystream_with_blocksize(mut dst []u8, src []u8) {
// ChaCha20 keystream generator was relatively easy to understand.
// Its contains steps:
// - loads current ChaCha20 into temporary state, used for later.
// - performs quarter_round function on this state and returns some new state.
// - adding back the new state with the old state.
// - performs xor-ing between src bytes (loaded as little endian number) with result from previous step.
// - serializes, in little endian form, this xor-ed state into destination buffer.
//
// Makes sure its works for size of multiple of block_size
if dst.len != src.len || dst.len % block_size != 0 {
panic('chacha20: internal error: wrong dst and/or src length')
}
// load state from current stream
st := s.new_curr_state()
// clone the state
mut st_c := clone_state(st)
// cache counter-independent precomputed values
if s.mode == .standard {
// first column round
mut fcr := Quartet{st[0], st[4], st[8], st[12]}
// precomputes three first column rounds that do not depend on counter
if !s.precomp {
mut pcr1 := Quartet{st[1], st[5], st[9], st[13]}
mut pcr2 := Quartet{st[2], st[6], st[10], st[14]}
mut pcr3 := Quartet{st[3], st[7], st[11], st[15]}
qround_on_quartet(mut pcr1)
qround_on_quartet(mut pcr2)
qround_on_quartet(mut pcr3)
s.p1 = pcr1.e0
s.p5 = pcr1.e1
s.p9 = pcr1.e2
s.p13 = pcr1.e3
s.p2 = pcr2.e0
s.p6 = pcr2.e1
s.p10 = pcr2.e2
s.p14 = pcr2.e3
s.p3 = pcr3.e0
s.p7 = pcr3.e1
s.p11 = pcr3.e2
s.p15 = pcr3.e3
s.precomp = true
}
// remaining first column round
qround_on_quartet(mut fcr)
// First diagonal round.
qround_on_state_with_quartet(mut st_c, fcr.e0, s.p5, s.p10, s.p15, 0, 5, 10, 15)
qround_on_state_with_quartet(mut st_c, s.p1, s.p6, s.p11, fcr.e3, 1, 6, 11, 12)
qround_on_state_with_quartet(mut st_c, s.p2, s.p7, fcr.e2, s.p13, 2, 7, 8, 13)
qround_on_state_with_quartet(mut st_c, s.p3, fcr.e1, s.p9, s.p14, 3, 4, 9, 14)
}
mut idx := 0
mut src_len := src.len
for src_len >= block_size {
// The remaining rounds
//
// For standard variant, the first column-round was already precomputed,
// For original variant, its use full quarter round number.
n := if s.mode == .standard { 9 } else { default_qround_nr }
// perform chacha20 quarter round n-times
for i := 0; i < n; i++ {
// Column-round
// 0 | 1 | 2 | 3
// 4 | 5 | 6 | 7
// 8 | 9 | 10 | 11
// 12 | 13 | 14 | 15
qround_on_state(mut st_c, 0, 4, 8, 12) // 0
qround_on_state(mut st_c, 1, 5, 9, 13) // 1
qround_on_state(mut st_c, 2, 6, 10, 14) // 2
qround_on_state(mut st_c, 3, 7, 11, 15) // 3
// Diagonal round.
// 0 \ 1 \ 2 \ 3
// 5 \ 6 \ 7 \ 4
// 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
qround_on_state(mut st_c, 0, 5, 10, 15)
qround_on_state(mut st_c, 1, 6, 11, 12)
qround_on_state(mut st_c, 2, 7, 8, 13)
qround_on_state(mut st_c, 3, 4, 9, 14)
}
// add back keystream result to initial state, xor-ing with the src and stores into dst
for i := 0; i < 16; i++ {
src_block := unsafe { src[idx + (i * 4)..idx + (i + 1) * 4] }
binary.little_endian_put_u32(mut dst[idx + (i * 4)..idx + (i + 1) * 4], binary.little_endian_u32(src_block) ^ (
st_c[i] + st[i]))
}
// increases Stream's internal counter
s.inc_ctr()
// updates index
idx += block_size
src_len -= block_size
}
}
// Handling of Stream's internal counter
//
// ctr returns a current Stream's counter as u64 value.
@[direct_array_access; inline]
fn (b Stream) ctr() u64 {
match b.mode {
// In the original mode, counter was 64-bit size
// stored on b.nonce[0], and b.nonce[1]
.original {
return u64(b.nonce[1]) << 32 | u64(b.nonce[0])
}
.standard {
// in standard mode, counter was 32-bit value, stored on b.nonce[0]
return u64(b.nonce[0])
}
}
}
// set_ctr sets Stream's counter
@[direct_array_access; inline]
fn (mut b Stream) set_ctr(ctr u64) {
// if this set counter would overflow internal counter
// we do panic instead
if b.check_ctr(ctr) {
panic('set_ctr: invalid check, maybe would overflow')
}
match b.mode {
.original {
b.nonce[0] = u32(ctr)
b.nonce[1] = u32(ctr >> 32)
}
.standard {
// check for ctr value that may exceed the counter limit
if ctr > max_32bit_counter {
panic('set_ctr: counter value exceed the limit ')
}
b.nonce[0] = u32(ctr)
}
}
}
// inc_ctr increases internal counter by one.
@[inline]
fn (mut b Stream) inc_ctr() {
mut curr_ctr := b.ctr()
curr_ctr += 1
b.set_ctr(curr_ctr)
}
// check_ctr checks for counter overflow when added by value.
// It returns true on counter overflow.
@[inline]
fn (b Stream) check_ctr(value u64) bool {
ctr := b.ctr()
sum := ctr + value
max := b.max_ctr()
if sum < ctr || sum < value || sum > max {
return true
}
return false
}
// max_ctr returns maximum counter value of this stream variant
@[inline]
fn (b Stream) max_ctr() u64 {
match b.mode {
.original { return max_64bit_counter }
.standard { return max_32bit_counter }
}
}
// State represents the running 64-bytes of chacha20 stream,
type State = [16]u32
@[direct_array_access; inline; unsafe]
fn reset_state(mut s State) {
unsafe {
_ := vmemset(&s, 0, 64)
}
}
@[direct_array_access; inline]
fn clone_state(s State) State {
mut sc := State{}
for i, v in s {
sc[i] = v
}
return sc
}
@[direct_array_access]
fn qround_on_state_with_quartet(mut s State, q0 u32, q1 u32, q2 u32, q3 u32, a int, b int, c int, d int) {
s[a] = q0
s[b] = q1
s[c] = q2
s[d] = q3
qround_on_state(mut s, a, b, c, d)
}
// qround_on_state performs chacha20 quarter round on states with quartet index a, b, c, d.
@[direct_array_access]
fn qround_on_state(mut s State, a int, b int, c int, d int) {
// a += b; d ^= a; d <<<= 16;
s[a] += s[b]
s[d] ^= s[a]
s[d] = bits.rotate_left_32(s[d], 16)
// c += d; b ^= c; b <<<= 12;
s[c] += s[d]
s[b] ^= s[c]
s[b] = bits.rotate_left_32(s[b], 12)
// a += b; d ^= a; d <<<= 8;
s[a] += s[b]
s[d] ^= s[a]
s[d] = bits.rotate_left_32(s[d], 8)
// c += d; b ^= c; b <<<= 7;
s[c] += s[d]
s[b] ^= s[c]
s[b] = bits.rotate_left_32(s[b], 7)
}
// quartet of u32 values.
struct Quartet {
mut:
e0 u32
e1 u32
e2 u32
e3 u32
}
// chacha20 quarter round run on Quartet and stored into res
fn qround_on_quartet(mut q Quartet) {
// a += b; d ^= a; d <<<= 16;
q.e0 += q.e1
q.e3 ^= q.e0
q.e3 = bits.rotate_left_32(q.e3, 16)
// c += d; b ^= c; b <<<= 12;
q.e2 += q.e3
q.e1 ^= q.e2
q.e1 = bits.rotate_left_32(q.e1, 12)
// a += b; d ^= a; d <<< 8;
q.e0 += q.e1
q.e3 ^= q.e0
q.e3 = bits.rotate_left_32(q.e3, 8)
// c += d; b ^= c; b <<<= 7;
q.e2 += q.e3
q.e1 ^= q.e2
q.e1 = bits.rotate_left_32(q.e1, 7)
}

View File

@ -0,0 +1,83 @@
module chacha20
import encoding.hex
fn test_qround_on_state() {
mut s := State{}
s[0] = 0x11111111
s[1] = 0x01020304
s[2] = 0x9b8d6f43
s[3] = 0x01234567
qround_on_state(mut s, 0, 1, 2, 3)
assert s[0] == 0xea2a92f4
assert s[1] == 0xcb1cf8ce
assert s[2] == 0x4581472e
assert s[3] == 0x5881c4bb
}
fn test_state_of_chacha20_block_simple() ! {
key := '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f'
key_bytes := hex.decode(key)!
nonce := '000000090000004a00000000'
nonce_bytes := hex.decode(nonce)!
mut stream := new_stream(key_bytes, nonce_bytes)!
mut block := []u8{len: block_size}
stream.set_ctr(1)
stream.keystream_with_blocksize(mut block, block)
expected_raw_bytes := '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e'
exp_bytes := hex.decode(expected_raw_bytes)!
assert block == exp_bytes
}
fn test_keystream_with_blocksize() ! {
for val in blocks_testcases {
key := hex.decode(val.key)!
nonce := hex.decode(val.nonce)!
mut stream := new_stream(key, nonce)!
stream.set_ctr(val.counter)
mut block := []u8{len: block_size}
stream.keystream_with_blocksize(mut block, block)
exp_bytes := hex.decode(val.output)!
assert block == exp_bytes
}
}
struct BlockCase {
key string
nonce string
counter u32
output string
}
const blocks_testcases = [
// section 2.3.4 https://datatracker.ietf.org/doc/html/rfc8439#section-2.3.2
BlockCase{
key: '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f'
nonce: '000000090000004a00000000'
counter: u32(1)
output: '10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e'
},
// https://datatracker.ietf.org/doc/html/rfc8439#appendix-A.1.1
BlockCase{
key: '0000000000000000000000000000000000000000000000000000000000000000'
nonce: '000000000000000000000000'
counter: u32(0)
output: '76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586'
},
// #appendix-A.1.2
BlockCase{
key: '0000000000000000000000000000000000000000000000000000000000000000'
nonce: '000000000000000000000000'
counter: u32(1)
output: '9f07e7be5551387a98ba977c732d080dcb0f29a048e3656912c6533e32ee7aed29b721769ce64e43d57133b074d839d531ed1f28510afb45ace10a1f4b794d6f'
},
]

View File

@ -22,55 +22,61 @@ fn xchacha20(key []u8, nonce []u8) ![]u8 {
return error('xchacha: Bad nonce size') return error('xchacha: Bad nonce size')
} }
// initializes ChaCha20 state // initializes ChaCha20 state
mut x0 := cc0 mut x := State{}
mut x1 := cc1 x[0] = cc0
mut x2 := cc2 x[1] = cc1
mut x3 := cc3 x[2] = cc2
x[3] = cc3
mut x4 := binary.little_endian_u32(key[0..4]) x[4] = binary.little_endian_u32(key[0..4])
mut x5 := binary.little_endian_u32(key[4..8]) x[5] = binary.little_endian_u32(key[4..8])
mut x6 := binary.little_endian_u32(key[8..12]) x[6] = binary.little_endian_u32(key[8..12])
mut x7 := binary.little_endian_u32(key[12..16]) x[7] = binary.little_endian_u32(key[12..16])
mut x8 := binary.little_endian_u32(key[16..20]) x[8] = binary.little_endian_u32(key[16..20])
mut x9 := binary.little_endian_u32(key[20..24]) x[9] = binary.little_endian_u32(key[20..24])
mut x10 := binary.little_endian_u32(key[24..28]) x[10] = binary.little_endian_u32(key[24..28])
mut x11 := binary.little_endian_u32(key[28..32]) x[11] = binary.little_endian_u32(key[28..32])
// we have no counter // we have no counter
mut x12 := binary.little_endian_u32(nonce[0..4]) x[12] = binary.little_endian_u32(nonce[0..4])
mut x13 := binary.little_endian_u32(nonce[4..8]) x[13] = binary.little_endian_u32(nonce[4..8])
mut x14 := binary.little_endian_u32(nonce[8..12]) x[14] = binary.little_endian_u32(nonce[8..12])
mut x15 := binary.little_endian_u32(nonce[12..16]) x[15] = binary.little_endian_u32(nonce[12..16])
// After initialization, proceed through the ChaCha20 rounds as usual. // After initialization, proceed through the ChaCha20 rounds as usual.
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
// Diagonal round. // Diagonal round.
x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12) qround_on_state(mut x, 0, 4, 8, 12) // 0
x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13) qround_on_state(mut x, 1, 5, 9, 13) // 1
x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14) qround_on_state(mut x, 2, 6, 10, 14) // 2
x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15) qround_on_state(mut x, 3, 7, 11, 15) // 3
// Column round. // quarter diagonal round
x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15) // Diagonal round.
x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12) // 0 \ 1 \ 2 \ 3
x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13) // 5 \ 6 \ 7 \ 4
x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14) // 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
qround_on_state(mut x, 0, 5, 10, 15)
qround_on_state(mut x, 1, 6, 11, 12)
qround_on_state(mut x, 2, 7, 8, 13)
qround_on_state(mut x, 3, 4, 9, 14)
} }
// Once the 20 ChaCha rounds have been completed, the first 128 bits (16 bytes) and // Once the 20 ChaCha rounds have been completed, the first 128 bits (16 bytes) and
// last 128 bits (16 bytes) of the ChaCha state (both little-endian) are // last 128 bits (16 bytes) of the ChaCha state (both little-endian) are
// concatenated, and this 256-bit (32 bytes) subkey is returned. // concatenated, and this 256-bit (32 bytes) subkey is returned.
mut out := []u8{len: 32} mut out := []u8{len: 32}
binary.little_endian_put_u32(mut out[0..4], x0) binary.little_endian_put_u32(mut out[0..4], x[0])
binary.little_endian_put_u32(mut out[4..8], x1) binary.little_endian_put_u32(mut out[4..8], x[1])
binary.little_endian_put_u32(mut out[8..12], x2) binary.little_endian_put_u32(mut out[8..12], x[2])
binary.little_endian_put_u32(mut out[12..16], x3) binary.little_endian_put_u32(mut out[12..16], x[3])
binary.little_endian_put_u32(mut out[16..20], x12) binary.little_endian_put_u32(mut out[16..20], x[12])
binary.little_endian_put_u32(mut out[20..24], x13) binary.little_endian_put_u32(mut out[20..24], x[13])
binary.little_endian_put_u32(mut out[24..28], x14) binary.little_endian_put_u32(mut out[24..28], x[14])
binary.little_endian_put_u32(mut out[28..32], x15) binary.little_endian_put_u32(mut out[28..32], x[15])
return out return out
} }