v/vlib/x/crypto/chacha20/chacha.v
2025-05-13 22:05:22 +03:00

599 lines
20 KiB
V

// Copyright (c) 2024 blackshirt.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
//
// Chacha20 symmetric key stream cipher encryption based on RFC 8439
module chacha20
import math.bits
import crypto.internal.subtle
import encoding.binary
// The size of ChaCha20 key, ie 256 bits size, in bytes
pub const key_size = 32
// The size of standard IETF ChaCha20 nonce, ie 96 bits size, in bytes
pub const nonce_size = 12
// The size of extended variant of standard ChaCha20 (XChaCha20) nonce, 192 bits
pub const x_nonce_size = 24
// The size of original ChaCha20 nonce, 64 bits
pub const orig_nonce_size = 8
// internal block size ChaCha20 operates on, in bytes
const block_size = 64
// four constants of ChaCha20 state.
const cc0 = u32(0x61707865) // expa
const cc1 = u32(0x3320646e) // nd 3
const cc2 = u32(0x79622d32) // 2-by
const cc3 = u32(0x6b206574) // te k
// CipherMode was enumeration of ChaCha20 supported variant.
enum CipherMode {
// The standard IETF ChaCha20 (and XChaCha20), with 32-bit internal counter.
standard
// The original ChaCha20 with 64-bit internal counter.
original
}
// Cipher represents ChaCha20 stream cipher instances.
pub struct Cipher {
// The mode of ChaCha20 cipher, set on cipher's creation.
mode CipherMode = .standard
mut:
// The internal's of ChaCha20 states contains 512 bits (64 bytes), contains of
// 4 words (16 bytes) of ChaCha20 constants,
// 8 words (32 bytes) of ChaCha20 keys,
// 4 words (16 bytes) of raw nonces, with internal counter, support for 32 and 64 bit counters.
key [8]u32
nonce [4]u32
// Flag that tells whether this cipher was an extended XChaCha20 standard variant.
// only make sense when mode == .standard
extended bool
// internal buffer for storing key stream results
block []u8 = []u8{len: block_size}
// The last length of leftover unprocessed keystream from internal buffer
length int
// Additional fields, follows the go version. Its mainly used to optimize
// standard IETF ciphers operations by pre-chache some quarter_round step.
// vfmt off
precomp bool
p1 u32 p5 u32 p9 u32 p13 u32
p2 u32 p6 u32 p10 u32 p14 u32
p3 u32 p7 u32 p11 u32 p15 u32
// vfmt on
}
// new_cipher creates a new ChaCha20 stream cipher with the given 32 bytes key
// and bytes of nonce with supported size, ie, 8, 12 or 24 bytes nonce.
// Standard IETF variant use 12 bytes nonce's, if you want create original ChaCha20 cipher
// with support for 64-bit counter, use 8 bytes length nonce's instead
// If 24 bytes of nonce was provided, the XChaCha20 construction will be used.
// It returns new ChaCha20 cipher instance or an error if key or nonce have any other length.
@[direct_array_access]
pub fn new_cipher(key []u8, nonce []u8) !&Cipher {
if key.len != key_size {
return error('Bad key size provided')
}
mut mode := CipherMode.standard
mut extended := false
match nonce.len {
nonce_size {}
x_nonce_size {
extended = true
}
orig_nonce_size {
mode = .original
}
else {
return error('Unsupported nonce size')
}
}
mut c := &Cipher{
mode: mode
extended: extended
}
// we dont need reset on new cipher instance
c.do_rekey(key, nonce)!
return c
}
// encrypt encrypts plaintext bytes with ChaCha20 cipher instance with provided key and nonce.
// It was a thin wrapper around two supported nonce size, ChaCha20 with 96 bits
// and XChaCha20 with 192 bits nonce. Internally, encrypt start with 0's counter value.
// If you want more control, use Cipher instance and setup the counter by your self.
pub fn encrypt(key []u8, nonce []u8, plaintext []u8) ![]u8 {
mut c := new_cipher(key, nonce)!
mut out := []u8{len: plaintext.len}
c.encrypt(mut out, plaintext)
unsafe { c.reset() }
return out
}
// decrypt does reverse of encrypt operation by decrypting ciphertext with ChaCha20 cipher
// instance with provided key and nonce.
pub fn decrypt(key []u8, nonce []u8, ciphertext []u8) ![]u8 {
mut c := new_cipher(key, nonce)!
mut out := []u8{len: ciphertext.len}
c.encrypt(mut out, ciphertext)
unsafe { c.reset() }
return out
}
// xor_key_stream xors each byte in the given slice in the src with a byte from the
// cipher's key stream. It fulfills `cipher.Stream` interface. It encrypts the plaintext message
// in src and stores the ciphertext result in dst in a key stream fashion.
// You must never use the same (key, nonce) pair more than once for encryption.
// This would void any confidentiality guarantees for the messages encrypted with the same nonce and key.
@[direct_array_access]
pub fn (mut c Cipher) xor_key_stream(mut dst []u8, src []u8) {
if src.len == 0 {
return
}
if dst.len < src.len {
panic('chacha20/chacha: dst buffer is to small')
}
mut idx := 0
mut src_len := src.len
dst = unsafe { dst[..src_len] }
if subtle.inexact_overlap(dst, src) {
panic('chacha20: invalid buffer overlap')
}
// We adapt and ports the go version here
// First, drain any remaining key stream
if c.length != 0 {
// remaining keystream on internal buffer
mut kstream := c.block[block_size - c.length..]
if src_len < kstream.len {
kstream = unsafe { kstream[..src_len] }
}
for i, b in kstream {
dst[idx + i] = src[idx + i] ^ b
}
// updates the idx for dst and src
c.length -= kstream.len
idx += kstream.len
src_len -= kstream.len
}
if src_len == 0 {
return
}
// check for counter overflow
num_blocks := (u64(src_len) + block_size - 1) / block_size
if c.check_for_ctr_overflow(num_blocks) {
panic('chacha20: internal counter overflow')
}
// take the most full bytes of multiples block_size from the src,
// build the keystream from the cipher's state and stores the result
// into dst
full := src_len - src_len % block_size
if full > 0 {
c.chacha20_block_generic(mut dst[idx..idx + full], src[idx..idx + full])
}
idx += full
src_len -= full
// If we have a partial block, pad it for chacha20_block_generic, and
// keep the leftover keystream for the next invocation.
if src_len > 0 {
// Make sure, internal buffer cleared with the new one
// or the old garbaged data from previous call still there
// See https://github.com/vlang/v/issues/24043
c.block = []u8{len: block_size}
// copy the last src block to internal buffer, and performs
// chacha20_block_generic on this buffer, and stores into remaining dst
_ := copy(mut c.block, src[idx..])
c.chacha20_block_generic(mut c.block, c.block)
n := copy(mut dst[idx..], c.block)
// the length of remaining bytes of unprocessed keystream
c.length = block_size - n
}
}
// encrypt encrypts src and stores into dst buffer. It works like `xor_key_stream` except
// its ignore key streaming process by ignoring remaining key stream in the internal buffer,
// so, its works in one shot of fashion.
// Its added to allow `chacha20poly1305` modules to work without key stream fashion.
// TODO: integrates it with the rest
@[direct_array_access]
pub fn (mut c Cipher) encrypt(mut dst []u8, src []u8) {
if src.len == 0 {
return
}
if dst.len < src.len {
panic('chacha20/chacha: dst buffer is to small')
}
if subtle.inexact_overlap(dst, src) {
panic('chacha20: invalid buffer overlap')
}
nr_blocks := src.len / block_size
for i := 0; i < nr_blocks; i++ {
// get current src block to be xor-ed
block := unsafe { src[i * block_size..(i + 1) * block_size] }
// build keystream, xor-ed with the block and stores into dst
c.chacha20_block_generic(mut dst[i * block_size..(i + 1) * block_size], block)
}
// process for partial block
if src.len % block_size != 0 {
// get the remaining last partial block
block := unsafe { src[nr_blocks * block_size..] }
// pad it into block_size, and then performs chacha20_block_generic
// on this src_block
mut src_block := []u8{len: block_size}
_ := copy(mut src_block, block)
c.chacha20_block_generic(mut src_block, src_block)
// copy the src_block key stream result into desired dst
n := copy(mut dst[nr_blocks * block_size..], src_block)
assert n == block.len
}
}
// chacha20_block_generic generates a generic ChaCha20 keystream.
// This is main building block for ChaCha20 keystream generator.
// This routine was intended to work only for msg source with multiples of block_size in size.
@[direct_array_access]
fn (mut c Cipher) chacha20_block_generic(mut dst []u8, src []u8) {
// ChaCha20 keystream generator was relatively easy to understand.
// Its contains steps:
// - Loads current ChaCha20 into temporary state, used for later.
// - Performs quarter_round function on this state and returns some new state.
// - Adds back the new state with the old state.
// - Performs xor-ing between src bytes (loaded as little endian number) with result from previous step.
// - Serializes, in little endian form, this xor-ed state into destination buffer.
//
// Makes sure its works for size of multiple of block_size
if dst.len != src.len || dst.len % block_size != 0 {
panic('chacha20: internal error: wrong dst and/or src length')
}
// check for counter overflow
num_blocks := u64((src.len + block_size - 1) / block_size)
if c.check_for_ctr_overflow(num_blocks) {
panic('chacha20: internal counter overflow')
}
// initializes ChaCha20 state
// 0:cccccccc 1:cccccccc 2:cccccccc 3:cccccccc
// 4:kkkkkkkk 5:kkkkkkkk 6:kkkkkkkk 7:kkkkkkkk
// 8:kkkkkkkk 9:kkkkkkkk 10:kkkkkkkk 11:kkkkkkkk
// 12:bbbbbbbb 13:nnnnnnnn 14:nnnnnnnn 15:nnnnnnnn
//
// where c=constant k=key b=blockcounter n=nonce
c0, c1, c2, c3 := cc0, cc1, cc2, cc3
c4, c5, c6, c7 := c.key[0], c.key[1], c.key[2], c.key[3]
c8, c9, c10, c11 := c.key[4], c.key[5], c.key[6], c.key[7]
// internal cipher's counter
mut c12 := c.nonce[0]
mut c13 := c.nonce[1]
c14, c15 := c.nonce[2], c.nonce[3]
// copy current cipher's states into temporary states
mut x0, mut x1, mut x2, mut x3 := c0, c1, c2, c3
mut x4, mut x5, mut x6, mut x7 := c4, c5, c6, c7
mut x8, mut x9, mut x10, mut x11 := c8, c9, c10, c11
mut x12, mut x13, mut x14, mut x15 := c12, c13, c14, c15
// this only for standard mode
if c.mode == .standard {
// precomputes three first column rounds that do not depend on counter
if !c.precomp {
c.p1, c.p5, c.p9, c.p13 = quarter_round(c1, c5, c9, c13)
c.p2, c.p6, c.p10, c.p14 = quarter_round(c2, c6, c10, c14)
c.p3, c.p7, c.p11, c.p15 = quarter_round(c3, c7, c11, c15)
c.precomp = true
}
}
mut idx := 0
mut src_len := src.len
for src_len >= block_size {
if c.mode == .standard {
// this for standard mode
// remaining first column round
fcr0, fcr4, fcr8, fcr12 := quarter_round(c0, c4, c8, c12)
// The second diagonal round.
x0, x5, x10, x15 = quarter_round(fcr0, c.p5, c.p10, c.p15)
x1, x6, x11, x12 = quarter_round(c.p1, c.p6, c.p11, fcr12)
x2, x7, x8, x13 = quarter_round(c.p2, c.p7, fcr8, c.p13)
x3, x4, x9, x14 = quarter_round(c.p3, fcr4, c.p9, c.p14)
}
// The remaining rounds, for standard its already precomputed,
// for original, its use full quarter round
n := if c.mode == .standard { 9 } else { 10 }
for i := 0; i < n; i++ {
// Column round.
// 0 | 1 | 2 | 3
// 4 | 5 | 6 | 7
// 8 | 9 | 10 | 11
// 12 | 13 | 14 | 15
x0, x4, x8, x12 = quarter_round(x0, x4, x8, x12)
x1, x5, x9, x13 = quarter_round(x1, x5, x9, x13)
x2, x6, x10, x14 = quarter_round(x2, x6, x10, x14)
x3, x7, x11, x15 = quarter_round(x3, x7, x11, x15)
// Diagonal round.
// 0 \ 1 \ 2 \ 3
// 5 \ 6 \ 7 \ 4
// 10 \ 11 \ 8 \ 9
// 15 \ 12 \ 13 \ 14
x0, x5, x10, x15 = quarter_round(x0, x5, x10, x15)
x1, x6, x11, x12 = quarter_round(x1, x6, x11, x12)
x2, x7, x8, x13 = quarter_round(x2, x7, x8, x13)
x3, x4, x9, x14 = quarter_round(x3, x4, x9, x14)
}
// add back keystream result to initial state, xor-ing with the src and stores into dst
binary.little_endian_put_u32(mut dst[idx + 0..idx + 4], binary.little_endian_u32(src[idx + 0..
idx + 4]) ^ (x0 + c0))
binary.little_endian_put_u32(mut dst[idx + 4..idx + 8], binary.little_endian_u32(src[idx + 4..
idx + 8]) ^ (x1 + c1))
binary.little_endian_put_u32(mut dst[idx + 8..idx + 12], binary.little_endian_u32(src[idx +
8..idx + 12]) ^ (x2 + c2))
binary.little_endian_put_u32(mut dst[idx + 12..idx + 16], binary.little_endian_u32(src[
idx + 12..idx + 16]) ^ (x3 + c3))
binary.little_endian_put_u32(mut dst[idx + 16..idx + 20], binary.little_endian_u32(src[
idx + 16..idx + 20]) ^ (x4 + c4))
binary.little_endian_put_u32(mut dst[idx + 20..idx + 24], binary.little_endian_u32(src[
idx + 20..idx + 24]) ^ (x5 + c5))
binary.little_endian_put_u32(mut dst[idx + 24..idx + 28], binary.little_endian_u32(src[
idx + 24..idx + 28]) ^ (x6 + c6))
binary.little_endian_put_u32(mut dst[idx + 28..idx + 32], binary.little_endian_u32(src[
idx + 28..idx + 32]) ^ (x7 + c7))
binary.little_endian_put_u32(mut dst[idx + 32..idx + 36], binary.little_endian_u32(src[
idx + 32..idx + 36]) ^ (x8 + c8))
binary.little_endian_put_u32(mut dst[idx + 36..idx + 40], binary.little_endian_u32(src[
idx + 36..idx + 40]) ^ (x9 + c9))
binary.little_endian_put_u32(mut dst[idx + 40..idx + 44], binary.little_endian_u32(src[
idx + 40..idx + 44]) ^ (x10 + c10))
binary.little_endian_put_u32(mut dst[idx + 44..idx + 48], binary.little_endian_u32(src[
idx + 44..idx + 48]) ^ (x11 + c11))
binary.little_endian_put_u32(mut dst[idx + 48..idx + 52], binary.little_endian_u32(src[
idx + 48..idx + 52]) ^ (x12 + c12))
binary.little_endian_put_u32(mut dst[idx + 52..idx + 56], binary.little_endian_u32(src[
idx + 52..idx + 56]) ^ (x13 + c13))
binary.little_endian_put_u32(mut dst[idx + 56..idx + 60], binary.little_endian_u32(src[
idx + 56..idx + 60]) ^ (x14 + c14))
binary.little_endian_put_u32(mut dst[idx + 60..idx + 64], binary.little_endian_u32(src[
idx + 60..idx + 64]) ^ (x15 + c15))
// Updates internal counter
//
// Its safe to update internal counter, its already checked before.
if c.mode == .original {
mut curr_ctr := u64(c13) << 32 | u64(c12)
curr_ctr += 1
// stores back the counter
c.nonce[0] = u32(curr_ctr)
c.nonce[1] = u32(curr_ctr >> 32)
} else {
c12 += 1
c.nonce[0] = c12
}
idx += block_size
src_len -= block_size
}
}
// free the resources taken by the Cipher `c`. Dont use cipher after .free call
@[unsafe]
pub fn (mut c Cipher) free() {
$if prealloc {
return
}
unsafe {
c.block.free()
}
}
// reset quickly sets all Cipher's fields to default value
@[unsafe]
pub fn (mut c Cipher) reset() {
unsafe {
_ := vmemset(&c.key, 0, 32)
_ := vmemset(&c.nonce, 0, 16)
c.block.reset()
}
c.length = 0
c.precomp = false
c.p1, c.p5, c.p9, c.p13 = u32(0), u32(0), u32(0), u32(0)
c.p2, c.p6, c.p10, c.p14 = u32(0), u32(0), u32(0), u32(0)
c.p3, c.p7, c.p11, c.p15 = u32(0), u32(0), u32(0), u32(0)
}
// set_counter sets Cipher's counter
@[direct_array_access; inline]
pub fn (mut c Cipher) set_counter(ctr u64) {
match c.mode {
.original {
c.nonce[0] = u32(ctr)
c.nonce[1] = u32(ctr >> 32)
}
.standard {
// check for ctr value that may exceed the counter limit
if ctr > max_32bit_counter {
panic('set_counter: counter value exceed the limit ')
}
c.nonce[0] = u32(ctr)
}
}
}
// rekey resets internal Cipher's state and reinitializes state with the provided key and nonce
pub fn (mut c Cipher) rekey(key []u8, nonce []u8) ! {
unsafe { c.reset() }
// this routine was publicly accessible to user, so we add a check here
// to ensure the supplied key and nonce has the correct size.
if key.len != key_size {
return error('Bad key size provided for rekey')
}
// For the standard cipher, allowed nonce size was nonce_size or x_nonce_size
if c.mode == .standard {
if nonce.len != x_nonce_size && nonce.len != nonce_size {
return error('Bad nonce size for standard cipher, use 12 or 24 bytes length nonce')
}
if c.extended && nonce.len != x_nonce_size {
return error('Bad nonce size provided for extended variant cipher')
}
}
// in the original variant, nonce should be orig_nonce_size length (8 bytes)
if c.mode == .original && nonce.len != orig_nonce_size {
return error('Bad nonce size provided for original mode')
}
c.do_rekey(key, nonce)!
}
// do_rekey reinitializes ChaCha20 instance with the provided key and nonce.
@[direct_array_access]
fn (mut c Cipher) do_rekey(key []u8, nonce []u8) ! {
mut nonces := nonce.clone()
mut keys := key.clone()
// Its now awares of the new flag, mode and extended
// If this cipher was standard mode with extended flag, derives a new key and nonce
// for later setup operation
if c.mode == .standard && c.extended {
keys, nonces = derive_xchacha20_key_nonce(key, nonce)!
}
// Its shared the same cipher key setup on the both of mode.
c.key[0] = binary.little_endian_u32(keys[0..4])
c.key[1] = binary.little_endian_u32(keys[4..8])
c.key[2] = binary.little_endian_u32(keys[8..12])
c.key[3] = binary.little_endian_u32(keys[12..16])
c.key[4] = binary.little_endian_u32(keys[16..20])
c.key[5] = binary.little_endian_u32(keys[20..24])
c.key[6] = binary.little_endian_u32(keys[24..28])
c.key[7] = binary.little_endian_u32(keys[28..32])
// first counter value
c.nonce[0] = 0
if c.mode == .standard {
c.nonce[1] = binary.little_endian_u32(nonces[0..4])
c.nonce[2] = binary.little_endian_u32(nonces[4..8])
c.nonce[3] = binary.little_endian_u32(nonces[8..12])
} else {
// original mode
// second of 64-bit counter value
c.nonce[1] = 0
// nonce size on original mode was 64 bits
c.nonce[2] = binary.little_endian_u32(nonces[0..4])
c.nonce[3] = binary.little_endian_u32(nonces[4..8])
}
}
// Helper and core function for ChaCha20
//
// quarter_round is the basic operation of the ChaCha algorithm. It operates
// on four 32-bit unsigned integers, by performing AXR (add, xor, rotate)
// operation on this quartet u32 numbers.
fn quarter_round(a u32, b u32, c u32, d u32) (u32, u32, u32, u32) {
// The operation is as follows (in C-like notation):
// where `<<<=` denotes bits rotate left operation
// a += b; d ^= a; d <<<= 16;
// c += d; b ^= c; b <<<= 12;
// a += b; d ^= a; d <<<= 8;
// c += d; b ^= c; b <<<= 7;
mut ax := a
mut bx := b
mut cx := c
mut dx := d
ax += bx
dx ^= ax
dx = bits.rotate_left_32(dx, 16)
cx += dx
bx ^= cx
bx = bits.rotate_left_32(bx, 12)
ax += bx
dx ^= ax
dx = bits.rotate_left_32(dx, 8)
cx += dx
bx ^= cx
bx = bits.rotate_left_32(bx, 7)
return ax, bx, cx, dx
}
// Cipher's counter handling routine
//
// We define counter limit to simplify the access
const max_64bit_counter = max_u64
const max_32bit_counter = u64(max_u32)
// load_ctr loads underlying cipher's counter as u64 value.
@[direct_array_access; inline]
fn (c Cipher) load_ctr() u64 {
match c.mode {
// In the original mode, counter was 64-bit size
// stored on c.nonce[0], and c.nonce[1]
.original {
return u64(c.nonce[1]) << 32 | u64(c.nonce[0])
}
.standard {
// in standard mode, counter was 32-bit value, stored on c.nonce[0]
return u64(c.nonce[0])
}
}
}
// max_ctr_value returns maximum value of cipher's counter.
@[inline]
fn (c Cipher) max_ctr_value() u64 {
match c.mode {
.original { return max_64bit_counter }
.standard { return max_32bit_counter }
}
}
// derive_xchacha20_key_nonce derives a new key and nonces for extended
// variant of standard mode. Its separated for simplify the access.
@[direct_array_access; inline]
fn derive_xchacha20_key_nonce(key []u8, nonce []u8) !([]u8, []u8) {
// Its only for x_nonce_size
if nonce.len != x_nonce_size {
return error('Bad nonce size for derive_xchacha20_key_nonce')
}
// derives a new key based on xchacha20 construction
// first 16 bytes of nonce used to derive the key
new_key := xchacha20(key, nonce[0..16])!
mut new_nonce := []u8{len: nonce_size}
// and the last of 8 bytes of nonce copied into new_nonce to build
// nonce_size length of new_nonce
_ := copy(mut new_nonce[4..12], nonce[16..24])
return new_key, new_nonce
}
@[direct_array_access; inline]
fn (c Cipher) check_for_ctr_overflow(add_value u64) bool {
// check for counter overflow
ctr := c.load_ctr()
sum := ctr + add_value
max := c.max_ctr_value()
if sum < ctr || sum < add_value || sum > max {
return true
}
return false
}