rand: add uuid_v7(), session function, simplify uuid_v4() (#24313)

This commit is contained in:
kbkpbot 2025-04-25 23:30:57 +08:00 committed by GitHub
parent 0bcfd0f9ea
commit e0a7f87693
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 150 additions and 59 deletions

View File

@ -1,60 +1,99 @@
module rand module rand
const clock_seq_hi_and_reserved_valid_values = [`8`, `9`, `a`, `b`]! import time
// uuid_v4 generates a random (v4) UUID // uuid_v4 generates a random (v4) UUID
// See https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random) // See https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random)
// See https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4
pub fn uuid_v4() string { pub fn uuid_v4() string {
return internal_uuid_v4(mut default_rng) rand_1 := default_rng.u64()
rand_2 := default_rng.u64()
return internal_uuid(4, rand_1, rand_2)
} }
@[direct_array_access] @[direct_array_access; inline]
fn internal_uuid_v4(mut rng PRNG) string { fn internal_uuid(version u8, rand_1 u64, rand_2 u64) string {
buflen := 36 // 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | rand_1 |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | rand_1 | ver | rand_1 |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |var| rand_2 |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | rand_2 |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
mut parts := [8]u16{}
parts[0] = u16(rand_1 >> 48)
parts[1] = u16(rand_1 >> 32)
parts[2] = u16(rand_1 >> 16)
parts[3] = u16(rand_1)
parts[4] = u16(rand_2 >> 48)
parts[5] = u16(rand_2 >> 32)
parts[6] = u16(rand_2 >> 16)
parts[7] = u16(rand_2)
parts[3] = (parts[3] & 0x0FFF) | (u16(version) << 12) // set version
parts[4] = (parts[4] & 0x3FFF) | 0x8000 // set variant = 0b10
mut buf := unsafe { malloc_noscan(37) } mut buf := unsafe { malloc_noscan(37) }
mut i_buf := 0 mut start := 0
mut x := u64(0)
mut d := u8(0)
for i_buf < buflen {
mut c := 0
x = rng.u64()
// do most of the bit manipulation at once:
x &= 0x0F0F0F0F0F0F0F0F
x += 0x3030303030303030
// write the ASCII codes to the buffer:
for c < 8 && i_buf < buflen {
d = u8(x)
unsafe {
buf[i_buf] = if d > 0x39 { d + 0x27 } else { d }
}
i_buf++
c++
x = x >> 8
}
}
// there are still some random bits in x:
x = x >> 8
d = u8(x)
unsafe { unsafe {
// From https://www.ietf.org/rfc/rfc4122.txt : for i in 0 .. 8 {
// >> Set the two most significant bits (bits 6 and 7) of the clock_seq_hi_and_reserved val := parts[i]
// >> to zero and one, respectively. buf[start] = hex_chars[(val >> 12) & 0xF]
// all nibbles starting with 10 are: 1000, 1001, 1010, 1011 -> hex digits `8`, `9`, `a`, `b` buf[start + 1] = hex_chars[(val >> 8) & 0xF]
// these are stored in clock_seq_hi_and_reserved_valid_values, choose one of them at random: buf[start + 2] = hex_chars[(val >> 4) & 0xF]
buf[19] = clock_seq_hi_and_reserved_valid_values[d & 0x03] buf[start + 3] = hex_chars[val & 0xF]
// >> Set the four most significant bits (bits 12 through 15) of the start += 4
// >> time_hi_and_version field to the 4-bit version number from Section 4.1.3. // insert `_` at specified locations
buf[14] = `4` if start in [8, 13, 18, 23]! {
buf[8] = `-` buf[start] = `_`
buf[13] = `-` start++
buf[18] = `-` }
buf[23] = `-` }
buf[buflen] = 0 // ensure the string will be 0 terminated, just in case buf[36] = 0
// for i in 0..37 { println('i: ${i:2} | ${buf[i].ascii_str()} | ${buf[i].hex()} | ${buf[i]:08b}') } return buf.vstring_with_len(36)
return buf.vstring_with_len(buflen)
} }
} }
// uuid_v7 generates a time-ordered (v7) UUID
// See https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-7
pub fn uuid_v7() string {
timestamp_48 := u64(time.now().unix_milli()) << 16
rand_1 := timestamp_48 | default_rng.u16()
rand_2 := default_rng.u64()
return internal_uuid(7, rand_1, rand_2)
}
pub struct UUIDSession {
mut:
counter u8 // 6 bits session counter
}
// new_uuid_v7_session create a new session for generating uuid_v7.
// The 12 bits `rand_a` in the RFC 9652, is replaced by 6 bits
// sub-millisecond timestamp + 6 bits session counter.
// See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=78c5e141e9c139fc2ff36a220334e4aa25e1b0eb
pub fn new_uuid_v7_session() UUIDSession {
return UUIDSession{}
}
// next get a new uuid_v7 from current session.
pub fn (mut u UUIDSession) next() string {
timestamp := u64(time.now().unix_nano())
// make place for holding 4 bits `version`
timestamp_shift_4bits := (timestamp & 0xFFFF_FFFF_FFFF_0000) | ((timestamp & 0x0000_0000_0000_FFFF) >> 4)
rand_1 := (timestamp_shift_4bits & 0xFFFF_FFFF_FFFF_FFC0) | u64(u.counter & 0x3F) // 6 bits session counter
rand_2 := default_rng.u64()
u.counter++
return internal_uuid(7, rand_1, rand_2)
}
const ulid_encoding = '0123456789ABCDEFGHJKMNPQRSTVWXYZ' const ulid_encoding = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
@[direct_array_access] @[direct_array_access]

View File

@ -701,7 +701,7 @@ pub fn read(mut buf []u8) {
} }
const english_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' const english_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
const hex_chars = 'abcdef0123456789' const hex_chars = '0123456789abcdef'
const ascii_chars = '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~' const ascii_chars = '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~'
// ulid generates an unique lexicographically sortable identifier. // ulid generates an unique lexicographically sortable identifier.

View File

@ -21,6 +21,58 @@ fn test_rand_uuid_v4() {
} }
} }
// uuid_v7:
fn test_rand_uuid_v7() {
uuid1 := rand.uuid_v7()
uuid2 := rand.uuid_v7()
uuid3 := rand.uuid_v7()
assert uuid1 != uuid2
assert uuid1 != uuid3
assert uuid2 != uuid3
assert uuid1.len == 36
assert uuid2.len == 36
assert uuid3.len == 36
for i in 0 .. 1000 {
x := rand.uuid_v7()
// check the version field is always 7:
assert x[14] == `7`
// and variant field is always 0b10:
assert x[19] in [`8`, `9`, `a`, `b`]
}
}
// uuid_v7_session:
fn test_rand_uuid_v7_session() {
mut u := rand.new_uuid_v7_session()
uuid1 := u.next()
uuid2 := u.next()
uuid3 := u.next()
assert uuid1 != uuid2
assert uuid1 != uuid3
assert uuid2 != uuid3
assert uuid1.len == 36
assert uuid2.len == 36
assert uuid3.len == 36
mut prev_counter := `3`
for i in 0 .. 1000 {
x := u.next()
// check the version field is always 7:
assert x[14] == `7`
// and variant field is always 0b10:
assert x[19] in [`8`, `9`, `a`, `b`]
// verify counter increase
assert x[17] == prev_counter
if prev_counter == `9` {
prev_counter = `a`
} else if prev_counter == `f` {
prev_counter = `0`
} else {
prev_counter++
}
}
}
// ulids: // ulids:
fn test_ulids_are_unique() { fn test_ulids_are_unique() {
ulid1 := rand.ulid() ulid1 := rand.ulid()

View File

@ -302,20 +302,20 @@ fn test_rand_string() {
fn test_rand_hex() { fn test_rand_hex() {
rand.seed([u32(0), 1]) rand.seed([u32(0), 1])
outputs := [ outputs := [
'847b633d9f9765c1a84d38035', 'ead1c993f5fdcb270ea39e69b',
'efdef342641958db89cfdb4e1', '453459a8ca7fbe31ef2531a47',
'704ee34204d29e9e99aca0ae0', 'd6a449a86a38f4f4ff0206046',
'0c8e1fd5472f65fc4b9668adf', '62e4753bad85cb52a1fcce035',
'3349538378c2023ef7f14dfbe', '99afb9e9de2868945d57a3514',
'ae4080a0cb4cbb0693c68037b', '04a6e60621a2116cf92ce69d1',
'90e3a7be588b3dfeb3663c97f', 'f6490d14bee1935419cc92fd5',
'f25a82eb559ab6f0288bd8590', '58b0e841bbf01c568ee13ebf6',
'649f579cb93e9f414d9f40539', 'caf5bdf21f94f5a7a3f5a6b9f',
'553a210a52bcbfbafb0783850', 'bb908760b8121510516de9eb6',
'3daef80b45ef518d30c6db6db', '93045e61ab45b7e3962c31c31',
'56a187106e6e5fb88761024a5', 'bc07ed76c4c4b51eedc768a0b',
'b5cd8b7a24054d7dc66e62f88', '1b23e1d08a6ba3d32cc4c85ee',
'306eed0c4207d8db185f04afd', '96c44362a86d3e317eb56a053',
] ]
for output in outputs { for output in outputs {
assert rand.hex(25) == output assert rand.hex(25) == output