From e0a7f87693be16758d65d3776dbb174ceed2f9fc Mon Sep 17 00:00:00 2001 From: kbkpbot Date: Fri, 25 Apr 2025 23:30:57 +0800 Subject: [PATCH] rand: add uuid_v7(), session function, simplify uuid_v4() (#24313) --- vlib/rand/rand.c.v | 127 ++++++++++++++++++---------- vlib/rand/rand.v | 2 +- vlib/rand/random_identifiers_test.v | 52 ++++++++++++ vlib/rand/random_numbers_test.v | 28 +++--- 4 files changed, 150 insertions(+), 59 deletions(-) diff --git a/vlib/rand/rand.c.v b/vlib/rand/rand.c.v index e6d574097f..8ccf486cf7 100644 --- a/vlib/rand/rand.c.v +++ b/vlib/rand/rand.c.v @@ -1,60 +1,99 @@ module rand -const clock_seq_hi_and_reserved_valid_values = [`8`, `9`, `a`, `b`]! +import time // uuid_v4 generates a random (v4) UUID // See https://en.wikipedia.org/wiki/Universally_unique_identifier#Version_4_(random) +// See https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-4 pub fn uuid_v4() string { - return internal_uuid_v4(mut default_rng) + rand_1 := default_rng.u64() + rand_2 := default_rng.u64() + return internal_uuid(4, rand_1, rand_2) } -@[direct_array_access] -fn internal_uuid_v4(mut rng PRNG) string { - buflen := 36 +@[direct_array_access; inline] +fn internal_uuid(version u8, rand_1 u64, rand_2 u64) string { + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | rand_1 | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | rand_1 | ver | rand_1 | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |var| rand_2 | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | rand_2 | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + mut parts := [8]u16{} + parts[0] = u16(rand_1 >> 48) + parts[1] = u16(rand_1 >> 32) + parts[2] = u16(rand_1 >> 16) + parts[3] = u16(rand_1) + parts[4] = u16(rand_2 >> 48) + parts[5] = u16(rand_2 >> 32) + parts[6] = u16(rand_2 >> 16) + parts[7] = u16(rand_2) + + parts[3] = (parts[3] & 0x0FFF) | (u16(version) << 12) // set version + parts[4] = (parts[4] & 0x3FFF) | 0x8000 // set variant = 0b10 + mut buf := unsafe { malloc_noscan(37) } - mut i_buf := 0 - mut x := u64(0) - mut d := u8(0) - for i_buf < buflen { - mut c := 0 - x = rng.u64() - // do most of the bit manipulation at once: - x &= 0x0F0F0F0F0F0F0F0F - x += 0x3030303030303030 - // write the ASCII codes to the buffer: - for c < 8 && i_buf < buflen { - d = u8(x) - unsafe { - buf[i_buf] = if d > 0x39 { d + 0x27 } else { d } - } - i_buf++ - c++ - x = x >> 8 - } - } - // there are still some random bits in x: - x = x >> 8 - d = u8(x) + mut start := 0 unsafe { - // From https://www.ietf.org/rfc/rfc4122.txt : - // >> Set the two most significant bits (bits 6 and 7) of the clock_seq_hi_and_reserved - // >> to zero and one, respectively. - // all nibbles starting with 10 are: 1000, 1001, 1010, 1011 -> hex digits `8`, `9`, `a`, `b` - // these are stored in clock_seq_hi_and_reserved_valid_values, choose one of them at random: - buf[19] = clock_seq_hi_and_reserved_valid_values[d & 0x03] - // >> Set the four most significant bits (bits 12 through 15) of the - // >> time_hi_and_version field to the 4-bit version number from Section 4.1.3. - buf[14] = `4` - buf[8] = `-` - buf[13] = `-` - buf[18] = `-` - buf[23] = `-` - buf[buflen] = 0 // ensure the string will be 0 terminated, just in case - // for i in 0..37 { println('i: ${i:2} | ${buf[i].ascii_str()} | ${buf[i].hex()} | ${buf[i]:08b}') } - return buf.vstring_with_len(buflen) + for i in 0 .. 8 { + val := parts[i] + buf[start] = hex_chars[(val >> 12) & 0xF] + buf[start + 1] = hex_chars[(val >> 8) & 0xF] + buf[start + 2] = hex_chars[(val >> 4) & 0xF] + buf[start + 3] = hex_chars[val & 0xF] + start += 4 + // insert `_` at specified locations + if start in [8, 13, 18, 23]! { + buf[start] = `_` + start++ + } + } + buf[36] = 0 + return buf.vstring_with_len(36) } } +// uuid_v7 generates a time-ordered (v7) UUID +// See https://datatracker.ietf.org/doc/html/rfc9562#name-uuid-version-7 +pub fn uuid_v7() string { + timestamp_48 := u64(time.now().unix_milli()) << 16 + rand_1 := timestamp_48 | default_rng.u16() + rand_2 := default_rng.u64() + return internal_uuid(7, rand_1, rand_2) +} + +pub struct UUIDSession { +mut: + counter u8 // 6 bits session counter +} + +// new_uuid_v7_session create a new session for generating uuid_v7. +// The 12 bits `rand_a` in the RFC 9652, is replaced by 6 bits +// sub-millisecond timestamp + 6 bits session counter. +// See https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=78c5e141e9c139fc2ff36a220334e4aa25e1b0eb +pub fn new_uuid_v7_session() UUIDSession { + return UUIDSession{} +} + +// next get a new uuid_v7 from current session. +pub fn (mut u UUIDSession) next() string { + timestamp := u64(time.now().unix_nano()) + // make place for holding 4 bits `version` + timestamp_shift_4bits := (timestamp & 0xFFFF_FFFF_FFFF_0000) | ((timestamp & 0x0000_0000_0000_FFFF) >> 4) + rand_1 := (timestamp_shift_4bits & 0xFFFF_FFFF_FFFF_FFC0) | u64(u.counter & 0x3F) // 6 bits session counter + rand_2 := default_rng.u64() + + u.counter++ + + return internal_uuid(7, rand_1, rand_2) +} + const ulid_encoding = '0123456789ABCDEFGHJKMNPQRSTVWXYZ' @[direct_array_access] diff --git a/vlib/rand/rand.v b/vlib/rand/rand.v index 7366ad4d70..8037a78de6 100644 --- a/vlib/rand/rand.v +++ b/vlib/rand/rand.v @@ -701,7 +701,7 @@ pub fn read(mut buf []u8) { } const english_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' -const hex_chars = 'abcdef0123456789' +const hex_chars = '0123456789abcdef' const ascii_chars = '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz{|}~' // ulid generates an unique lexicographically sortable identifier. diff --git a/vlib/rand/random_identifiers_test.v b/vlib/rand/random_identifiers_test.v index 9c8ab74a56..dd63f38891 100644 --- a/vlib/rand/random_identifiers_test.v +++ b/vlib/rand/random_identifiers_test.v @@ -21,6 +21,58 @@ fn test_rand_uuid_v4() { } } +// uuid_v7: +fn test_rand_uuid_v7() { + uuid1 := rand.uuid_v7() + uuid2 := rand.uuid_v7() + uuid3 := rand.uuid_v7() + assert uuid1 != uuid2 + assert uuid1 != uuid3 + assert uuid2 != uuid3 + assert uuid1.len == 36 + assert uuid2.len == 36 + assert uuid3.len == 36 + for i in 0 .. 1000 { + x := rand.uuid_v7() + // check the version field is always 7: + assert x[14] == `7` + // and variant field is always 0b10: + assert x[19] in [`8`, `9`, `a`, `b`] + } +} + +// uuid_v7_session: +fn test_rand_uuid_v7_session() { + mut u := rand.new_uuid_v7_session() + uuid1 := u.next() + uuid2 := u.next() + uuid3 := u.next() + assert uuid1 != uuid2 + assert uuid1 != uuid3 + assert uuid2 != uuid3 + assert uuid1.len == 36 + assert uuid2.len == 36 + assert uuid3.len == 36 + mut prev_counter := `3` + for i in 0 .. 1000 { + x := u.next() + // check the version field is always 7: + assert x[14] == `7` + // and variant field is always 0b10: + assert x[19] in [`8`, `9`, `a`, `b`] + + // verify counter increase + assert x[17] == prev_counter + if prev_counter == `9` { + prev_counter = `a` + } else if prev_counter == `f` { + prev_counter = `0` + } else { + prev_counter++ + } + } +} + // ulids: fn test_ulids_are_unique() { ulid1 := rand.ulid() diff --git a/vlib/rand/random_numbers_test.v b/vlib/rand/random_numbers_test.v index 59456ed828..b26828689f 100644 --- a/vlib/rand/random_numbers_test.v +++ b/vlib/rand/random_numbers_test.v @@ -302,20 +302,20 @@ fn test_rand_string() { fn test_rand_hex() { rand.seed([u32(0), 1]) outputs := [ - '847b633d9f9765c1a84d38035', - 'efdef342641958db89cfdb4e1', - '704ee34204d29e9e99aca0ae0', - '0c8e1fd5472f65fc4b9668adf', - '3349538378c2023ef7f14dfbe', - 'ae4080a0cb4cbb0693c68037b', - '90e3a7be588b3dfeb3663c97f', - 'f25a82eb559ab6f0288bd8590', - '649f579cb93e9f414d9f40539', - '553a210a52bcbfbafb0783850', - '3daef80b45ef518d30c6db6db', - '56a187106e6e5fb88761024a5', - 'b5cd8b7a24054d7dc66e62f88', - '306eed0c4207d8db185f04afd', + 'ead1c993f5fdcb270ea39e69b', + '453459a8ca7fbe31ef2531a47', + 'd6a449a86a38f4f4ff0206046', + '62e4753bad85cb52a1fcce035', + '99afb9e9de2868945d57a3514', + '04a6e60621a2116cf92ce69d1', + 'f6490d14bee1935419cc92fd5', + '58b0e841bbf01c568ee13ebf6', + 'caf5bdf21f94f5a7a3f5a6b9f', + 'bb908760b8121510516de9eb6', + '93045e61ab45b7e3962c31c31', + 'bc07ed76c4c4b51eedc768a0b', + '1b23e1d08a6ba3d32cc4c85ee', + '96c44362a86d3e317eb56a053', ] for output in outputs { assert rand.hex(25) == output