examples: add a solution to the "1 Billion Row Challenge" (#23458)

This commit is contained in:
syrmel 2025-01-15 08:09:11 +01:00 committed by GitHub
parent e0303b2221
commit b1d2593c1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 705 additions and 0 deletions

View File

@ -242,6 +242,7 @@ pub fn new_test_session(_vargs string, will_compile bool) TestSession {
skip_files << 'examples/database/psql/customer.v'
}
$if windows {
skip_files << 'examples/1brc/solution/main.v' // requires mmap
skip_files << 'examples/database/mysql.v'
skip_files << 'examples/database/orm.v'
skip_files << 'examples/smtp/mail.v' // requires OpenSSL

53
examples/1brc/README.md Normal file
View File

@ -0,0 +1,53 @@
# 1 Billion Row Challenge (1BRC)
A solution to the [1 Billion Row Challenge](https://www.morling.dev/blog/one-billion-row-challenge/),
written in [the V programming language](https://vlang.io/).
Read more about the challenge here: https://www.morling.dev/blog/one-billion-row-challenge/
## Running instructions
Suggested compiler options for performance:
`v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" .`
### Step 1: Create a measurements file
Compile and run `make-samples` to create the sample file.
```
cd make-samples
v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" .
./make-samples 1000000000 > ~/measurements.txt
```
NOTE: If you create a billion rows, the file will be about 12GB!
### Step 2: Run (and time) the solution
```
cd solution
v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" .
./solution ~/measurements.txt
```
You can time the solution using `v time`:
`v time ./solution ~/measurements.txt`
By default, the solution runs in a single thread. If you want to run
parallel processing, use the `-n` parameter, for example, to run with
8 threads:
`./solution -n 8 ~/measurements.txt`
On Linux, to run one thread per core, use
`./solution -n $(nproc) ~/measurements.txt`
### Step 3: Improve upon the solution
Make changes that improve the performance and submit them.
Let's show off what is possible in V!

View File

@ -0,0 +1,413 @@
Abha,18.0
Abidjan,26.0
Abéché,29.4
Accra,26.4
Addis Ababa,16.0
Adelaide,17.3
Aden,29.1
Ahvaz,25.4
Albuquerque,14.0
Alexandra,11.0
Alexandria,20.0
Algiers,18.2
Alice Springs,21.0
Almaty,10.0
Amsterdam,10.2
Anadyr,-6.9
Anchorage,2.8
Andorra la Vella,9.8
Ankara,12.0
Antananarivo,17.9
Antsiranana,25.2
Arkhangelsk,1.3
Ashgabat,17.1
Asmara,15.6
Assab,30.5
Astana,3.5
Athens,19.2
Atlanta,17.0
Auckland,15.2
Austin,20.7
Baghdad,22.77
Baguio,19.5
Baku,15.1
Baltimore,13.1
Bamako,27.8
Bangkok,28.6
Bangui,26.0
Banjul,26.0
Barcelona,18.2
Bata,25.1
Batumi,14.0
Beijing,12.9
Beirut,20.9
Belgrade,12.5
Belize City,26.7
Benghazi,19.9
Bergen,7.7
Berlin,10.3
Bilbao,14.7
Birao,26.5
Bishkek,11.3
Bissau,27.0
Blantyre,22.2
Bloemfontein,15.6
Boise,11.4
Bordeaux,14.2
Bosaso,30.0
Boston,10.9
Bouaké,26.0
Bratislava,10.5
Brazzaville,25.0
Bridgetown,27.0
Brisbane,21.4
Brussels,10.5
Bucharest,10.8
Budapest,11.3
Bujumbura,23.8
Bulawayo,18.9
Burnie,13.1
Busan,15.0
Cabo San Lucas,23.9
Cairns,25.0
Cairo,21.4
Calgary,4.4
Canberra,13.1
Cape Town,16.2
Changsha,17.4
Charlotte,16.1
Chiang Mai,25.8
Chicago,9.8
Chihuahua,18.6
Chișinău,10.2
Chittagong,25.9
Chongqing,18.6
Christchurch,12.2
City of San Marino,11.8
Colombo,27.4
Columbus,11.7
Conakry,26.4
Copenhagen,9.1
Cotonou,27.2
Cracow,9.3
Da Lat,17.9
Da Nang,25.8
Dakar,24.0
Dallas,19.0
Damascus,17.0
Dampier,26.4
Dar es Salaam,25.8
Darwin,27.6
Denpasar,23.7
Denver,10.4
Detroit,10.0
Dhaka,25.9
Dikson,-11.1
Dili,26.6
Djibouti,29.9
Dodoma,22.7
Dolisie,24.0
Douala,26.7
Dubai,26.9
Dublin,9.8
Dunedin,11.1
Durban,20.6
Dushanbe,14.7
Edinburgh,9.3
Edmonton,4.2
El Paso,18.1
Entebbe,21.0
Erbil,19.5
Erzurum,5.1
Fairbanks,-2.3
Fianarantsoa,17.9
Flores, Petén,26.4
Frankfurt,10.6
Fresno,17.9
Fukuoka,17.0
Gabès,19.5
Gaborone,21.0
Gagnoa,26.0
Gangtok,15.2
Garissa,29.3
Garoua,28.3
George Town,27.9
Ghanzi,21.4
Gjoa Haven,-14.4
Guadalajara,20.9
Guangzhou,22.4
Guatemala City,20.4
Halifax,7.5
Hamburg,9.7
Hamilton,13.8
Hanga Roa,20.5
Hanoi,23.6
Harare,18.4
Harbin,5.0
Hargeisa,21.7
Hat Yai,27.0
Havana,25.2
Helsinki,5.9
Heraklion,18.9
Hiroshima,16.3
Ho Chi Minh City,27.4
Hobart,12.7
Hong Kong,23.3
Honiara,26.5
Honolulu,25.4
Houston,20.8
Ifrane,11.4
Indianapolis,11.8
Iqaluit,-9.3
Irkutsk,1.0
Istanbul,13.9
İzmir,17.9
Jacksonville,20.3
Jakarta,26.7
Jayapura,27.0
Jerusalem,18.3
Johannesburg,15.5
Jos,22.8
Juba,27.8
Kabul,12.1
Kampala,20.0
Kandi,27.7
Kankan,26.5
Kano,26.4
Kansas City,12.5
Karachi,26.0
Karonga,24.4
Kathmandu,18.3
Khartoum,29.9
Kingston,27.4
Kinshasa,25.3
Kolkata,26.7
Kuala Lumpur,27.3
Kumasi,26.0
Kunming,15.7
Kuopio,3.4
Kuwait City,25.7
Kyiv,8.4
Kyoto,15.8
La Ceiba,26.2
La Paz,23.7
Lagos,26.8
Lahore,24.3
Lake Havasu City,23.7
Lake Tekapo,8.7
Las Palmas de Gran Canaria,21.2
Las Vegas,20.3
Launceston,13.1
Lhasa,7.6
Libreville,25.9
Lisbon,17.5
Livingstone,21.8
Ljubljana,10.9
Lodwar,29.3
Lomé,26.9
London,11.3
Los Angeles,18.6
Louisville,13.9
Luanda,25.8
Lubumbashi,20.8
Lusaka,19.9
Luxembourg City,9.3
Lviv,7.8
Lyon,12.5
Madrid,15.0
Mahajanga,26.3
Makassar,26.7
Makurdi,26.0
Malabo,26.3
Malé,28.0
Managua,27.3
Manama,26.5
Mandalay,28.0
Mango,28.1
Manila,28.4
Maputo,22.8
Marrakesh,19.6
Marseille,15.8
Maun,22.4
Medan,26.5
Mek'ele,22.7
Melbourne,15.1
Memphis,17.2
Mexicali,23.1
Mexico City,17.5
Miami,24.9
Milan,13.0
Milwaukee,8.9
Minneapolis,7.8
Minsk,6.7
Mogadishu,27.1
Mombasa,26.3
Monaco,16.4
Moncton,6.1
Monterrey,22.3
Montreal,6.8
Moscow,5.8
Mumbai,27.1
Murmansk,0.6
Muscat,28.0
Mzuzu,17.7
N'Djamena,28.3
Naha,23.1
Nairobi,17.8
Nakhon Ratchasima,27.3
Napier,14.6
Napoli,15.9
Nashville,15.4
Nassau,24.6
Ndola,20.3
New Delhi,25.0
New Orleans,20.7
New York City,12.9
Ngaoundéré,22.0
Niamey,29.3
Nicosia,19.7
Niigata,13.9
Nouadhibou,21.3
Nouakchott,25.7
Novosibirsk,1.7
Nuuk,-1.4
Odesa,10.7
Odienné,26.0
Oklahoma City,15.9
Omaha,10.6
Oranjestad,28.1
Oslo,5.7
Ottawa,6.6
Ouagadougou,28.3
Ouahigouya,28.6
Ouarzazate,18.9
Oulu,2.7
Palembang,27.3
Palermo,18.5
Palm Springs,24.5
Palmerston North,13.2
Panama City,28.0
Parakou,26.8
Paris,12.3
Perth,18.7
Petropavlovsk-Kamchatsky,1.9
Philadelphia,13.2
Phnom Penh,28.3
Phoenix,23.9
Pittsburgh,10.8
Podgorica,15.3
Pointe-Noire,26.1
Pontianak,27.7
Port Moresby,26.9
Port Sudan,28.4
Port Vila,24.3
Port-Gentil,26.0
Portland {OR},12.4
Porto,15.7
Prague,8.4
Praia,24.4
Pretoria,18.2
Pyongyang,10.8
Rabat,17.2
Rangpur,24.4
Reggane,28.3
Reykjavík,4.3
Riga,6.2
Riyadh,26.0
Rome,15.2
Roseau,26.2
Rostov-on-Don,9.9
Sacramento,16.3
Saint Petersburg,5.8
Saint-Pierre,5.7
Salt Lake City,11.6
San Antonio,20.8
San Diego,17.8
San Francisco,14.6
San Jose,16.4
San José,22.6
San Juan,27.2
San Salvador,23.1
Sana'a,20.0
Santo Domingo,25.9
Sapporo,8.9
Sarajevo,10.1
Saskatoon,3.3
Seattle,11.3
Ségou,28.0
Seoul,12.5
Seville,19.2
Shanghai,16.7
Singapore,27.0
Skopje,12.4
Sochi,14.2
Sofia,10.6
Sokoto,28.0
Split,16.1
St. John's,5.0
St. Louis,13.9
Stockholm,6.6
Surabaya,27.1
Suva,25.6
Suwałki,7.2
Sydney,17.7
Tabora,23.0
Tabriz,12.6
Taipei,23.0
Tallinn,6.4
Tamale,27.9
Tamanrasset,21.7
Tampa,22.9
Tashkent,14.8
Tauranga,14.8
Tbilisi,12.9
Tegucigalpa,21.7
Tehran,17.0
Tel Aviv,20.0
Thessaloniki,16.0
Thiès,24.0
Tijuana,17.8
Timbuktu,28.0
Tirana,15.2
Toamasina,23.4
Tokyo,15.4
Toliara,24.1
Toluca,12.4
Toronto,9.4
Tripoli,20.0
Tromsø,2.9
Tucson,20.9
Tunis,18.4
Ulaanbaatar,-0.4
Upington,20.4
Ürümqi,7.4
Vaduz,10.1
Valencia,18.3
Valletta,18.8
Vancouver,10.4
Veracruz,25.4
Vienna,10.4
Vientiane,25.9
Villahermosa,27.1
Vilnius,6.0
Virginia Beach,15.8
Vladivostok,4.9
Warsaw,8.5
Washington, D.C.,14.6
Wau,27.8
Wellington,12.9
Whitehorse,-0.1
Wichita,13.9
Willemstad,28.0
Winnipeg,3.0
Wrocław,9.6
Xi'an,14.1
Yakutsk,-8.8
Yangon,27.5
Yaoundé,23.8
Yellowknife,-4.3
Yerevan,12.4
Yinchuan,9.0
Zagreb,10.7
Zanzibar City,26.0
Zürich,9.3

View File

@ -0,0 +1,39 @@
import encoding.csv
import flag
import os
import rand
struct CityMean {
city string
mean f64
}
fn main() {
mut fp := flag.new_flag_parser(os.args)
fp.version('1brc sample generator v1.0.0')
fp.skip_executable()
fp.application('Sample generator for 1 billion rows challenge')
fp.description('The 1 billion rows challenge solved in V.\nFor details, see https://www.morling.dev/blog/one-billion-row-challenge/')
input_file := fp.string('city-file', `i`, 'cities.txt', 'Path to input file with cities and means list')
fp.limit_free_args_to_exactly(1)!
sample_count := fp.remaining_parameters()[0].u64()
content := os.read_file(input_file) or { panic(err) }
mut reader := csv.new_reader(content, csv.ReaderConfig{ delimiter: `,` })
mut means := []CityMean{}
for {
rec := reader.read() or { break }
means << CityMean{
city: rec[0]
mean: rec[1].f64()
}
}
for _ in 0 .. sample_count / 2 {
mut city := rand.intn(means.len)!
m1, m2 := rand.normal_pair(mu: means[city].mean, sigma: 10)!
println('${means[city].city};${m1:.1f}')
city = rand.intn(means.len)!
println('${means[city].city};${m2:.1f}')
}
}

View File

@ -0,0 +1,199 @@
import flag
import math
import os
#include <sys/mman.h>
fn C.mmap(addr voidptr, len u64, prot int, flags int, fd int, offset i64) voidptr
fn C.munmap(addr voidptr, len u64) int
struct MemoryMappedFile {
size u64
mut:
data &u8
file os.File
}
fn mmap_file(path string) MemoryMappedFile {
mut mf := MemoryMappedFile{
file: os.open_file(path, 'r', 0) or { panic('fail') }
size: os.file_size(path)
data: C.NULL
}
mf.data = &u8(C.mmap(C.NULL, mf.size, C.PROT_READ, C.MAP_SHARED, mf.file.fd, 0))
return mf
}
fn (mut mf MemoryMappedFile) unmap() {
if C.munmap(mf.data, mf.size) != 0 {
panic('(${C.errno}) munmap() failed')
}
mf.file.close()
}
enum ReadState {
city
temp
}
struct Result {
pub mut:
min i32
max i32
sum i32
count u32
}
fn format_value(value i32) string {
return '${value / 10}.${math.abs(value % 10)}'
}
fn print_results(results map[string]Result, print_nicely bool) {
mut output := []string{cap: results.len}
mut cities := results.keys()
cities.sort()
for city in cities {
v := results[city]
mean := f64(v.sum) / v.count / 10
output << '${city}=${format_value(v.min)}/${mean:.1f}/${format_value(v.max)}'
}
if print_nicely {
println(output.join('\n'))
} else {
println('{' + output.join(', ') + '}')
}
}
fn combine_results(results []map[string]Result) map[string]Result {
mut combined_result := map[string]Result{}
for result in results {
for city, r in result {
if city !in combined_result {
combined_result[city] = r
} else {
if r.max > combined_result[city].max {
combined_result[city].max = r.max
}
if r.min < combined_result[city].min {
combined_result[city].min = r.min
}
combined_result[city].sum += r.sum
combined_result[city].count += r.count
}
}
}
return combined_result
}
@[direct_array_access]
fn process_chunk(addr &u8, from u64, to u64) map[string]Result {
mut results := map[string]Result{}
mut state := ReadState.city
mut city := ''
mut temp := i32(0)
mut mod := i32(1)
mut j := int(0)
for i in from .. to {
c := unsafe { u8(addr[i]) }
match state {
.city {
match c {
`;` {
state = .temp
city = unsafe { tos(addr[i - u64(j)], j) }
}
else {
j += 1
}
}
}
.temp {
match c {
`\n` {
temp *= mod
if city !in results {
results[city] = Result{
min: temp
max: temp
sum: temp
count: 1
}
} else {
if temp > results[city].max {
results[city].max = temp
}
if temp < results[city].min {
results[city].min = temp
}
results[city].sum += temp
results[city].count += 1
}
state = .city
temp = 0
mod = 1
j = 0
}
`-` {
mod = -1
}
`.` {}
else {
// ASCII 48 = '0' ... ASCII 57 = '9' => (ASCII value) - 48 = decimal value
temp = temp * 10 + (c - 48)
}
}
}
}
}
return results
}
fn process_in_parallel(mf MemoryMappedFile, thread_count u32) map[string]Result {
mut threads := []thread map[string]Result{}
approx_chunk_size := mf.size / thread_count
mut from := u64(0)
mut to := approx_chunk_size
for _ in 0 .. thread_count - 1 {
unsafe {
for mf.data[to] != `\n` {
to += 1
}
}
threads << spawn process_chunk(mf.data, from, to)
from = to + 1
to = from + approx_chunk_size
}
to = mf.size
threads << spawn process_chunk(mf.data, from, to)
res := threads.wait()
return combine_results(res)
}
fn main() {
mut fp := flag.new_flag_parser(os.args)
fp.version('1brc v1.0.0')
fp.skip_executable()
fp.application('1 billion rows challenge')
fp.description('The 1 billion rows challenge solved in V.\nFor details, see https://www.morling.dev/blog/one-billion-row-challenge/')
thread_count := u32(fp.int('threads', `n`, 1, 'number of threads for parallel processing.'))
print_nicely := fp.bool('human-readable', `h`, false, 'Print results with new lines rather than following challenge spec')
quiet := fp.bool('quiet', `q`, false, 'Suppress the results output (e.g., if you only care about timing)')
fp.limit_free_args_to_exactly(1)!
path := fp.remaining_parameters()[0]
mut mf := mmap_file(path)
defer {
mf.unmap()
}
results := if thread_count > 1 {
process_in_parallel(mf, thread_count)
} else {
process_chunk(mf.data, 0, mf.size)
}
if !quiet {
print_results(results, print_nicely)
}
}