From b1d2593c1bc0e89ee52359a02b14a0256989a04d Mon Sep 17 00:00:00 2001 From: syrmel <104119569+syrmel@users.noreply.github.com> Date: Wed, 15 Jan 2025 08:09:11 +0100 Subject: [PATCH] examples: add a solution to the "1 Billion Row Challenge" (#23458) --- cmd/tools/modules/testing/common.v | 1 + examples/1brc/README.md | 53 ++++ examples/1brc/make-samples/cities.txt | 413 ++++++++++++++++++++++++++ examples/1brc/make-samples/main.v | 39 +++ examples/1brc/solution/main.v | 199 +++++++++++++ 5 files changed, 705 insertions(+) create mode 100644 examples/1brc/README.md create mode 100644 examples/1brc/make-samples/cities.txt create mode 100644 examples/1brc/make-samples/main.v create mode 100644 examples/1brc/solution/main.v diff --git a/cmd/tools/modules/testing/common.v b/cmd/tools/modules/testing/common.v index 25b6f6152f..e1db860539 100644 --- a/cmd/tools/modules/testing/common.v +++ b/cmd/tools/modules/testing/common.v @@ -242,6 +242,7 @@ pub fn new_test_session(_vargs string, will_compile bool) TestSession { skip_files << 'examples/database/psql/customer.v' } $if windows { + skip_files << 'examples/1brc/solution/main.v' // requires mmap skip_files << 'examples/database/mysql.v' skip_files << 'examples/database/orm.v' skip_files << 'examples/smtp/mail.v' // requires OpenSSL diff --git a/examples/1brc/README.md b/examples/1brc/README.md new file mode 100644 index 0000000000..89ed58dc7e --- /dev/null +++ b/examples/1brc/README.md @@ -0,0 +1,53 @@ +# 1 Billion Row Challenge (1BRC) + +A solution to the [1 Billion Row Challenge](https://www.morling.dev/blog/one-billion-row-challenge/), +written in [the V programming language](https://vlang.io/). + +Read more about the challenge here: https://www.morling.dev/blog/one-billion-row-challenge/ + + +## Running instructions + +Suggested compiler options for performance: + +`v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" .` + + +### Step 1: Create a measurements file + +Compile and run `make-samples` to create the sample file. + +``` +cd make-samples +v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" . +./make-samples 1000000000 > ~/measurements.txt +``` + +NOTE: If you create a billion rows, the file will be about 12GB! + +### Step 2: Run (and time) the solution + +``` +cd solution +v -cc gcc -prod -cflags "-std=c17 -march=native -mtune=native" . +./solution ~/measurements.txt +``` + +You can time the solution using `v time`: + +`v time ./solution ~/measurements.txt` + +By default, the solution runs in a single thread. If you want to run +parallel processing, use the `-n` parameter, for example, to run with +8 threads: + +`./solution -n 8 ~/measurements.txt` + +On Linux, to run one thread per core, use + +`./solution -n $(nproc) ~/measurements.txt` + +### Step 3: Improve upon the solution + +Make changes that improve the performance and submit them. +Let's show off what is possible in V! diff --git a/examples/1brc/make-samples/cities.txt b/examples/1brc/make-samples/cities.txt new file mode 100644 index 0000000000..0190561d05 --- /dev/null +++ b/examples/1brc/make-samples/cities.txt @@ -0,0 +1,413 @@ +Abha,18.0 +Abidjan,26.0 +Abéché,29.4 +Accra,26.4 +Addis Ababa,16.0 +Adelaide,17.3 +Aden,29.1 +Ahvaz,25.4 +Albuquerque,14.0 +Alexandra,11.0 +Alexandria,20.0 +Algiers,18.2 +Alice Springs,21.0 +Almaty,10.0 +Amsterdam,10.2 +Anadyr,-6.9 +Anchorage,2.8 +Andorra la Vella,9.8 +Ankara,12.0 +Antananarivo,17.9 +Antsiranana,25.2 +Arkhangelsk,1.3 +Ashgabat,17.1 +Asmara,15.6 +Assab,30.5 +Astana,3.5 +Athens,19.2 +Atlanta,17.0 +Auckland,15.2 +Austin,20.7 +Baghdad,22.77 +Baguio,19.5 +Baku,15.1 +Baltimore,13.1 +Bamako,27.8 +Bangkok,28.6 +Bangui,26.0 +Banjul,26.0 +Barcelona,18.2 +Bata,25.1 +Batumi,14.0 +Beijing,12.9 +Beirut,20.9 +Belgrade,12.5 +Belize City,26.7 +Benghazi,19.9 +Bergen,7.7 +Berlin,10.3 +Bilbao,14.7 +Birao,26.5 +Bishkek,11.3 +Bissau,27.0 +Blantyre,22.2 +Bloemfontein,15.6 +Boise,11.4 +Bordeaux,14.2 +Bosaso,30.0 +Boston,10.9 +Bouaké,26.0 +Bratislava,10.5 +Brazzaville,25.0 +Bridgetown,27.0 +Brisbane,21.4 +Brussels,10.5 +Bucharest,10.8 +Budapest,11.3 +Bujumbura,23.8 +Bulawayo,18.9 +Burnie,13.1 +Busan,15.0 +Cabo San Lucas,23.9 +Cairns,25.0 +Cairo,21.4 +Calgary,4.4 +Canberra,13.1 +Cape Town,16.2 +Changsha,17.4 +Charlotte,16.1 +Chiang Mai,25.8 +Chicago,9.8 +Chihuahua,18.6 +Chișinău,10.2 +Chittagong,25.9 +Chongqing,18.6 +Christchurch,12.2 +City of San Marino,11.8 +Colombo,27.4 +Columbus,11.7 +Conakry,26.4 +Copenhagen,9.1 +Cotonou,27.2 +Cracow,9.3 +Da Lat,17.9 +Da Nang,25.8 +Dakar,24.0 +Dallas,19.0 +Damascus,17.0 +Dampier,26.4 +Dar es Salaam,25.8 +Darwin,27.6 +Denpasar,23.7 +Denver,10.4 +Detroit,10.0 +Dhaka,25.9 +Dikson,-11.1 +Dili,26.6 +Djibouti,29.9 +Dodoma,22.7 +Dolisie,24.0 +Douala,26.7 +Dubai,26.9 +Dublin,9.8 +Dunedin,11.1 +Durban,20.6 +Dushanbe,14.7 +Edinburgh,9.3 +Edmonton,4.2 +El Paso,18.1 +Entebbe,21.0 +Erbil,19.5 +Erzurum,5.1 +Fairbanks,-2.3 +Fianarantsoa,17.9 +Flores, Petén,26.4 +Frankfurt,10.6 +Fresno,17.9 +Fukuoka,17.0 +Gabès,19.5 +Gaborone,21.0 +Gagnoa,26.0 +Gangtok,15.2 +Garissa,29.3 +Garoua,28.3 +George Town,27.9 +Ghanzi,21.4 +Gjoa Haven,-14.4 +Guadalajara,20.9 +Guangzhou,22.4 +Guatemala City,20.4 +Halifax,7.5 +Hamburg,9.7 +Hamilton,13.8 +Hanga Roa,20.5 +Hanoi,23.6 +Harare,18.4 +Harbin,5.0 +Hargeisa,21.7 +Hat Yai,27.0 +Havana,25.2 +Helsinki,5.9 +Heraklion,18.9 +Hiroshima,16.3 +Ho Chi Minh City,27.4 +Hobart,12.7 +Hong Kong,23.3 +Honiara,26.5 +Honolulu,25.4 +Houston,20.8 +Ifrane,11.4 +Indianapolis,11.8 +Iqaluit,-9.3 +Irkutsk,1.0 +Istanbul,13.9 +İzmir,17.9 +Jacksonville,20.3 +Jakarta,26.7 +Jayapura,27.0 +Jerusalem,18.3 +Johannesburg,15.5 +Jos,22.8 +Juba,27.8 +Kabul,12.1 +Kampala,20.0 +Kandi,27.7 +Kankan,26.5 +Kano,26.4 +Kansas City,12.5 +Karachi,26.0 +Karonga,24.4 +Kathmandu,18.3 +Khartoum,29.9 +Kingston,27.4 +Kinshasa,25.3 +Kolkata,26.7 +Kuala Lumpur,27.3 +Kumasi,26.0 +Kunming,15.7 +Kuopio,3.4 +Kuwait City,25.7 +Kyiv,8.4 +Kyoto,15.8 +La Ceiba,26.2 +La Paz,23.7 +Lagos,26.8 +Lahore,24.3 +Lake Havasu City,23.7 +Lake Tekapo,8.7 +Las Palmas de Gran Canaria,21.2 +Las Vegas,20.3 +Launceston,13.1 +Lhasa,7.6 +Libreville,25.9 +Lisbon,17.5 +Livingstone,21.8 +Ljubljana,10.9 +Lodwar,29.3 +Lomé,26.9 +London,11.3 +Los Angeles,18.6 +Louisville,13.9 +Luanda,25.8 +Lubumbashi,20.8 +Lusaka,19.9 +Luxembourg City,9.3 +Lviv,7.8 +Lyon,12.5 +Madrid,15.0 +Mahajanga,26.3 +Makassar,26.7 +Makurdi,26.0 +Malabo,26.3 +Malé,28.0 +Managua,27.3 +Manama,26.5 +Mandalay,28.0 +Mango,28.1 +Manila,28.4 +Maputo,22.8 +Marrakesh,19.6 +Marseille,15.8 +Maun,22.4 +Medan,26.5 +Mek'ele,22.7 +Melbourne,15.1 +Memphis,17.2 +Mexicali,23.1 +Mexico City,17.5 +Miami,24.9 +Milan,13.0 +Milwaukee,8.9 +Minneapolis,7.8 +Minsk,6.7 +Mogadishu,27.1 +Mombasa,26.3 +Monaco,16.4 +Moncton,6.1 +Monterrey,22.3 +Montreal,6.8 +Moscow,5.8 +Mumbai,27.1 +Murmansk,0.6 +Muscat,28.0 +Mzuzu,17.7 +N'Djamena,28.3 +Naha,23.1 +Nairobi,17.8 +Nakhon Ratchasima,27.3 +Napier,14.6 +Napoli,15.9 +Nashville,15.4 +Nassau,24.6 +Ndola,20.3 +New Delhi,25.0 +New Orleans,20.7 +New York City,12.9 +Ngaoundéré,22.0 +Niamey,29.3 +Nicosia,19.7 +Niigata,13.9 +Nouadhibou,21.3 +Nouakchott,25.7 +Novosibirsk,1.7 +Nuuk,-1.4 +Odesa,10.7 +Odienné,26.0 +Oklahoma City,15.9 +Omaha,10.6 +Oranjestad,28.1 +Oslo,5.7 +Ottawa,6.6 +Ouagadougou,28.3 +Ouahigouya,28.6 +Ouarzazate,18.9 +Oulu,2.7 +Palembang,27.3 +Palermo,18.5 +Palm Springs,24.5 +Palmerston North,13.2 +Panama City,28.0 +Parakou,26.8 +Paris,12.3 +Perth,18.7 +Petropavlovsk-Kamchatsky,1.9 +Philadelphia,13.2 +Phnom Penh,28.3 +Phoenix,23.9 +Pittsburgh,10.8 +Podgorica,15.3 +Pointe-Noire,26.1 +Pontianak,27.7 +Port Moresby,26.9 +Port Sudan,28.4 +Port Vila,24.3 +Port-Gentil,26.0 +Portland {OR},12.4 +Porto,15.7 +Prague,8.4 +Praia,24.4 +Pretoria,18.2 +Pyongyang,10.8 +Rabat,17.2 +Rangpur,24.4 +Reggane,28.3 +Reykjavík,4.3 +Riga,6.2 +Riyadh,26.0 +Rome,15.2 +Roseau,26.2 +Rostov-on-Don,9.9 +Sacramento,16.3 +Saint Petersburg,5.8 +Saint-Pierre,5.7 +Salt Lake City,11.6 +San Antonio,20.8 +San Diego,17.8 +San Francisco,14.6 +San Jose,16.4 +San José,22.6 +San Juan,27.2 +San Salvador,23.1 +Sana'a,20.0 +Santo Domingo,25.9 +Sapporo,8.9 +Sarajevo,10.1 +Saskatoon,3.3 +Seattle,11.3 +Ségou,28.0 +Seoul,12.5 +Seville,19.2 +Shanghai,16.7 +Singapore,27.0 +Skopje,12.4 +Sochi,14.2 +Sofia,10.6 +Sokoto,28.0 +Split,16.1 +St. John's,5.0 +St. Louis,13.9 +Stockholm,6.6 +Surabaya,27.1 +Suva,25.6 +Suwałki,7.2 +Sydney,17.7 +Tabora,23.0 +Tabriz,12.6 +Taipei,23.0 +Tallinn,6.4 +Tamale,27.9 +Tamanrasset,21.7 +Tampa,22.9 +Tashkent,14.8 +Tauranga,14.8 +Tbilisi,12.9 +Tegucigalpa,21.7 +Tehran,17.0 +Tel Aviv,20.0 +Thessaloniki,16.0 +Thiès,24.0 +Tijuana,17.8 +Timbuktu,28.0 +Tirana,15.2 +Toamasina,23.4 +Tokyo,15.4 +Toliara,24.1 +Toluca,12.4 +Toronto,9.4 +Tripoli,20.0 +Tromsø,2.9 +Tucson,20.9 +Tunis,18.4 +Ulaanbaatar,-0.4 +Upington,20.4 +Ürümqi,7.4 +Vaduz,10.1 +Valencia,18.3 +Valletta,18.8 +Vancouver,10.4 +Veracruz,25.4 +Vienna,10.4 +Vientiane,25.9 +Villahermosa,27.1 +Vilnius,6.0 +Virginia Beach,15.8 +Vladivostok,4.9 +Warsaw,8.5 +Washington, D.C.,14.6 +Wau,27.8 +Wellington,12.9 +Whitehorse,-0.1 +Wichita,13.9 +Willemstad,28.0 +Winnipeg,3.0 +Wrocław,9.6 +Xi'an,14.1 +Yakutsk,-8.8 +Yangon,27.5 +Yaoundé,23.8 +Yellowknife,-4.3 +Yerevan,12.4 +Yinchuan,9.0 +Zagreb,10.7 +Zanzibar City,26.0 +Zürich,9.3 \ No newline at end of file diff --git a/examples/1brc/make-samples/main.v b/examples/1brc/make-samples/main.v new file mode 100644 index 0000000000..d05301af2b --- /dev/null +++ b/examples/1brc/make-samples/main.v @@ -0,0 +1,39 @@ +import encoding.csv +import flag +import os +import rand + +struct CityMean { + city string + mean f64 +} + +fn main() { + mut fp := flag.new_flag_parser(os.args) + fp.version('1brc sample generator v1.0.0') + fp.skip_executable() + fp.application('Sample generator for 1 billion rows challenge') + fp.description('The 1 billion rows challenge solved in V.\nFor details, see https://www.morling.dev/blog/one-billion-row-challenge/') + input_file := fp.string('city-file', `i`, 'cities.txt', 'Path to input file with cities and means list') + fp.limit_free_args_to_exactly(1)! + sample_count := fp.remaining_parameters()[0].u64() + + content := os.read_file(input_file) or { panic(err) } + mut reader := csv.new_reader(content, csv.ReaderConfig{ delimiter: `,` }) + mut means := []CityMean{} + for { + rec := reader.read() or { break } + means << CityMean{ + city: rec[0] + mean: rec[1].f64() + } + } + + for _ in 0 .. sample_count / 2 { + mut city := rand.intn(means.len)! + m1, m2 := rand.normal_pair(mu: means[city].mean, sigma: 10)! + println('${means[city].city};${m1:.1f}') + city = rand.intn(means.len)! + println('${means[city].city};${m2:.1f}') + } +} diff --git a/examples/1brc/solution/main.v b/examples/1brc/solution/main.v new file mode 100644 index 0000000000..e5dc8e23b7 --- /dev/null +++ b/examples/1brc/solution/main.v @@ -0,0 +1,199 @@ +import flag +import math +import os + +#include + +fn C.mmap(addr voidptr, len u64, prot int, flags int, fd int, offset i64) voidptr +fn C.munmap(addr voidptr, len u64) int + +struct MemoryMappedFile { + size u64 +mut: + data &u8 + file os.File +} + +fn mmap_file(path string) MemoryMappedFile { + mut mf := MemoryMappedFile{ + file: os.open_file(path, 'r', 0) or { panic('fail') } + size: os.file_size(path) + data: C.NULL + } + + mf.data = &u8(C.mmap(C.NULL, mf.size, C.PROT_READ, C.MAP_SHARED, mf.file.fd, 0)) + return mf +} + +fn (mut mf MemoryMappedFile) unmap() { + if C.munmap(mf.data, mf.size) != 0 { + panic('(${C.errno}) munmap() failed') + } + mf.file.close() +} + +enum ReadState { + city + temp +} + +struct Result { +pub mut: + min i32 + max i32 + sum i32 + count u32 +} + +fn format_value(value i32) string { + return '${value / 10}.${math.abs(value % 10)}' +} + +fn print_results(results map[string]Result, print_nicely bool) { + mut output := []string{cap: results.len} + mut cities := results.keys() + cities.sort() + for city in cities { + v := results[city] + mean := f64(v.sum) / v.count / 10 + output << '${city}=${format_value(v.min)}/${mean:.1f}/${format_value(v.max)}' + } + if print_nicely { + println(output.join('\n')) + } else { + println('{' + output.join(', ') + '}') + } +} + +fn combine_results(results []map[string]Result) map[string]Result { + mut combined_result := map[string]Result{} + for result in results { + for city, r in result { + if city !in combined_result { + combined_result[city] = r + } else { + if r.max > combined_result[city].max { + combined_result[city].max = r.max + } + if r.min < combined_result[city].min { + combined_result[city].min = r.min + } + combined_result[city].sum += r.sum + combined_result[city].count += r.count + } + } + } + return combined_result +} + +@[direct_array_access] +fn process_chunk(addr &u8, from u64, to u64) map[string]Result { + mut results := map[string]Result{} + mut state := ReadState.city + mut city := '' + mut temp := i32(0) + mut mod := i32(1) + mut j := int(0) + for i in from .. to { + c := unsafe { u8(addr[i]) } + match state { + .city { + match c { + `;` { + state = .temp + city = unsafe { tos(addr[i - u64(j)], j) } + } + else { + j += 1 + } + } + } + .temp { + match c { + `\n` { + temp *= mod + if city !in results { + results[city] = Result{ + min: temp + max: temp + sum: temp + count: 1 + } + } else { + if temp > results[city].max { + results[city].max = temp + } + if temp < results[city].min { + results[city].min = temp + } + results[city].sum += temp + results[city].count += 1 + } + state = .city + temp = 0 + mod = 1 + j = 0 + } + `-` { + mod = -1 + } + `.` {} + else { + // ASCII 48 = '0' ... ASCII 57 = '9' => (ASCII value) - 48 = decimal value + temp = temp * 10 + (c - 48) + } + } + } + } + } + return results +} + +fn process_in_parallel(mf MemoryMappedFile, thread_count u32) map[string]Result { + mut threads := []thread map[string]Result{} + approx_chunk_size := mf.size / thread_count + mut from := u64(0) + mut to := approx_chunk_size + for _ in 0 .. thread_count - 1 { + unsafe { + for mf.data[to] != `\n` { + to += 1 + } + } + threads << spawn process_chunk(mf.data, from, to) + from = to + 1 + to = from + approx_chunk_size + } + to = mf.size + threads << spawn process_chunk(mf.data, from, to) + res := threads.wait() + return combine_results(res) +} + +fn main() { + mut fp := flag.new_flag_parser(os.args) + fp.version('1brc v1.0.0') + fp.skip_executable() + fp.application('1 billion rows challenge') + fp.description('The 1 billion rows challenge solved in V.\nFor details, see https://www.morling.dev/blog/one-billion-row-challenge/') + thread_count := u32(fp.int('threads', `n`, 1, 'number of threads for parallel processing.')) + print_nicely := fp.bool('human-readable', `h`, false, 'Print results with new lines rather than following challenge spec') + quiet := fp.bool('quiet', `q`, false, 'Suppress the results output (e.g., if you only care about timing)') + fp.limit_free_args_to_exactly(1)! + path := fp.remaining_parameters()[0] + + mut mf := mmap_file(path) + defer { + mf.unmap() + } + + results := if thread_count > 1 { + process_in_parallel(mf, thread_count) + } else { + process_chunk(mf.data, 0, mf.size) + } + + if !quiet { + print_results(results, print_nicely) + } +}