diff --git a/layers.c b/layers.c index e74236d..8f7c9c4 100644 --- a/layers.c +++ b/layers.c @@ -698,8 +698,6 @@ void setEndSeed(EndNoise *en, int64_t seed) perlinInit(en, &s); } -// TODO: compiler independent unroll -//__attribute__(( optimize("unroll-loops") )) static int getEndBiome(int hx, int hz, const uint16_t *hmap, int hw) { int i, j; @@ -722,16 +720,16 @@ static int getEndBiome(int hx, int hz, const uint16_t *hmap, int hw) for (j = 0; j < 25; j++) { - uint16_t hdsj = p_dsj[j]; - for (i = 0; i < 25; i++) + uint16_t dsj = p_dsj[j]; + uint16_t e; + uint32_t u; + for (i = 0; i < 25; i += 5) { - uint16_t elev = p_elev[i]; - if (elev) - { - uint32_t hds = (p_dsi[i] + (uint32_t)hdsj) * elev; - if (hds < h) - h = hds; - } + if (U(e = p_elev[i+0]) && (u = (p_dsi[i+0] + (uint32_t)dsj) * e) < h) h = u; + if (U(e = p_elev[i+1]) && (u = (p_dsi[i+1] + (uint32_t)dsj) * e) < h) h = u; + if (U(e = p_elev[i+2]) && (u = (p_dsi[i+2] + (uint32_t)dsj) * e) < h) h = u; + if (U(e = p_elev[i+3]) && (u = (p_dsi[i+3] + (uint32_t)dsj) * e) < h) h = u; + if (U(e = p_elev[i+4]) && (u = (p_dsi[i+4] + (uint32_t)dsj) * e) < h) h = u; } p_elev += hw; } diff --git a/tests.c b/tests.c index 61c1fc8..b090d7d 100644 --- a/tests.c +++ b/tests.c @@ -1,6 +1,10 @@ #include "finders.h" #include "util.h" +#include +#include +#include + uint32_t hash32(uint32_t x) { x ^= x >> 15; @@ -11,6 +15,59 @@ uint32_t hash32(uint32_t x) return x; } +double now() +{ + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return t.tv_sec + t.tv_nsec * 1e-9; +} + +/* Runs a performance test using function f(). The function should take a + * number of trial runs and a data argument [dat], and return an arbitrary + * consumable value (useful to avoid ommission by optimization). + * The minimum and mean times for a trial run will be stored in tmin and tavg. + * The benchmark aims to take just over one second. + * Returned is the total number of trials performed. + */ +int64_t +benchmark(int64_t (*f)(int64_t n, void*), void *dat, double *tmin, double *tavg) +{ + const double maxt = 1.0; + const double mintt = 1e-2; + int64_t cnt = 0; + int64_t ntt = 1; + int64_t consume = 0; + double t, _tavg = 0, _tmin = DBL_MAX; + + for (ntt = 1; ; ntt *= 2) + { + t = -now(); + consume ^= f(ntt, dat); + t += now(); + if (t >= mintt) + break; + } + do + { + t = -now(); + consume ^= f(ntt, dat); + t += now(); + cnt++; + _tavg += t; + if (t < _tmin) + _tmin = t; + } + while (_tavg < maxt); + + cnt *= ntt; + if (tmin) *tmin = _tmin / ntt; + if (tavg) *tavg = _tavg / cnt; + + static volatile int64_t v_consume; + v_consume ^= consume; + return cnt; +} + uint32_t getRef(int mc, int bits, const char *path) { initBiomes(); @@ -67,7 +124,7 @@ int testBiomeGen1x1(const int *mc, const uint32_t *expect, int bits, int cnt) return ok; } -int main() +int testOverworldBiomes() { const int mc_vers[] = { MC_1_16, MC_1_15, MC_1_13, MC_1_12, MC_1_9, MC_1_7, @@ -95,4 +152,64 @@ int main() } +int gw = 16, gh = 16; + +int64_t testPerfEnd(int64_t n, void *data) +{ + EndNoise *en = (EndNoise*) data; + int ids[gw*gh]; + int64_t r = 0; + + while (n-->0) + { + int x = rand() % 10000 - 5000; + int z = rand() % 10000 - 5000; + mapEndBiome(en, ids, x, z, gw, gh); + r ^= ids[0]; + } + return r; +} + +int64_t testPerfNether(int64_t n, void *data) +{ + NetherNoise *nn = (NetherNoise*) data; + int ids[gw*gh]; + int64_t r = 0; + + while (n-->0) + { + int x = rand() % 10000 - 5000; + int z = rand() % 10000 - 5000; + mapNether2D(nn, ids, x, z, gw, gh); + r ^= ids[0]; + } + return r; +} + +int testPerformance() +{ + double tmin, tavg; + + EndNoise en; + setEndSeed(&en, 12345); + benchmark(testPerfEnd, &en, &tmin, &tavg); + printf("End %dx%d -> min:%10.0lf ns | avg:%10.0lf ns | conf:%4.2lf %%\n", + gw, gh, 1e9*tmin, 1e9*tavg, 100 * (tavg-tmin) / (tavg+tmin)); + + NetherNoise nn; + setNetherSeed(&nn, 12345); + benchmark(testPerfNether, &nn, &tmin, &tavg); + printf("Nether %dx%d -> min:%10.0lf ns | avg:%10.0lf ns | conf:%4.2lf %%\n", + gw, gh, 1e9*tmin, 1e9*tavg, 100 * (tavg-tmin) / (tavg+tmin)); +} + + +int main() +{ + //testOverworldBiomes(); + testPerformance(); + return 0; +} + +