diff --git a/src/main/cpp/config/settings.cpp b/src/main/cpp/config/settings.cpp index 12db0f2..42e19f9 100644 --- a/src/main/cpp/config/settings.cpp +++ b/src/main/cpp/config/settings.cpp @@ -42,7 +42,7 @@ void init_settings() { enableExtComputeShader = 0; if (enableCompatibleMode < 0 || enableCompatibleMode > 1) enableCompatibleMode = 0; - if ((int)multidrawMode < 0 || (int)multidrawMode > 4) + if ((int)multidrawMode < 0 || (int)multidrawMode >= (int)multidraw_mode_t::MaxValue) multidrawMode = multidraw_mode_t::Auto; // 1205 @@ -141,6 +141,9 @@ void init_settings() { case multidraw_mode_t::DrawElements: draw_mode_str = "DrawElements"; break; + case multidraw_mode_t::Compute: + draw_mode_str = "Compute"; + break; case multidraw_mode_t::Auto: draw_mode_str = "Auto"; break; @@ -199,6 +202,12 @@ void init_settings_post() { case multidraw_mode_t::DrawElements: LOG_V("multidrawMode = DrawElements") global_settings.multidraw_mode = multidraw_mode_t::DrawElements; + LOG_V(" -> DrawElements (OK)") + break; + case multidraw_mode_t::Compute: + LOG_V("multidrawMode = Compute") + global_settings.multidraw_mode = multidraw_mode_t::Compute; + LOG_V(" -> Compute (OK)") break; case multidraw_mode_t::Auto: default: diff --git a/src/main/cpp/config/settings.h b/src/main/cpp/config/settings.h index 644a40f..f7cfdfc 100644 --- a/src/main/cpp/config/settings.h +++ b/src/main/cpp/config/settings.h @@ -16,7 +16,9 @@ enum class multidraw_mode_t: int { PreferIndirect, PreferBaseVertex, PreferMultidrawIndirect, - DrawElements + DrawElements, + Compute, + MaxValue }; struct global_settings_t { diff --git a/src/main/cpp/gl/log.h b/src/main/cpp/gl/log.h index 8f8b12f..c358833 100644 --- a/src/main/cpp/gl/log.h +++ b/src/main/cpp/gl/log.h @@ -8,7 +8,7 @@ #define FORCE_SYNC_WITH_LOG_FILE 0 -#define GLOBAL_DEBUG 0 +#define GLOBAL_DEBUG 1 #define LOG_CALLED_FUNCS 0 diff --git a/src/main/cpp/gl/multidraw.cpp b/src/main/cpp/gl/multidraw.cpp index 0377b31..28db43c 100644 --- a/src/main/cpp/gl/multidraw.cpp +++ b/src/main/cpp/gl/multidraw.cpp @@ -3,6 +3,7 @@ // #include "multidraw.h" +#include #define DEBUG 0 @@ -243,3 +244,179 @@ void mg_glMultiDrawElements_basevertex(GLenum mode, const GLsizei *count, GLenum CHECK_GL_ERROR } +const std::string multidraw_comp_shader = +R"(#version 310 es + +layout(local_size_x = 256) in; + +struct DrawCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint reservedMustBeZero; +}; + +layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; }; +layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; }; +layout(std430, binding = 2) readonly buffer Prefix { uint prefixSums[]; }; +layout(std430, binding = 3) writeonly buffer Output { uint out_indices[]; }; + +void main() { + uint globalIdx = gl_GlobalInvocationID.x; + if (globalIdx >= prefixSums[prefixSums.length() - 1]) + return; + + out_indices[globalIdx] = globalIdx; + // bisect to find out draw call # +// int low = 0; +// int high = draws.length() - 1; +// while(low < high) { +// int mid = (low + high + 1) / 2; +// if (prefixSums[mid] <= globalIdx) { +// low = mid; +// } else { +// high = mid - 1; +// } +// } +// +// // figure out which index to take +// DrawCommand cmd = draws[low]; +// uint localIdx = globalIdx - prefixSums[low]; +// uint srcIndex = cmd.firstIndex + localIdx; +// +// // Write out +// out_indices[globalIdx] = uint(int(in_indices[srcIndex]) + cmd.baseVertex); +} + +)"; + +static bool g_compute_inited = false; +std::vector g_prefix_sum; +GLuint g_prefixsumbuffer = 0; +GLuint g_outputibo = 0; +GLuint g_compute_program = 0; +char g_compile_info[1024]; +GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute( + GLenum mode, GLsizei *counts, GLenum type, const void *const *indices, GLsizei primcount, const GLint *basevertex) { + LOG() + + INIT_CHECK_GL_ERROR + + if (primcount <= 0) + return; + + // TODO: support `types` other than GL_UNSIGNED_INT + + // Align compute shader input format with standard OpenGL indirect-draw format + prepare_indirect_buffer(counts, type, indices, primcount, basevertex); + + // Init compute buffers + if (!g_compute_inited) { + LOG_D("Initializing multidraw compute pipeline...") + glGenBuffers(1, &g_prefixsumbuffer); + glGenBuffers(1, &g_outputibo); + + g_compute_program = GLES.glCreateProgram(); + CHECK_GL_ERROR_NO_INIT + GLuint shader = GLES.glCreateShader(GL_COMPUTE_SHADER); + CHECK_GL_ERROR_NO_INIT + const char* s[] = { multidraw_comp_shader.c_str() }; + const GLint length[] = { static_cast(multidraw_comp_shader.length()) }; + GLES.glShaderSource(shader, 1, s, length); + CHECK_GL_ERROR_NO_INIT + GLES.glCompileShader(shader); + CHECK_GL_ERROR_NO_INIT + int success = 0; + GLES.glGetShaderiv(shader, GL_COMPILE_STATUS, &success); + CHECK_GL_ERROR_NO_INIT + if (!success) { + GLES.glGetShaderInfoLog(shader, 1024, NULL, g_compile_info); + CHECK_GL_ERROR_NO_INIT + LOG_E("%s: %s shader compile error: %s\nsrc:\n%s", + __func__, + "compute", + g_compile_info, + multidraw_comp_shader.c_str()); +#if DEBUG || GLOBAL_DEBUG + abort(); +#endif + return; + } + + GLES.glAttachShader(g_compute_program, shader); + CHECK_GL_ERROR_NO_INIT + GLES.glLinkProgram(g_compute_program); + CHECK_GL_ERROR_NO_INIT + + GLES.glGetProgramiv(g_compute_program, GL_LINK_STATUS, &success); + CHECK_GL_ERROR_NO_INIT + if(!success) { + GLES.glGetProgramInfoLog(g_compute_program, 1024, NULL, g_compile_info); + CHECK_GL_ERROR_NO_INIT + LOG_E("program link error: %s", g_compile_info); +#if DEBUG || GLOBAL_DEBUG + abort(); +#endif + return; + } + + g_compute_inited = true; + } + + // Resize prefix sum buffer if needed + if (g_prefix_sum.size() < g_cmdbufsize) + g_prefix_sum.resize(g_cmdbufsize); + + // Calculate prefix sum + g_prefix_sum[0] = counts[0]; + for (GLsizei i = 1; i < primcount; ++i) { + g_prefix_sum[i] = g_prefix_sum[i - 1] + counts[i]; + } + + // Fill in the data + glBindBuffer(GL_SHADER_STORAGE_BUFFER, g_prefixsumbuffer); + CHECK_GL_ERROR_NO_INIT + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * primcount, g_prefix_sum.data(), GL_DYNAMIC_DRAW); + CHECK_GL_ERROR_NO_INIT + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + CHECK_GL_ERROR_NO_INIT + + GLint ibo = 0; + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING, &ibo); + CHECK_GL_ERROR_NO_INIT + + // Bind buffers + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ibo); + CHECK_GL_ERROR_NO_INIT + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, g_indirectbuffer); + CHECK_GL_ERROR_NO_INIT + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, g_prefixsumbuffer); + CHECK_GL_ERROR_NO_INIT + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, g_outputibo); + CHECK_GL_ERROR_NO_INIT + + // Dispatch compute + LOG_D("Using compute program = %d", g_compute_program) + GLES.glUseProgram(g_compute_program); + CHECK_GL_ERROR_NO_INIT + GLuint total_indices = g_prefix_sum[primcount - 1]; + LOG_D("Dispatch compute") + GLES.glDispatchCompute((total_indices + 255) / 256, 1, 1); + CHECK_GL_ERROR_NO_INIT + + // Wait for compute to complete + LOG_D("memory barrier") + GLES.glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT); + CHECK_GL_ERROR_NO_INIT + + // Bind index buffer and do draw + LOG_D("draw") + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, g_outputibo); + CHECK_GL_ERROR_NO_INIT + glDrawElements(mode, total_indices, type, 0); + + // Restore states + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ibo); + CHECK_GL_ERROR_NO_INIT +} diff --git a/src/main/cpp/gl/multidraw.h b/src/main/cpp/gl/multidraw.h index 6941c3c..92f77b3 100644 --- a/src/main/cpp/gl/multidraw.h +++ b/src/main/cpp/gl/multidraw.h @@ -28,7 +28,6 @@ struct draw_elements_indirect_command_t { GLuint reservedMustBeZero; }; - GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_indirect(GLenum mode, GLsizei *counts, GLenum type, const void *const *indices, GLsizei primcount, const GLint *basevertex); GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_multiindirect(GLenum mode, GLsizei *counts, GLenum type, const void *const *indices, GLsizei primcount, const GLint *basevertex); @@ -37,6 +36,8 @@ GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_basevertex(GLenum mode, G GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_drawelements(GLenum mode, GLsizei *counts, GLenum type, const void *const *indices, GLsizei primcount, const GLint *basevertex); +GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute(GLenum mode, GLsizei *counts, GLenum type, const void *const *indices, GLsizei primcount, const GLint *basevertex); + GLAPI GLAPIENTRY void mg_glMultiDrawElements_indirect(GLenum mode, const GLsizei *count, GLenum type, const void *const *indices, GLsizei primcount); GLAPI GLAPIENTRY void mg_glMultiDrawElements_multiindirect(GLenum mode, const GLsizei *count, GLenum type, const void *const *indices, GLsizei primcount); diff --git a/src/main/cpp/glx/lookup.cpp b/src/main/cpp/glx/lookup.cpp index ab8b77e..d44f153 100644 --- a/src/main/cpp/glx/lookup.cpp +++ b/src/main/cpp/glx/lookup.cpp @@ -36,6 +36,9 @@ void* get_multidraw_func(const char* name) { case multidraw_mode_t::DrawElements: namestr += "_drawelements"; break; + case multidraw_mode_t::Compute: + namestr += "_compute"; + break; default: LOG_W("get_multidraw_func() cannot determine multidraw emulation mode!") return nullptr; diff --git a/src/main/cpp/shaders/multidraw_compute.comp b/src/main/cpp/shaders/multidraw_compute.comp new file mode 100644 index 0000000..f77a4aa --- /dev/null +++ b/src/main/cpp/shaders/multidraw_compute.comp @@ -0,0 +1,42 @@ +#version 310 es + +layout(local_size_x = 256) in; + +struct DrawCommand { + uint count; + uint instanceCount; + uint firstIndex; + int baseVertex; + uint reservedMustBeZero; +}; + +layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; }; +layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; }; +layout(std430, binding = 2) readonly buffer Prefix { uint prefixSums[]; }; +layout(std430, binding = 3) writeonly buffer Output { uint out_indices[]; }; + +void main() { + uint globalIdx = gl_GlobalInvocationID.x; + if (globalIdx >= prefixSums[prefixSums.length() - 1]) + return; + + // bisect to find out draw call # + int low = 0; + int high = draws.length() - 1; + while(low < high) { + int mid = (low + high + 1) / 2; + if (prefixSums[mid] <= globalIdx) { + low = mid; + } else { + high = mid - 1; + } + } + + // figure out which index to take + DrawCommand cmd = draws[low]; + uint localIdx = globalIdx - prefixSums[low]; + uint srcIndex = cmd.firstIndex + localIdx; + + // Write out + out_indices[globalIdx] = uint(int(in_indices[srcIndex]) + cmd.baseVertex); +}