mirror of
https://github.com/MobileGL-Dev/MobileGlues.git
synced 2025-09-24 03:31:43 -04:00
[Optimization] (multidraw, compute): send firstIndex/baseVertex directly to compute shader
This commit is contained in:
parent
2497bdbaf4
commit
9702972330
@ -71,66 +71,66 @@ void prepare_indirect_buffer(const GLsizei *counts, GLenum type, const void *con
|
|||||||
GLES.glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
|
GLES.glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool g_drawssbo_inited = false;
|
//static bool g_drawssbo_inited = false;
|
||||||
static GLsizei g_drawssbo_size = 0;
|
//static GLsizei g_drawssbo_size = 0;
|
||||||
GLuint g_drawssbo = 0;
|
//GLuint g_drawssbo = 0;
|
||||||
|
|
||||||
void prepare_compute_drawcmd_ssbo(const GLsizei *counts, GLenum type, const void *const *indices,
|
//void prepare_compute_drawcmd_ssbo(const GLsizei *counts, GLenum type, const void *const *indices,
|
||||||
GLsizei primcount, const GLint *basevertex) {
|
// GLsizei primcount, const GLint *basevertex) {
|
||||||
if (!g_drawssbo_inited) {
|
// if (!g_drawssbo_inited) {
|
||||||
GLES.glGenBuffers(1, &g_drawssbo);
|
// GLES.glGenBuffers(1, &g_drawssbo);
|
||||||
GLES.glBindBuffer(GL_DRAW_INDIRECT_BUFFER, g_drawssbo);
|
// GLES.glBindBuffer(GL_DRAW_INDIRECT_BUFFER, g_drawssbo);
|
||||||
g_drawssbo_size = 1;
|
// g_drawssbo_size = 1;
|
||||||
GLES.glBufferData(GL_DRAW_INDIRECT_BUFFER,
|
// GLES.glBufferData(GL_DRAW_INDIRECT_BUFFER,
|
||||||
g_drawssbo_size * sizeof(drawcmd_compute_t), NULL, GL_DYNAMIC_DRAW);
|
// g_drawssbo_size * sizeof(drawcmd_compute_t), NULL, GL_DYNAMIC_DRAW);
|
||||||
|
//
|
||||||
g_drawssbo_inited = true;
|
// g_drawssbo_inited = true;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
if (g_drawssbo_size < primcount) {
|
// if (g_drawssbo_size < primcount) {
|
||||||
size_t sz = g_drawssbo_size;
|
// size_t sz = g_drawssbo_size;
|
||||||
|
//
|
||||||
LOG_D("Before resize: %d", sz)
|
// LOG_D("Before resize: %d", sz)
|
||||||
|
//
|
||||||
// 2-exponential to reduce reallocation
|
// // 2-exponential to reduce reallocation
|
||||||
while (sz < primcount)
|
// while (sz < primcount)
|
||||||
sz *= 2;
|
// sz *= 2;
|
||||||
|
//
|
||||||
GLES.glBufferData(GL_DRAW_INDIRECT_BUFFER,
|
// GLES.glBufferData(GL_DRAW_INDIRECT_BUFFER,
|
||||||
sz * sizeof(drawcmd_compute_t), NULL, GL_DYNAMIC_DRAW);
|
// sz * sizeof(drawcmd_compute_t), NULL, GL_DYNAMIC_DRAW);
|
||||||
g_drawssbo_size = sz;
|
// g_drawssbo_size = sz;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
LOG_D("After resize: %d", g_drawssbo_size)
|
// LOG_D("After resize: %d", g_drawssbo_size)
|
||||||
|
//
|
||||||
auto* pcmds = (drawcmd_compute_t*)
|
// auto* pcmds = (drawcmd_compute_t*)
|
||||||
GLES.glMapBufferRange(GL_DRAW_INDIRECT_BUFFER,
|
// GLES.glMapBufferRange(GL_DRAW_INDIRECT_BUFFER,
|
||||||
0, primcount * sizeof(drawcmd_compute_t),
|
// 0, primcount * sizeof(drawcmd_compute_t),
|
||||||
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
// GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
||||||
|
//
|
||||||
GLsizei elementSize;
|
// GLsizei elementSize;
|
||||||
switch (type) {
|
// switch (type) {
|
||||||
case GL_UNSIGNED_BYTE:
|
// case GL_UNSIGNED_BYTE:
|
||||||
elementSize = 1;
|
// elementSize = 1;
|
||||||
break;
|
// break;
|
||||||
case GL_UNSIGNED_SHORT:
|
// case GL_UNSIGNED_SHORT:
|
||||||
elementSize = 2;
|
// elementSize = 2;
|
||||||
break;
|
// break;
|
||||||
case GL_UNSIGNED_INT:
|
// case GL_UNSIGNED_INT:
|
||||||
elementSize = 4;
|
// elementSize = 4;
|
||||||
break;
|
// break;
|
||||||
default:
|
// default:
|
||||||
elementSize = 4;
|
// elementSize = 4;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
for (GLsizei i = 0; i < primcount; ++i) {
|
// for (GLsizei i = 0; i < primcount; ++i) {
|
||||||
auto byteOffset = reinterpret_cast<uintptr_t>(indices[i]);
|
// auto byteOffset = reinterpret_cast<uintptr_t>(indices[i]);
|
||||||
pcmds[i].firstIndex = static_cast<GLuint>(byteOffset / elementSize);
|
// pcmds[i].firstIndex = static_cast<GLuint>(byteOffset / elementSize);
|
||||||
pcmds[i].baseVertex = basevertex ? basevertex[i] : 0;
|
// pcmds[i].baseVertex = basevertex ? basevertex[i] : 0;
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
GLES.glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
|
// GLES.glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
|
||||||
}
|
//}
|
||||||
|
|
||||||
void mg_glMultiDrawElementsBaseVertex_drawelements(GLenum mode, GLsizei* counts, GLenum type, const void* const* indices, GLsizei primcount, const GLint* basevertex) {
|
void mg_glMultiDrawElementsBaseVertex_drawelements(GLenum mode, GLsizei* counts, GLenum type, const void* const* indices, GLsizei primcount, const GLint* basevertex) {
|
||||||
LOG()
|
LOG()
|
||||||
@ -310,18 +310,20 @@ R"(#version 310 es
|
|||||||
|
|
||||||
layout(local_size_x = 64) in;
|
layout(local_size_x = 64) in;
|
||||||
|
|
||||||
struct DrawCommand {
|
//struct DrawCommand {
|
||||||
// uint count;
|
//// uint count;
|
||||||
// uint instanceCount;
|
//// uint instanceCount;
|
||||||
uint firstIndex;
|
// uint firstIndex;
|
||||||
int baseVertex;
|
// int baseVertex;
|
||||||
// uint reservedMustBeZero;
|
//// uint reservedMustBeZero;
|
||||||
};
|
//};
|
||||||
|
|
||||||
layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; };
|
layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; };
|
||||||
layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; };
|
//layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; };
|
||||||
layout(std430, binding = 2) readonly buffer Prefix { uint prefixSums[]; };
|
layout(std430, binding = 1) readonly buffer FirstIndex { uint firstIndex[]; };
|
||||||
layout(std430, binding = 3) writeonly buffer Output { uint out_indices[]; };
|
layout(std430, binding = 2) readonly buffer BaseVertex { int baseVertex[]; };
|
||||||
|
layout(std430, binding = 3) readonly buffer Prefix { uint prefixSums[]; };
|
||||||
|
layout(std430, binding = 4) writeonly buffer Output { uint out_indices[]; };
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint outIdx = gl_GlobalInvocationID.x;
|
uint outIdx = gl_GlobalInvocationID.x;
|
||||||
@ -329,13 +331,13 @@ void main() {
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
// Find out draw call #
|
// Find out draw call #
|
||||||
// int low = 0;
|
// int low = 0;
|
||||||
// int high = prefixSums.length();
|
// int high = prefixSums.length();
|
||||||
// for (low = 0; low < high; ++low) {
|
// for (low = 0; low < high; ++low) {
|
||||||
// if (prefixSums[low] > outIdx) {
|
// if (prefixSums[low] > outIdx) {
|
||||||
// break;
|
// break;
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
int low = 0;
|
int low = 0;
|
||||||
int high = prefixSums.length() - 1;
|
int high = prefixSums.length() - 1;
|
||||||
@ -350,19 +352,21 @@ void main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// figure out which index to take
|
// figure out which index to take
|
||||||
DrawCommand cmd = draws[low];
|
//DrawCommand cmd = draws[low];
|
||||||
uint localIdx = outIdx - ((low == 0) ? 0u : (prefixSums[low - 1]));
|
uint localIdx = outIdx - ((low == 0) ? 0u : (prefixSums[low - 1]));
|
||||||
uint inIndex = localIdx + cmd.firstIndex;
|
uint inIndex = localIdx + firstIndex[low] / 4u; // elementSize == 4
|
||||||
|
|
||||||
// Write out
|
// Write out
|
||||||
out_indices[outIdx] = uint(int(in_indices[inIndex]) + cmd.baseVertex);
|
out_indices[outIdx] = uint(int(in_indices[inIndex]) + baseVertex[low]);
|
||||||
}
|
}
|
||||||
|
|
||||||
)";
|
)";
|
||||||
|
|
||||||
static bool g_compute_inited = false;
|
static bool g_compute_inited = false;
|
||||||
std::vector<GLuint> g_prefix_sum;
|
std::vector<GLuint> g_prefix_sum(1);
|
||||||
GLuint g_prefixsumbuffer = 0;
|
GLuint g_prefixsumbuffer = 0;
|
||||||
|
GLuint g_firstidx_ssbo = 0;
|
||||||
|
GLuint g_basevtx_ssbo = 0;
|
||||||
GLuint g_outputibo = 0;
|
GLuint g_outputibo = 0;
|
||||||
GLuint g_compute_program = 0;
|
GLuint g_compute_program = 0;
|
||||||
char g_compile_info[1024];
|
char g_compile_info[1024];
|
||||||
@ -429,12 +433,15 @@ GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute(
|
|||||||
|
|
||||||
// Align compute shader input format with standard OpenGL indirect-draw format
|
// Align compute shader input format with standard OpenGL indirect-draw format
|
||||||
// prepare_indirect_buffer(counts, type, indices, primcount, basevertex);
|
// prepare_indirect_buffer(counts, type, indices, primcount, basevertex);
|
||||||
prepare_compute_drawcmd_ssbo(counts, type, indices, primcount, basevertex);
|
// prepare_compute_drawcmd_ssbo(counts, type, indices, primcount, basevertex);
|
||||||
|
|
||||||
// Init compute buffers
|
// Init compute buffers
|
||||||
if (!g_compute_inited) {
|
if (!g_compute_inited) {
|
||||||
LOG_D("Initializing multidraw compute pipeline...")
|
LOG_D("Initializing multidraw compute pipeline...")
|
||||||
GLES.glGenBuffers(1, &g_prefixsumbuffer);
|
GLES.glGenBuffers(1, &g_prefixsumbuffer);
|
||||||
|
GLES.glGenBuffers(1, &g_firstidx_ssbo);
|
||||||
|
GLES.glGenBuffers(1, &g_basevtx_ssbo);
|
||||||
|
GLES.glGenBuffers(1, &g_prefixsumbuffer);
|
||||||
GLES.glGenBuffers(1, &g_outputibo);
|
GLES.glGenBuffers(1, &g_outputibo);
|
||||||
|
|
||||||
g_compute_program = compile_compute_program(multidraw_comp_shader);
|
g_compute_program = compile_compute_program(multidraw_comp_shader);
|
||||||
@ -443,8 +450,10 @@ GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Resize prefix sum buffer if needed
|
// Resize prefix sum buffer if needed
|
||||||
if (g_prefix_sum.size() < g_drawssbo_size)
|
size_t sz = g_prefix_sum.empty() ? 1 : g_prefix_sum.size();
|
||||||
g_prefix_sum.resize(g_drawssbo_size);
|
while (sz < primcount)
|
||||||
|
sz *= 2;
|
||||||
|
g_prefix_sum.resize(sz);
|
||||||
|
|
||||||
// Calculate prefix sum
|
// Calculate prefix sum
|
||||||
g_prefix_sum[0] = counts[0];
|
g_prefix_sum[0] = counts[0];
|
||||||
@ -453,6 +462,16 @@ GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fill in the data
|
// Fill in the data
|
||||||
|
GLES.glBindBuffer(GL_SHADER_STORAGE_BUFFER, g_firstidx_ssbo);
|
||||||
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
GLES.glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * primcount, indices, GL_DYNAMIC_DRAW);
|
||||||
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
|
||||||
|
GLES.glBindBuffer(GL_SHADER_STORAGE_BUFFER, g_basevtx_ssbo);
|
||||||
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
GLES.glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint) * primcount, basevertex, GL_DYNAMIC_DRAW);
|
||||||
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
|
||||||
GLES.glBindBuffer(GL_SHADER_STORAGE_BUFFER, g_prefixsumbuffer);
|
GLES.glBindBuffer(GL_SHADER_STORAGE_BUFFER, g_prefixsumbuffer);
|
||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
GLES.glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * primcount, g_prefix_sum.data(), GL_DYNAMIC_DRAW);
|
GLES.glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * primcount, g_prefix_sum.data(), GL_DYNAMIC_DRAW);
|
||||||
@ -474,13 +493,24 @@ GLAPI GLAPIENTRY void mg_glMultiDrawElementsBaseVertex_compute(
|
|||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
|
||||||
// Bind buffers
|
// Bind buffers
|
||||||
|
// GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ibo);
|
||||||
|
// CHECK_GL_ERROR_NO_INIT
|
||||||
|
// GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, g_drawssbo);
|
||||||
|
// CHECK_GL_ERROR_NO_INIT
|
||||||
|
// GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, g_prefixsumbuffer);
|
||||||
|
// CHECK_GL_ERROR_NO_INIT
|
||||||
|
// GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, g_outputibo);
|
||||||
|
// CHECK_GL_ERROR_NO_INIT
|
||||||
|
|
||||||
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ibo);
|
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ibo);
|
||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, g_drawssbo);
|
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, g_firstidx_ssbo);
|
||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, g_prefixsumbuffer);
|
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, g_basevtx_ssbo);
|
||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, g_outputibo);
|
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, g_prefixsumbuffer);
|
||||||
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
GLES.glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, g_outputibo);
|
||||||
CHECK_GL_ERROR_NO_INIT
|
CHECK_GL_ERROR_NO_INIT
|
||||||
|
|
||||||
// Save states
|
// Save states
|
||||||
|
@ -2,18 +2,20 @@
|
|||||||
|
|
||||||
layout(local_size_x = 64) in;
|
layout(local_size_x = 64) in;
|
||||||
|
|
||||||
struct DrawCommand {
|
//struct DrawCommand {
|
||||||
uint count;
|
//// uint count;
|
||||||
uint instanceCount;
|
//// uint instanceCount;
|
||||||
uint firstIndex;
|
// uint firstIndex;
|
||||||
int baseVertex;
|
// int baseVertex;
|
||||||
uint reservedMustBeZero;
|
//// uint reservedMustBeZero;
|
||||||
};
|
//};
|
||||||
|
|
||||||
layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; };
|
layout(std430, binding = 0) readonly buffer Input { uint in_indices[]; };
|
||||||
layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; };
|
//layout(std430, binding = 1) readonly buffer Draws { DrawCommand draws[]; };
|
||||||
layout(std430, binding = 2) readonly buffer Prefix { uint prefixSums[]; };
|
layout(std430, binding = 1) readonly buffer FirstIndex { uint firstIndex[]; };
|
||||||
layout(std430, binding = 3) writeonly buffer Output { uint out_indices[]; };
|
layout(std430, binding = 2) readonly buffer BaseVertex { int baseVertex[]; };
|
||||||
|
layout(std430, binding = 3) readonly buffer Prefix { uint prefixSums[]; };
|
||||||
|
layout(std430, binding = 4) writeonly buffer Output { uint out_indices[]; };
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint outIdx = gl_GlobalInvocationID.x;
|
uint outIdx = gl_GlobalInvocationID.x;
|
||||||
@ -22,7 +24,7 @@ void main() {
|
|||||||
|
|
||||||
// Find out draw call #
|
// Find out draw call #
|
||||||
// int low = 0;
|
// int low = 0;
|
||||||
// int high = draws.length();
|
// int high = prefixSums.length();
|
||||||
// for (low = 0; low < high; ++low) {
|
// for (low = 0; low < high; ++low) {
|
||||||
// if (prefixSums[low] > outIdx) {
|
// if (prefixSums[low] > outIdx) {
|
||||||
// break;
|
// break;
|
||||||
@ -30,7 +32,7 @@ void main() {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
int low = 0;
|
int low = 0;
|
||||||
int high = prefixSums.length();
|
int high = prefixSums.length() - 1;
|
||||||
while (low < high) {
|
while (low < high) {
|
||||||
int mid = low + (high - low) / 2;
|
int mid = low + (high - low) / 2;
|
||||||
if (prefixSums[mid] > outIdx) {
|
if (prefixSums[mid] > outIdx) {
|
||||||
@ -42,11 +44,10 @@ void main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// figure out which index to take
|
// figure out which index to take
|
||||||
DrawCommand cmd = draws[low];
|
//DrawCommand cmd = draws[low];
|
||||||
uint localIdx = outIdx - ((low == 0) ? 0u : (prefixSums[low - 1]));
|
uint localIdx = outIdx - ((low == 0) ? 0u : (prefixSums[low - 1]));
|
||||||
uint inIndex = localIdx + cmd.firstIndex;
|
uint inIndex = localIdx + firstIndex[low] / 4u; // elementSize == 4
|
||||||
|
|
||||||
// Write out
|
// Write out
|
||||||
out_indices[outIdx] = uint(in_indices[inIndex] + uint(cmd.baseVertex));
|
out_indices[outIdx] = uint(int(in_indices[inIndex]) + baseVertex[low]);
|
||||||
// out_indices[outIdx] = uint(cmd.baseVertex);
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user