From efa258ebf37c781dcb8ab8631f9e825e072c9dc0 Mon Sep 17 00:00:00 2001 From: rdb Date: Tue, 15 Jul 2014 08:46:58 +0000 Subject: [PATCH] Memory barriers, immutable texture storage, bindless textures, fixes for multisample FBOs, code cleanup, etc. --- panda/src/glstuff/glCgShaderContext_src.I | 2 +- panda/src/glstuff/glCgShaderContext_src.cxx | 28 +- panda/src/glstuff/glCgShaderContext_src.h | 3 - panda/src/glstuff/glGeomMunger_src.cxx | 50 +- panda/src/glstuff/glGraphicsBuffer_src.cxx | 137 ++-- panda/src/glstuff/glGraphicsBuffer_src.h | 10 +- .../glstuff/glGraphicsStateGuardian_src.cxx | 624 ++++++++++++------ .../src/glstuff/glGraphicsStateGuardian_src.h | 60 +- panda/src/glstuff/glShaderContext_src.cxx | 355 +++++----- panda/src/glstuff/glShaderContext_src.h | 31 +- panda/src/glstuff/glTextureContext_src.I | 14 +- panda/src/glstuff/glTextureContext_src.cxx | 131 +++- panda/src/glstuff/glTextureContext_src.h | 36 +- panda/src/glstuff/glmisc_src.cxx | 28 + panda/src/glstuff/glmisc_src.h | 3 + 15 files changed, 1035 insertions(+), 477 deletions(-) diff --git a/panda/src/glstuff/glCgShaderContext_src.I b/panda/src/glstuff/glCgShaderContext_src.I index 84e78d6544..c989f99b63 100755 --- a/panda/src/glstuff/glCgShaderContext_src.I +++ b/panda/src/glstuff/glCgShaderContext_src.I @@ -40,7 +40,7 @@ valid() { //////////////////////////////////////////////////////////////////// INLINE bool CLP(CgShaderContext):: uses_standard_vertex_arrays() { - return _uses_standard_vertex_arrays; + return false; } //////////////////////////////////////////////////////////////////// diff --git a/panda/src/glstuff/glCgShaderContext_src.cxx b/panda/src/glstuff/glCgShaderContext_src.cxx index a37a9073fd..0c62e4c6e5 100755 --- a/panda/src/glstuff/glCgShaderContext_src.cxx +++ b/panda/src/glstuff/glCgShaderContext_src.cxx @@ -40,8 +40,10 @@ TypeHandle CLP(CgShaderContext)::_type_handle; CLP(CgShaderContext):: CLP(CgShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext(s) { _glgsg = glgsg; - _uses_standard_vertex_arrays = false; _cg_context = 0; + _cg_vprogram = 0; + _cg_fprogram = 0; + _cg_gprogram = 0; _cg_vprofile = CG_PROFILE_UNKNOWN; _cg_fprofile = CG_PROFILE_UNKNOWN; _cg_gprofile = CG_PROFILE_UNKNOWN; @@ -409,10 +411,14 @@ disable_shader_vertex_arrays() { //////////////////////////////////////////////////////////////////// bool CLP(CgShaderContext):: update_shader_vertex_arrays(ShaderContext *prev, bool force) { - if (prev) prev->disable_shader_vertex_arrays(); + if (prev) { + prev->disable_shader_vertex_arrays(); + } + if (!valid()) { return true; } + cg_report_errors(); #ifdef SUPPORT_IMMEDIATE_MODE @@ -451,11 +457,17 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) { } CGparameter p = _cg_parameter_map[_shader->_var_spec[i]._id._seqno]; - cgGLSetParameterPointer(p, - num_values, _glgsg->get_numeric_type(numeric_type), - stride, client_pointer + start); + cgGLEnableClientState(p); - } else { + if (numeric_type == GeomEnums::NT_packed_dabc) { + cgGLSetParameterPointer(p, GL_BGRA, GL_UNSIGNED_BYTE, + stride, client_pointer + start); + } else { + cgGLSetParameterPointer(p, + num_values, _glgsg->get_numeric_type(numeric_type), + stride, client_pointer + start); + } + } else { CGparameter p = _cg_parameter_map[_shader->_var_spec[i]._id._seqno]; cgGLDisableClientState(p); } @@ -464,7 +476,7 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) { cg_report_errors(); _glgsg->report_my_gl_errors(); - + return true; } @@ -507,7 +519,6 @@ disable_shader_texture_bindings() { // cgGLDisableTextureParameter(p); } #endif // OPENGLES_2 - _stage_offset = 0; cg_report_errors(); _glgsg->report_my_gl_errors(); @@ -538,7 +549,6 @@ update_shader_texture_bindings(ShaderContext *prev) { // filtered TextureAttrib in _target_texture. const TextureAttrib *texattrib = DCAST(TextureAttrib, _glgsg->_target_rs->get_attrib_def(TextureAttrib::get_class_slot())); nassertv(texattrib != (TextureAttrib *)NULL); - _stage_offset = texattrib->get_num_on_stages(); for (int i = 0; i < (int)_shader->_tex_spec.size(); ++i) { InternalName *id = _shader->_tex_spec[i]._name; diff --git a/panda/src/glstuff/glCgShaderContext_src.h b/panda/src/glstuff/glCgShaderContext_src.h index c7437f72c3..3e487fbe04 100755 --- a/panda/src/glstuff/glCgShaderContext_src.h +++ b/panda/src/glstuff/glCgShaderContext_src.h @@ -61,11 +61,8 @@ private: pvector _cg_parameter_map; - int _stage_offset; CLP(GraphicsStateGuardian) *_glgsg; - bool _uses_standard_vertex_arrays; - void release_resources(); public: diff --git a/panda/src/glstuff/glGeomMunger_src.cxx b/panda/src/glstuff/glGeomMunger_src.cxx index 1dbf889454..b4da760703 100644 --- a/panda/src/glstuff/glGeomMunger_src.cxx +++ b/panda/src/glstuff/glGeomMunger_src.cxx @@ -46,7 +46,7 @@ CLP(GeomMunger)(GraphicsStateGuardian *gsg, const RenderState *state) : //////////////////////////////////////////////////////////////////// // Function: CLP(GeomMunger)::Destructor // Access: Public, Virtual -// Description: +// Description: //////////////////////////////////////////////////////////////////// CLP(GeomMunger):: ~CLP(GeomMunger)() { @@ -84,9 +84,13 @@ munge_format_impl(const GeomVertexFormat *orig, PT(GeomVertexFormat) new_format = new GeomVertexFormat(*orig); new_format->set_animation(animation); + CLP(GraphicsStateGuardian) *glgsg; + DCAST_INTO_R(glgsg, get_gsg(), NULL); + const GeomVertexColumn *color_type = orig->get_color_column(); if (color_type != (GeomVertexColumn *)NULL && - color_type->get_numeric_type() == NT_packed_dabc) { + color_type->get_numeric_type() == NT_packed_dabc && + !glgsg->_supports_packed_dabc) { // We need to convert the color format; OpenGL doesn't support the // byte order of DirectX's packed ARGB format. int color_array = orig->get_array_with(InternalName::get_color()); @@ -102,7 +106,7 @@ munge_format_impl(const GeomVertexFormat *orig, if (animation.get_animation_type() == AT_hardware) { // If we want hardware animation, we need to reserve space for the // blend weights. - + // Make sure the old weights and indices are removed, just in // case. new_format->remove_column(InternalName::get_transform_weight()); @@ -116,11 +120,11 @@ munge_format_impl(const GeomVertexFormat *orig, new_array_format->add_column (InternalName::get_transform_weight(), animation.get_num_transforms() - 1, NT_stdfloat, C_other); - + if (animation.get_indexed_transforms()) { // Also, if we'll be indexing into the transform table, reserve // space for the index. - + // TODO: We should examine the maximum palette index so we can // decide whether we need 16-bit indices. That implies saving // the maximum palette index, presumably in the AnimationSpec. @@ -129,7 +133,7 @@ munge_format_impl(const GeomVertexFormat *orig, new_array_format->add_column (InternalName::get_transform_index(), animation.get_num_transforms(), NT_uint8, C_index); - } + } new_format->add_array(new_array_format); } @@ -149,16 +153,16 @@ munge_format_impl(const GeomVertexFormat *orig, new_format->add_array(new_array_format); } format = GeomVertexFormat::register_format(new_format); - + } else if ((_flags & F_interleaved_arrays) != 0) { // Combine the primary data columns into a single array. new_format = new GeomVertexFormat(*format); PT(GeomVertexArrayFormat) new_array_format = new GeomVertexArrayFormat; - + const GeomVertexColumn *column = format->get_vertex_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -167,7 +171,7 @@ munge_format_impl(const GeomVertexFormat *orig, column = format->get_normal_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -176,7 +180,7 @@ munge_format_impl(const GeomVertexFormat *orig, column = format->get_color_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -187,7 +191,7 @@ munge_format_impl(const GeomVertexFormat *orig, if (_texture != (TextureAttrib *)NULL) { typedef pset UsedStages; UsedStages used_stages; - + int num_stages = _texture->get_num_on_stages(); for (int i = 0; i < num_stages; ++i) { TextureStage *stage = _texture->get_on_stage(i); @@ -197,7 +201,7 @@ munge_format_impl(const GeomVertexFormat *orig, if (used_stages.insert(name).second) { // This is the first time we've encountered this texcoord name. const GeomVertexColumn *texcoord_type = format->get_column(name); - + if (texcoord_type != (const GeomVertexColumn *)NULL) { new_array_format->add_column (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord, @@ -230,9 +234,13 @@ CPT(GeomVertexFormat) CLP(GeomMunger):: premunge_format_impl(const GeomVertexFormat *orig) { PT(GeomVertexFormat) new_format = new GeomVertexFormat(*orig); + CLP(GraphicsStateGuardian) *glgsg; + DCAST_INTO_R(glgsg, get_gsg(), NULL); + const GeomVertexColumn *color_type = orig->get_color_column(); if (color_type != (GeomVertexColumn *)NULL && - color_type->get_numeric_type() == NT_packed_dabc) { + color_type->get_numeric_type() == NT_packed_dabc && + !glgsg->_supports_packed_dabc) { // We need to convert the color format; OpenGL doesn't support the // byte order of DirectX's packed ARGB format. int color_array = orig->get_array_with(InternalName::get_color()); @@ -259,7 +267,7 @@ premunge_format_impl(const GeomVertexFormat *orig) { new_format->add_array(new_array_format); } format = GeomVertexFormat::register_format(new_format); - + } else { // Combine the primary data columns into a single array. Unlike // the munge case, above, in the premunge case, we do this even if @@ -269,11 +277,11 @@ premunge_format_impl(const GeomVertexFormat *orig) { // at run time. new_format = new GeomVertexFormat(*format); PT(GeomVertexArrayFormat) new_array_format = new GeomVertexArrayFormat; - + const GeomVertexColumn *column = format->get_vertex_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -282,7 +290,7 @@ premunge_format_impl(const GeomVertexFormat *orig) { column = format->get_normal_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -291,7 +299,7 @@ premunge_format_impl(const GeomVertexFormat *orig) { column = format->get_color_column(); if (column != (const GeomVertexColumn *)NULL) { new_array_format->add_column - (column->get_name(), column->get_num_components(), + (column->get_name(), column->get_num_components(), column->get_numeric_type(), column->get_contents(), -1, column->get_column_alignment()); new_format->remove_column(column->get_name()); @@ -303,7 +311,7 @@ premunge_format_impl(const GeomVertexFormat *orig) { if (_texture != (TextureAttrib *)NULL) { typedef pset UsedStages; UsedStages used_stages; - + int num_stages = _texture->get_num_on_stages(); for (int i = 0; i < num_stages; ++i) { TextureStage *stage = _texture->get_on_stage(i); @@ -313,7 +321,7 @@ premunge_format_impl(const GeomVertexFormat *orig) { if (used_stages.insert(name).second) { // This is the first time we've encountered this texcoord name. const GeomVertexColumn *texcoord_type = format->get_column(name); - + if (texcoord_type != (const GeomVertexColumn *)NULL) { new_array_format->add_column (name, texcoord_type->get_num_values(), NT_stdfloat, C_texcoord, diff --git a/panda/src/glstuff/glGraphicsBuffer_src.cxx b/panda/src/glstuff/glGraphicsBuffer_src.cxx index 6b974dbf84..bc13a435c7 100644 --- a/panda/src/glstuff/glGraphicsBuffer_src.cxx +++ b/panda/src/glstuff/glGraphicsBuffer_src.cxx @@ -73,9 +73,8 @@ CLP(GraphicsBuffer)(GraphicsEngine *engine, GraphicsPipe *pipe, _rb_size_x = 0; _rb_size_y = 0; _rb_size_z = 0; - for (int i=0; i::iterator it; + for (it = _texture_contexts.begin(); it != _texture_contexts.end(); ++it) { + CLP(TextureContext) *gtc = *it; + + if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) { + glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT); + // If we've done it for one, we've done it for all. + break; + } + } + } } _gsg->set_current_properties(&get_fb_properties()); @@ -248,7 +265,9 @@ rebuild_bitplanes() { DCAST_INTO_V(glgsg, _gsg); if (!_needs_rebuild) { - if (_fbo.size() > 0) { + if (_fbo_multisample != 0) { + glgsg->bind_fbo(_fbo_multisample); + } else if (_fbo.size() > 0) { glgsg->bind_fbo(_fbo[0]); } else { glgsg->bind_fbo(0); @@ -284,6 +303,7 @@ rebuild_bitplanes() { // These variables indicate what should be bound to each bitplane. Texture *attach[RTP_COUNT]; memset(attach, 0, sizeof(Texture *) * RTP_COUNT); + _texture_contexts.clear(); // Sort the textures list into appropriate slots. { @@ -437,12 +457,13 @@ rebuild_bitplanes() { bind_slot(layer, rb_resize, attach, RTP_color, next++); if (_fb_properties.is_stereo()) { - // The texture has already been initialized, so bind it straight away. + // The second tex view has already been initialized, so bind it straight away. if (attach[RTP_color] != NULL) { attach_tex(layer, 1, attach[RTP_color], next++); } else { //XXX hack: I needed a slot to use, and we don't currently use RTP_stencil - // which is treated as a color attachment below, so this fits the bill. + // and it's treated as a color attachment below, so this fits the bill. + // Eventually, we might want to add RTP_color_left and RTP_color_right. bind_slot(layer, rb_resize, attach, RTP_stencil, next++); } } @@ -553,7 +574,6 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot, DCAST_INTO_V(glgsg, _gsg); Texture *tex = attach[slot]; - _tex[slot] = tex; if (tex && layer >= tex->get_z_size()) { // If the requested layer index exceeds the number of layers @@ -1047,7 +1067,10 @@ attach_tex(int layer, int view, Texture *attach, GLenum attachpoint) { TextureContext *tc = attach->prepare_now(view, glgsg->get_prepared_objects(), glgsg); nassertv(tc != (TextureContext *)NULL); CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); - glgsg->update_texture(tc, true); + + glgsg->update_texture(gtc, true); + gtc->set_active(true); + _texture_contexts.push_back(gtc); #ifndef OPENGLES GLclampf priority = 1.0f; @@ -1098,23 +1121,26 @@ attach_tex(int layer, int view, Texture *attach, GLenum attachpoint) { //////////////////////////////////////////////////////////////////// void CLP(GraphicsBuffer):: generate_mipmaps() { + if (gl_ignore_mipmaps && !gl_force_mipmaps) { + return; + } + CLP(GraphicsStateGuardian) *glgsg; DCAST_INTO_V(glgsg, _gsg); - for (int slot=0; slotuses_mipmaps())) { + pvector::iterator it; + for (it = _texture_contexts.begin(); it != _texture_contexts.end(); ++it) { + CLP(TextureContext) *gtc = *it; + + if (gtc->_generate_mipmaps) { glgsg->_state_texture = 0; - TextureContext *tc = tex->prepare_now(0, glgsg->get_prepared_objects(), glgsg); - nassertv(tc != (TextureContext *)NULL); - CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); - glgsg->update_texture(tc, true); - GLenum target = glgsg->get_texture_target(tex->get_texture_type()); - glBindTexture(target, gtc->_index); - glgsg->_glGenerateMipmap(target); - glBindTexture(target, 0); + glgsg->update_texture(gtc, true); + glgsg->apply_texture(gtc); + glgsg->_glGenerateMipmap(gtc->_target); + glBindTexture(gtc->_target, 0); } } + report_my_gl_errors(); } @@ -1197,8 +1223,12 @@ select_target_tex_page(int page) { resolve_multisamples(); } } - - glgsg->bind_fbo(_fbo[page]); + + if (_fbo_multisample != 0) { + // TODO: re-issue clears? + } else { + glgsg->bind_fbo(_fbo[page]); + } _bound_tex_page = page; } @@ -1305,7 +1335,8 @@ open_buffer() { _fb_properties.set_stencil_bits(0); } _fb_properties.set_accum_bits(0); - _fb_properties.set_multisamples(_host->get_fb_properties().get_multisamples()); + + _fb_properties.set_multisamples(_requested_multisamples); // Update aux settings to reflect the GL_MAX_DRAW_BUFFERS limit, // if we exceed it, that is. @@ -1378,7 +1409,6 @@ close_buffer() { glgsg->_glDeleteRenderbuffers(1, &(_rb[i])); _rb[i] = 0; } - _tex[i] = 0; } // Delete the renderbuffers. for (int i=0; i_glDeleteRenderbuffers(1, &(_rbm[i])); _rb[i] = 0; } - _tex[i] = 0; } _rb_size_x = 0; _rb_size_y = 0; @@ -1577,6 +1606,21 @@ resolve_multisamples() { nassertv(_fbo.size() > 0); + if (gl_enable_memory_barriers) { + // Issue memory barriers as necessary to make sure that the + // texture memory is synchronized before we blit to it. + pvector::iterator it; + for (it = _texture_contexts.begin(); it != _texture_contexts.end(); ++it) { + CLP(TextureContext) *gtc = *it; + + if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) { + glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT); + // If we've done it for one, we've done it for all. + break; + } + } + } + glgsg->report_my_gl_errors(); GLuint fbo = _fbo[0]; if (_bound_tex_page != -1) { @@ -1586,31 +1630,34 @@ resolve_multisamples() { glgsg->_glBindFramebuffer(GL_READ_FRAMEBUFFER_EXT, _fbo_multisample); // If the depth buffer is shared, resolve it only on the last to render FBO. - int do_depth_blit = 0; - if (_shared_depth_buffer) { - CLP(GraphicsBuffer) *graphics_buffer = NULL; - CLP(GraphicsBuffer) *highest_sort_graphics_buffer = NULL; - list ::iterator graphics_buffer_iterator; - - int max_sort_order = 0; - for (graphics_buffer_iterator = _shared_depth_buffer_list.begin(); - graphics_buffer_iterator != _shared_depth_buffer_list.end(); - graphics_buffer_iterator++) { - graphics_buffer = (*graphics_buffer_iterator); - if (graphics_buffer) { - // this call removes the entry from the list - if ( graphics_buffer->get_sort() >= max_sort_order ) { - max_sort_order = graphics_buffer->get_sort(); - highest_sort_graphics_buffer = graphics_buffer; + bool do_depth_blit = false; + if (_rbm[RTP_depth_stencil] != 0 || _rbm[RTP_depth] != 0) { + if (_shared_depth_buffer) { + CLP(GraphicsBuffer) *graphics_buffer = NULL; + CLP(GraphicsBuffer) *highest_sort_graphics_buffer = NULL; + list ::iterator graphics_buffer_iterator; + + int max_sort_order = 0; + for (graphics_buffer_iterator = _shared_depth_buffer_list.begin(); + graphics_buffer_iterator != _shared_depth_buffer_list.end(); + graphics_buffer_iterator++) { + graphics_buffer = (*graphics_buffer_iterator); + if (graphics_buffer) { + // this call removes the entry from the list + if (graphics_buffer->get_sort() >= max_sort_order) { + max_sort_order = graphics_buffer->get_sort(); + highest_sort_graphics_buffer = graphics_buffer; + } } } + if (max_sort_order == this->get_sort()) { + do_depth_blit = true; + } + } else { + do_depth_blit = true; } - if (max_sort_order == this->get_sort()) { - do_depth_blit = 1; - } - } else { - do_depth_blit = 1; } + if (do_depth_blit) { glgsg->_glBlitFramebuffer(0, 0, _rb_size_x, _rb_size_y, 0, 0, _rb_size_x, _rb_size_y, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, @@ -1620,7 +1667,6 @@ resolve_multisamples() { GL_COLOR_BUFFER_BIT, GL_NEAREST); } -#ifndef OPENGLES // Now handle the other color buffers. int next = GL_COLOR_ATTACHMENT1_EXT; if (_fb_properties.is_stereo()) { @@ -1630,6 +1676,7 @@ resolve_multisamples() { GL_COLOR_BUFFER_BIT, GL_NEAREST); next += 1; } +#ifndef OPENGLES for (int i = 0; i < _fb_properties.get_aux_rgba(); ++i) { glReadBuffer(next); glDrawBuffer(next); diff --git a/panda/src/glstuff/glGraphicsBuffer_src.h b/panda/src/glstuff/glGraphicsBuffer_src.h index 8a161e3e0e..34d9cfa0a4 100644 --- a/panda/src/glstuff/glGraphicsBuffer_src.h +++ b/panda/src/glstuff/glGraphicsBuffer_src.h @@ -120,13 +120,15 @@ private: int _rb_size_y; int _rb_size_z; - // The texture or render buffer bound to each plane. - PT(Texture) _tex[RTP_COUNT]; + // Stores the render buffers for each plane. + // _rbm stores the multisample renderbuffers. GLuint _rb[RTP_COUNT]; - - // The render buffer for _fbo_multisample. GLuint _rbm[RTP_COUNT]; + // List of textures for which we might have to generate mipmaps + // after rendering one frame. + pvector _texture_contexts; + // The cube map face we are currently drawing to or have just // finished drawing to, or -1 if we are not drawing to a cube map. int _bound_tex_page; diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx index 03377ce3c6..10f3a8f413 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx @@ -730,6 +730,38 @@ reset() { } } + _supports_tex_storage = false; + + if (is_at_least_gl_version(4, 2) || has_extension("GL_ARB_texture_storage")) { + _supports_tex_storage = true; + + _glTexStorage1D = (PFNGLTEXSTORAGE1DPROC) + get_extension_func("glTexStorage1D"); + _glTexStorage2D = (PFNGLTEXSTORAGE2DPROC) + get_extension_func("glTexStorage2D"); + _glTexStorage3D = (PFNGLTEXSTORAGE3DPROC) + get_extension_func("glTexStorage3D"); + + } else if (has_extension("GL_EXT_texture_storage")) { // GLES case + _supports_tex_storage = true; + + _glTexStorage1D = (PFNGLTEXSTORAGE1DPROC) + get_extension_func("glTexStorage1DEXT"); + _glTexStorage2D = (PFNGLTEXSTORAGE2DPROC) + get_extension_func("glTexStorage2DEXT"); + _glTexStorage3D = (PFNGLTEXSTORAGE3DPROC) + get_extension_func("glTexStorage3DEXT"); + } + + if (_supports_tex_storage) { + if (_glTexStorage1D == NULL || _glTexStorage2D == NULL || _glTexStorage3D == NULL) { + GLCAT.warning() + << "Immutable texture storage advertised as supported by OpenGL runtime, but could not get pointers to extension functions.\n"; + _supports_tex_storage = false; + } + } + + _supports_2d_texture_array = false; #ifndef OPENGLES _supports_2d_texture_array = has_extension("GL_EXT_texture_array"); @@ -897,6 +929,9 @@ reset() { (has_extension("GL_EXT_rescale_normal") || is_at_least_gl_version(1, 2)); #endif + _supports_packed_dabc = /*gl_support_packed_dabc &&*/ + has_extension("GL_ARB_vertex_array_bgra") || has_extension("GL_EXT_vertex_array_bgra"); + _supports_multisample = has_extension("GL_ARB_multisample") || is_at_least_gl_version(1, 3); @@ -1282,6 +1317,8 @@ reset() { _glGetFramebufferAttachmentParameteriv = glGetFramebufferAttachmentParameteriv; _glGenerateMipmap = glGenerateMipmap; #else + //TODO: add ARB/3.0 version + _supports_framebuffer_object = false; if (has_extension("GL_EXT_framebuffer_object")) { _supports_framebuffer_object = true; @@ -1757,15 +1794,21 @@ reset() { get_extension_func("glMemoryBarrierEXT"); glGetIntegerv(GL_MAX_IMAGE_UNITS_EXT, &_max_image_units); + + } else { + _glBindImageTexture = NULL; + _glMemoryBarrier = NULL; } // Check availability of multi-bind functions. _supports_multi_bind = false; if (is_at_least_gl_version(4, 4) || has_extension("GL_ARB_multi_bind")) { + _glBindTextures = (PFNGLBINDTEXTURESPROC) + get_extension_func("glBindTextures"); _glBindImageTextures = (PFNGLBINDIMAGETEXTURESPROC) get_extension_func("glBindImageTextures"); - if (_glBindImageTextures != NULL) { + if (_glBindTextures != NULL && _glBindImageTextures != NULL) { _supports_multi_bind = true; } else { GLCAT.warning() @@ -1782,6 +1825,24 @@ reset() { << "ARB_internalformat_query2 advertised as supported by OpenGL runtime, but could not get pointers to extension function.\n"; } } + + _supports_bindless_texture = false; + if (has_extension("GL_ARB_bindless_texture")) { + _glGetTextureHandle = (PFNGLGETTEXTUREHANDLEPROC) + get_extension_func("glGetTextureHandleARB"); + _glMakeTextureHandleResident = (PFNGLMAKETEXTUREHANDLERESIDENTPROC) + get_extension_func("glMakeTextureHandleResidentARB"); + _glUniformHandleui64 = (PFNGLUNIFORMHANDLEUI64PROC) + get_extension_func("glUniformHandleui64ARB"); + + if (_glGetTextureHandle == NULL || _glMakeTextureHandleResident == NULL || + _glUniformHandleui64 == NULL) { + GLCAT.warning() + << "GL_ARB_bindless_texture advertised as supported by OpenGL runtime, but could not get pointers to extension function.\n"; + } else { + _supports_bindless_texture = true; + } + } #endif #ifndef OPENGLES @@ -1879,11 +1940,11 @@ reset() { #ifndef OPENGLES_1 _current_shader = (Shader *)NULL; - _current_shader_context = (CLP(ShaderContext) *)NULL; + _current_shader_context = (ShaderContext *)NULL; _vertex_array_shader = (Shader *)NULL; - _vertex_array_shader_context = (CLP(ShaderContext) *)NULL; + _vertex_array_shader_context = (ShaderContext *)NULL; _texture_binding_shader = (Shader *)NULL; - _texture_binding_shader_context = (CLP(ShaderContext) *)NULL; + _texture_binding_shader_context = (ShaderContext *)NULL; #endif #ifdef OPENGLES_2 @@ -2508,17 +2569,17 @@ end_frame(Thread *current_thread) { if (_vertex_array_shader_context != 0) { _vertex_array_shader_context->disable_shader_vertex_arrays(); _vertex_array_shader = (Shader *)NULL; - _vertex_array_shader_context = (CLP(ShaderContext) *)NULL; + _vertex_array_shader_context = (ShaderContext *)NULL; } if (_texture_binding_shader_context != 0) { _texture_binding_shader_context->disable_shader_texture_bindings(); _texture_binding_shader = (Shader *)NULL; - _texture_binding_shader_context = (CLP(ShaderContext) *)NULL; + _texture_binding_shader_context = (ShaderContext *)NULL; } if (_current_shader_context != 0) { _current_shader_context->unbind(); _current_shader = (Shader *)NULL; - _current_shader_context = (CLP(ShaderContext) *)NULL; + _current_shader_context = (ShaderContext *)NULL; } #endif @@ -3008,13 +3069,17 @@ update_standard_vertex_arrays(bool force) { } else #endif // NDEBUG if (_data_reader->get_color_info(array_reader, num_values, numeric_type, - start, stride) && - numeric_type != Geom::NT_packed_dabc) { + start, stride)) { if (!setup_array_data(client_pointer, array_reader, force)) { return false; } - glColorPointer(num_values, get_numeric_type(numeric_type), - stride, client_pointer + start); + if (numeric_type == Geom::NT_packed_dabc) { + glColorPointer(GL_BGRA, GL_UNSIGNED_BYTE, + stride, client_pointer + start); + } else { + glColorPointer(num_values, get_numeric_type(numeric_type), + stride, client_pointer + start); + } glEnableClientState(GL_COLOR_ARRAY); } else { glDisableClientState(GL_COLOR_ARRAY); @@ -3737,6 +3802,54 @@ end_draw_primitives() { report_my_gl_errors(); } +//////////////////////////////////////////////////////////////////// +// Function: GLGraphicsStateGuardian::issue_memory_barrier +// Access: Public +// Description: Issues the given memory barriers, and clears the +// list of textures marked as incoherent for the given +// bits. +//////////////////////////////////////////////////////////////////// +void CLP(GraphicsStateGuardian):: +issue_memory_barrier(GLbitfield barriers) { +#ifndef OPENGLES + if (!gl_enable_memory_barriers || _glMemoryBarrier == NULL) { + return; + } + + if (GLCAT.is_debug()) { + GLCAT.debug() << "Issuing memory barriers:"; + } + + _glMemoryBarrier(barriers); + + // Indicate that barriers no longer need to be issued for + // the relevant lists of textures. + if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) { + _textures_needing_fetch_barrier.clear(); + GLCAT.debug(false) << " texture_fetch"; + } + + if (barriers & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) { + _textures_needing_image_access_barrier.clear(); + GLCAT.debug(false) << " shader_image_access"; + } + + if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) { + _textures_needing_update_barrier.clear(); + GLCAT.debug(false) << " texture_update"; + } + + if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) { + _textures_needing_framebuffer_barrier.clear(); + GLCAT.debug(false) << " framebuffer"; + } + + GLCAT.debug(false) << "\n"; + + report_my_gl_errors(); +#endif // OPENGLES +} + //////////////////////////////////////////////////////////////////// // Function: GLGraphicsStateGuardian::prepare_texture // Access: Public, Virtual @@ -3783,11 +3896,9 @@ prepare_texture(Texture *tex, int view) { break; } - CLP(TextureContext) *gtc = new CLP(TextureContext)(_prepared_objects, tex, view); - glGenTextures(1, >c->_index); + CLP(TextureContext) *gtc = new CLP(TextureContext)(this, _prepared_objects, tex, view); report_my_gl_errors(); - apply_texture(gtc); return gtc; } @@ -3809,14 +3920,14 @@ prepare_texture(Texture *tex, int view) { //////////////////////////////////////////////////////////////////// bool CLP(GraphicsStateGuardian):: update_texture(TextureContext *tc, bool force) { - apply_texture(tc); - CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); - if (gtc->was_image_modified() || !gtc->_already_applied) { - // If the texture image was modified, reload the texture. This - // means we also re-specify the properties for good measure. - specify_texture(gtc); + if (gtc->was_image_modified() || !gtc->_has_storage) { + // If the texture image was modified, reload the texture. + apply_texture(tc); + if (gtc->was_properties_modified()) { + specify_texture(gtc); + } bool okflag = upload_texture(gtc, force); if (!okflag) { GLCAT.error() @@ -3827,6 +3938,7 @@ update_texture(TextureContext *tc, bool force) { } else if (gtc->was_properties_modified()) { // If only the properties have been modified, we don't necessarily // need to reload the texture. + apply_texture(tc); if (specify_texture(gtc)) { // Actually, looks like the texture *does* need to be reloaded. gtc->mark_needs_reload(); @@ -3843,6 +3955,7 @@ update_texture(TextureContext *tc, bool force) { gtc->mark_loaded(); } } + gtc->enqueue_lru(&_prepared_objects->_graphics_memory_lru); report_my_gl_errors(); @@ -3860,11 +3973,6 @@ update_texture(TextureContext *tc, bool force) { void CLP(GraphicsStateGuardian):: release_texture(TextureContext *tc) { CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc); - - glDeleteTextures(1, >c->_index); - report_my_gl_errors(); - - gtc->_index = 0; delete gtc; } @@ -4613,17 +4721,17 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, bool uses_mipmaps = tex->uses_mipmaps() && !gl_ignore_mipmaps; if (uses_mipmaps) { -#ifndef OPENGLES_2 if (_supports_generate_mipmap) { - glTexParameteri(target, GL_GENERATE_MIPMAP, true); - } else { +#ifndef OPENGLES_2 + if (_glGenerateMipmap == NULL) { + glTexParameteri(target, GL_GENERATE_MIPMAP, true); + } #endif + } else { // If we can't auto-generate mipmaps, do without mipmaps. glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); uses_mipmaps = false; -#ifndef OPENGLES_2 } -#endif } bool new_image = needs_reload || gtc->was_image_modified(); @@ -4641,7 +4749,7 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, } } - if (!gtc->_already_applied || + if (!gtc->_has_storage || internal_format != gtc->_internal_format || uses_mipmaps != gtc->_uses_mipmaps || width != gtc->_width || @@ -4652,6 +4760,18 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, new_image = true; } + if (new_image && gtc->_immutable) { + gtc->reset_data(); + glBindTexture(target, gtc->_index); + } + +#ifndef OPENGLES + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { + // Make sure that any incoherent writes to this texture have been synced. + issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); + } +#endif + if (z >= 0) { if (new_image) { // These won't be used because we pass a NULL image, but we still @@ -4675,7 +4795,11 @@ framebuffer_copy_to_texture(Texture *tex, int view, int z, } } - gtc->_already_applied = true; + if (uses_mipmaps && _glGenerateMipmap != NULL) { + _glGenerateMipmap(target); + } + + gtc->_has_storage = true; gtc->_uses_mipmaps = uses_mipmaps; gtc->_internal_format = internal_format; gtc->_width = width; @@ -4975,7 +5099,7 @@ do_issue_shade_model() { void CLP(GraphicsStateGuardian):: do_issue_shader(bool state_has_changed) { #ifndef OPENGLES_1 - CLP(ShaderContext) *context = 0; + ShaderContext *context = 0; Shader *shader = (Shader *)(_target_shader->get_shader()); #ifdef OPENGLES_2 @@ -4986,13 +5110,13 @@ do_issue_shader(bool state_has_changed) { #endif if (shader) { - context = (CLP(ShaderContext) *)(shader->prepare_now(get_prepared_objects(), this)); + context = shader->prepare_now(get_prepared_objects(), this); } #ifdef OPENGLES_2 // If it failed, try applying the default shader. if (shader != _default_shader && (context == 0 || !context->valid())) { shader = _default_shader; - context = (CLP(ShaderContext) *)(shader->prepare_now(get_prepared_objects(), this)); + context = shader->prepare_now(get_prepared_objects(), this); } #endif @@ -8421,6 +8545,7 @@ update_standard_texture_bindings() { #endif continue; } + apply_texture(tc); if (stage->involves_color_scale() && _color_scale_enabled) { LColor color = stage->get_color(); @@ -8659,6 +8784,8 @@ update_show_usage_texture_bindings(int show_stage_index) { GLuint index; glGenTextures(1, &index); glBindTexture(GL_TEXTURE_2D, index); + //TODO: this could be a lot simpler with glTexStorage2D + // followed by a call to glClearTexImage. upload_usage_texture(texture->get_x_size(), texture->get_y_size()); _usage_textures[key] = index; @@ -8730,7 +8857,7 @@ upload_usage_texture(int width, int height) { } glTexImage2D(GL_TEXTURE_2D, n, GL_RGBA, width, height, 0, - GL_RGBA, GL_UNSIGNED_BYTE, buffer); + GL_RGBA, GL_UNSIGNED_BYTE, buffer); if (width == 1 && height == 1) { // That was the last mipmap level. break; @@ -9106,6 +9233,8 @@ do_issue_tex_gen() { //////////////////////////////////////////////////////////////////// bool CLP(GraphicsStateGuardian):: specify_texture(CLP(TextureContext) *gtc) { + nassertr(gtc->_handle == 0 /* can't modify tex with active handle */, false); + Texture *tex = gtc->get_texture(); GLenum target = get_texture_target(tex->get_texture_type()); @@ -9153,13 +9282,10 @@ specify_texture(CLP(TextureContext) *gtc) { if (!tex->might_have_ram_image()) { // If it's a dynamically generated texture (that is, the RAM image // isn't available so it didn't pass through the CPU), we should - // enable GL-generated mipmaps here if we can. - if (_supports_generate_mipmap) { -#ifndef OPENGLES_2 - glTexParameteri(target, GL_GENERATE_MIPMAP, uses_mipmaps); -#endif - } else { - // Otherwise, don't try to use mipmaps. + // enable GL-generated mipmaps if we can. + if (!_supports_generate_mipmap) { + // However, if the GPU doesn't support mipmap generation, we + // have to turn it off. uses_mipmaps = false; } } @@ -9169,13 +9295,6 @@ specify_texture(CLP(TextureContext) *gtc) { glTexParameteri(target, GL_TEXTURE_MAG_FILTER, get_texture_filter_type(magfilter, true)); -#ifndef OPENGLES - if (!uses_mipmaps) { - // NVIDIA drivers complain if we don't do this. - glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, 0); - } -#endif - // Set anisotropic filtering. if (_supports_anisotropy) { PN_stdfloat anisotropy = tex->get_effective_anisotropic_degree(); @@ -9294,20 +9413,6 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { image_compression = Texture::CM_off; } - if (GLCAT.is_debug()) { - if (image.is_null()) { - GLCAT.debug() - << "Got NULL image: " << tex->get_name() << "\n"; - } - } - - /* - if (image.is_null()) { - // If we don't have an image, we can't upload. - return false; - } - */ - int mipmap_bias = 0; int width = tex->get_x_size(); @@ -9318,6 +9423,23 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { GLint external_format = get_external_image_format(tex); GLenum component_type = get_component_type(tex->get_component_type()); + if (GLCAT.is_debug()) { + if (image.is_null()) { + GLCAT.debug() + << "loading texture with NULL image"; + } else if (image_compression != Texture::CM_off) { + GLCAT.debug() + << "loading pre-compressed texture"; + } else if (is_compressed_format(internal_format)) { + GLCAT.debug() + << "loading compressed texture"; + } else { + GLCAT.debug() + << "loading uncompressed texture"; + } + GLCAT.debug(false) << " " << tex->get_name() << "\n"; + } + // Ensure that the texture fits within the GL's specified limits. // Need to split dimensions because of texture arrays int max_dimension_x; @@ -9383,13 +9505,16 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { } } + width = tex->get_expected_mipmap_x_size(mipmap_bias); + height = tex->get_expected_mipmap_y_size(mipmap_bias); + depth = tex->get_expected_mipmap_z_size(mipmap_bias); + if (mipmap_bias != 0) { GLCAT.info() << "Reducing image " << tex->get_name() - << " from " << width << " x " << height << " x " << depth << " to " - << tex->get_expected_mipmap_x_size(mipmap_bias) << " x " - << tex->get_expected_mipmap_y_size(mipmap_bias) << " x " - << tex->get_expected_mipmap_z_size(mipmap_bias) << "\n"; + << " from " << tex->get_x_size() << " x " << tex->get_y_size() + << " x " << tex->get_z_size() << " to " + << width << " x " << height << " x " << depth << "\n"; } } @@ -9416,17 +9541,145 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + GLenum target = get_texture_target(tex->get_texture_type()); bool uses_mipmaps = (tex->uses_mipmaps() && !gl_ignore_mipmaps) || gl_force_mipmaps; - -#ifndef NDEBUG - if (gl_force_mipmaps) { - uses_mipmaps = true; + bool needs_reload = false; + if (!gtc->_has_storage || + gtc->_uses_mipmaps != uses_mipmaps || + gtc->_internal_format != internal_format || + gtc->_width != width || + gtc->_height != height || + gtc->_depth != depth) { + // We need to reload a new GL Texture object. + needs_reload = true; } + + if (needs_reload && gtc->_immutable) { + GLCAT.warning() << "Attempt to modify texture with immutable storage, recreating texture.\n"; + gtc->reset_data(); + glBindTexture(target, gtc->_index); + } + + if (needs_reload) { + gtc->_generate_mipmaps = false; + int num_levels = 1; + CPTA_uchar image = tex->get_ram_mipmap_image(mipmap_bias); + + // Figure out whether mipmaps will be generated by the GPU or by + // Panda (or not at all), and how many mipmap levels should be created. + if (image.is_null()) { + if (uses_mipmaps) { + if (_supports_generate_mipmap) { + num_levels = tex->get_expected_num_mipmap_levels() - mipmap_bias; + gtc->_generate_mipmaps = true; + } else { + // If it can't, do without mipmaps. + num_levels = 1; + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + } + } + + } else { + if (uses_mipmaps) { + num_levels = tex->get_num_ram_mipmap_images() - mipmap_bias; + + if (num_levels <= 1) { + // No RAM mipmap levels available. Should we generate some? + if (!_supports_generate_mipmap || !driver_generate_mipmaps || + image_compression != Texture::CM_off) { + // Yes, the GL can't or won't generate them, so we need to. + // Note that some drivers (nVidia) will *corrupt memory* if + // you ask them to generate mipmaps for a pre-compressed + // texture. + tex->generate_ram_mipmap_images(); + num_levels = tex->get_num_ram_mipmap_images() - mipmap_bias; + } + } + + if (num_levels <= 1) { + // We don't have mipmap levels in RAM. Ask the GL to generate + // them if it can. + if (_supports_generate_mipmap) { + num_levels = tex->get_expected_num_mipmap_levels() - mipmap_bias; + gtc->_generate_mipmaps = true; + } else { + // If it can't, do without mipmaps. + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + num_levels = 1; + } + } + } + } + +#ifndef OPENGLES // OpenGL ES doesn't have GL_TEXTURE_MAX_LEVEL. + if (is_at_least_gl_version(1, 2)) { + // By the time we get here, we have a pretty good prediction for + // the number of mipmaps we're going to have, so tell the GL that's + // all it's going to get. + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, num_levels - 1); + } #endif - bool success = true; +#ifndef OPENGLES_2 + if (gtc->_generate_mipmaps && _glGenerateMipmap == NULL) { + // The old, deprecated way to generate mipmaps. + glTexParameteri(target, GL_GENERATE_MIPMAP, GL_TRUE); + } +#endif - GLenum target = get_texture_target(tex->get_texture_type()); + // Allocate immutable storage for the texture, after which we can subload it. + // Pre-allocating storage using glTexStorage is more efficient than using glTexImage + // to load all of the individual images one by one later, but we are not allowed to + // change the texture size or number of mipmap levels after this point. + if (gl_immutable_texture_storage && _supports_tex_storage && !gtc->_has_storage) { + if (GLCAT.is_debug()) { + GLCAT.debug() + << "allocating storage for texture " << tex->get_name() << ", " << width + << " x " << height << " x " << depth << ", mipmaps " << num_levels + << ", uses_mipmaps = " << uses_mipmaps << "\n"; + } + + switch (tex->get_texture_type()) { + case Texture::TT_1d_texture: + _glTexStorage1D(target, num_levels, internal_format, width); + break; + case Texture::TT_2d_texture: + case Texture::TT_cube_map: + _glTexStorage2D(target, num_levels, internal_format, width, height); + break; + case Texture::TT_3d_texture: + case Texture::TT_2d_texture_array: + _glTexStorage3D(target, num_levels, internal_format, width, height, depth); + break; + } + + gtc->_has_storage = true; + gtc->_immutable = true; + gtc->_uses_mipmaps = uses_mipmaps; + gtc->_internal_format = internal_format; + gtc->_width = width; + gtc->_height = height; + gtc->_depth = depth; + needs_reload = false; + } + } else { + // Maybe we need to generate mipmaps on the CPU. + if (!image.is_null() && uses_mipmaps) { + if (tex->get_num_ram_mipmap_images() - mipmap_bias <= 1) { + // No RAM mipmap levels available. Should we generate some? + if (!_supports_generate_mipmap || !driver_generate_mipmaps || + image_compression != Texture::CM_off) { + // Yes, the GL can't or won't generate them, so we need to. + // Note that some drivers (nVidia) will *corrupt memory* if + // you ask them to generate mipmaps for a pre-compressed + // texture. + tex->generate_ram_mipmap_images(); + } + } + } + } + + bool success = true; if (tex->get_texture_type() == Texture::TT_cube_map) { // A cube map must load six different 2-d images (which are stored // as the six pages of the system ram image). @@ -9437,37 +9690,37 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { nassertr(target == GL_TEXTURE_CUBE_MAP, false); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_POSITIVE_X, internal_format, external_format, component_type, true, 0, image_compression); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, internal_format, external_format, component_type, true, 1, image_compression); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, internal_format, external_format, component_type, true, 2, image_compression); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, internal_format, external_format, component_type, true, 3, image_compression); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_POSITIVE_Z, internal_format, external_format, component_type, true, 4, image_compression); success = success && upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, + (gtc, needs_reload, uses_mipmaps, mipmap_bias, GL_TEXTURE_CUBE_MAP, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, internal_format, external_format, component_type, true, 5, image_compression); @@ -9475,20 +9728,32 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { } else { // Any other kind of texture can be loaded all at once. success = upload_texture_image - (gtc, uses_mipmaps, mipmap_bias, target, target, - internal_format, external_format, component_type, - false, 0, image_compression); + (gtc, needs_reload, uses_mipmaps, mipmap_bias, target, + target, internal_format, external_format, + component_type, false, 0, image_compression); + } + + if (gtc->_generate_mipmaps && _glGenerateMipmap != NULL) { + if (GLCAT.is_debug()) { + GLCAT.debug() + << "generating mipmaps for texture " << tex->get_name() << ", " + << width << " x " << height << " x " << depth + << ", uses_mipmaps = " << uses_mipmaps << "\n"; + } + _glGenerateMipmap(target); } maybe_gl_finish(); if (success) { - gtc->_already_applied = true; - gtc->_uses_mipmaps = uses_mipmaps; - gtc->_internal_format = internal_format; - gtc->_width = width; - gtc->_height = height; - gtc->_depth = depth; + if (needs_reload) { + gtc->_has_storage = true; + gtc->_uses_mipmaps = uses_mipmaps; + gtc->_internal_format = internal_format; + gtc->_width = width; + gtc->_height = height; + gtc->_depth = depth; + } if (!image.is_null()) { gtc->update_data_size_bytes(get_texture_memory_size(tex)); @@ -9539,7 +9804,7 @@ upload_texture(CLP(TextureContext) *gtc, bool force) { // e.g. GL_TEXTURE_CUBE_MAP_POSITIVE_X. //////////////////////////////////////////////////////////////////// bool CLP(GraphicsStateGuardian):: -upload_texture_image(CLP(TextureContext) *gtc, +upload_texture_image(CLP(TextureContext) *gtc, bool needs_reload, bool uses_mipmaps, int mipmap_bias, GLenum texture_target, GLenum page_target, GLint internal_format, @@ -9566,111 +9831,40 @@ upload_texture_image(CLP(TextureContext) *gtc, int height = tex->get_expected_mipmap_y_size(mipmap_bias); int depth = tex->get_expected_mipmap_z_size(mipmap_bias); - if (GLCAT.is_debug()) { - if (image_compression != Texture::CM_off) { - GLCAT.debug() - << "loading pre-compressed texture " << tex->get_name() << "\n"; - } else if (is_compressed_format(internal_format)) { - GLCAT.debug() - << "compressing texture " << tex->get_name() << "\n"; - } else { - GLCAT.debug() - << "loading uncompressed texture " << tex->get_name() << "\n"; - } - GLCAT.debug() - << "page_target " << hex << page_target << dec << "\n"; - } - + // Determine the number of images to upload. int num_ram_mipmap_levels = 0; - bool load_ram_mipmaps = false; - - if (image.is_null()) { - if (GLCAT.is_debug()) { - GLCAT.debug() - << "Not loading NULL image " << tex->get_name() << "\n"; - } - - if (uses_mipmaps) { - if (_supports_generate_mipmap) { -#ifndef OPENGLES_2 - glTexParameteri(texture_target, GL_GENERATE_MIPMAP, true); -#endif - } else { - // If it can't, do without mipmaps. - glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - uses_mipmaps = false; - } - } - - } else { - num_ram_mipmap_levels = 1; + if (!image.is_null()) { if (uses_mipmaps) { num_ram_mipmap_levels = tex->get_num_ram_mipmap_images(); - - if (num_ram_mipmap_levels == 1) { - // No RAM mipmap levels available. Should we generate some? - if (!_supports_generate_mipmap || !driver_generate_mipmaps || - image_compression != Texture::CM_off) { - // Yes, the GL can't or won't generate them, so we need to. - // Note that some drivers (nVidia) will *corrupt memory* if - // you ask them to generate mipmaps for a pre-compressed - // texture. - tex->generate_ram_mipmap_images(); - num_ram_mipmap_levels = tex->get_num_ram_mipmap_images(); - } - } - - if (num_ram_mipmap_levels != 1) { - // We will load the mipmap levels from RAM. Don't ask the GL to - // generate them. -#ifndef OPENGLES_2 - if (_supports_generate_mipmap) { - glTexParameteri(texture_target, GL_GENERATE_MIPMAP, false); - } -#endif - load_ram_mipmaps = true; - - } else { - // We don't have mipmap levels in RAM. Ask the GL to generate - // them if it can. - if (_supports_generate_mipmap) { -#ifndef OPENGLES_2 - glTexParameteri(texture_target, GL_GENERATE_MIPMAP, true); -#endif - } else { - // If it can't, do without mipmaps. - glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - uses_mipmaps = false; - } - } + } else { + num_ram_mipmap_levels = 1; } } - int highest_level = 0; -#ifdef OPENGLES // OpenGL ES doesn't support texture subloads. - static const bool needs_reload = true; - -#else - bool needs_reload = false; - if (!gtc->_already_applied || - gtc->_uses_mipmaps != uses_mipmaps || - gtc->_internal_format != internal_format || - gtc->_width != width || - gtc->_height != height || - gtc->_depth != depth) { - // We need to reload a new GL Texture object. - needs_reload = true; +#ifndef OPENGLES + if (needs_reload || num_ram_mipmap_levels > 0) { + // Make sure that any incoherent writes to this texture have been synced. + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { + issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); + } } +#endif if (!needs_reload) { // Try to subload the image over the existing GL Texture object, // possibly saving on texture memory fragmentation. if (GLCAT.is_debug()) { - GLCAT.debug() - << "subloading existing texture object, " << width << " x " << height - << " x " << depth << ", z = " << z << ", mipmaps " << num_ram_mipmap_levels - << ", uses_mipmaps = " << uses_mipmaps << "\n"; + if (num_ram_mipmap_levels == 0) { + GLCAT.debug() + << "not loading NULL image for tex " << tex->get_name() << ", " << width << " x " << height + << " x " << depth << ", z = " << z << ", uses_mipmaps = " << uses_mipmaps << "\n"; + } else { + GLCAT.debug() + << "updating image data of texture " << tex->get_name() << ", " << width << " x " << height + << " x " << depth << ", z = " << z << ", mipmaps " << num_ram_mipmap_levels + << ", uses_mipmaps = " << uses_mipmaps << "\n"; + } } for (int n = mipmap_bias; n < num_ram_mipmap_levels; ++n) { @@ -9777,8 +9971,6 @@ upload_texture_image(CLP(TextureContext) *gtc, } break; } - - highest_level = n; } // Did that fail? If it did, we'll immediately try again, this @@ -9793,7 +9985,6 @@ upload_texture_image(CLP(TextureContext) *gtc, needs_reload = true; } } -#endif // OPENGLES if (needs_reload) { // Load the image up from scratch, creating a new GL Texture @@ -9805,7 +9996,16 @@ upload_texture_image(CLP(TextureContext) *gtc, << num_ram_mipmap_levels << ", uses_mipmaps = " << uses_mipmaps << "\n"; } + // If there is immutable storage, this is impossible to do, and we should + // not have gotten here at all. + nassertr(!gtc->_immutable, false); + if (num_ram_mipmap_levels == 0) { + if (GLCAT.is_debug()) { + GLCAT.debug() + << " (initializing NULL image)\n"; + } + if ((external_format == GL_DEPTH_STENCIL) && get_supports_depth_stencil()) { #ifdef OPENGLES component_type = GL_UNSIGNED_INT_24_8_OES; @@ -9844,6 +10044,12 @@ upload_texture_image(CLP(TextureContext) *gtc, << "No mipmap level " << n << " defined for " << tex->get_name() << "\n"; // No mipmap level n; stop here. +#ifndef OPENGLES + if (is_at_least_gl_version(1, 2)) { + // Tell the GL we have no more mipmaps for it to use. + glTexParameteri(texture_target, GL_TEXTURE_MAX_LEVEL, n - mipmap_bias); + } +#endif break; } image_ptr = ptimage; @@ -9878,8 +10084,8 @@ upload_texture_image(CLP(TextureContext) *gtc, case GL_TEXTURE_1D: if (image_compression == Texture::CM_off) { glTexImage1D(page_target, n - mipmap_bias, internal_format, - width, 0, - external_format, component_type, image_ptr); + width, 0, + external_format, component_type, image_ptr); } else { _glCompressedTexImage1D(page_target, n - mipmap_bias, external_format, width, 0, view_size, image_ptr); @@ -9932,15 +10138,13 @@ upload_texture_image(CLP(TextureContext) *gtc, default: if (image_compression == Texture::CM_off) { glTexImage2D(page_target, n - mipmap_bias, internal_format, - width, height, 0, - external_format, component_type, image_ptr); + width, height, 0, + external_format, component_type, image_ptr); } else { - _glCompressedTexImage2D(page_target, n - mipmap_bias, external_format, width, height, - 0, view_size, image_ptr); + _glCompressedTexImage2D(page_target, n - mipmap_bias, external_format, + width, height, 0, view_size, image_ptr); } } - - highest_level = n; } // Report the error message explicitly if the GL texture creation @@ -9951,29 +10155,11 @@ upload_texture_image(CLP(TextureContext) *gtc, << "GL texture creation failed for " << tex->get_name() << " : " << get_error_string(error_code) << "\n"; - gtc->_already_applied = false; + gtc->_has_storage = false; return false; } } -#ifndef OPENGLES // OpenGL ES doesn't have GL_TEXTURE_MAX_LEVEL. - if (is_at_least_gl_version(1, 2)) { - if (load_ram_mipmaps) { - // By the time we get here, we have successfully loaded a certain - // number of mipmap levels. Tell the GL that's all it's going to - // get. - glTexParameteri(texture_target, GL_TEXTURE_MAX_LEVEL, highest_level - mipmap_bias); - - } else if (uses_mipmaps) { - // Since the mipmap levels were auto-generated and are therefore - // complete, make sure the GL doesn't remember some previous value - // for GL_TEXTURE_MAX_LEVEL from the above call--set it to the - // full count of mipmap levels. - glTexParameteri(texture_target, GL_TEXTURE_MAX_LEVEL, tex->get_expected_num_mipmap_levels() - mipmap_bias - 1); - } - } -#endif - report_my_gl_errors(); return true; @@ -10042,8 +10228,8 @@ upload_simple_texture(CLP(TextureContext) *gtc) { #endif glTexImage2D(GL_TEXTURE_2D, 0, internal_format, - width, height, 0, - external_format, component_type, image_ptr); + width, height, 0, + external_format, component_type, image_ptr); gtc->mark_simple_loaded(); @@ -10207,6 +10393,14 @@ do_extract_texture_data(CLP(TextureContext) *gtc) { if (target == GL_NONE) { return false; } + +#ifndef OPENGLES + // Make sure any incoherent writes to the texture have been synced. + if (gtc->needs_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT)) { + issue_memory_barrier(GL_TEXTURE_UPDATE_BARRIER_BIT); + } +#endif + glBindTexture(target, gtc->_index); Texture *tex = gtc->get_texture(); @@ -10424,6 +10618,7 @@ do_extract_texture_data(CLP(TextureContext) *gtc) { #endif #ifndef OPENGLES case GL_R32I: + type = Texture::T_int; format = Texture::F_r32i; break; #endif @@ -10656,6 +10851,7 @@ extract_texture_image(PTA_uchar &image, size_t &page_size, nassertr(false, false); return false; #else + if (target == GL_TEXTURE_CUBE_MAP) { // A cube map, compressed or uncompressed. This we must extract // one page at a time. diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.h b/panda/src/glstuff/glGraphicsStateGuardian_src.h index 3ae072dea7..aa46bff2ef 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.h +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.h @@ -134,6 +134,9 @@ typedef void (APIENTRYP PFNGLCURRENTPALETTEMATRIXOESPROC) (GLuint matrixpalettei typedef void (APIENTRYP PFNGLLOADPALETTEFROMMODELVIEWMATRIXOESPROC) (void); typedef void (APIENTRYP PFNGLMATRIXINDEXPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); typedef void (APIENTRYP PFNGLWEIGHTPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +typedef void (APIENTRYP PFNGLTEXSTORAGE1DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (APIENTRYP PFNGLTEXSTORAGE2DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLTEXSTORAGE3DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); #ifndef OPENGLES_1 // GLSL shader functions @@ -177,12 +180,29 @@ typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC) (GLuint index, GLint size, typedef void (APIENTRYP PFNGLPROGRAMPARAMETERIEXTPROC) (GLuint program, GLenum pname, GLint value); typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount); typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount); +typedef void (APIENTRYP PFNGLBINDTEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers); typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufsize, GLsizei *length, GLenum *binaryFormat, void *binary); typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +typedef GLuint64 (APIENTRYP PFNGLGETTEXTUREHANDLEPROC) (GLuint texture); +typedef GLuint64 (APIENTRYP PFNGLGETTEXTURESAMPLERHANDLEPROC) (GLuint texture, GLuint sampler); +typedef void (APIENTRYP PFNGLMAKETEXTUREHANDLERESIDENTPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLMAKETEXTUREHANDLENONRESIDENTPROC) (GLuint64 handle); +typedef GLuint64 (APIENTRYP PFNGLGETIMAGEHANDLEPROC) (GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format); +typedef void (APIENTRYP PFNGLMAKEIMAGEHANDLERESIDENTPROC) (GLuint64 handle, GLenum access); +typedef void (APIENTRYP PFNGLMAKEIMAGEHANDLENONRESIDENTPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLUNIFORMHANDLEUI64PROC) (GLint location, GLuint64 value); +typedef void (APIENTRYP PFNGLUNIFORMHANDLEUI64VPROC) (GLint location, GLsizei count, const GLuint64 *value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64PROC) (GLuint program, GLint location, GLuint64 value); +typedef void (APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64VPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *values); +typedef GLboolean (APIENTRYP PFNGLISTEXTUREHANDLERESIDENTPROC) (GLuint64 handle); +typedef GLboolean (APIENTRYP PFNGLISIMAGEHANDLERESIDENTPROC) (GLuint64 handle); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1UI64PROC) (GLuint index, GLuint64EXT x); +typedef void (APIENTRYP PFNGLVERTEXATTRIBL1UI64VPROC) (GLuint index, const GLuint64EXT *v); +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBLUI64VPROC) (GLuint index, GLenum pname, GLuint64EXT *params); #endif // OPENGLES #endif // __EDG__ @@ -242,6 +262,8 @@ public: bool force); virtual void end_draw_primitives(); + void issue_memory_barrier(GLbitfield barrier); + virtual TextureContext *prepare_texture(Texture *tex, int view); virtual bool update_texture(TextureContext *tc, bool force); virtual void release_texture(TextureContext *tc); @@ -448,7 +470,7 @@ protected: bool specify_texture(CLP(TextureContext) *gtc); bool apply_texture(TextureContext *tc); bool upload_texture(CLP(TextureContext) *gtc, bool force); - bool upload_texture_image(CLP(TextureContext) *gtc, + bool upload_texture_image(CLP(TextureContext) *gtc, bool needs_reload, bool uses_mipmaps, int mipmap_bias, GLenum texture_target, GLenum page_target, GLint internal_format, GLint external_format, @@ -506,12 +528,12 @@ protected: bool _vertex_blending_enabled; #ifndef OPENGLES_1 - PT(Shader) _current_shader; - CLP(ShaderContext) *_current_shader_context; - PT(Shader) _vertex_array_shader; - CLP(ShaderContext) *_vertex_array_shader_context; - PT(Shader) _texture_binding_shader; - CLP(ShaderContext) *_texture_binding_shader_context; + PT(Shader) _current_shader; + ShaderContext *_current_shader_context; + PT(Shader) _vertex_array_shader; + ShaderContext *_vertex_array_shader_context; + PT(Shader) _texture_binding_shader; + ShaderContext *_texture_binding_shader_context; #endif #ifdef OPENGLES_2 static PT(Shader) _default_shader; @@ -590,6 +612,11 @@ public: PFNGLTEXSUBIMAGE3DPROC _glTexSubImage3D; PFNGLCOPYTEXSUBIMAGE3DPROC _glCopyTexSubImage3D; + bool _supports_tex_storage; + PFNGLTEXSTORAGE1DPROC _glTexStorage1D; + PFNGLTEXSTORAGE2DPROC _glTexStorage2D; + PFNGLTEXSTORAGE3DPROC _glTexStorage3D; + PFNGLCOMPRESSEDTEXIMAGE1DPROC _glCompressedTexImage1D; PFNGLCOMPRESSEDTEXIMAGE2DPROC _glCompressedTexImage2D; PFNGLCOMPRESSEDTEXIMAGE3DPROC _glCompressedTexImage3D; @@ -600,6 +627,7 @@ public: bool _supports_bgr; bool _supports_rescale_normal; + bool _supports_packed_dabc; bool _supports_multitexture; PFNGLACTIVETEXTUREPROC _glActiveTexture; @@ -661,6 +689,7 @@ public: PFNGLDRAWBUFFERSPROC _glDrawBuffers; int _max_fb_samples; bool _supports_viewport_arrays; + bool _supports_bindless_texture; PFNGLGENQUERIESPROC _glGenQueries; PFNGLBEGINQUERYPROC _glBeginQuery; @@ -714,6 +743,7 @@ public: PFNGLPATCHPARAMETERIPROC _glPatchParameteri; PFNGLDRAWARRAYSINSTANCEDPROC _glDrawArraysInstanced; PFNGLDRAWELEMENTSINSTANCEDPROC _glDrawElementsInstanced; + PFNGLBINDTEXTURESPROC _glBindTextures; PFNGLBINDIMAGETEXTUREPROC _glBindImageTexture; PFNGLBINDIMAGETEXTURESPROC _glBindImageTextures; PFNGLDISPATCHCOMPUTEPROC _glDispatchCompute; @@ -723,6 +753,11 @@ public: PFNGLVIEWPORTARRAYVPROC _glViewportArrayv; PFNGLSCISSORARRAYVPROC _glScissorArrayv; PFNGLDEPTHRANGEARRAYVPROC _glDepthRangeArrayv; + PFNGLGETTEXTUREHANDLEPROC _glGetTextureHandle; + PFNGLMAKETEXTUREHANDLERESIDENTPROC _glMakeTextureHandleResident; + PFNGLMAKETEXTUREHANDLENONRESIDENTPROC _glMakeTextureHandleNonResident; + PFNGLUNIFORMHANDLEUI64PROC _glUniformHandleui64; + PFNGLUNIFORMHANDLEUI64VPROC _glUniformHandleui64v; #endif // OPENGLES GLenum _edge_clamp; @@ -740,6 +775,15 @@ public: DeletedDisplayLists _deleted_display_lists; DeletedDisplayLists _deleted_queries; +#ifndef OPENGLES + // Stores textures for which memory bariers should be issued. + typedef pset TextureSet; + TextureSet _textures_needing_fetch_barrier; + TextureSet _textures_needing_image_access_barrier; + TextureSet _textures_needing_update_barrier; + TextureSet _textures_needing_framebuffer_barrier; +#endif + //RenderState::SlotMask _inv_state_mask; bool _check_errors; @@ -798,5 +842,3 @@ private: }; #include "glGraphicsStateGuardian_src.I" - - diff --git a/panda/src/glstuff/glShaderContext_src.cxx b/panda/src/glstuff/glShaderContext_src.cxx index dd8042701b..577cb11c2c 100755 --- a/panda/src/glstuff/glShaderContext_src.cxx +++ b/panda/src/glstuff/glShaderContext_src.cxx @@ -195,18 +195,12 @@ CLP(ShaderContext):: CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext(s) { _glgsg = glgsg; _glsl_program = 0; - _glsl_vshader = 0; - _glsl_fshader = 0; - _glsl_gshader = 0; - _glsl_tcshader = 0; - _glsl_teshader = 0; - _glsl_cshader = 0; _uses_standard_vertex_arrays = false; nassertv(s->get_language() == Shader::SL_GLSL); // We compile and analyze the shader here, instead of in shader.cxx, to avoid gobj getting a dependency on GL stuff. - if (!glsl_compile_shader()) { + if (!glsl_compile_and_link()) { release_resources(); s->_error_flag = true; return; @@ -344,6 +338,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext continue; } if (size > 7 && noprefix.substr(0, 7) == "Texture") { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = 0; @@ -376,7 +371,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext s->_mat_spec.push_back(bind); continue; } else if (noprefix == "Material.specular") { - bind._piece = Shader::SMP_row3; + bind._piece = Shader::SMP_row3x3; s->_mat_spec.push_back(bind); continue; } @@ -463,6 +458,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_UNSIGNED_INT_SAMPLER_1D: case GL_SAMPLER_1D_SHADOW: case GL_SAMPLER_1D: { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = InternalName::make(param_name); @@ -475,6 +471,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_SAMPLER_2D_SHADOW: #endif case GL_SAMPLER_2D: { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = InternalName::make(param_name); @@ -487,6 +484,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_UNSIGNED_INT_SAMPLER_3D: #endif case GL_SAMPLER_3D: { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = InternalName::make(param_name); @@ -500,6 +498,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_SAMPLER_CUBE_SHADOW: #endif case GL_SAMPLER_CUBE: { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = InternalName::make(param_name); @@ -508,7 +507,11 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext s->_tex_spec.push_back(bind); continue; } #ifndef OPENGLES + case GL_INT_SAMPLER_2D_ARRAY: + case GL_UNSIGNED_INT_SAMPLER_2D_ARRAY: + case GL_SAMPLER_2D_ARRAY_SHADOW: case GL_SAMPLER_2D_ARRAY: { + _glgsg->_glUniform1i(p, s->_tex_spec.size()); Shader::ShaderTexSpec bind; bind._id = arg_id; bind._name = InternalName::make(param_name); @@ -584,6 +587,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_FLOAT_MAT3: bind._dim[1] = 9; break; case GL_FLOAT_MAT4: bind._dim[1] = 16; break; } + bind._type = Shader::SPT_int; bind._arg = InternalName::make(param_name); bind._dim[0] = 1; bind._dep[0] = Shader::SSD_general | Shader::SSD_shaderinputs; @@ -611,6 +615,7 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext // bind once and then forget about it. _glgsg->_glUniform1i(p, imgunitno++); _glsl_img_inputs.push_back(InternalName::make(param_name)); + _glsl_img_textures.push_back(NULL); continue; #endif default: @@ -661,6 +666,23 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext case GL_FLOAT_MAT3: bind._dim[1] = 9; break; case GL_FLOAT_MAT4: bind._dim[1] = 16; break; } + switch (param_type) { + case GL_BOOL: + case GL_BOOL_VEC2: + case GL_BOOL_VEC3: + case GL_BOOL_VEC4: + bind._type = Shader::SPT_unknown; + break; + case GL_INT: + case GL_INT_VEC2: + case GL_INT_VEC3: + case GL_INT_VEC4: + bind._type = Shader::SPT_int; + break; + default: + bind._type = Shader::SPT_float; + break; + } bind._arg = InternalName::make(param_name); bind._dim[0] = param_size; bind._dep[0] = Shader::SSD_general | Shader::SSD_shaderinputs; @@ -783,51 +805,20 @@ release_resources() { return; } if (_glsl_program != 0) { - if (_glsl_vshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_vshader); - } - if (_glsl_fshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_fshader); - } - if (_glsl_gshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_gshader); - } - if (_glsl_tcshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_tcshader); - } - if (_glsl_teshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_teshader); - } - if (_glsl_cshader != 0) { - _glgsg->_glDetachShader(_glsl_program, _glsl_cshader); + GLSLShaders::const_iterator it; + for (it = _glsl_shaders.begin(); it != _glsl_shaders.end(); ++it) { + _glgsg->_glDetachShader(_glsl_program, *it); } _glgsg->_glDeleteProgram(_glsl_program); _glsl_program = 0; } - if (_glsl_vshader != 0) { - _glgsg->_glDeleteShader(_glsl_vshader); - _glsl_vshader = 0; - } - if (_glsl_fshader != 0) { - _glgsg->_glDeleteShader(_glsl_fshader); - _glsl_fshader = 0; - } - if (_glsl_gshader != 0) { - _glgsg->_glDeleteShader(_glsl_gshader); - _glsl_gshader = 0; - } - if (_glsl_tcshader != 0) { - _glgsg->_glDeleteShader(_glsl_tcshader); - _glsl_tcshader = 0; - } - if (_glsl_teshader != 0) { - _glgsg->_glDeleteShader(_glsl_teshader); - _glsl_teshader = 0; - } - if (_glsl_cshader != 0) { - _glgsg->_glDeleteShader(_glsl_cshader); - _glsl_cshader = 0; + + GLSLShaders::const_iterator it; + for (it = _glsl_shaders.begin(); it != _glsl_shaders.end(); ++it) { + _glgsg->_glDeleteShader(*it); } + + _glsl_shaders.clear(); _glgsg->report_my_gl_errors(); } @@ -890,32 +881,33 @@ issue_parameters(int altered) { // Iterate through _ptr parameters for (int i=0; i<(int)_shader->_ptr_spec.size(); i++) { - if(altered & (_shader->_ptr_spec[i]._dep[0] | _shader->_ptr_spec[i]._dep[1])) { - const Shader::ShaderPtrSpec& _ptr = _shader->_ptr_spec[i]; + const Shader::ShaderPtrSpec& spec = _shader->_ptr_spec[i]; + + if (altered & (spec._dep[0] | spec._dep[1])) { Shader::ShaderPtrData* ptr_data = - const_cast< Shader::ShaderPtrData*>(_glgsg->fetch_ptr_parameter(_ptr)); + const_cast< Shader::ShaderPtrData*>(_glgsg->fetch_ptr_parameter(spec)); if (ptr_data == NULL) { //the input is not contained in ShaderPtrData release_resources(); return; } - GLint p = _glsl_parameter_map[_shader->_ptr_spec[i]._id._seqno]; + GLint p = _glsl_parameter_map[spec._id._seqno]; switch (ptr_data->_type) { case Shader::SPT_float: - switch (_ptr._dim[1]) { - case 1: _glgsg->_glUniform1fv(p, _ptr._dim[0], (float*)ptr_data->_ptr); continue; - case 2: _glgsg->_glUniform2fv(p, _ptr._dim[0], (float*)ptr_data->_ptr); continue; - case 3: _glgsg->_glUniform3fv(p, _ptr._dim[0], (float*)ptr_data->_ptr); continue; - case 4: _glgsg->_glUniform4fv(p, _ptr._dim[0], (float*)ptr_data->_ptr); continue; - case 9: _glgsg->_glUniformMatrix3fv(p, _ptr._dim[0], GL_FALSE, (float*)ptr_data->_ptr); continue; - case 16: _glgsg->_glUniformMatrix4fv(p, _ptr._dim[0], GL_FALSE, (float*)ptr_data->_ptr); continue; + switch (spec._dim[1]) { + case 1: _glgsg->_glUniform1fv(p, spec._dim[0], (float*)ptr_data->_ptr); continue; + case 2: _glgsg->_glUniform2fv(p, spec._dim[0], (float*)ptr_data->_ptr); continue; + case 3: _glgsg->_glUniform3fv(p, spec._dim[0], (float*)ptr_data->_ptr); continue; + case 4: _glgsg->_glUniform4fv(p, spec._dim[0], (float*)ptr_data->_ptr); continue; + case 9: _glgsg->_glUniformMatrix3fv(p, spec._dim[0], GL_FALSE, (float*)ptr_data->_ptr); continue; + case 16: _glgsg->_glUniformMatrix4fv(p, spec._dim[0], GL_FALSE, (float*)ptr_data->_ptr); continue; } case Shader::SPT_int: - switch (_ptr._dim[1]) { - case 1: _glgsg->_glUniform1iv(p, _ptr._dim[0], (int*)ptr_data->_ptr); continue; - case 2: _glgsg->_glUniform2iv(p, _ptr._dim[0], (int*)ptr_data->_ptr); continue; - case 3: _glgsg->_glUniform3iv(p, _ptr._dim[0], (int*)ptr_data->_ptr); continue; - case 4: _glgsg->_glUniform4iv(p, _ptr._dim[0], (int*)ptr_data->_ptr); continue; + switch (spec._dim[1]) { + case 1: _glgsg->_glUniform1iv(p, spec._dim[0], (int*)ptr_data->_ptr); continue; + case 2: _glgsg->_glUniform2iv(p, spec._dim[0], (int*)ptr_data->_ptr); continue; + case 3: _glgsg->_glUniform3iv(p, spec._dim[0], (int*)ptr_data->_ptr); continue; + case 4: _glgsg->_glUniform4iv(p, spec._dim[0], (int*)ptr_data->_ptr); continue; } case Shader::SPT_double: GLCAT.error() << "Passing double-precision shader inputs to GLSL shaders is not currently supported\n"; @@ -1002,7 +994,9 @@ disable_shader_vertex_arrays() { //////////////////////////////////////////////////////////////////// bool CLP(ShaderContext):: update_shader_vertex_arrays(ShaderContext *prev, bool force) { - if (prev) prev->disable_shader_vertex_arrays(); + if (prev) { + prev->disable_shader_vertex_arrays(); + } if (!valid()) { return true; } @@ -1040,14 +1034,20 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) { const GLint p = _glsl_parameter_map[_shader->_var_spec[i]._id._seqno]; _glgsg->_glEnableVertexAttribArray(p); - _glgsg->_glVertexAttribPointer(p, num_values, _glgsg->get_numeric_type(numeric_type), - GL_TRUE, stride, client_pointer + start); + + if (numeric_type == GeomEnums::NT_packed_dabc) { + _glgsg->_glVertexAttribPointer(p, GL_BGRA, GL_UNSIGNED_BYTE, + GL_TRUE, stride, client_pointer + start); + } else { + _glgsg->_glVertexAttribPointer(p, num_values, _glgsg->get_numeric_type(numeric_type), + GL_TRUE, stride, client_pointer + start); + } } } } _glgsg->report_my_gl_errors(); - + return true; } @@ -1062,14 +1062,28 @@ disable_shader_texture_bindings() { return; } -#ifndef OPENGLES_2 - for (int i=0; i<(int)_shader->_tex_spec.size(); i++) { - if (_shader->_tex_spec[i]._name == 0) { - _glgsg->_glActiveTexture(GL_TEXTURE0 + _shader->_tex_spec[i]._stage); - } else { - _glgsg->_glActiveTexture(GL_TEXTURE0 + _shader->_tex_spec[i]._stage + _stage_offset); + for (int i = 0; i < _shader->_tex_spec.size(); ++i) { +#ifndef OPENGLES + // Check if bindless was used, if so, there's nothing to unbind. + if (_glgsg->_supports_bindless_texture) { + GLint p = _glsl_parameter_map[_shader->_tex_spec[i]._id._seqno]; + + if (_glsl_uniform_handles.count(p) > 0) { + continue; + } } + if (_glgsg->_supports_multi_bind) { + // There are non-bindless textures to unbind, and we're lazy, + // so let's go and unbind everything after this point using one + // multi-bind call, and then break out of the loop. + _glgsg->_glBindTextures(i, _shader->_tex_spec.size() - i, NULL); + break; + } +#endif + + _glgsg->_glActiveTexture(GL_TEXTURE0 + i); + #ifndef OPENGLES glBindTexture(GL_TEXTURE_1D, 0); #endif // OPENGLES @@ -1087,22 +1101,29 @@ disable_shader_texture_bindings() { if (_glgsg->_supports_cube_map) { glBindTexture(GL_TEXTURE_CUBE_MAP, 0); } - // This is probably faster - but maybe not as safe? - // cgGLDisableTextureParameter(p); } -#endif // OPENGLES_2 - _stage_offset = 0; #ifndef OPENGLES // Now unbind all the image units. Not sure if we *have* to do this. int num_image_units = min(_glsl_img_inputs.size(), (size_t)_glgsg->_max_image_units); - if (_glgsg->_supports_multi_bind) { - _glgsg->_glBindImageTextures(0, num_image_units, NULL); + if (num_image_units > 0) { + if (_glgsg->_supports_multi_bind) { + _glgsg->_glBindImageTextures(0, num_image_units, NULL); - } else { - for (int i = 0; i < num_image_units; ++i) { - _glgsg->_glBindImageTexture(i, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); + } else { + for (int i = 0; i < num_image_units; ++i) { + _glgsg->_glBindImageTexture(i, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); + } + } + + if (gl_enable_memory_barriers) { + for (int i = 0; i < num_image_units; ++i) { + // We don't distinguish between read-only and read-write/write-only + // image access, so we have to assume that the shader wrote to it. + _glsl_img_textures[i]->mark_incoherent(); + _glsl_img_textures[i] = NULL; + } } } #endif @@ -1132,6 +1153,8 @@ update_shader_texture_bindings(ShaderContext *prev) { } #ifndef OPENGLES + GLbitfield barriers = 0; + // First bind all the 'image units'; a bit of an esoteric OpenGL feature right now. int num_image_units = min(_glsl_img_inputs.size(), (size_t)_glgsg->_max_image_units); @@ -1152,8 +1175,14 @@ update_shader_texture_bindings(ShaderContext *prev) { CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tex->prepare_now(view, _glgsg->_prepared_objects, _glgsg)); if (gtc != (TextureContext*)NULL) { + _glsl_img_textures[i] = gtc; + gl_tex = gtc->_index; _glgsg->update_texture(gtc, true); + + if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)) { + barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + } } } @@ -1185,28 +1214,26 @@ update_shader_texture_bindings(ShaderContext *prev) { // filtered TextureAttrib in _target_texture. const TextureAttrib *texattrib = DCAST(TextureAttrib, _glgsg->_target_rs->get_attrib_def(TextureAttrib::get_class_slot())); nassertv(texattrib != (TextureAttrib *)NULL); - _stage_offset = texattrib->get_num_on_stages(); for (int i = 0; i < (int)_shader->_tex_spec.size(); ++i) { - InternalName *id = _shader->_tex_spec[i]._name; + const InternalName *id = _shader->_tex_spec[i]._name; int texunit = _shader->_tex_spec[i]._stage; - if (id != 0) { - texunit += _stage_offset; - } - Texture *tex = 0; + Texture *tex = NULL; int view = _glgsg->get_current_tex_view_offset(); - if (id != 0) { + if (id != NULL) { const ShaderInput *input = _glgsg->_target_shader->get_shader_input(id); tex = input->get_texture(); + } else { - if (_shader->_tex_spec[i]._stage >= texattrib->get_num_on_stages()) { + if (texunit >= texattrib->get_num_on_stages()) { continue; } - TextureStage *stage = texattrib->get_on_stage(_shader->_tex_spec[i]._stage); + TextureStage *stage = texattrib->get_on_stage(texunit); tex = texattrib->get_on_texture(stage); view += stage->get_tex_view_offset(); } + if (_shader->_tex_spec[i]._suffix != 0) { // The suffix feature is inefficient. It is a temporary hack. if (tex == 0) { @@ -1218,27 +1245,64 @@ update_shader_texture_bindings(ShaderContext *prev) { continue; } - _glgsg->_glActiveTexture(GL_TEXTURE0 + texunit); - - TextureContext *tc = tex->prepare_now(view, _glgsg->_prepared_objects, _glgsg); - if (tc == (TextureContext*)NULL) { - continue; - } - - GLenum target = _glgsg->get_texture_target(tex->get_texture_type()); - if (target == GL_NONE) { - // Unsupported texture mode. + CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tex->prepare_now(view, _glgsg->_prepared_objects, _glgsg)); + if (gtc == NULL) { continue; } GLint p = _glsl_parameter_map[_shader->_tex_spec[i]._id._seqno]; - _glgsg->_glUniform1i(p, texunit); - if (!_glgsg->update_texture(tc, false)) { +#ifndef OPENGLES + // If it was recently written to, we will have to issue a memory barrier soon. + if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT)) { + barriers |= GL_TEXTURE_FETCH_BARRIER_BIT; + } + + // Try bindless texturing first, if supported. + if (gl_use_bindless_texture && _glgsg->_supports_bindless_texture) { + // We demand the real texture, since we won't be able + // to change the texture properties after this point. + if (!_glgsg->update_texture(gtc, true)) { + continue; + } + + GLuint64 handle = gtc->get_handle(); + if (handle != 0) { + gtc->make_handle_resident(); + gtc->set_active(true); + + // Check if we have already specified this texture handle. + // If so, no need to call glUniformHandle again. + pmap::const_iterator it; + it = _glsl_uniform_handles.find(p); + if (it != _glsl_uniform_handles.end() && it->second == handle) { + // Already specified. + continue; + } else { + _glgsg->_glUniformHandleui64(p, handle); + _glsl_uniform_handles[p] = handle; + } + continue; + } + } +#endif + + // Bindless texturing wasn't supported or didn't work, so + // let's just bind the texture normally. + _glgsg->_glActiveTexture(GL_TEXTURE0 + i); + if (!_glgsg->update_texture(gtc, false)) { continue; } + _glgsg->apply_texture(gtc); } +#ifndef OPENGLES + if (barriers != 0) { + // Issue a memory barrier. + _glgsg->issue_memory_barrier(barriers); + } +#endif + _glgsg->report_my_gl_errors(); } @@ -1248,7 +1312,7 @@ update_shader_texture_bindings(ShaderContext *prev) { // Description: This subroutine prints the infolog for a shader. //////////////////////////////////////////////////////////////////// void CLP(ShaderContext):: -glsl_report_shader_errors(unsigned int shader) { +glsl_report_shader_errors(GLuint shader) { char *info_log; GLint length = 0; GLint num_chars = 0; @@ -1271,7 +1335,7 @@ glsl_report_shader_errors(unsigned int shader) { // Description: This subroutine prints the infolog for a program. //////////////////////////////////////////////////////////////////// void CLP(ShaderContext):: -glsl_report_program_errors(unsigned int program) { +glsl_report_program_errors(GLuint program) { char *info_log; GLint length = 0; GLint num_chars = 0; @@ -1289,13 +1353,13 @@ glsl_report_program_errors(unsigned int program) { } //////////////////////////////////////////////////////////////////// -// Function: Shader::glsl_compile_entry_point +// Function: Shader::glsl_compile_shader // Access: Private // Description: //////////////////////////////////////////////////////////////////// -unsigned int CLP(ShaderContext):: -glsl_compile_entry_point(Shader::ShaderType type) { - unsigned int handle = 0; +bool CLP(ShaderContext):: +glsl_compile_shader(Shader::ShaderType type) { + GLuint handle = 0; switch (type) { case Shader::ST_vertex: handle = _glgsg->_glCreateShader(GL_VERTEX_SHADER); @@ -1327,8 +1391,10 @@ glsl_compile_entry_point(Shader::ShaderType type) { #endif } if (!handle) { + GLCAT.error() + << "Could not create a GLSL shader of the requested type.\n"; _glgsg->report_my_gl_errors(); - return 0; + return false; } string text_str = _shader->get_text(type); @@ -1345,79 +1411,68 @@ glsl_compile_entry_point(Shader::ShaderType type) { glsl_report_shader_errors(handle); _glgsg->_glDeleteShader(handle); _glgsg->report_my_gl_errors(); - return 0; + return false; } - return handle; + _glgsg->_glAttachShader(_glsl_program, handle); + _glsl_shaders.push_back(handle); + return true; } //////////////////////////////////////////////////////////////////// -// Function: Shader::glsl_compile_shader +// Function: Shader::glsl_compile_and_link // Access: Private // Description: This subroutine compiles a GLSL shader. //////////////////////////////////////////////////////////////////// bool CLP(ShaderContext):: -glsl_compile_shader() { +glsl_compile_and_link() { + _glsl_shaders.clear(); _glsl_program = _glgsg->_glCreateProgram(); - if (!_glsl_program) return false; + if (!_glsl_program) { + return false; + } + bool valid = true; if (!_shader->get_text(Shader::ST_vertex).empty()) { - _glsl_vshader = glsl_compile_entry_point(Shader::ST_vertex); - if (!_glsl_vshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_vshader); + valid &= glsl_compile_shader(Shader::ST_vertex); } if (!_shader->get_text(Shader::ST_fragment).empty()) { - _glsl_fshader = glsl_compile_entry_point(Shader::ST_fragment); - if (!_glsl_fshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_fshader); + valid &= glsl_compile_shader(Shader::ST_fragment); } - if (!_shader->get_text(Shader::ST_geometry).empty()) { - _glsl_gshader = glsl_compile_entry_point(Shader::ST_geometry); - if (!_glsl_gshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_gshader); - #ifdef OPENGLES nassertr(false, false); // OpenGL ES has no geometry shaders. #else - // Set the vertex output limit to the maximum + if (!_shader->get_text(Shader::ST_geometry).empty()) { + valid &= glsl_compile_shader(Shader::ST_geometry); + + // Set the vertex output limit to the maximum. + // This is slow, but it is probably reasonable to require + // the user to override this in his shader using layout(). nassertr(_glgsg->_glProgramParameteri != NULL, false); GLint max_vertices; glGetIntegerv(GL_MAX_GEOMETRY_OUTPUT_VERTICES, &max_vertices); _glgsg->_glProgramParameteri(_glsl_program, GL_GEOMETRY_VERTICES_OUT_ARB, max_vertices); -#endif } +#endif if (!_shader->get_text(Shader::ST_tess_control).empty()) { - _glsl_tcshader = glsl_compile_entry_point(Shader::ST_tess_control); - if (!_glsl_tcshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_tcshader); + valid &= glsl_compile_shader(Shader::ST_tess_control); } if (!_shader->get_text(Shader::ST_tess_evaluation).empty()) { - _glsl_teshader = glsl_compile_entry_point(Shader::ST_tess_evaluation); - if (!_glsl_teshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_teshader); + valid &= glsl_compile_shader(Shader::ST_tess_evaluation); } if (!_shader->get_text(Shader::ST_compute).empty()) { - _glsl_cshader = glsl_compile_entry_point(Shader::ST_compute); - if (!_glsl_cshader) return false; - _glgsg->_glAttachShader(_glsl_program, _glsl_cshader); + valid &= glsl_compile_shader(Shader::ST_compute); } - // There might be warnings. Only report them for one shader program. - if (_glsl_vshader != 0) { - glsl_report_shader_errors(_glsl_vshader); - } else if (_glsl_fshader != 0) { - glsl_report_shader_errors(_glsl_fshader); - } else if (_glsl_gshader != 0) { - glsl_report_shader_errors(_glsl_gshader); - } else if (_glsl_tcshader != 0) { - glsl_report_shader_errors(_glsl_tcshader); - } else if (_glsl_teshader != 0) { - glsl_report_shader_errors(_glsl_teshader); + // There might be warnings, so report those. + GLSLShaders::const_iterator it; + for (it = _glsl_shaders.begin(); it != _glsl_shaders.end(); ++it) { + glsl_report_shader_errors(*it); } // If we requested to retrieve the shader, we should indicate that before linking. @@ -1454,7 +1509,7 @@ glsl_compile_shader() { _glgsg->_glGetProgramBinary(_glsl_program, length, &num_bytes, &format, (void*)binary); pofstream s; - s.open(filename, ios::out | ios::binary); + s.open(filename, ios::out | ios::binary | ios::trunc); s.write(binary, num_bytes); s.close(); diff --git a/panda/src/glstuff/glShaderContext_src.h b/panda/src/glstuff/glShaderContext_src.h index 1ce7277647..d7715d8c30 100755 --- a/panda/src/glstuff/glShaderContext_src.h +++ b/panda/src/glstuff/glShaderContext_src.h @@ -50,26 +50,33 @@ public: private: GLuint _glsl_program; - GLuint _glsl_vshader; - GLuint _glsl_fshader; - GLuint _glsl_gshader; - GLuint _glsl_tcshader; - GLuint _glsl_teshader; - GLuint _glsl_cshader; + typedef pvector GLSLShaders; + GLSLShaders _glsl_shaders; - pvector _glsl_parameter_map; + //struct ParamContext { + // CPT(InternalName) _name; + // GLint _location; + // GLsizei _count; + // WPT(ParamValue) _value; + // UpdateSeq _updated; + //}; + //typedef pvector ParamContexts; + //ParamContexts _params; + + pvector _glsl_parameter_map; + pmap _glsl_uniform_handles; pvector _glsl_img_inputs; + pvector _glsl_img_textures; - int _stage_offset; CLP(GraphicsStateGuardian) *_glgsg; bool _uses_standard_vertex_arrays; - void glsl_report_shader_errors(unsigned int shader); - void glsl_report_program_errors(unsigned int program); - unsigned int glsl_compile_entry_point(Shader::ShaderType type); - bool glsl_compile_shader(); + void glsl_report_shader_errors(GLuint shader); + void glsl_report_program_errors(GLuint program); + bool glsl_compile_shader(Shader::ShaderType type); + bool glsl_compile_and_link(); bool parse_and_set_short_hand_shader_vars(Shader::ShaderArgId &arg_id, Shader *s); void release_resources(); diff --git a/panda/src/glstuff/glTextureContext_src.I b/panda/src/glstuff/glTextureContext_src.I index 017ae91399..50704ce14e 100644 --- a/panda/src/glstuff/glTextureContext_src.I +++ b/panda/src/glstuff/glTextureContext_src.I @@ -19,12 +19,20 @@ // Description: //////////////////////////////////////////////////////////////////// INLINE CLP(TextureContext):: -CLP(TextureContext)(PreparedGraphicsObjects *pgo, Texture *tex, int view) : +CLP(TextureContext)(CLP(GraphicsStateGuardian) *glgsg, + PreparedGraphicsObjects *pgo, Texture *tex, int view) : TextureContext(pgo, tex, view) { - _index = 0; - _already_applied = false; + _glgsg = glgsg; + + glGenTextures(1, &_index); + + _handle = 0; + _needs_barrier = false; + _has_storage = false; + _immutable = false; _uses_mipmaps = false; + _generate_mipmaps = false; _internal_format = 0; _width = 0; _height = 0; diff --git a/panda/src/glstuff/glTextureContext_src.cxx b/panda/src/glstuff/glTextureContext_src.cxx index 4e6cdbb1bb..30effdd052 100644 --- a/panda/src/glstuff/glTextureContext_src.cxx +++ b/panda/src/glstuff/glTextureContext_src.cxx @@ -16,6 +16,23 @@ TypeHandle CLP(TextureContext)::_type_handle; +//////////////////////////////////////////////////////////////////// +// Function: CLP(TextureContext)::Denstructor +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +CLP(TextureContext):: +~CLP(TextureContext)() { + if (gl_enable_memory_barriers) { + _glgsg->_textures_needing_fetch_barrier.erase(this); + _glgsg->_textures_needing_image_access_barrier.erase(this); + _glgsg->_textures_needing_update_barrier.erase(this); + _glgsg->_textures_needing_framebuffer_barrier.erase(this); + } + + glDeleteTextures(1, &_index); + _index = 0; +} //////////////////////////////////////////////////////////////////// // Function: GLTextureContext::evict_lru @@ -35,7 +52,16 @@ TypeHandle CLP(TextureContext)::_type_handle; void CLP(TextureContext):: evict_lru() { dequeue_lru(); - reset_data(); + + if (_handle != 0) { + if (_handle_resident) { + _glgsg->_glMakeTextureHandleNonResident(_handle); + } + _handle_resident = false; + } else { + reset_data(); + } + update_data_size_bytes(0); mark_unloaded(); } @@ -48,6 +74,10 @@ evict_lru() { //////////////////////////////////////////////////////////////////// void CLP(TextureContext):: reset_data() { + if (_handle != 0 && _handle_resident) { + _glgsg->_glMakeTextureHandleNonResident(_handle); + } + // Free the texture resources. glDeleteTextures(1, &_index); @@ -55,5 +85,102 @@ reset_data() { // re-load the texture later. glGenTextures(1, &_index); - _already_applied = false; + _handle = 0; + _handle_resident = false; + _needs_barrier = false; + _has_storage = false; + _immutable = false; + +#ifndef OPENGLES + // Mark the texture as coherent. + if (gl_enable_memory_barriers) { + _glgsg->_textures_needing_fetch_barrier.erase(this); + _glgsg->_textures_needing_image_access_barrier.erase(this); + _glgsg->_textures_needing_update_barrier.erase(this); + _glgsg->_textures_needing_framebuffer_barrier.erase(this); + } +#endif } + +//////////////////////////////////////////////////////////////////// +// Function: GLTextureContext::make_handle_resident +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +void CLP(TextureContext):: +make_handle_resident() { + if (_handle != 0) { + if (!_handle_resident) { + _glgsg->_glMakeTextureHandleResident(_handle); + _handle_resident = true; + } + set_resident(true); + } +} + +//////////////////////////////////////////////////////////////////// +// Function: CLP(TextureContext)::get_handle +// Access: Public +// Description: Returns a handle for this texture. Once this has +// been created, the texture data may still be updated, +// but its properties may not. +//////////////////////////////////////////////////////////////////// +INLINE GLuint64 CLP(TextureContext):: +get_handle() { +#ifdef OPENGLES + return 0; +#else + if (!_glgsg->_supports_bindless_texture) { + return false; + } + + if (_handle == 0) { + _handle = _glgsg->_glGetTextureHandle(_index); + } + + _immutable = true; + return _handle; +#endif +} + +#ifndef OPENGLES +//////////////////////////////////////////////////////////////////// +// Function: GLTextureContext::needs_barrier +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +bool CLP(TextureContext):: +needs_barrier(GLbitfield barrier) { + if (!gl_enable_memory_barriers) { + return false; + } + + return (((barrier & GL_TEXTURE_FETCH_BARRIER_BIT) && + _glgsg->_textures_needing_fetch_barrier.count(this))) + || (((barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) && + _glgsg->_textures_needing_image_access_barrier.count(this))) + || (((barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) && + _glgsg->_textures_needing_update_barrier.count(this))) + || (((barrier & GL_FRAMEBUFFER_BARRIER_BIT) && + _glgsg->_textures_needing_framebuffer_barrier.count(this))); +} + +//////////////////////////////////////////////////////////////////// +// Function: GLTextureContext::mark_incoherent +// Access: Public +// Description: Mark a texture as needing a memory barrier, since +// a non-coherent write just happened to it. +//////////////////////////////////////////////////////////////////// +void CLP(TextureContext):: +mark_incoherent() { + if (!gl_enable_memory_barriers) { + return; + } + + _glgsg->_textures_needing_fetch_barrier.insert(this); + _glgsg->_textures_needing_image_access_barrier.insert(this); + _glgsg->_textures_needing_update_barrier.insert(this); + _glgsg->_textures_needing_framebuffer_barrier.insert(this); +} + +#endif // OPENGLES diff --git a/panda/src/glstuff/glTextureContext_src.h b/panda/src/glstuff/glTextureContext_src.h index b03e767dc4..92e90a8276 100644 --- a/panda/src/glstuff/glTextureContext_src.h +++ b/panda/src/glstuff/glTextureContext_src.h @@ -16,32 +16,60 @@ #include "textureContext.h" #include "deletedChain.h" +class CLP(GraphicsStateGuardian); + //////////////////////////////////////////////////////////////////// // Class : GLTextureContext // Description : //////////////////////////////////////////////////////////////////// class EXPCL_GL CLP(TextureContext) : public TextureContext { public: - INLINE CLP(TextureContext)(PreparedGraphicsObjects *pgo, Texture *tex, int view); + INLINE CLP(TextureContext)(CLP(GraphicsStateGuardian) *glgsg, + PreparedGraphicsObjects *pgo, + Texture *tex, int view); ALLOC_DELETED_CHAIN(CLP(TextureContext)); + virtual ~CLP(TextureContext)(); virtual void evict_lru(); void reset_data(); + void make_handle_resident(); + GLuint64 get_handle(); + +#ifdef OPENGLES + CONSTEXPR bool needs_barrier(GLbitfield barrier) { return false; }; +#else + bool needs_barrier(GLbitfield barrier); + void mark_incoherent(); +#endif + // This is the GL "name" of the texture object. GLuint _index; + // This is the bindless "handle" to the texture object. + GLuint64 _handle; + bool _handle_resident; + + // This is true if the texture was recently written to in a + // non-coherent way, and Panda may have to call glMemoryBarrier + // for the results of this write to become visible. + bool _needs_barrier; + // These are the parameters that we specified with the last - // glTexImage2D() call. If none of these have changed, we can - // reload the texture image with a glTexSubImage2D(). - bool _already_applied; + // glTexImage2D() or glTexStorage2D() call. If none of these have + // changed, we can reload the texture image with a glTexSubImage2D(). + bool _has_storage; + bool _immutable; bool _uses_mipmaps; + bool _generate_mipmaps; GLint _internal_format; GLsizei _width; GLsizei _height; GLsizei _depth; GLenum _target; + CLP(GraphicsStateGuardian) *_glgsg; + public: static TypeHandle get_class_type() { return _type_handle; diff --git a/panda/src/glstuff/glmisc_src.cxx b/panda/src/glstuff/glmisc_src.cxx index 83a15025a0..6f6969883e 100644 --- a/panda/src/glstuff/glmisc_src.cxx +++ b/panda/src/glstuff/glmisc_src.cxx @@ -208,6 +208,34 @@ ConfigVariableBool gl_dump_compiled_shaders "programs to disk with a filename like glsl_program0.dump " "into the current directory.")); +ConfigVariableBool gl_immutable_texture_storage + ("gl-immutable-texture-storage", true, + PRC_DESC("This configures Panda to pre-allocate immutable storage " + "for each texture. This improves runtime performance, but " + "changing the size or type of a texture will be slower.")); + +ConfigVariableBool gl_use_bindless_texture + ("gl-use-bindless-texture", false, + PRC_DESC("Set this to let Panda use OpenGL's bindless texture " + "extension for all textures passed to shaders, for improved " + "performance. This is an experimental feature and comes " + "with a few caveats; for one, it requires that all sampler " + "uniforms have a layout(bindless_sampler) qualifier, and " + "it also requires that the texture properties are not " + "modified after the texture handle has been initialized.")); + +ConfigVariableBool gl_enable_memory_barriers + ("gl-enable-memory-barriers", true, + PRC_DESC("If this is set, Panda will make sure that every write " + "to an image using an image2D (et al) binding will cause " + "Panda to issue a memory barrier before the next use of " + "said texture, to ensure that all reads and writes are " + "properly synchronized. This may not be strictly necessary " + "when using the 'coherent' qualifier, but Panda has no " + "way to detect whether you are using those. Turning " + "this off may give a slight performance increase, but you " + "have to know what you're doing.")); + extern ConfigVariableBool gl_parallel_arrays; void CLP(init_classes)() { diff --git a/panda/src/glstuff/glmisc_src.h b/panda/src/glstuff/glmisc_src.h index 378d585511..1d7e330f83 100644 --- a/panda/src/glstuff/glmisc_src.h +++ b/panda/src/glstuff/glmisc_src.h @@ -67,6 +67,9 @@ extern ConfigVariableBool gl_force_flush; extern ConfigVariableBool gl_separate_specular_color; extern ConfigVariableBool gl_cube_map_seamless; extern ConfigVariableBool gl_dump_compiled_shaders; +extern ConfigVariableBool gl_immutable_texture_storage; +extern ConfigVariableBool gl_use_bindless_texture; +extern ConfigVariableBool gl_enable_memory_barriers; extern EXPCL_GL void CLP(init_classes)();