diff --git a/panda/src/display/graphicsStateGuardian.I b/panda/src/display/graphicsStateGuardian.I index 4bcc120f72..900e349254 100644 --- a/panda/src/display/graphicsStateGuardian.I +++ b/panda/src/display/graphicsStateGuardian.I @@ -655,6 +655,17 @@ get_supports_tessellation_shaders() const { return _supports_tessellation_shaders; } +//////////////////////////////////////////////////////////////////// +// Function: GraphicsStateGuardian::get_supports_compute_shaders +// Access: Published +// Description: Returns true if this particular GSG supports +// compute shaders. +//////////////////////////////////////////////////////////////////// +INLINE bool GraphicsStateGuardian:: +get_supports_compute_shaders() const { + return _supports_compute_shaders; +} + //////////////////////////////////////////////////////////////////// // Function: GraphicsStateGuardian::get_supports_glsl // Access: Published diff --git a/panda/src/display/graphicsStateGuardian.cxx b/panda/src/display/graphicsStateGuardian.cxx index 715d1a7fee..8dab1e0f74 100644 --- a/panda/src/display/graphicsStateGuardian.cxx +++ b/panda/src/display/graphicsStateGuardian.cxx @@ -754,6 +754,17 @@ end_occlusion_query() { return result; } +//////////////////////////////////////////////////////////////////// +// Function: GraphicsStateGuardian::dispatch_compute +// Access: Public, Virtual +// Description: Dispatches a currently bound compute shader using +// the given work group counts. +//////////////////////////////////////////////////////////////////// +void GraphicsStateGuardian:: +dispatch_compute(int num_groups_x, int num_groups_y, int num_groups_z) { + nassertv(false /* Compute shaders not supported by GSG */); +} + //////////////////////////////////////////////////////////////////// // Function: GraphicsStateGuardian::get_geom_munger // Access: Public, Virtual diff --git a/panda/src/display/graphicsStateGuardian.h b/panda/src/display/graphicsStateGuardian.h index 3f7efa8297..996d5cfc2e 100644 --- a/panda/src/display/graphicsStateGuardian.h +++ b/panda/src/display/graphicsStateGuardian.h @@ -145,6 +145,7 @@ PUBLISHED: INLINE bool get_supports_basic_shaders() const; INLINE bool get_supports_geometry_shaders() const; INLINE bool get_supports_tessellation_shaders() const; + INLINE bool get_supports_compute_shaders() const; INLINE bool get_supports_glsl() const; INLINE bool get_supports_stencil() const; INLINE bool get_supports_two_sided_stencil() const; @@ -225,6 +226,8 @@ public: virtual void begin_occlusion_query(); virtual PT(OcclusionQueryContext) end_occlusion_query(); + virtual void dispatch_compute(int size_x, int size_y, int size_z); + virtual PT(GeomMunger) get_geom_munger(const RenderState *state, Thread *current_thread); virtual PT(GeomMunger) make_geom_munger(const RenderState *state, @@ -487,10 +490,11 @@ protected: bool _supports_basic_shaders; bool _supports_geometry_shaders; bool _supports_tessellation_shaders; + bool _supports_compute_shaders; bool _supports_glsl; bool _supports_framebuffer_multisample; bool _supports_framebuffer_blit; - + bool _supports_stencil; bool _supports_stencil_wrap; bool _supports_two_sided_stencil; diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx index faf07b31b2..39c2dd3edd 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx @@ -993,10 +993,12 @@ reset() { #ifdef OPENGLES_2 _supports_glsl = true; + _supports_geometry_shaders = false; _supports_tessellation_shaders = false; #else #ifdef OPENGLES_1 _supports_glsl = false; + _supports_geometry_shaders = false; _supports_tessellation_shaders = false; #else _supports_glsl = is_at_least_gl_version(2, 0) || has_extension("GL_ARB_shading_language_100"); @@ -1006,6 +1008,18 @@ reset() { #endif _shader_caps._supports_glsl = _supports_glsl; + _supports_compute_shaders = false; +#ifndef OPENGLES + if (is_at_least_gl_version(4, 3) || has_extension("GL_ARB_compute_shader")) { + _glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC) + get_extension_func("glDispatchCompute"); + + if (_glDispatchCompute != NULL) { + _supports_compute_shaders = true; + } + } +#endif + #ifndef OPENGLES if (_supports_glsl) { _glAttachShader = (PFNGLATTACHSHADERPROC) @@ -4218,6 +4232,19 @@ end_occlusion_query() { #endif // OPENGLES } +//////////////////////////////////////////////////////////////////// +// Function: GLGraphicsStateGuardian::dispatch_compute +// Access: Public, Virtual +// Description: Dispatches a currently bound compute shader using +// the given work group counts. +//////////////////////////////////////////////////////////////////// +void CLP(GraphicsStateGuardian):: +dispatch_compute(int num_groups_x, int num_groups_y, int num_groups_z) { + nassertv(_supports_compute_shaders); + nassertv(_current_shader_context != NULL); + _glDispatchCompute(num_groups_x, num_groups_y, num_groups_z); +} + //////////////////////////////////////////////////////////////////// // Function: GLGraphicsStateGuardian::make_geom_munger // Access: Public, Virtual diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.h b/panda/src/glstuff/glGraphicsStateGuardian_src.h index e7d3fb898b..39803d4d7c 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.h +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.h @@ -172,6 +172,7 @@ typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC) (GLenum mode, GLint first, typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount); typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); typedef void (APIENTRYP PFNGLBINDIMAGETEXTURESPROC) (GLuint first, GLsizei count, const GLuint *textures); +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); #endif // OPENGLES #endif // __EDG__ @@ -262,6 +263,8 @@ public: virtual void begin_occlusion_query(); virtual PT(OcclusionQueryContext) end_occlusion_query(); + virtual void dispatch_compute(int size_x, int size_y, int size_z); + virtual PT(GeomMunger) make_geom_munger(const RenderState *state, Thread *current_thread); @@ -693,6 +696,7 @@ public: PFNGLDRAWELEMENTSINSTANCEDPROC _glDrawElementsInstanced; PFNGLBINDIMAGETEXTUREPROC _glBindImageTexture; PFNGLBINDIMAGETEXTURESPROC _glBindImageTextures; + PFNGLDISPATCHCOMPUTEPROC _glDispatchCompute; #endif // OPENGLES GLenum _edge_clamp; diff --git a/panda/src/glstuff/glShaderContext_src.cxx b/panda/src/glstuff/glShaderContext_src.cxx index 83a63345ca..3bd245c234 100755 --- a/panda/src/glstuff/glShaderContext_src.cxx +++ b/panda/src/glstuff/glShaderContext_src.cxx @@ -217,6 +217,7 @@ CLP(ShaderContext)(Shader *s, GSG *gsg) : ShaderContext(s) { _glsl_gshader = 0; _glsl_tcshader = 0; _glsl_teshader = 0; + _glsl_cshader = 0; _uses_standard_vertex_arrays = false; #if defined(HAVE_CG) && !defined(OPENGLES) @@ -880,6 +881,9 @@ release_resources(GSG *gsg) { if (_glsl_teshader != 0) { gsg->_glDetachShader(_glsl_program, _glsl_teshader); } + if (_glsl_cshader != 0) { + gsg->_glDetachShader(_glsl_program, _glsl_cshader); + } gsg->_glDeleteProgram(_glsl_program); _glsl_program = 0; } @@ -903,6 +907,10 @@ release_resources(GSG *gsg) { gsg->_glDeleteShader(_glsl_teshader); _glsl_teshader = 0; } + if (_glsl_cshader != 0) { + gsg->_glDeleteShader(_glsl_cshader); + _glsl_cshader = 0; + } gsg->report_my_gl_errors(); } @@ -1391,6 +1399,22 @@ disable_shader_texture_bindings(GSG *gsg) { cg_report_errors(); #endif +#ifndef OPENGLES + // Now unbind all the image units. Not sure if we *have* to do this. + int num_image_units = min(_glsl_img_inputs.size(), (size_t)gsg->_max_image_units); + + if (num_image_units > 0 && _shader->get_language() == Shader::SL_GLSL) { + if (gsg->_supports_multi_bind) { + gsg->_glBindImageTextures(0, num_image_units, NULL); + + } else { + for (int i = 0; i < num_image_units; ++i) { + gsg->_glBindImageTexture(i, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); + } + } + } +#endif + gsg->report_my_gl_errors(); } @@ -1623,6 +1647,11 @@ glsl_compile_entry_point(GSG *gsg, Shader::ShaderType type) { handle = gsg->_glCreateShader(GL_TESS_EVALUATION_SHADER); } break; + case Shader::ST_compute: + if (gsg->get_supports_compute_shaders()) { + handle = gsg->_glCreateShader(GL_COMPUTE_SHADER); + } + break; #endif } if (!handle) { @@ -1701,7 +1730,13 @@ glsl_compile_shader(GSG *gsg) { if (!_glsl_teshader) return false; gsg->_glAttachShader(_glsl_program, _glsl_teshader); } - + + if (!_shader->get_text(Shader::ST_compute).empty()) { + _glsl_cshader = glsl_compile_entry_point(gsg, Shader::ST_compute); + if (!_glsl_cshader) return false; + gsg->_glAttachShader(_glsl_program, _glsl_cshader); + } + // There might be warnings. Only report them for one shader program. if (_glsl_vshader != 0) { glsl_report_shader_errors(gsg, _glsl_vshader); diff --git a/panda/src/glstuff/glShaderContext_src.h b/panda/src/glstuff/glShaderContext_src.h index cfc1007673..c1eaec6267 100755 --- a/panda/src/glstuff/glShaderContext_src.h +++ b/panda/src/glstuff/glShaderContext_src.h @@ -74,6 +74,7 @@ private: GLuint _glsl_gshader; GLuint _glsl_tcshader; GLuint _glsl_teshader; + GLuint _glsl_cshader; pvector _glsl_parameter_map; diff --git a/panda/src/glstuff/panda_glext.h b/panda/src/glstuff/panda_glext.h index 6949c1c245..93a90c5742 100644 --- a/panda/src/glstuff/panda_glext.h +++ b/panda/src/glstuff/panda_glext.h @@ -2398,6 +2398,27 @@ extern "C" { #define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F #endif +#ifndef GL_ARB_compute_shader +#define GL_COMPUTE_SHADER 0x91B9 +#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB +#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC +#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD +#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262 +#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263 +#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264 +#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265 +#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266 +#define GL_MAX_COMPUTE_LOCAL_INVOCATIONS 0x90EB +#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE +#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF +#define GL_COMPUTE_LOCAL_WORK_SIZE 0x8267 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC +#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED +#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE +#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF +#define GL_COMPUTE_SHADER_BIT 0x00000020 +#endif + #ifndef GL_EXT_abgr #define GL_ABGR_EXT 0x8000 #endif @@ -7882,6 +7903,16 @@ typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum ta typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); #endif +#ifndef GL_ARB_compute_shader +#define GL_ARB_compute_shader 1 +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glDispatchCompute (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +GLAPI void APIENTRY glDispatchComputeIndirect (GLintptr indirect); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEPROC) (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +typedef void (APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC) (GLintptr indirect); +#endif + #ifndef GL_EXT_abgr #define GL_EXT_abgr 1 #endif diff --git a/panda/src/gobj/shader.I b/panda/src/gobj/shader.I index 9ea0869884..862966f85d 100755 --- a/panda/src/gobj/shader.I +++ b/panda/src/gobj/shader.I @@ -32,6 +32,15 @@ get_filename(const ShaderType &type) const { case ST_geometry: return _filename->_geometry; break; + case ST_tess_control: + return _text->_tess_control; + break; + case ST_tess_evaluation: + return _text->_tess_evaluation; + break; + case ST_compute: + return _text->_compute; + break; default: return _filename->_shared; } @@ -65,6 +74,9 @@ get_text(const ShaderType &type) const { case ST_tess_evaluation: return _text->_tess_evaluation; break; + case ST_compute: + return _text->_compute; + break; default: return _text->_shared; } @@ -636,14 +648,16 @@ ShaderFile(const string &vertex, //////////////////////////////////////////////////////////////////// INLINE void Shader::ShaderFile:: write_datagram(Datagram &dg) const { - dg.add_bool(_separate); if (_separate) { + dg.add_uint8(6); dg.add_string(_vertex); dg.add_string(_fragment); dg.add_string(_geometry); dg.add_string(_tess_control); dg.add_string(_tess_evaluation); + dg.add_string(_compute); } else { + dg.add_uint8(0); dg.add_string(_shared); } } @@ -655,13 +669,17 @@ write_datagram(Datagram &dg) const { //////////////////////////////////////////////////////////////////// INLINE void Shader::ShaderFile:: read_datagram(DatagramIterator &scan) { - _separate = scan.get_bool(); - if (_separate) { - _vertex = scan.get_string(); - _fragment = scan.get_string(); - _geometry = scan.get_string(); - _tess_control = scan.get_string(); - _tess_evaluation = scan.get_string(); + short count = scan.get_uint8(); + if (count > 0) { + if (count-- > 0) _vertex = scan.get_string(); + if (count-- > 0) _fragment = scan.get_string(); + if (count-- > 0) _geometry = scan.get_string(); + if (count-- > 0) _tess_control = scan.get_string(); + if (count-- > 0) _tess_evaluation = scan.get_string(); + if (count-- > 0) _compute = scan.get_string(); + while (count-- > 0) { + scan.get_string(); + } } else { _shared = scan.get_string(); } diff --git a/panda/src/gobj/shader.cxx b/panda/src/gobj/shader.cxx index 675fbca876..f4bcefc2c7 100755 --- a/panda/src/gobj/shader.cxx +++ b/panda/src/gobj/shader.cxx @@ -20,8 +20,6 @@ #ifdef HAVE_CG #include -#define JCG_PROFILE_GLSLV ((CGprofile)7007) -#define JCG_PROFILE_GLSLF ((CGprofile)7008) #endif TypeHandle Shader::_type_handle; @@ -1428,6 +1426,14 @@ cg_compile_entry_point(const char *entry, const ShaderCaps &caps, ShaderType typ compiler_args[nargs++] = "-po"; compiler_args[nargs++] = "ATI_draw_buffers"; } + + char version_arg[16]; + if (!cg_glsl_version.empty() && cgGetProfileProperty((CGprofile) active, CG_IS_GLSL_PROFILE)) { + snprintf(version_arg, 16, "version=%s", cg_glsl_version.c_str()); + compiler_args[nargs++] = "-po"; + compiler_args[nargs++] = version_arg; + } + compiler_args[nargs] = 0; if ((active != (int)CG_PROFILE_UNKNOWN) && (active != ultimate)) { @@ -2243,6 +2249,34 @@ load(const ShaderLanguage &lang, const Filename &vertex, return result; } +//////////////////////////////////////////////////////////////////// +// Function: Shader::load_compute +// Access: Published, Static +// Description: Loads a compute shader. +//////////////////////////////////////////////////////////////////// +PT(Shader) Shader:: +load_compute(const ShaderLanguage &lang, const Filename &fn) { + PT(ShaderFile) sfile = new ShaderFile(fn); + ShaderTable::const_iterator i = _load_table.find(sfile); + if (i != _load_table.end() && (lang == SL_none || lang == i->second->_language)) { + return i->second; + } + + PT(ShaderFile) sbody = new ShaderFile; + sbody->_separate = true; + VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr(); + if (!vfs->read_file(fn, sbody->_compute, true)) { + gobj_cat.error() + << "Could not read compute shader file: " << fn << "\n"; + return NULL; + } + + PT(Shader) result = new Shader(sfile, sbody, lang); + result->_loaded = true; + _load_table[sfile] = result; + return result; +} + ////////////////////////////////////////////////////////////////////// // Function: Shader::make // Access: Published, Static @@ -2294,6 +2328,28 @@ make(const ShaderLanguage &lang, const string &vertex, const string &fragment, return result; } +////////////////////////////////////////////////////////////////////// +// Function: Shader::make_compute +// Access: Published, Static +// Description: Loads the compute shader from the given string. +////////////////////////////////////////////////////////////////////// +PT(Shader) Shader:: +make_compute(const ShaderLanguage &lang, const string &body) { + PT(ShaderFile) sbody = new ShaderFile; + sbody->_separate = true; + sbody->_compute = body; + + ShaderTable::const_iterator i = _make_table.find(sbody); + if (i != _make_table.end() && (lang == SL_none || lang == i->second->_language)) { + return i->second; + } + + PT(ShaderFile) sfile = new ShaderFile("created-shader"); + PT(Shader) result = new Shader(sfile, sbody, lang); + _make_table[sbody] = result; + return result; +} + //////////////////////////////////////////////////////////////////// // Function: Shader::parse_init // Access: Public diff --git a/panda/src/gobj/shader.h b/panda/src/gobj/shader.h index 7980a3dc78..34657495ab 100755 --- a/panda/src/gobj/shader.h +++ b/panda/src/gobj/shader.h @@ -59,6 +59,7 @@ PUBLISHED: ST_geometry, ST_tess_control, ST_tess_evaluation, + ST_compute, }; enum AutoShaderSwitch { @@ -84,11 +85,13 @@ PUBLISHED: const Filename &geometry = "", const Filename &tess_control = "", const Filename &tess_evaluation = ""); + static PT(Shader) load_compute(const ShaderLanguage &lang, const Filename &fn); static PT(Shader) make(const ShaderLanguage &lang, const string &vertex, const string &fragment, const string &geometry = "", const string &tess_control = "", const string &tess_evaluation = ""); + static PT(Shader) make_compute(const ShaderLanguage &lang, const string &body); INLINE const Filename get_filename(const ShaderType &type = ST_none) const; INLINE const string &get_text(const ShaderType &type = ST_none) const; @@ -397,6 +400,7 @@ public: string _geometry; string _tess_control; string _tess_evaluation; + string _compute; }; public: diff --git a/panda/src/gsgbase/graphicsStateGuardianBase.h b/panda/src/gsgbase/graphicsStateGuardianBase.h index 4de1ffca8f..33e3793edd 100644 --- a/panda/src/gsgbase/graphicsStateGuardianBase.h +++ b/panda/src/gsgbase/graphicsStateGuardianBase.h @@ -163,6 +163,8 @@ public: virtual void begin_occlusion_query()=0; virtual PT(OcclusionQueryContext) end_occlusion_query()=0; + virtual void dispatch_compute(int size_x, int size_y, int size_z)=0; + virtual PT(GeomMunger) get_geom_munger(const RenderState *state, Thread *current_thread)=0; diff --git a/panda/src/pgraphnodes/computeNode.I b/panda/src/pgraphnodes/computeNode.I new file mode 100644 index 0000000000..399bfd1ca3 --- /dev/null +++ b/panda/src/pgraphnodes/computeNode.I @@ -0,0 +1,87 @@ +// Filename: computeNode.I +// Created by: rdb (13Mar09) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::add_dispatch +// Access: Published +// Description: Adds a dispatch command with the given number of +// work groups in the X, Y, and Z dimensions. Any +// of these values may be set to 1 if the respective +// dimension should not be used. +//////////////////////////////////////////////////////////////////// +INLINE void ComputeNode:: +add_dispatch(const LVecBase3i &num_groups) { + Dispatcher::CDWriter cdata(_dispatcher->_cycler); + cdata->_dispatches.push_back(num_groups); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::add_dispatch +// Access: Published +// Description: Adds a dispatch command with the given number of +// work groups in the X, Y, and Z dimensions. Any +// of these values may be set to 1 if the respective +// dimension should not be used. +//////////////////////////////////////////////////////////////////// +INLINE void ComputeNode:: +add_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + LVecBase3i num_groups(num_groups_x, num_groups_y, num_groups_z); + add_dispatch(num_groups); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::get_num_dispatches +// Access: Published +// Description: Returns the number of times add_dispatch has been +// called on this object. +//////////////////////////////////////////////////////////////////// +INLINE int ComputeNode:: +get_num_dispatches() const { + Dispatcher::CDReader cdata(_dispatcher->_cycler); + return cdata->_dispatches.size(); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::get_dispatch +// Access: Published +// Description: Returns the group counts of the nth dispatch +// associated with this object. +//////////////////////////////////////////////////////////////////// +INLINE const LVecBase3i &ComputeNode:: +get_dispatch(int n) const { + Dispatcher::CDReader cdata(_dispatcher->_cycler); + nassertr(n >= 0 && n < cdata->_dispatches.size(), LVecBase3i::zero()); + return cdata->_dispatches[n]; +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::CData::Constructor +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +INLINE ComputeNode::Dispatcher::CData:: +CData() { +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::CData::Copy Constructor +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +INLINE ComputeNode::Dispatcher::CData:: +CData(const ComputeNode::Dispatcher::CData ©) : + _dispatches(copy._dispatches) +{ +} diff --git a/panda/src/pgraphnodes/computeNode.cxx b/panda/src/pgraphnodes/computeNode.cxx new file mode 100644 index 0000000000..bae35b0e85 --- /dev/null +++ b/panda/src/pgraphnodes/computeNode.cxx @@ -0,0 +1,274 @@ +// Filename: computeNode.cxx +// Created by: rdb (19Jun14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + +#include "pandabase.h" +#include "computeNode.h" +#include "cullTraverser.h" +#include "cullableObject.h" +#include "cullHandler.h" +#include "geomDrawCallbackData.h" +#include "omniBoundingVolume.h" +#include "config_pgraph.h" + +TypeHandle ComputeNode::_type_handle; + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Constructor +// Access: Published +// Description: Creates a ComputeNode with the given name. Use +// add_dispatch and also assign a shader using a +// ShaderAttrib. +//////////////////////////////////////////////////////////////////// +ComputeNode:: +ComputeNode(const string &name) : + PandaNode(name), + _dispatcher(new ComputeNode::Dispatcher) +{ + set_internal_bounds(new OmniBoundingVolume); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Copy Constructor +// Access: Protected +// Description: +//////////////////////////////////////////////////////////////////// +ComputeNode:: +ComputeNode(const ComputeNode ©) : + PandaNode(copy), + _dispatcher(new ComputeNode::Dispatcher(*copy._dispatcher)) +{ +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::make_copy +// Access: Public, Virtual +// Description: Returns a newly-allocated Node that is a shallow copy +// of this one. It will be a different Node pointer, +// but its internal data may or may not be shared with +// that of the original Node. +//////////////////////////////////////////////////////////////////// +PandaNode *ComputeNode:: +make_copy() const { + return new ComputeNode(*this); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::safe_to_combine +// Access: Public, Virtual +// Description: Returns true if it is generally safe to combine this +// particular kind of PandaNode with other kinds of +// PandaNodes of compatible type, adding children or +// whatever. For instance, an LODNode should not be +// combined with any other PandaNode, because its set of +// children is meaningful. +//////////////////////////////////////////////////////////////////// +bool ComputeNode:: +safe_to_combine() const { + return false; +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::is_renderable +// Access: Public, Virtual +// Description: Returns true if there is some value to visiting this +// particular node during the cull traversal for any +// camera, false otherwise. This will be used to +// optimize the result of get_net_draw_show_mask(), so +// that any subtrees that contain only nodes for which +// is_renderable() is false need not be visited. +//////////////////////////////////////////////////////////////////// +bool ComputeNode:: +is_renderable() const { + return true; +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::add_for_draw +// Access: Public, Virtual +// Description: Adds the node's contents to the CullResult we are +// building up during the cull traversal, so that it +// will be drawn at render time. For most nodes other +// than GeomNodes, this is a do-nothing operation. +//////////////////////////////////////////////////////////////////// +void ComputeNode:: +add_for_draw(CullTraverser *trav, CullTraverserData &data) { + if (pgraph_cat.is_spam()) { + pgraph_cat.spam() + << "Found " << *this << " in state " << *data._state + << " draw_mask = " << data._draw_mask << "\n"; + } + + // OK, render this node. Rendering this node means creating a + // CullableObject for the Dispatcher. We don't need to pass + // any Geoms, however. + CullableObject *object = + new CullableObject(NULL, data._state, + data.get_net_transform(trav), + data.get_modelview_transform(trav), + trav->get_scene()); + object->set_draw_callback(_dispatcher); + trav->get_cull_handler()->record_object(object, trav); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::output +// Access: Public, Virtual +// Description: Writes a brief description of the node to the +// indicated output stream. This is invoked by the << +// operator. It may be overridden in derived classes to +// include some information relevant to the class. +//////////////////////////////////////////////////////////////////// +void ComputeNode:: +output(ostream &out) const { + PandaNode::output(out); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::Constructor +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +ComputeNode::Dispatcher:: +Dispatcher() { +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::Copy Constructor +// Access: Public +// Description: +//////////////////////////////////////////////////////////////////// +ComputeNode::Dispatcher:: +Dispatcher(const Dispatcher ©) : + _cycler(copy._cycler) +{ +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::do_callback +// Access: Public, Virtual +// Description: Asks the GSG to dispatch the compute shader. +//////////////////////////////////////////////////////////////////// +void ComputeNode::Dispatcher:: +do_callback(CallbackData *cbdata) { + GeomDrawCallbackData *data = (GeomDrawCallbackData *)cbdata; + GraphicsStateGuardianBase *gsg = data->get_gsg(); + + CDReader cdata(_cycler); + + Dispatches::const_iterator it; + for (it = cdata->_dispatches.begin(); it != cdata->_dispatches.end(); ++it) { + gsg->dispatch_compute(it->get_x(), it->get_y(), it->get_z()); + } + + // No need to upcall; we don't have any geometry, after all. +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::register_with_read_factory +// Access: Public, Static +// Description: Tells the BamReader how to create objects of type +// ComputeNode. +//////////////////////////////////////////////////////////////////// +void ComputeNode:: +register_with_read_factory() { + BamReader::get_factory()->register_factory(get_class_type(), make_from_bam); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::write_datagram +// Access: Public, Virtual +// Description: Writes the contents of this object to the datagram +// for shipping out to a Bam file. +//////////////////////////////////////////////////////////////////// +void ComputeNode:: +write_datagram(BamWriter *manager, Datagram &dg) { + PandaNode::write_datagram(manager, dg); + manager->write_cdata(dg, _dispatcher->_cycler); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::make_from_bam +// Access: Protected, Static +// Description: This function is called by the BamReader's factory +// when a new object of type ComputeNode is encountered +// in the Bam file. It should create the ComputeNode +// and extract its information from the file. +//////////////////////////////////////////////////////////////////// +TypedWritable *ComputeNode:: +make_from_bam(const FactoryParams ¶ms) { + ComputeNode *node = new ComputeNode(""); + DatagramIterator scan; + BamReader *manager; + + parse_params(params, scan, manager); + node->fillin(scan, manager); + + return node; +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::fillin +// Access: Protected +// Description: This internal function is called by make_from_bam to +// read in all of the relevant data from the BamFile for +// the new ComputeNode. +//////////////////////////////////////////////////////////////////// +void ComputeNode:: +fillin(DatagramIterator &scan, BamReader *manager) { + PandaNode::fillin(scan, manager); + manager->read_cdata(scan, _dispatcher->_cycler); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::CData::make_copy +// Access: Public, Virtual +// Description: +//////////////////////////////////////////////////////////////////// +CycleData *ComputeNode::Dispatcher::CData:: +make_copy() const { + return new CData(*this); +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::CData::write_datagram +// Access: Public, Virtual +// Description: Writes the contents of this object to the datagram +// for shipping out to a Bam file. +//////////////////////////////////////////////////////////////////// +void ComputeNode::Dispatcher::CData:: +write_datagram(BamWriter *manager, Datagram &dg) const { + dg.add_uint16(_dispatches.size()); + + Dispatches::const_iterator it; + for (it = _dispatches.begin(); it != _dispatches.end(); ++it) { + generic_write_datagram(dg, *it); + } +} + +//////////////////////////////////////////////////////////////////// +// Function: ComputeNode::Dispatcher::CData::fillin +// Access: Public, Virtual +// Description: This internal function is called by make_from_bam to +// read in all of the relevant data from the BamFile for +// the new ComputeNode. +//////////////////////////////////////////////////////////////////// +void ComputeNode::Dispatcher::CData:: +fillin(DatagramIterator &scan, BamReader *manager) { + int num_dispatches = scan.get_uint16(); + _dispatches.resize(num_dispatches); + + for (int i = 0; i < num_dispatches; ++i) { + generic_read_datagram(_dispatches[i], scan); + } +} diff --git a/panda/src/pgraphnodes/computeNode.h b/panda/src/pgraphnodes/computeNode.h new file mode 100644 index 0000000000..3b92f43956 --- /dev/null +++ b/panda/src/pgraphnodes/computeNode.h @@ -0,0 +1,114 @@ +// Filename: computeNode.h +// Created by: rdb (19Jun14) +// +//////////////////////////////////////////////////////////////////// +// +// PANDA 3D SOFTWARE +// Copyright (c) Carnegie Mellon University. All rights reserved. +// +// All use of this software is subject to the terms of the revised BSD +// license. You should have received a copy of this license along +// with this source code in a file named "LICENSE." +// +//////////////////////////////////////////////////////////////////// + +#ifndef COMPUTENODE_H +#define COMPUTENODE_H + +#include "pandabase.h" +#include "pandaNode.h" +#include "callbackObject.h" +#include "pointerTo.h" + +//////////////////////////////////////////////////////////////////// +// Class : ComputeNode +// Description : A special node, the sole purpose of which is to +// invoke a dispatch operation on the assigned +// compute shader. +//////////////////////////////////////////////////////////////////// +class EXPCL_PANDA_PGRAPHNODES ComputeNode : public PandaNode { +PUBLISHED: + ComputeNode(const string &name); + + INLINE void add_dispatch(const LVecBase3i &num_groups); + INLINE void add_dispatch(int num_groups_x, int num_groups_y, int num_groups_z); + + INLINE int get_num_dispatches() const; + INLINE const LVecBase3i &get_dispatch(int i) const; + MAKE_SEQ(get_dispatches, get_num_dispatches, get_dispatch); + +public: + ComputeNode(const ComputeNode ©); + + virtual PandaNode *make_copy() const; + virtual bool safe_to_combine() const; + + virtual bool is_renderable() const; + virtual void add_for_draw(CullTraverser *trav, CullTraverserData &data); + + virtual void output(ostream &out) const; + +private: + class EXPCL_PANDA_PGRAPHNODES Dispatcher : public CallbackObject { + friend class ComputeNode; + public: + ALLOC_DELETED_CHAIN(Dispatcher); + Dispatcher(); + Dispatcher(const Dispatcher ©); + + virtual void do_callback(CallbackData *cbdata); + + typedef pvector Dispatches; + + class EXPCL_PANDA_PGRAPHNODES CData : public CycleData { + public: + INLINE CData(); + INLINE CData(const CData ©); + virtual CycleData *make_copy() const; + virtual void write_datagram(BamWriter *manager, Datagram &dg) const; + virtual void fillin(DatagramIterator &scan, BamReader *manager); + virtual TypeHandle get_parent_type() const { + return CallbackNode::get_class_type(); + } + + Dispatches _dispatches; + }; + + PipelineCycler _cycler; + typedef CycleDataReader CDReader; + typedef CycleDataWriter CDWriter; + + }; + + // One per ComputeNode. + PT(Dispatcher) _dispatcher; + +public: + static void register_with_read_factory(); + virtual void write_datagram(BamWriter *manager, Datagram &dg); + +protected: + static TypedWritable *make_from_bam(const FactoryParams ¶ms); + void fillin(DatagramIterator &scan, BamReader *manager); + +public: + static TypeHandle get_class_type() { + return _type_handle; + } + static void init_type() { + PandaNode::init_type(); + register_type(_type_handle, "ComputeNode", + PandaNode::get_class_type()); + } + virtual TypeHandle get_type() const { + return get_class_type(); + } + virtual TypeHandle force_init_type() {init_type(); return get_class_type();} + +private: + static TypeHandle _type_handle; +}; + +#include "computeNode.I" + +#endif diff --git a/panda/src/pgraphnodes/config_pgraphnodes.cxx b/panda/src/pgraphnodes/config_pgraphnodes.cxx index 79423f9b7a..0973afe91a 100644 --- a/panda/src/pgraphnodes/config_pgraphnodes.cxx +++ b/panda/src/pgraphnodes/config_pgraphnodes.cxx @@ -18,6 +18,7 @@ #include "callbackData.h" #include "callbackNode.h" #include "callbackObject.h" +#include "computeNode.h" #include "directionalLight.h" #include "fadeLodNode.h" #include "fadeLodNodeData.h" @@ -113,6 +114,7 @@ init_libpgraphnodes() { CallbackData::init_type(); CallbackNode::init_type(); CallbackObject::init_type(); + ComputeNode::init_type(); DirectionalLight::init_type(); FadeLODNode::init_type(); FadeLODNodeData::init_type(); @@ -130,6 +132,7 @@ init_libpgraphnodes() { AmbientLight::register_with_read_factory(); CallbackNode::register_with_read_factory(); + ComputeNode::register_with_read_factory(); DirectionalLight::register_with_read_factory(); FadeLODNode::register_with_read_factory(); LightNode::register_with_read_factory(); diff --git a/panda/src/pgraphnodes/p3pgraphnodes_composite1.cxx b/panda/src/pgraphnodes/p3pgraphnodes_composite1.cxx index 94b499f94f..67f6fff97c 100644 --- a/panda/src/pgraphnodes/p3pgraphnodes_composite1.cxx +++ b/panda/src/pgraphnodes/p3pgraphnodes_composite1.cxx @@ -1,5 +1,6 @@ #include "ambientLight.cxx" #include "callbackNode.cxx" +#include "computeNode.cxx" #include "config_pgraphnodes.cxx" #include "directionalLight.cxx" #include "fadeLodNode.cxx"