From 0a3733ccb9fe15a9c00838a78b062958674ed3a3 Mon Sep 17 00:00:00 2001 From: rdb Date: Sun, 20 Feb 2022 17:05:44 +0100 Subject: [PATCH] pstats: GPU timing improvements; use same frame numbering everywhere Timer queries are significantly more efficient, are synchronized to CPU time, and the synchronized frame numbering makes it possible to correlate stuff in the Timeline view --- panda/src/display/graphicsEngine.cxx | 7 - panda/src/display/graphicsStateGuardian.cxx | 184 ++------------- panda/src/display/graphicsStateGuardian.h | 15 +- panda/src/display/graphicsWindow.cxx | 23 +- panda/src/display/graphicsWindow.h | 4 + panda/src/display/pStatGPUTimer.h | 1 - panda/src/ffmpeg/ffmpegVideoCursor.cxx | 2 +- .../glstuff/glGraphicsStateGuardian_src.cxx | 215 ++++++++++++++---- .../src/glstuff/glGraphicsStateGuardian_src.h | 22 +- panda/src/glstuff/glLatencyQueryContext_src.I | 12 - .../src/glstuff/glLatencyQueryContext_src.cxx | 47 ---- panda/src/glstuff/glLatencyQueryContext_src.h | 53 ----- panda/src/glstuff/glShaderContext_src.cxx | 2 +- panda/src/glstuff/glTimerQueryContext_src.I | 24 -- panda/src/glstuff/glTimerQueryContext_src.cxx | 96 -------- panda/src/glstuff/glTimerQueryContext_src.h | 63 ----- panda/src/glstuff/glmisc_src.cxx | 2 - panda/src/glstuff/glstuff_src.cxx | 2 - panda/src/glstuff/glstuff_src.h | 2 - panda/src/gobj/CMakeLists.txt | 2 - panda/src/gobj/config_gobj.cxx | 2 - panda/src/gobj/p3gobj_composite2.cxx | 1 - panda/src/gobj/timerQueryContext.I | 22 -- panda/src/gobj/timerQueryContext.cxx | 30 --- panda/src/gobj/timerQueryContext.h | 58 ----- panda/src/gobj/vertexDataPage.cxx | 2 +- panda/src/pstatclient/pStatClient.cxx | 5 +- panda/src/pstatclient/pStatClientImpl.cxx | 19 +- panda/src/pstatclient/pStatClientImpl.h | 4 +- panda/src/pstatclient/pStatProperties.cxx | 2 +- panda/src/pstatclient/pStatThread.cxx | 8 +- panda/src/pstatclient/pStatThread.h | 4 +- panda/src/wgldisplay/wglGraphicsWindow.cxx | 15 -- panda/src/wgldisplay/wglGraphicsWindow.h | 1 - panda/src/windisplay/winGraphicsWindow.cxx | 15 -- panda/src/windisplay/winGraphicsWindow.h | 2 - 36 files changed, 267 insertions(+), 701 deletions(-) delete mode 100644 panda/src/glstuff/glLatencyQueryContext_src.I delete mode 100644 panda/src/glstuff/glLatencyQueryContext_src.cxx delete mode 100644 panda/src/glstuff/glLatencyQueryContext_src.h delete mode 100644 panda/src/glstuff/glTimerQueryContext_src.I delete mode 100644 panda/src/glstuff/glTimerQueryContext_src.cxx delete mode 100644 panda/src/glstuff/glTimerQueryContext_src.h delete mode 100644 panda/src/gobj/timerQueryContext.I delete mode 100644 panda/src/gobj/timerQueryContext.cxx delete mode 100644 panda/src/gobj/timerQueryContext.h diff --git a/panda/src/display/graphicsEngine.cxx b/panda/src/display/graphicsEngine.cxx index 85f70cb5d6..d6dc3c678d 100644 --- a/panda/src/display/graphicsEngine.cxx +++ b/panda/src/display/graphicsEngine.cxx @@ -1719,13 +1719,6 @@ draw_bins(const GraphicsEngine::Windows &wlist, Thread *current_thread) { win->end_frame(GraphicsOutput::FM_render, current_thread); if (_auto_flip) { -#ifdef DO_PSTATS - // This is a good time to perform a latency query. - if (gsg->get_timer_queries_active()) { - gsg->issue_timer_query(GraphicsStateGuardian::_command_latency_pcollector.get_index()); - } -#endif - if (win->flip_ready()) { PStatGPUTimer timer(gsg, _flip_pcollector, current_thread); win->begin_flip(); diff --git a/panda/src/display/graphicsStateGuardian.cxx b/panda/src/display/graphicsStateGuardian.cxx index 322d77d1cd..503fb841b3 100644 --- a/panda/src/display/graphicsStateGuardian.cxx +++ b/panda/src/display/graphicsStateGuardian.cxx @@ -98,7 +98,6 @@ PStatCollector GraphicsStateGuardian::_compute_dispatch_pcollector("Draw:Compute PStatCollector GraphicsStateGuardian::_wait_occlusion_pcollector("Wait:Occlusion"); PStatCollector GraphicsStateGuardian::_wait_timer_pcollector("Wait:Timer Queries"); PStatCollector GraphicsStateGuardian::_timer_queries_pcollector("Timer queries"); -PStatCollector GraphicsStateGuardian::_command_latency_pcollector("Command latency"); PStatCollector GraphicsStateGuardian::_prepare_pcollector("Draw:Prepare"); PStatCollector GraphicsStateGuardian::_prepare_texture_pcollector("Draw:Prepare:Texture"); @@ -222,10 +221,6 @@ GraphicsStateGuardian(CoordinateSystem internal_coordinate_system, #ifdef DO_PSTATS _timer_queries_active = false; - _last_query_frame = 0; - _last_num_queried = 0; - // _timer_delta = 0.0; - _pstats_gpu_thread = -1; #endif @@ -778,9 +773,17 @@ end_occlusion_query() { * Adds a timer query to the command stream, associated with the given PStats * collector index. */ -PT(TimerQueryContext) GraphicsStateGuardian:: +void GraphicsStateGuardian:: issue_timer_query(int pstats_index) { - return nullptr; +} + +/** + * A latency query is a special type of timer query that measures the + * difference between CPU time and GPU time, ie. how far the GPU is behind in + * processing the commands being generated by the CPU right now. + */ +void GraphicsStateGuardian:: +issue_latency_query(int pstats_index) { } /** @@ -2370,30 +2373,6 @@ begin_frame(Thread *current_thread) { _state_rs = RenderState::make_empty(); _state_mask.clear(); -#ifdef DO_PSTATS - // We have to do this here instead of in GraphicsEngine because we need a - // current context to issue timer queries. - int frame = ClockObject::get_global_clock()->get_frame_count(); - if (_last_query_frame < frame) { - _last_query_frame = frame; - if (pstats_gpu_timing && _supports_timer_query) { - _timer_queries_pcollector.clear_level(); - - // Now is a good time to flush previous frame's queries. We may not - // actually have all of the previous frame's results in yet, but that's - // okay; the GPU data is allowed to lag a few frames behind. - flush_timer_queries(); - - if (_timer_queries_active) { - // Issue a stop and start event for collector 0, marking the beginning - // of the new frame. - issue_timer_query(0x8000); - issue_timer_query(0x0000); - } - } - } -#endif - return !_needs_reset; } @@ -2479,133 +2458,6 @@ end_frame(Thread *current_thread) { _prepared_objects->_graphics_memory_lru.begin_epoch(); } -/** - * Called by the graphics engine on the draw thread to check the status of the - * running timer queries and submit their results to the PStats server. - */ -void GraphicsStateGuardian:: -flush_timer_queries() { -#ifdef DO_PSTATS - // This uses the lower-level PStats interfaces for now because of all the - // unnecessary overhead that would otherwise be incurred when adding such a - // large amount of data at once. - - PStatClient *client = PStatClient::get_global_pstats(); - - if (!client->client_is_connected()) { - _timer_queries_active = false; - return; - } - - if (!_timer_queries_active) { - if (pstats_gpu_timing && _supports_timer_query) { - // Check if timer queries should be enabled. - _timer_queries_active = true; - } else { - return; - } - } - - // Currently, we use one thread per GSG, for convenience. In the future, we - // may want to try and use one thread per graphics card. - if (_pstats_gpu_thread == -1) { - _pstats_gpu_thread = client->make_gpu_thread(get_driver_renderer()).get_index(); - } - PStatThread gpu_thread(client, _pstats_gpu_thread); - - // Get the results of all the timer queries. - int first = 0; - if (!_pending_timer_queries.empty()) { - int count = _pending_timer_queries.size(); - if (count == 0) { - return; - } - - PStatGPUTimer timer(this, _wait_timer_pcollector); - - if (_last_num_queried > 0) { - // We know how many queries were available last frame, and this usually - // stays fairly constant, so use this as a starting point. - int i = std::min(_last_num_queried, count) - 1; - - if (_pending_timer_queries[i]->is_answer_ready()) { - first = count; - while (i < count - 1) { - if (!_pending_timer_queries[++i]->is_answer_ready()) { - first = i; - break; - } - } - } else { - first = 0; - while (i > 0) { - if (_pending_timer_queries[--i]->is_answer_ready()) { - first = i + 1; - break; - } - } - } - } else { - // We figure out which tasks the GPU has already finished by doing a - // binary search for the first query that does not have an answer ready. - // We know then that everything before that must be ready. - while (count > 0) { - int step = count / 2; - int i = first + step; - if (_pending_timer_queries[i]->is_answer_ready()) { - first += step + 1; - count -= step + 1; - } else { - count = step; - } - } - } - - if (first <= 0) { - return; - } - - _last_num_queried = first; - - for (int i = 0; i < first; ++i) { - CPT(TimerQueryContext) query = _pending_timer_queries[i]; - - double time_data = query->get_timestamp(); // + _timer_delta; - - if (query->_pstats_index == _command_latency_pcollector.get_index()) { - // Special case for the latency pcollector. - PStatCollectorDef *cdef; - cdef = client->get_collector_ptr(query->_pstats_index)->get_def(client, query->_pstats_index); - _pstats_gpu_data.add_level(query->_pstats_index, time_data * cdef->_factor); - - } else if (query->_pstats_index & 0x8000) { - _pstats_gpu_data.add_stop(query->_pstats_index & 0x7fff, time_data); - - } else { - _pstats_gpu_data.add_start(query->_pstats_index & 0x7fff, time_data); - } - - // We found an end-frame marker (a stop event for collector 0). This - // means that the GPU actually caught up with that frame, and we can - // flush the GPU thread's frame data to the pstats server. - if (query->_pstats_index == 0x8000) { - gpu_thread.add_frame(_pstats_gpu_data); - _pstats_gpu_data.clear(); - } - } - } - - if (first > 0) { - // Do this out of the scope of _wait_timer_pcollector. - _pending_timer_queries.erase( - _pending_timer_queries.begin(), - _pending_timer_queries.begin() + first - ); - _timer_queries_pcollector.add_level_now(first); - } -#endif -} - /** * Returns true if this GSG can implement decals using a DepthOffsetAttrib, or * false if that is unreliable and the three-step rendering process should be @@ -3246,8 +3098,19 @@ init_frame_pstats() { _texture_state_pcollector.clear_level(); } } -#endif // DO_PSTATS +/** + * Returns a PStatThread used to represent this GL context. + */ +PStatThread GraphicsStateGuardian:: +get_pstats_thread() { + PStatClient *client = PStatClient::get_global_pstats(); + if (_pstats_gpu_thread == -1) { + _pstats_gpu_thread = client->make_gpu_thread("GPU").get_index(); + } + return PStatThread(client, _pstats_gpu_thread); +} +#endif // DO_PSTATS /** * Create a gamma table. @@ -3467,9 +3330,6 @@ close_gsg() { // Make sure that all the contexts belonging to the GSG are deleted. _prepared_objects.clear(); -#ifdef DO_PSTATS - _pending_timer_queries.clear(); -#endif free_pointers(); } diff --git a/panda/src/display/graphicsStateGuardian.h b/panda/src/display/graphicsStateGuardian.h index 42702d1ed3..6d0b26b6e1 100644 --- a/panda/src/display/graphicsStateGuardian.h +++ b/panda/src/display/graphicsStateGuardian.h @@ -44,7 +44,6 @@ #include "bitMask.h" #include "texture.h" #include "occlusionQueryContext.h" -#include "timerQueryContext.h" #include "loader.h" #include "shaderAttrib.h" #include "texGenAttrib.h" @@ -320,7 +319,8 @@ public: virtual void begin_occlusion_query(); virtual PT(OcclusionQueryContext) end_occlusion_query(); - virtual PT(TimerQueryContext) issue_timer_query(int pstats_index); + virtual void issue_timer_query(int pstats_index); + virtual void issue_latency_query(int pstats_index); virtual void dispatch_compute(int size_x, int size_y, int size_z); @@ -363,8 +363,6 @@ PUBLISHED: public: virtual void end_frame(Thread *current_thread); - void flush_timer_queries(); - void set_current_properties(const FrameBufferProperties *properties); virtual bool depth_offset_decals(); @@ -445,6 +443,7 @@ public: #ifdef DO_PSTATS static void init_frame_pstats(); + PStatThread get_pstats_thread(); #endif protected: @@ -602,13 +601,6 @@ protected: #ifdef DO_PSTATS int _pstats_gpu_thread; bool _timer_queries_active; - PStatFrameData _pstats_gpu_data; - - int _last_query_frame; - int _last_num_queried; - // double _timer_delta; - typedef pdeque TimerQueryQueue; - TimerQueryQueue _pending_timer_queries; #endif bool _copy_texture_inverted; @@ -699,7 +691,6 @@ public: static PStatCollector _wait_occlusion_pcollector; static PStatCollector _wait_timer_pcollector; static PStatCollector _timer_queries_pcollector; - static PStatCollector _command_latency_pcollector; static PStatCollector _prepare_pcollector; static PStatCollector _prepare_texture_pcollector; diff --git a/panda/src/display/graphicsWindow.cxx b/panda/src/display/graphicsWindow.cxx index 7abda2bac7..f1808dfcd9 100644 --- a/panda/src/display/graphicsWindow.cxx +++ b/panda/src/display/graphicsWindow.cxx @@ -39,7 +39,8 @@ GraphicsWindow(GraphicsEngine *engine, GraphicsPipe *pipe, GraphicsOutput *host) : GraphicsOutput(engine, pipe, name, fb_prop, win_prop, flags, gsg, host, true), _input_lock("GraphicsWindow::_input_lock"), - _properties_lock("GraphicsWindow::_properties_lock") + _properties_lock("GraphicsWindow::_properties_lock"), + _latency_pcollector(name + " latency") { #ifdef DO_MEMORY_USAGE MemoryUsage::update_type(this, this); @@ -610,6 +611,26 @@ close_window() { _is_valid = false; } +/** + * This function will be called within the draw thread after end_frame() has + * been called on all windows, to initiate the exchange of the front and back + * buffers. + * + * This should instruct the window to prepare for the flip at the next video + * sync, but it should not wait. + * + * We have the two separate functions, begin_flip() and end_flip(), to make it + * easier to flip all of the windows at the same time. + */ +void GraphicsWindow:: +begin_flip() { +#ifdef DO_PSTATS + if (_gsg->get_timer_queries_active()) { + _gsg->issue_latency_query(_latency_pcollector.get_index()); + } +#endif +} + /** * Opens the window right now. Called from the window thread. Returns true * if the window is successfully opened, or false if there was a problem. diff --git a/panda/src/display/graphicsWindow.h b/panda/src/display/graphicsWindow.h index 12047ceea6..45b1438906 100644 --- a/panda/src/display/graphicsWindow.h +++ b/panda/src/display/graphicsWindow.h @@ -126,6 +126,8 @@ public: virtual void process_events(); virtual void set_properties_now(WindowProperties &properties); + virtual void begin_flip(); + protected: virtual void close_window(); virtual bool open_window(); @@ -152,6 +154,8 @@ protected: bool _got_expose_event; + PStatCollector _latency_pcollector; + private: LightReMutex _properties_lock; // protects _requested_properties, _rejected_properties, and _window_event. diff --git a/panda/src/display/pStatGPUTimer.h b/panda/src/display/pStatGPUTimer.h index 1e0accdc04..4b54e08b57 100644 --- a/panda/src/display/pStatGPUTimer.h +++ b/panda/src/display/pStatGPUTimer.h @@ -18,7 +18,6 @@ #include "pStatTimer.h" #include "pStatCollector.h" #include "config_pstatclient.h" -#include "timerQueryContext.h" class Thread; class GraphicsStateGuardian; diff --git a/panda/src/ffmpeg/ffmpegVideoCursor.cxx b/panda/src/ffmpeg/ffmpegVideoCursor.cxx index 5cc3d2cffb..66d7fcdd86 100644 --- a/panda/src/ffmpeg/ffmpegVideoCursor.cxx +++ b/panda/src/ffmpeg/ffmpegVideoCursor.cxx @@ -698,7 +698,7 @@ thread_main() { while (do_poll()) { // Keep doing stuff as long as there's something to do. _lock.release(); - PStatClient::thread_tick(_sync_name); + PStatClient::thread_tick(); Thread::consider_yield(); _lock.acquire(); } diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx index 11d66699c0..69ef795ee3 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.cxx +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.cxx @@ -4162,12 +4162,21 @@ begin_frame(Thread *current_thread) { _primitive_batches_display_list_pcollector.clear_level(); #endif +#if defined(DO_PSTATS) && !defined(OPENGLES) + int frame_number = ClockObject::get_global_clock()->get_frame_count(current_thread); + if (_current_frame_timing == nullptr || + frame_number != _current_frame_timing->_frame_number) { + + _current_frame_timing = begin_frame_timing(frame_number); + } +#endif + #ifndef NDEBUG _show_texture_usage = false; if (gl_show_texture_usage) { // When this is true, then every other second, we show the usage textures // instead of the real textures. - double now = ClockObject::get_global_clock()->get_frame_time(); + double now = ClockObject::get_global_clock()->get_frame_time(current_thread); int this_second = (int)floor(now); if (this_second & 1) { _show_texture_usage = true; @@ -4190,16 +4199,6 @@ begin_frame(Thread *current_thread) { } #endif // NDEBUG -#ifdef DO_PSTATS - /*if (_supports_timer_query) { - // Measure the difference between the OpenGL clock and the PStats clock. - GLint64 time_ns; - _glGetInteger64v(GL_TIMESTAMP, &time_ns); - _timer_delta = time_ns * -0.000000001; - _timer_delta += PStatClient::get_global_pstats()->get_real_time(); - }*/ -#endif - #ifndef OPENGLES if (_current_properties->get_srgb_color()) { glEnable(GL_FRAMEBUFFER_SRGB); @@ -4406,6 +4405,127 @@ end_frame(Thread *current_thread) { } } +/** + * + */ +CLP(GraphicsStateGuardian)::FrameTiming *CLP(GraphicsStateGuardian):: +begin_frame_timing(int frame_number) { +#if defined(DO_PSTATS) && !defined(OPENGLES) + if (!_timer_queries_active) { + if (pstats_gpu_timing && _supports_timer_query && PStatClient::is_connected()) { + _timer_queries_active = true; + } else { + return nullptr; + } + } + + PStatClient *client = PStatClient::get_global_pstats(); + + _timer_queries_pcollector.clear_level(); + + if (_deleted_queries.size() < 128) { + // We'll need a lot of timer queries, so allocate a whole bunch up front. + size_t alloc_count = 128 - _deleted_queries.size(); + _deleted_queries.resize(_deleted_queries.size() + alloc_count); + _glGenQueries(alloc_count, _deleted_queries.data() + _deleted_queries.size() - alloc_count); + } + + // Issue a start query for collector 0, marking the start of this frame. + GLuint frame_query = _deleted_queries.back(); + _deleted_queries.pop_back(); + _glQueryCounter(frame_query, GL_TIMESTAMP); + + // Synchronize the GL time with the PStats clock. + GLint64 gl_time; + double cpu_time1 = client->get_real_time(); + _glGetInteger64v(GL_TIMESTAMP, &gl_time); + double cpu_time2 = client->get_real_time(); + double cpu_time = (cpu_time1 + cpu_time2) / 2.0; + + // Check if the results from the previous frame are available. We just need + // to check whether the last query for each frame is available. + while (!_frame_timings.empty()) { + const FrameTiming &frame = _frame_timings.front(); + GLuint last_query = frame._queries.back().first; + GLuint result; + _glGetQueryObjectuiv(last_query, GL_QUERY_RESULT_AVAILABLE, &result); + if (result == 0) { + // Not ready, so subsequent frames won't be, either. + break; + } + // We've got a frame whose timer queries are ready. + end_frame_timing(frame); + _frame_timings.pop_front(); + } + + FrameTiming frame; + frame._frame_number = frame_number; + frame._gpu_sync_time = gl_time; + frame._cpu_sync_time = cpu_time; + frame._queries.push_back(std::make_pair(frame_query, 0)); + _frame_timings.push_back(std::move(frame)); + + return &_frame_timings.back(); +#else + return nullptr; +#endif +} + +/** + * Gets the timer query results for the given frame and sends them to the + * PStats server. + */ +void CLP(GraphicsStateGuardian):: +end_frame_timing(const FrameTiming &frame) { +#if defined(DO_PSTATS) && !defined(OPENGLES) + // This uses the lower-level PStats interfaces for now because of all the + // unnecessary overhead that would otherwise be incurred when adding such a + // large amount of data at once. + if (!PStatClient::is_connected()) { + _timer_queries_active = false; + return; + } + + PStatTimer timer(_wait_timer_pcollector); + + // We represent each GSG as one thread. In the future we may change this to + // representing each graphics device as one thread, but OpenGL doesn't really + // expose this information to us. + PStatThread gpu_thread = get_pstats_thread(); + + PStatFrameData frame_data; + size_t latency_ref_i = 0; + + for (auto &query : frame._queries) { + GLuint64 time_ns; + _glGetQueryObjectui64v(query.first, GL_QUERY_RESULT, &time_ns); + + if (query.second & 0x10000) { + // Latency query. + GLint64 ref = frame._latency_refs[latency_ref_i++]; + double time = ((GLint64)time_ns - ref) * 0.000001; + frame_data.add_level(query.second & 0x7fff, time); + } + else { + // Convert GL time to Panda time. + double time = ((GLint64)time_ns - frame._gpu_sync_time) * 0.000000001 + frame._cpu_sync_time; + if (query.second & 0x8000) { + frame_data.add_stop(query.second & 0x7fff, time); + } + else { + frame_data.add_start(query.second & 0x7fff, time); + } + } + } + + // The end time of the last collector is implicitly the frame's end time. + frame_data.add_stop(0, frame_data.get_end()); + gpu_thread.add_frame(frame._frame_number, frame_data); + + _timer_queries_pcollector.add_level_now(frame._queries.size()); +#endif +} + /** * Called before a sequence of draw_primitive() functions are called, this * should prepare the vertex data for rendering. It returns true if the @@ -7149,48 +7269,57 @@ end_occlusion_query() { * Adds a timer query to the command stream, associated with the given PStats * collector index. */ -PT(TimerQueryContext) CLP(GraphicsStateGuardian):: +void CLP(GraphicsStateGuardian):: issue_timer_query(int pstats_index) { #if defined(DO_PSTATS) && !defined(OPENGLES) - nassertr(_supports_timer_query, nullptr); - - PT(CLP(TimerQueryContext)) query; - - // Hack - if (pstats_index == _command_latency_pcollector.get_index()) { - query = new CLP(LatencyQueryContext)(this, pstats_index); - } else { - query = new CLP(TimerQueryContext)(this, pstats_index); + FrameTiming *frame = _current_frame_timing; + if (frame == nullptr) { + return; } - if (_deleted_queries.size() >= 1) { - query->_index = _deleted_queries.back(); - _deleted_queries.pop_back(); - } else { - _glGenQueries(1, &query->_index); + nassertv(_supports_timer_query); - if (GLCAT.is_spam()) { - GLCAT.spam() << "Generating query for " << pstats_index - << ": " << query->_index << "\n"; - } + if (_deleted_queries.empty()) { + // Allocate some number at a time, since we'll need a lot of these. + _deleted_queries.resize(_deleted_queries.size() + 16); + _glGenQueries(16, _deleted_queries.data() + _deleted_queries.size() - 16); } + GLuint index = _deleted_queries.back(); + _deleted_queries.pop_back(); + // Issue the timestamp query. - _glQueryCounter(query->_index, GL_TIMESTAMP); + _glQueryCounter(index, GL_TIMESTAMP); - if (_use_object_labels) { - // Assign a label to it based on the PStatCollector name. - const PStatClient *client = PStatClient::get_global_pstats(); - string name = client->get_collector_fullname(pstats_index & 0x7fff); - _glObjectLabel(GL_QUERY, query->_index, name.size(), name.data()); + //if (_use_object_labels) { + // // Assign a label to it based on the PStatCollector name. + // const PStatClient *client = PStatClient::get_global_pstats(); + // string name = client->get_collector_fullname(pstats_index & 0x7fff); + // _glObjectLabel(GL_QUERY, index, name.size(), name.data()); + //} + + frame->_queries.push_back(std::make_pair(index, pstats_index)); +#endif +} + +/** + * A latency query is a special type of timer query that measures the + * difference between CPU time and GPU time, ie. how far the GPU is behind in + * processing the commands being generated by the CPU right now. + */ +void CLP(GraphicsStateGuardian):: +issue_latency_query(int pstats_index) { +#if defined(DO_PSTATS) && !defined(OPENGLES) + FrameTiming *frame = _current_frame_timing; + if (frame == nullptr) { + return; } - _pending_timer_queries.push_back((TimerQueryContext *)query); + GLint64 time; + _glGetInteger64v(GL_TIMESTAMP, &time); + issue_timer_query(pstats_index | 0x10000); - return (TimerQueryContext *)query; - -#else - return nullptr; + frame->_latency_refs.push_back(time); #endif } @@ -11743,7 +11872,7 @@ set_state_and_transform(const RenderState *target, #endif _state_pcollector.add_level(1); - PStatGPUTimer timer1(this, _draw_set_state_pcollector); + PStatTimer timer1(_draw_set_state_pcollector); bool transform_changed = transform != _internal_transform; if (transform_changed) { @@ -11934,7 +12063,7 @@ set_state_and_transform(const RenderState *target, int texture_slot = TextureAttrib::get_class_slot(); if (_target_rs->get_attrib(texture_slot) != _state_rs->get_attrib(texture_slot) || !_state_mask.get_bit(texture_slot)) { - PStatGPUTimer timer(this, _draw_set_state_texture_pcollector); + //PStatGPUTimer timer(this, _draw_set_state_texture_pcollector); determine_target_texture(); do_issue_texture(); diff --git a/panda/src/glstuff/glGraphicsStateGuardian_src.h b/panda/src/glstuff/glGraphicsStateGuardian_src.h index c0af1f5228..ef8f262da7 100644 --- a/panda/src/glstuff/glGraphicsStateGuardian_src.h +++ b/panda/src/glstuff/glGraphicsStateGuardian_src.h @@ -293,6 +293,10 @@ public: virtual void end_scene(); virtual void end_frame(Thread *current_thread); + struct FrameTiming; + FrameTiming *begin_frame_timing(int frame_index); + void end_frame_timing(const FrameTiming &frame); + virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader, const GeomVertexDataPipelineReader *data_reader, size_t num_instances, bool force); @@ -384,7 +388,8 @@ public: virtual PT(OcclusionQueryContext) end_occlusion_query(); #endif - virtual PT(TimerQueryContext) issue_timer_query(int pstats_index); + virtual void issue_timer_query(int pstats_index) final; + virtual void issue_latency_query(int pstats_index) final; #ifndef OPENGLES_1 virtual void dispatch_compute(int size_x, int size_y, int size_z); @@ -1148,6 +1153,20 @@ public: UsageTextures _usage_textures; #endif // NDEBUG +#if defined(DO_PSTATS) && !defined(OPENGLES) + struct FrameTiming { + int _frame_number; + GLint64 _gpu_sync_time; + double _cpu_sync_time; + pvector > _queries; + pvector _latency_refs; + }; + GLint64 _gpu_reference_time = 0; + double _cpu_reference_time; + pdeque _frame_timings; + FrameTiming *_current_frame_timing = nullptr; +#endif + BufferResidencyTracker _renderbuffer_residency; static PStatCollector _load_display_list_pcollector; @@ -1188,7 +1207,6 @@ private: friend class CLP(CgShaderContext); friend class CLP(GraphicsBuffer); friend class CLP(OcclusionQueryContext); - friend class CLP(TimerQueryContext); }; #include "glGraphicsStateGuardian_src.I" diff --git a/panda/src/glstuff/glLatencyQueryContext_src.I b/panda/src/glstuff/glLatencyQueryContext_src.I deleted file mode 100644 index adc45cdf5e..0000000000 --- a/panda/src/glstuff/glLatencyQueryContext_src.I +++ /dev/null @@ -1,12 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glLatencyQueryContext_src.I - * @author rdb - * @date 2014-09-24 - */ diff --git a/panda/src/glstuff/glLatencyQueryContext_src.cxx b/panda/src/glstuff/glLatencyQueryContext_src.cxx deleted file mode 100644 index a08ab74517..0000000000 --- a/panda/src/glstuff/glLatencyQueryContext_src.cxx +++ /dev/null @@ -1,47 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glLatencyQueryContext_src.cxx - * @author rdb - * @date 2014-09-24 - */ - -#ifndef OPENGLES // Timer queries not supported by OpenGL ES. - -TypeHandle CLP(LatencyQueryContext)::_type_handle; - -/** - * - */ -CLP(LatencyQueryContext):: -CLP(LatencyQueryContext)(CLP(GraphicsStateGuardian) *glgsg, - int pstats_index) : - CLP(TimerQueryContext)(glgsg, pstats_index), - _timestamp(0) -{ - glgsg->_glGetInteger64v(GL_TIMESTAMP, &_timestamp); -} - -/** - * Returns the timestamp that is the result of this timer query. There's no - * guarantee about which clock this uses, the only guarantee is that - * subtracting a start time from an end time should yield a time in seconds. - * If is_answer_ready() did not return true, this function may block before it - * returns. - * - * It is only valid to call this from the draw thread. - */ -double CLP(LatencyQueryContext):: -get_timestamp() const { - GLint64 time_ns; - _glgsg->_glGetQueryObjecti64v(_index, GL_QUERY_RESULT, &time_ns); - - return (time_ns - _timestamp) * 0.000000001; -} - -#endif // OPENGLES diff --git a/panda/src/glstuff/glLatencyQueryContext_src.h b/panda/src/glstuff/glLatencyQueryContext_src.h deleted file mode 100644 index 7c255cb5df..0000000000 --- a/panda/src/glstuff/glLatencyQueryContext_src.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glLatencyQueryContext_src.h - * @author rdb - * @date 2014-09-24 - */ - -class GraphicsStateGuardian; - -#ifndef OPENGLES // Timer queries not supported by OpenGL ES. - -/** - * This is a special variant of GLTimerQueryContext that measures the command - * latency, ie. the time it takes for the GPU to actually get to the commands - * we are issuing right now. - */ -class EXPCL_GL CLP(LatencyQueryContext) : public CLP(TimerQueryContext) { -public: - CLP(LatencyQueryContext)(CLP(GraphicsStateGuardian) *glgsg, int pstats_index); - - ALLOC_DELETED_CHAIN(CLP(LatencyQueryContext)); - - virtual double get_timestamp() const; - - GLint64 _timestamp; - -public: - static TypeHandle get_class_type() { - return _type_handle; - } - static void init_type() { - CLP(TimerQueryContext)::init_type(); - register_type(_type_handle, CLASSPREFIX_QUOTED "LatencyQueryContext", - CLP(TimerQueryContext)::get_class_type()); - } - virtual TypeHandle get_type() const { - return get_class_type(); - } - virtual TypeHandle force_init_type() {init_type(); return get_class_type();} - -private: - static TypeHandle _type_handle; -}; - -#include "glLatencyQueryContext_src.I" - -#endif // OPENGLES diff --git a/panda/src/glstuff/glShaderContext_src.cxx b/panda/src/glstuff/glShaderContext_src.cxx index 8260d1430d..9d2fbd71e8 100644 --- a/panda/src/glstuff/glShaderContext_src.cxx +++ b/panda/src/glstuff/glShaderContext_src.cxx @@ -2105,7 +2105,7 @@ set_state_and_transform(const RenderState *target_rs, */ void CLP(ShaderContext):: issue_parameters(int altered) { - PStatGPUTimer timer(_glgsg, _glgsg->_draw_set_state_shader_parameters_pcollector); + PStatTimer timer(_glgsg->_draw_set_state_shader_parameters_pcollector); if (GLCAT.is_spam()) { GLCAT.spam() diff --git a/panda/src/glstuff/glTimerQueryContext_src.I b/panda/src/glstuff/glTimerQueryContext_src.I deleted file mode 100644 index c898c30261..0000000000 --- a/panda/src/glstuff/glTimerQueryContext_src.I +++ /dev/null @@ -1,24 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glTimerQueryContext_src.I - * @author rdb - * @date 2014-08-22 - */ - -/** - * - */ -INLINE CLP(TimerQueryContext):: -CLP(TimerQueryContext)(CLP(GraphicsStateGuardian) *glgsg, - int pstats_index) : - TimerQueryContext(pstats_index), - _glgsg(glgsg), - _index(0) -{ -} diff --git a/panda/src/glstuff/glTimerQueryContext_src.cxx b/panda/src/glstuff/glTimerQueryContext_src.cxx deleted file mode 100644 index a7b4ecb612..0000000000 --- a/panda/src/glstuff/glTimerQueryContext_src.cxx +++ /dev/null @@ -1,96 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glTimerQueryContext_src.cxx - * @author rdb - * @date 2014-08-22 - */ - -#include "pnotify.h" -#include "dcast.h" -#include "lightMutexHolder.h" -#include "pStatTimer.h" - -#ifndef OPENGLES // Timer queries not supported by OpenGL ES. - -TypeHandle CLP(TimerQueryContext)::_type_handle; - -/** - * - */ -CLP(TimerQueryContext):: -~CLP(TimerQueryContext)() { - if (_index != 0) { - // Tell the GSG to recycle this index when it gets around to it. If it - // has already shut down, though, too bad. This means we never get to - // free this index, but presumably the app is already shutting down - // anyway. - if (auto glgsg = _glgsg.lock()) { - LightMutexHolder holder(glgsg->_lock); - glgsg->_deleted_queries.push_back(_index); - _index = 0; - } - } -} - -/** - * Returns true if the query's answer is ready, false otherwise. If this - * returns false, the application must continue to poll until it returns true. - * - * It is only valid to call this from the draw thread. - */ -bool CLP(TimerQueryContext):: -is_answer_ready() const { - GLuint result; - _glgsg->_glGetQueryObjectuiv(_index, GL_QUERY_RESULT_AVAILABLE, &result); - - return (result != 0); -} - -/** - * Requests the graphics engine to expedite the pending answer--the - * application is now waiting until the answer is ready. - * - * It is only valid to call this from the draw thread. - */ -void CLP(TimerQueryContext):: -waiting_for_answer() { - PStatTimer timer(GraphicsStateGuardian::_wait_timer_pcollector); - glFlush(); -} - -/** - * Returns the timestamp that is the result of this timer query. There's no - * guarantee about which clock this uses, the only guarantee is that - * subtracting a start time from an end time should yield a time in seconds. - * If is_answer_ready() did not return true, this function may block before it - * returns. - * - * It is only valid to call this from the draw thread. - */ -double CLP(TimerQueryContext):: -get_timestamp() const { - GLuint64 time_ns; - - /*GLuint available; - _glgsg->_glGetQueryObjectuiv(_index[1], GL_QUERY_RESULT_AVAILABLE, &available); - if (available) { - // The answer is ready now. - do_get_timestamps(begin_ns, end_ns); - } else { - // The answer is not ready; this call will block. - PStatTimer timer(GraphicsStateGuardian::_wait_timer_pcollector); - do_get_timestamps(begin_ns, end_ns); - }*/ - - _glgsg->_glGetQueryObjectui64v(_index, GL_QUERY_RESULT, &time_ns); - - return time_ns * 0.000000001; -} - -#endif // OPENGLES diff --git a/panda/src/glstuff/glTimerQueryContext_src.h b/panda/src/glstuff/glTimerQueryContext_src.h deleted file mode 100644 index 07709bc7af..0000000000 --- a/panda/src/glstuff/glTimerQueryContext_src.h +++ /dev/null @@ -1,63 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file glTimerQueryContext_src.h - * @author rdb - * @date 2014-08-22 - */ - -#include "pandabase.h" -#include "timerQueryContext.h" -#include "deletedChain.h" -#include "clockObject.h" - -class GraphicsStateGuardian; - -#ifndef OPENGLES // Timer queries not supported by OpenGL ES. - -/** - * This class manages a timer query that can be used by a PStatGPUTimer to - * measure the time a task takes to execute on the GPU. This records the - * current timestamp; a pair of these is usually used to get the elapsed time. - */ -class EXPCL_GL CLP(TimerQueryContext) : public TimerQueryContext { -public: - INLINE CLP(TimerQueryContext)(CLP(GraphicsStateGuardian) *glgsg, - int pstats_index); - virtual ~CLP(TimerQueryContext)(); - - ALLOC_DELETED_CHAIN(CLP(TimerQueryContext)); - - virtual bool is_answer_ready() const; - virtual void waiting_for_answer(); - virtual double get_timestamp() const; - - GLuint _index; - WPT(CLP(GraphicsStateGuardian)) _glgsg; - -public: - static TypeHandle get_class_type() { - return _type_handle; - } - static void init_type() { - TimerQueryContext::init_type(); - register_type(_type_handle, CLASSPREFIX_QUOTED "TimerQueryContext", - TimerQueryContext::get_class_type()); - } - virtual TypeHandle get_type() const { - return get_class_type(); - } - virtual TypeHandle force_init_type() {init_type(); return get_class_type();} - -private: - static TypeHandle _type_handle; -}; - -#include "glTimerQueryContext_src.I" - -#endif // OPENGLES diff --git a/panda/src/glstuff/glmisc_src.cxx b/panda/src/glstuff/glmisc_src.cxx index 2c388985f6..2d6db9f654 100644 --- a/panda/src/glstuff/glmisc_src.cxx +++ b/panda/src/glstuff/glmisc_src.cxx @@ -354,8 +354,6 @@ void CLP(init_classes)() { #ifndef OPENGLES CLP(OcclusionQueryContext)::init_type(); - CLP(TimerQueryContext)::init_type(); - CLP(LatencyQueryContext)::init_type(); #endif PandaSystem *ps = PandaSystem::get_global_ptr(); diff --git a/panda/src/glstuff/glstuff_src.cxx b/panda/src/glstuff/glstuff_src.cxx index 06abef89f4..fc281d1749 100644 --- a/panda/src/glstuff/glstuff_src.cxx +++ b/panda/src/glstuff/glstuff_src.cxx @@ -23,8 +23,6 @@ #include "glIndexBufferContext_src.cxx" #include "glBufferContext_src.cxx" #include "glOcclusionQueryContext_src.cxx" -#include "glTimerQueryContext_src.cxx" -#include "glLatencyQueryContext_src.cxx" #include "glGeomContext_src.cxx" #include "glGeomMunger_src.cxx" #include "glShaderContext_src.cxx" diff --git a/panda/src/glstuff/glstuff_src.h b/panda/src/glstuff/glstuff_src.h index 02e3208445..21ca8ad8fe 100644 --- a/panda/src/glstuff/glstuff_src.h +++ b/panda/src/glstuff/glstuff_src.h @@ -35,8 +35,6 @@ #include "glIndexBufferContext_src.h" #include "glBufferContext_src.h" #include "glOcclusionQueryContext_src.h" -#include "glTimerQueryContext_src.h" -#include "glLatencyQueryContext_src.h" #include "glGeomContext_src.h" #include "glGeomMunger_src.h" #include "glShaderContext_src.h" diff --git a/panda/src/gobj/CMakeLists.txt b/panda/src/gobj/CMakeLists.txt index 9565a73b0c..1835ffe3f8 100644 --- a/panda/src/gobj/CMakeLists.txt +++ b/panda/src/gobj/CMakeLists.txt @@ -61,7 +61,6 @@ set(P3GOBJ_HEADERS textureReloadRequest.I textureReloadRequest.h textureStage.I textureStage.h textureStagePool.I textureStagePool.h - timerQueryContext.I timerQueryContext.h transformBlend.I transformBlend.h transformBlendTable.I transformBlendTable.h transformTable.I transformTable.h @@ -141,7 +140,6 @@ set(P3GOBJ_SOURCES textureReloadRequest.cxx textureStage.cxx textureStagePool.cxx - timerQueryContext.cxx transformBlend.cxx transformBlendTable.cxx transformTable.cxx diff --git a/panda/src/gobj/config_gobj.cxx b/panda/src/gobj/config_gobj.cxx index df0990f69f..7ada146307 100644 --- a/panda/src/gobj/config_gobj.cxx +++ b/panda/src/gobj/config_gobj.cxx @@ -48,7 +48,6 @@ #include "textureReloadRequest.h" #include "textureStage.h" #include "textureContext.h" -#include "timerQueryContext.h" #include "samplerContext.h" #include "samplerState.h" #include "shader.h" @@ -626,7 +625,6 @@ ConfigureFn(config_gobj) { TexturePoolFilter::init_type(); TextureReloadRequest::init_type(); TextureStage::init_type(); - TimerQueryContext::init_type(); TransformBlend::init_type(); TransformBlendTable::init_type(); TransformTable::init_type(); diff --git a/panda/src/gobj/p3gobj_composite2.cxx b/panda/src/gobj/p3gobj_composite2.cxx index 56704b12b3..e0a6bcbd6c 100644 --- a/panda/src/gobj/p3gobj_composite2.cxx +++ b/panda/src/gobj/p3gobj_composite2.cxx @@ -20,7 +20,6 @@ #include "textureReloadRequest.cxx" #include "textureStage.cxx" #include "textureStagePool.cxx" -#include "timerQueryContext.cxx" #include "transformBlend.cxx" #include "transformBlendTable.cxx" #include "transformTable.cxx" diff --git a/panda/src/gobj/timerQueryContext.I b/panda/src/gobj/timerQueryContext.I deleted file mode 100644 index a542e96855..0000000000 --- a/panda/src/gobj/timerQueryContext.I +++ /dev/null @@ -1,22 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file timerQueryContext.I - * @author rdb - * @date 2014-08-22 - */ - -/** - * - */ -INLINE TimerQueryContext:: -TimerQueryContext(int pstats_index) : - _pstats_index(pstats_index), - _frame_index(ClockObject::get_global_clock()->get_frame_count()) -{ -} diff --git a/panda/src/gobj/timerQueryContext.cxx b/panda/src/gobj/timerQueryContext.cxx deleted file mode 100644 index af69030ff4..0000000000 --- a/panda/src/gobj/timerQueryContext.cxx +++ /dev/null @@ -1,30 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file timerQueryContext.cxx - * @author rdb - * @date 2014-08-22 - */ - -#include "timerQueryContext.h" - -TypeHandle TimerQueryContext::_type_handle; - -/** - * Returns the timestamp that is the result of this timer query. There's no - * guarantee about which clock this uses, the only guarantee is that - * subtracting a start time from an end time should yield a time in seconds. - * If is_answer_ready() did not return true, this function may block before it - * returns. - * - * It is only valid to call this from the draw thread. - */ -double TimerQueryContext:: -get_timestamp() const { - return 0.0; -} diff --git a/panda/src/gobj/timerQueryContext.h b/panda/src/gobj/timerQueryContext.h deleted file mode 100644 index 26fd32efbd..0000000000 --- a/panda/src/gobj/timerQueryContext.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * PANDA 3D SOFTWARE - * Copyright (c) Carnegie Mellon University. All rights reserved. - * - * All use of this software is subject to the terms of the revised BSD - * license. You should have received a copy of this license along - * with this source code in a file named "LICENSE." - * - * @file timerQueryContext.h - * @author rdb - * @date 2014-08-22 - */ - -#ifndef TIMERQUERYCONTEXT_H -#define TIMERQUERYCONTEXT_H - -#include "pandabase.h" -#include "queryContext.h" -#include "clockObject.h" -#include "pStatCollector.h" - -/** - * - */ -class EXPCL_PANDA_GOBJ TimerQueryContext : public QueryContext { -public: - INLINE TimerQueryContext(int pstats_index); - - ALLOC_DELETED_CHAIN(TimerQueryContext); - - virtual double get_timestamp() const=0; - - int _frame_index; - int _pstats_index; - -public: - static TypeHandle get_class_type() { - return _type_handle; - } - static void init_type() { - QueryContext::init_type(); - register_type(_type_handle, "TimerQueryContext", - QueryContext::get_class_type()); - } - virtual TypeHandle get_type() const { - return get_class_type(); - } - virtual TypeHandle force_init_type() {init_type(); return get_class_type();} - -private: - static TypeHandle _type_handle; - - friend class PreparedGraphicsObjects; -}; - -#include "timerQueryContext.I" - -#endif diff --git a/panda/src/gobj/vertexDataPage.cxx b/panda/src/gobj/vertexDataPage.cxx index 74e58edb58..7f48d39d0a 100644 --- a/panda/src/gobj/vertexDataPage.cxx +++ b/panda/src/gobj/vertexDataPage.cxx @@ -934,7 +934,7 @@ thread_main() { _tlock.acquire(); while (true) { - PStatClient::thread_tick(get_sync_name()); + PStatClient::thread_tick(); while (_manager->_pending_reads.empty() && _manager->_pending_writes.empty()) { diff --git a/panda/src/pstatclient/pStatClient.cxx b/panda/src/pstatclient/pStatClient.cxx index 657badb048..593ebf8f20 100644 --- a/panda/src/pstatclient/pStatClient.cxx +++ b/panda/src/pstatclient/pStatClient.cxx @@ -403,6 +403,8 @@ client_main_tick() { return; } + ClockObject *clock = ClockObject::get_global_clock(); + _impl->client_main_tick(); MultiThingsByName::const_iterator ni = @@ -412,7 +414,8 @@ client_main_tick() { for (vector_int::const_iterator vi = indices.begin(); vi != indices.end(); ++vi) { - _impl->new_frame(*vi); + int frame_number = clock->get_frame_count(get_thread_object(*vi)); + _impl->new_frame(*vi, frame_number); } } } diff --git a/panda/src/pstatclient/pStatClientImpl.cxx b/panda/src/pstatclient/pStatClientImpl.cxx index 85d7e45f27..619c8fd33e 100644 --- a/panda/src/pstatclient/pStatClientImpl.cxx +++ b/panda/src/pstatclient/pStatClientImpl.cxx @@ -162,7 +162,7 @@ client_disconnect() { * data for the previous frame. */ void PStatClientImpl:: -new_frame(int thread_index) { +new_frame(int thread_index, int frame_number) { double frame_start = get_real_time(); nassertv(thread_index >= 0 && thread_index < _client->_num_threads); @@ -185,7 +185,6 @@ new_frame(int thread_index) { return; } - int frame_number = -1; PStatFrameData frame_data; if (!pthread->_frame_data.is_empty()) { @@ -205,11 +204,13 @@ new_frame(int thread_index) { } } pthread->_frame_data.swap(frame_data); - frame_number = pthread->_frame_number; + if (frame_number == -1) { + frame_number = pthread->_frame_number; + } } pthread->_frame_data.clear(); - pthread->_frame_number++; + pthread->_frame_number = frame_number + 1; _client->start(0, thread_index, frame_start); // Also record the time for the PStats operation itself. @@ -217,7 +218,7 @@ new_frame(int thread_index) { int pstats_index = PStatClient::_pstats_pcollector.get_index(); _client->start(pstats_index, current_thread_index, frame_start); - if (frame_number != -1) { + if (!frame_data.is_empty()) { transmit_frame_data(thread_index, frame_number, frame_data); } _client->stop(pstats_index, current_thread_index, get_real_time()); @@ -228,7 +229,7 @@ new_frame(int thread_index) { * data. */ void PStatClientImpl:: -add_frame(int thread_index, const PStatFrameData &frame_data) { +add_frame(int thread_index, int frame_number, const PStatFrameData &frame_data) { nassertv(thread_index >= 0 && thread_index < _client->_num_threads); PStatClient::InternalThread *pthread = _client->get_thread_ptr(thread_index); @@ -249,16 +250,12 @@ add_frame(int thread_index, const PStatFrameData &frame_data) { return; } - int frame_number = pthread->_frame_number++; - // Also record the time for the PStats operation itself. int current_thread_index = Thread::get_current_thread()->get_pstats_index(); int pstats_index = PStatClient::_pstats_pcollector.get_index(); _client->start(pstats_index, current_thread_index); - if (frame_number != -1) { - transmit_frame_data(thread_index, frame_number, frame_data); - } + transmit_frame_data(thread_index, frame_number, frame_data); _client->stop(pstats_index, current_thread_index); } diff --git a/panda/src/pstatclient/pStatClientImpl.h b/panda/src/pstatclient/pStatClientImpl.h index 598a2e63e7..2b767838bc 100644 --- a/panda/src/pstatclient/pStatClientImpl.h +++ b/panda/src/pstatclient/pStatClientImpl.h @@ -65,8 +65,8 @@ public: INLINE void client_resume_after_pause(); - void new_frame(int thread_index); - void add_frame(int thread_index, const PStatFrameData &frame_data); + void new_frame(int thread_index, int frame_number = -1); + void add_frame(int thread_index, int frame_number, const PStatFrameData &frame_data); private: void transmit_frame_data(int thread_index, int frame_number, diff --git a/panda/src/pstatclient/pStatProperties.cxx b/panda/src/pstatclient/pStatProperties.cxx index 30110fec69..03a93d7b59 100644 --- a/panda/src/pstatclient/pStatProperties.cxx +++ b/panda/src/pstatclient/pStatProperties.cxx @@ -220,7 +220,7 @@ static LevelCollectorProperties level_properties[] = { { 1, "PipelineCyclers:Dirty", { 0.2, 0.2, 0.2 }, "", 5000 }, { 1, "Collision Volumes", { 1.0, 0.8, 0.5 }, "", 500 }, { 1, "Collision Tests", { 0.5, 0.8, 1.0 }, "", 100 }, - { 1, "Command latency", { 0.8, 0.2, 0.0 }, "ms", 10, 1.0 / 1000.0 }, + { 1, "window1 latency", { 0.8, 0.2, 0.0 }, "ms", 10, 1.0 / 1000.0 }, { 0, nullptr } }; diff --git a/panda/src/pstatclient/pStatThread.cxx b/panda/src/pstatclient/pStatThread.cxx index e263ac089a..0315b5591f 100644 --- a/panda/src/pstatclient/pStatThread.cxx +++ b/panda/src/pstatclient/pStatThread.cxx @@ -24,9 +24,9 @@ * threads with the indicated sync name. */ void PStatThread:: -new_frame() { +new_frame(int frame_number) { #ifdef DO_PSTATS - _client->get_impl()->new_frame(_index); + _client->get_impl()->new_frame(_index, frame_number); #endif } @@ -35,9 +35,9 @@ new_frame() { * data to send for this frame. */ void PStatThread:: -add_frame(const PStatFrameData &frame_data) { +add_frame(int frame_number, const PStatFrameData &frame_data) { #ifdef DO_PSTATS - _client->get_impl()->add_frame(_index, frame_data); + _client->get_impl()->add_frame(_index, frame_number, frame_data); #endif } diff --git a/panda/src/pstatclient/pStatThread.h b/panda/src/pstatclient/pStatThread.h index 74defa2a15..e3f4acb7e5 100644 --- a/panda/src/pstatclient/pStatThread.h +++ b/panda/src/pstatclient/pStatThread.h @@ -36,8 +36,8 @@ PUBLISHED: INLINE PStatThread(const PStatThread ©); INLINE void operator = (const PStatThread ©); - void new_frame(); - void add_frame(const PStatFrameData &frame_data); + void new_frame(int frame_number = -1); + void add_frame(int frame_number, const PStatFrameData &frame_data); Thread *get_thread() const; INLINE int get_index() const; diff --git a/panda/src/wgldisplay/wglGraphicsWindow.cxx b/panda/src/wgldisplay/wglGraphicsWindow.cxx index 1c4fbe9773..1af86c6b95 100644 --- a/panda/src/wgldisplay/wglGraphicsWindow.cxx +++ b/panda/src/wgldisplay/wglGraphicsWindow.cxx @@ -122,21 +122,6 @@ end_frame(FrameMode mode, Thread *current_thread) { } } -/** - * This function will be called within the draw thread after end_frame() has - * been called on all windows, to initiate the exchange of the front and back - * buffers. - * - * This should instruct the window to prepare for the flip at the next video - * sync, but it should not wait. - * - * We have the two separate functions, begin_flip() and end_flip(), to make it - * easier to flip all of the windows at the same time. - */ -void wglGraphicsWindow:: -begin_flip() { -} - /** * This function will be called within the draw thread after end_frame() has * been called on all windows, to initiate the exchange of the front and back diff --git a/panda/src/wgldisplay/wglGraphicsWindow.h b/panda/src/wgldisplay/wglGraphicsWindow.h index 06a74aa501..66dc2d5878 100644 --- a/panda/src/wgldisplay/wglGraphicsWindow.h +++ b/panda/src/wgldisplay/wglGraphicsWindow.h @@ -34,7 +34,6 @@ public: virtual bool begin_frame(FrameMode mode, Thread *current_thread); virtual void end_frame(FrameMode mode, Thread *current_thread); - virtual void begin_flip(); virtual void ready_flip(); virtual void end_flip(); diff --git a/panda/src/windisplay/winGraphicsWindow.cxx b/panda/src/windisplay/winGraphicsWindow.cxx index ceef979b75..93b4ba989a 100644 --- a/panda/src/windisplay/winGraphicsWindow.cxx +++ b/panda/src/windisplay/winGraphicsWindow.cxx @@ -218,21 +218,6 @@ close_ime() { return; } -/** - * This function will be called within the draw thread after end_frame() has - * been called on all windows, to initiate the exchange of the front and back - * buffers. - * - * This should instruct the window to prepare for the flip at the next video - * sync, but it should not wait. - * - * We have the two separate functions, begin_flip() and end_flip(), to make it - * easier to flip all of the windows at the same time. - */ -void WinGraphicsWindow:: -begin_flip() { -} - /** * Do whatever processing is necessary to ensure that the window responds to * user events. Also, honor any requests recently made via diff --git a/panda/src/windisplay/winGraphicsWindow.h b/panda/src/windisplay/winGraphicsWindow.h index ca33615219..d262249349 100644 --- a/panda/src/windisplay/winGraphicsWindow.h +++ b/panda/src/windisplay/winGraphicsWindow.h @@ -77,8 +77,6 @@ public: virtual void close_ime(); - virtual void begin_flip(); - virtual void process_events(); virtual void set_properties_now(WindowProperties &properties); void receive_windows_message(unsigned int msg, int wparam, int lparam);