From eaa13184509e4431665f71e89f401f42164da5ad Mon Sep 17 00:00:00 2001 From: Darren Ranalli Date: Thu, 2 Oct 2008 02:00:34 +0000 Subject: [PATCH] added ProfileSession, don't format profile output until it's requested --- direct/src/showbase/ProfileSession.py | 141 ++++++++++++++++++++++++++ direct/src/showbase/PythonUtil.py | 15 +++ direct/src/task/TaskOrig.py | 50 +++++---- direct/src/task/TaskProfiler.py | 139 +++++++++++++------------ 4 files changed, 260 insertions(+), 85 deletions(-) create mode 100755 direct/src/showbase/ProfileSession.py diff --git a/direct/src/showbase/ProfileSession.py b/direct/src/showbase/ProfileSession.py new file mode 100755 index 0000000000..e21f57f2f8 --- /dev/null +++ b/direct/src/showbase/ProfileSession.py @@ -0,0 +1,141 @@ +from pandac.libpandaexpressModules import TrueClock +from direct.showbase.PythonUtil import ( + StdoutCapture, _installProfileCustomFuncs,_removeProfileCustomFuncs, + _profileWithoutGarbageLeak, _getProfileResultFileInfo, _setProfileResultsFileInfo, + _clearProfileResultFileInfo, ) +import __builtin__ +import profile +import pstats +from StringIO import StringIO +import marshal + +class ProfileSession: + # class that encapsulates a profile of a single callable using Python's standard + # 'profile' module + # + # defers formatting of profile results until they are requested + # + # implementation sidesteps memory leak in Python profile module, + # and redirects file output to RAM file for efficiency + DefaultFilename = 'profilesession' + DefaultLines = 80 + DefaultSorts = ['cumulative', 'time', 'calls'] + + TrueClock = TrueClock.getGlobalPtr() + + def __init__(self, func, name): + self._func = func + self._name = name + self._filename = 'profileData-%s-%s' % (self._name, id(self)) + self._wallClockDur = None + self._ramFile = None + self._refCount = 0 + self.acquire() + + def acquire(self): + self._refCount += 1 + def release(self): + self._refCount -= 1 + if not self._refCount: + self._destroy() + + def _destroy(self): + del self._func + del self._name + del self._filename + del self._wallClockDur + del self._ramFile + + def run(self): + # make sure this instance doesn't get destroyed inside self._func + self.acquire() + + # put the function in the global namespace so that profile can find it + assert 'globalProfileFunc' not in __builtin__.__dict__ + __builtin__.globalProfileFunc = self._func + __builtin__.globalProfileResult = [None] + + # set up the RAM file + _installProfileCustomFuncs(self._filename) + + # do the profiling + Profile = profile.Profile + statement = 'globalProfileResult[0]=globalProfileFunc()' + sort = -1 + retVal = None + + # this is based on profile.run, the code is replicated here to allow us to + # eliminate a memory leak + prof = Profile() + # try to get wall-clock duration that is as accurate as possible + startT = self.TrueClock.getShortTime() + try: + prof = prof.run(statement) + except SystemExit: + pass + # try to get wall-clock duration that is as accurate as possible + endT = self.TrueClock.getShortTime() + # this has to be run immediately after profiling for the timings to be accurate + # tell the Profile object to generate output to the RAM file + prof.dump_stats(self._filename) + + dur = endT - startT + + # eliminate the memory leak + del prof.dispatcher + + # and store the results + self._wallClockDur = dur + + # store the RAM file for later + self._ramFile = _getProfileResultFileInfo(self._filename) + # clean up the RAM file support + _removeProfileCustomFuncs(self._filename) + + # clean up the globals + result = globalProfileResult[0] + del __builtin__.__dict__['globalProfileFunc'] + del __builtin__.__dict__['globalProfileResult'] + + self.release() + return result + + def getWallClockDuration(self): + # this might not be accurate, it may include time taken up by other processes + return self._wallClockDur + + def getResults(self, + lines=80, + sorts=['cumulative', 'time', 'calls'], + callInfo=False): + # set up the RAM file + _installProfileCustomFuncs(self._filename) + # install the stored RAM file from self.run() + _setProfileResultsFileInfo(self._filename, self._ramFile) + + # now extract human-readable output from the RAM file + + # capture print output to a string + sc = StdoutCapture() + + # print the info to stdout + s = pstats.Stats(self._filename) + s.strip_dirs() + for sort in sorts: + s.sort_stats(sort) + s.print_stats(lines) + if callInfo: + s.print_callees(lines) + s.print_callers(lines) + + # make a copy of the print output + output = sc.getString() + + # restore stdout to what it was before + sc.destroy() + + # clean up the RAM file support + _removeProfileCustomFuncs(self._filename) + + return output + diff --git a/direct/src/showbase/PythonUtil.py b/direct/src/showbase/PythonUtil.py index c93912871b..4331758458 100644 --- a/direct/src/showbase/PythonUtil.py +++ b/direct/src/showbase/PythonUtil.py @@ -1086,6 +1086,21 @@ def _installProfileCustomFuncs(filename): movedLoadFuncs.append(marshal.load) marshal.load = _profileMarshalLoad +def _getProfileResultFileInfo(filename): + return (profileFilename2file.get(filename, None), + profileFilename2marshalData.get(filename, None)) + +def _setProfileResultsFileInfo(filename, info): + f, m = info + if f: + profileFilename2file[filename] = f + if m: + profileFilename2marshalData[filename] = m + +def _clearProfileResultFileInfo(filename): + profileFilename2file.pop(filename, None) + profileFilename2marshalData.pop(filename, None) + def _removeProfileCustomFuncs(filename): assert profileFilenameList.top() == filename marshal.load = movedLoadFuncs.pop() diff --git a/direct/src/task/TaskOrig.py b/direct/src/task/TaskOrig.py index a0bcf47f56..9776964652 100644 --- a/direct/src/task/TaskOrig.py +++ b/direct/src/task/TaskOrig.py @@ -16,6 +16,7 @@ from direct.directnotify.DirectNotifyGlobal import * from direct.showbase.PythonUtil import * from direct.showbase.MessengerGlobal import * from direct.showbase import ExceptionVarDump +from direct.showbase.ProfileSession import ProfileSession import time import fnmatch import string @@ -406,8 +407,8 @@ class TaskManager: self._taskProfiler = None self._profileInfo = ScratchPad( taskId = None, - dt = None, - lastProfileResultString = None, + profiled = False, + session = None, ) # We copy this value in from __builtins__ when it gets set. @@ -809,14 +810,20 @@ class TaskManager: # by the task when it runs profileInfo = self._profileInfo doProfile = (task.id == profileInfo.taskId) + # don't profile the same task twice in a row + doProfile = doProfile and (not profileInfo.profiled) if not self.taskTimerVerbose: startTime = self.trueClock.getShortTime() # don't record timing info if doProfile: - ret = profileFunc(Functor(task, *task.extraArgs), - 'TASK_PROFILE:%s' % task.name, True, log=False) + profileSession = ProfileSession(Functor(task, *task.extraArgs), + 'TASK_PROFILE:%s' % task.name) + ret = profileSession.run() + # set these values *after* profiling in case we're profiling the TaskProfiler + profileInfo.session = profileSession + profileInfo.profiled = True else: ret = task(*task.extraArgs) endTime = self.trueClock.getShortTime() @@ -824,9 +831,7 @@ class TaskManager: # Record the dt dt = endTime - startTime if doProfile: - # if we profiled, record the measured duration but don't pollute the task's - # normal duration - profileInfo.dt = dt + # if we profiled, don't pollute the task's recorded duration dt = task.avgDt task.dt = dt @@ -836,8 +841,12 @@ class TaskManager: task.pstats.start() startTime = self.trueClock.getShortTime() if doProfile: - ret = profileFunc(Functor(task, *task.extraArgs), - 'profiled-task-%s' % task.name, True, log=False) + profileSession = ProfileSession(Functor(task, *task.extraArgs), + 'profiled-task-%s' % task.name) + ret = profileSession.run() + # set these values *after* profiling in case we're profiling the TaskProfiler + profileInfo.session = profileSession + profileInfo.profiled = True else: ret = task(*task.extraArgs) endTime = self.trueClock.getShortTime() @@ -847,9 +856,7 @@ class TaskManager: # Record the dt dt = endTime - startTime if doProfile: - # if we profiled, record the measured duration but don't pollute the task's - # normal duration - profileInfo.dt = dt + # if we profiled, don't pollute the task's recorded duration dt = task.avgDt task.dt = dt @@ -864,9 +871,6 @@ class TaskManager: else: task.avgDt = 0 - if doProfile: - profileInfo.lastProfileResultString = self._getProfileResultString() - # warn if the task took too long if self.warnTaskDuration and self.globalClock: avgFrameRate = self.globalClock.getAverageFrameRate() @@ -1020,17 +1024,21 @@ class TaskManager: self._taskProfiler.flush(name) def _setProfileTask(self, task): + if self._profileInfo.session: + self._profileInfo.session.release() + self._profileInfo.session = None self._profileInfo = ScratchPad( taskId = task.id, - dt = None, - lastProfileResultString = None, + profiled = False, + session = None, ) - def _getTaskProfileDt(self): - return self._profileInfo.dt + def _hasProfiledDesignatedTask(self): + # have we run a profile of the designated task yet? + return self._profileInfo.profiled - def _getLastProfileResultString(self): - return self._profileInfo.lastProfileResultString + def _getLastProfileSession(self): + return self._profileInfo.session def _getRandomTask(self): numTasks = 0 diff --git a/direct/src/task/TaskProfiler.py b/direct/src/task/TaskProfiler.py index 84da7f38e2..0cd14125ed 100755 --- a/direct/src/task/TaskProfiler.py +++ b/direct/src/task/TaskProfiler.py @@ -1,6 +1,6 @@ from direct.directnotify.DirectNotifyGlobal import directNotify from direct.fsm.StatePush import FunctionCall -from direct.showbase.PythonUtil import getProfileResultString, Averager +from direct.showbase.PythonUtil import Averager class TaskTracker: # call it TaskProfiler to avoid confusion for the user @@ -8,60 +8,72 @@ class TaskTracker: def __init__(self, namePattern): self._namePattern = namePattern self._durationAverager = Averager('%s-durationAverager' % namePattern) - self._avgData = None - self._avgDataDur = None - self._maxNonSpikeData = None - self._maxNonSpikeDataDur = None + self._avgSession = None + self._maxNonSpikeSession = None def destroy(self): + self.flush() del self._namePattern del self._durationAverager def flush(self): self._durationAverager.reset() - self._avgData = None - self._avgDataDur = None - self._maxNonSpikeData = None - self._maxNonSpikeDataDur = None + if self._avgSession: + self._avgSession.release() + if self._maxNonSpikeSession: + self._maxNonSpikeSession.release() + self._avgSession = None + self._maxNonSpikeSession = None def getNamePattern(self, namePattern): return self._namePattern - def addDuration(self, duration, data, isSpike): + def addProfileSession(self, session, isSpike): + duration = session.getWallClockDuration() self._durationAverager.addValue(duration) - storeAvgData = True - storeMaxNSData = True - if self._avgDataDur is not None: + storeAvg = True + storeMaxNS = True + if self._avgSession is not None: avgDur = self.getAvgDuration() - if abs(self._avgDataDur - avgDur) < abs(duration - avgDur): + if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur): # current avg data is more average than this new sample, keep the data we've # already got stored - storeAvgData = False + storeAvg = False if isSpike: - storeMaxNSData = False + storeMaxNS = False else: - if self._maxNonSpikeDataDur is not None and self._maxNonSpikeDataDur > duration: - storeMaxNSData = False - if storeAvgData: - self._avgData = data - self._avgDataDur = duration - if storeMaxNSData: - self._maxNonSpikeData = data - self._maxNonSpikeDataDur = duration + if (self._maxNonSpikeSession is not None and + self._maxNonSpikeSession.getWallClockDuration() > duration): + storeMaxNS = False + if storeAvg: + if self._avgSession: + self._avgSession.release() + session.acquire() + self._avgSession = session + if storeMaxNS: + if self._maxNonSpikeSession: + self._maxNonSpikeSession.release() + session.acquire() + self._maxNonSpikeSession = session def getAvgDuration(self): return self._durationAverager.getAverage() def getNumDurationSamples(self): return self._durationAverager.getCount() - def getAvgData(self): - # returns duration, data for closest-to-average sample - return self._avgDataDur, self._avgData - def getMaxNonSpikeData(self): - # returns duration, data for closest-to-average sample - return self._maxNonSpikeDataDur, self._maxNonSpikeData + def getAvgSession(self): + # returns profile session for closest-to-average sample + return self._avgSession + def getMaxNonSpikeSession(self): + # returns profile session for closest-to-average sample + return self._maxNonSpikeSession def log(self): - self.notify.info('task CPU profile (%s):\n' - '== AVERAGE (%s wall-clock seconds)\n%s\n' - '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % ( - self._namePattern, - self._avgDataDur, self._avgData, - self._maxNonSpikeDataDur, self._maxNonSpikeData, - )) + if self._avgSession: + self.notify.info('task CPU profile (%s):\n' + '== AVERAGE (%s wall-clock seconds)\n%s\n' + '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % ( + self._namePattern, + self._avgSession.getWallClockDuration(), + self._avgSession.getResults(), + self._maxNonSpikeSession.getWallClockDuration(), + self._maxNonSpikeSession.getResults(), + )) + else: + self.notify.info('task CPU profile (%s): no data collected' % self._namePattern) class TaskProfiler: # this does intermittent profiling of tasks running on the system @@ -77,8 +89,6 @@ class TaskProfiler: self._minSamples = config.GetInt('profile-task-spike-min-samples', 30) # defines spike as longer than this multiple of avg task duration self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.) - # assign getProfileResultString to the taskMgr here, since Task.py can't import PythonUtil - taskMgr._getProfileResultString = getProfileResultString def destroy(self): if taskMgr.getProfileTasks(): @@ -117,31 +127,32 @@ class TaskProfiler: def _doProfileTasks(self, task=None): # gather data from the previous frame # set up for the next frame - profileDt = taskMgr._getTaskProfileDt() - if (self._task is not None) and (profileDt is not None): - lastProfileResult = taskMgr._getLastProfileResultString() - if lastProfileResult: - namePattern = self._task.getNamePattern() - if namePattern not in self._namePattern2tracker: - self._namePattern2tracker[namePattern] = TaskTracker(namePattern) - tracker = self._namePattern2tracker[namePattern] - isSpike = False - # do we have enough samples? - if tracker.getNumDurationSamples() > self._minSamples: - # was this a spike? - if profileDt > (tracker.getAvgDuration() * self._spikeThreshold): - isSpike = True - avgDur, avgResult = tracker.getAvgData() - maxNSDur, maxNSResult = tracker.getMaxNonSpikeData() - self.notify.info('task CPU spike profile (%s):\n' - '== AVERAGE (%s wall-clock seconds)\n%s\n' - '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n' - '== SPIKE (%s wall-clock seconds)\n%s' % ( - namePattern, - avgDur, avgResult, - maxNSDur, maxNSResult, - profileDt, lastProfileResult)) - tracker.addDuration(profileDt, lastProfileResult, isSpike) + if (self._task is not None) and taskMgr._hasProfiledDesignatedTask(): + session = taskMgr._getLastProfileSession() + sessionDur = session.getWallClockDuration() + namePattern = self._task.getNamePattern() + if namePattern not in self._namePattern2tracker: + self._namePattern2tracker[namePattern] = TaskTracker(namePattern) + tracker = self._namePattern2tracker[namePattern] + isSpike = False + # do we have enough samples? + if tracker.getNumDurationSamples() > self._minSamples: + # was this a spike? + if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold): + print 'sessionDur=%s' % sessionDur + print 'avgDur=%s' % tracker.getAvgDuration() + isSpike = True + avgSession = tracker.getAvgSession() + maxNSSession = tracker.getMaxNonSpikeSession() + self.notify.info('task CPU spike profile (%s):\n' + '== AVERAGE (%s wall-clock seconds)\n%s\n' + '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n' + '== SPIKE (%s wall-clock seconds)\n%s' % ( + namePattern, + avgSession.getWallClockDuration(), avgSession.getResults(), + maxNSSession.getWallClockDuration(), maxNSSession.getResults(), + sessionDur, session.getResults())) + tracker.addProfileSession(session, isSpike) # set up the next task self._task = taskMgr._getRandomTask()