mirror of
https://github.com/panda3d/panda3d.git
synced 2025-10-03 18:31:55 -04:00
added ProfileSession, don't format profile output until it's requested
This commit is contained in:
parent
ee89d48297
commit
eaa1318450
141
direct/src/showbase/ProfileSession.py
Executable file
141
direct/src/showbase/ProfileSession.py
Executable file
@ -0,0 +1,141 @@
|
||||
from pandac.libpandaexpressModules import TrueClock
|
||||
from direct.showbase.PythonUtil import (
|
||||
StdoutCapture, _installProfileCustomFuncs,_removeProfileCustomFuncs,
|
||||
_profileWithoutGarbageLeak, _getProfileResultFileInfo, _setProfileResultsFileInfo,
|
||||
_clearProfileResultFileInfo, )
|
||||
import __builtin__
|
||||
import profile
|
||||
import pstats
|
||||
from StringIO import StringIO
|
||||
import marshal
|
||||
|
||||
class ProfileSession:
|
||||
# class that encapsulates a profile of a single callable using Python's standard
|
||||
# 'profile' module
|
||||
#
|
||||
# defers formatting of profile results until they are requested
|
||||
#
|
||||
# implementation sidesteps memory leak in Python profile module,
|
||||
# and redirects file output to RAM file for efficiency
|
||||
DefaultFilename = 'profilesession'
|
||||
DefaultLines = 80
|
||||
DefaultSorts = ['cumulative', 'time', 'calls']
|
||||
|
||||
TrueClock = TrueClock.getGlobalPtr()
|
||||
|
||||
def __init__(self, func, name):
|
||||
self._func = func
|
||||
self._name = name
|
||||
self._filename = 'profileData-%s-%s' % (self._name, id(self))
|
||||
self._wallClockDur = None
|
||||
self._ramFile = None
|
||||
self._refCount = 0
|
||||
self.acquire()
|
||||
|
||||
def acquire(self):
|
||||
self._refCount += 1
|
||||
def release(self):
|
||||
self._refCount -= 1
|
||||
if not self._refCount:
|
||||
self._destroy()
|
||||
|
||||
def _destroy(self):
|
||||
del self._func
|
||||
del self._name
|
||||
del self._filename
|
||||
del self._wallClockDur
|
||||
del self._ramFile
|
||||
|
||||
def run(self):
|
||||
# make sure this instance doesn't get destroyed inside self._func
|
||||
self.acquire()
|
||||
|
||||
# put the function in the global namespace so that profile can find it
|
||||
assert 'globalProfileFunc' not in __builtin__.__dict__
|
||||
__builtin__.globalProfileFunc = self._func
|
||||
__builtin__.globalProfileResult = [None]
|
||||
|
||||
# set up the RAM file
|
||||
_installProfileCustomFuncs(self._filename)
|
||||
|
||||
# do the profiling
|
||||
Profile = profile.Profile
|
||||
statement = 'globalProfileResult[0]=globalProfileFunc()'
|
||||
sort = -1
|
||||
retVal = None
|
||||
|
||||
# this is based on profile.run, the code is replicated here to allow us to
|
||||
# eliminate a memory leak
|
||||
prof = Profile()
|
||||
# try to get wall-clock duration that is as accurate as possible
|
||||
startT = self.TrueClock.getShortTime()
|
||||
try:
|
||||
prof = prof.run(statement)
|
||||
except SystemExit:
|
||||
pass
|
||||
# try to get wall-clock duration that is as accurate as possible
|
||||
endT = self.TrueClock.getShortTime()
|
||||
# this has to be run immediately after profiling for the timings to be accurate
|
||||
# tell the Profile object to generate output to the RAM file
|
||||
prof.dump_stats(self._filename)
|
||||
|
||||
dur = endT - startT
|
||||
|
||||
# eliminate the memory leak
|
||||
del prof.dispatcher
|
||||
|
||||
# and store the results
|
||||
self._wallClockDur = dur
|
||||
|
||||
# store the RAM file for later
|
||||
self._ramFile = _getProfileResultFileInfo(self._filename)
|
||||
# clean up the RAM file support
|
||||
_removeProfileCustomFuncs(self._filename)
|
||||
|
||||
# clean up the globals
|
||||
result = globalProfileResult[0]
|
||||
del __builtin__.__dict__['globalProfileFunc']
|
||||
del __builtin__.__dict__['globalProfileResult']
|
||||
|
||||
self.release()
|
||||
return result
|
||||
|
||||
def getWallClockDuration(self):
|
||||
# this might not be accurate, it may include time taken up by other processes
|
||||
return self._wallClockDur
|
||||
|
||||
def getResults(self,
|
||||
lines=80,
|
||||
sorts=['cumulative', 'time', 'calls'],
|
||||
callInfo=False):
|
||||
# set up the RAM file
|
||||
_installProfileCustomFuncs(self._filename)
|
||||
# install the stored RAM file from self.run()
|
||||
_setProfileResultsFileInfo(self._filename, self._ramFile)
|
||||
|
||||
# now extract human-readable output from the RAM file
|
||||
|
||||
# capture print output to a string
|
||||
sc = StdoutCapture()
|
||||
|
||||
# print the info to stdout
|
||||
s = pstats.Stats(self._filename)
|
||||
s.strip_dirs()
|
||||
for sort in sorts:
|
||||
s.sort_stats(sort)
|
||||
s.print_stats(lines)
|
||||
if callInfo:
|
||||
s.print_callees(lines)
|
||||
s.print_callers(lines)
|
||||
|
||||
# make a copy of the print output
|
||||
output = sc.getString()
|
||||
|
||||
# restore stdout to what it was before
|
||||
sc.destroy()
|
||||
|
||||
# clean up the RAM file support
|
||||
_removeProfileCustomFuncs(self._filename)
|
||||
|
||||
return output
|
||||
|
@ -1086,6 +1086,21 @@ def _installProfileCustomFuncs(filename):
|
||||
movedLoadFuncs.append(marshal.load)
|
||||
marshal.load = _profileMarshalLoad
|
||||
|
||||
def _getProfileResultFileInfo(filename):
|
||||
return (profileFilename2file.get(filename, None),
|
||||
profileFilename2marshalData.get(filename, None))
|
||||
|
||||
def _setProfileResultsFileInfo(filename, info):
|
||||
f, m = info
|
||||
if f:
|
||||
profileFilename2file[filename] = f
|
||||
if m:
|
||||
profileFilename2marshalData[filename] = m
|
||||
|
||||
def _clearProfileResultFileInfo(filename):
|
||||
profileFilename2file.pop(filename, None)
|
||||
profileFilename2marshalData.pop(filename, None)
|
||||
|
||||
def _removeProfileCustomFuncs(filename):
|
||||
assert profileFilenameList.top() == filename
|
||||
marshal.load = movedLoadFuncs.pop()
|
||||
|
@ -16,6 +16,7 @@ from direct.directnotify.DirectNotifyGlobal import *
|
||||
from direct.showbase.PythonUtil import *
|
||||
from direct.showbase.MessengerGlobal import *
|
||||
from direct.showbase import ExceptionVarDump
|
||||
from direct.showbase.ProfileSession import ProfileSession
|
||||
import time
|
||||
import fnmatch
|
||||
import string
|
||||
@ -406,8 +407,8 @@ class TaskManager:
|
||||
self._taskProfiler = None
|
||||
self._profileInfo = ScratchPad(
|
||||
taskId = None,
|
||||
dt = None,
|
||||
lastProfileResultString = None,
|
||||
profiled = False,
|
||||
session = None,
|
||||
)
|
||||
|
||||
# We copy this value in from __builtins__ when it gets set.
|
||||
@ -809,14 +810,20 @@ class TaskManager:
|
||||
# by the task when it runs
|
||||
profileInfo = self._profileInfo
|
||||
doProfile = (task.id == profileInfo.taskId)
|
||||
# don't profile the same task twice in a row
|
||||
doProfile = doProfile and (not profileInfo.profiled)
|
||||
|
||||
if not self.taskTimerVerbose:
|
||||
startTime = self.trueClock.getShortTime()
|
||||
|
||||
# don't record timing info
|
||||
if doProfile:
|
||||
ret = profileFunc(Functor(task, *task.extraArgs),
|
||||
'TASK_PROFILE:%s' % task.name, True, log=False)
|
||||
profileSession = ProfileSession(Functor(task, *task.extraArgs),
|
||||
'TASK_PROFILE:%s' % task.name)
|
||||
ret = profileSession.run()
|
||||
# set these values *after* profiling in case we're profiling the TaskProfiler
|
||||
profileInfo.session = profileSession
|
||||
profileInfo.profiled = True
|
||||
else:
|
||||
ret = task(*task.extraArgs)
|
||||
endTime = self.trueClock.getShortTime()
|
||||
@ -824,9 +831,7 @@ class TaskManager:
|
||||
# Record the dt
|
||||
dt = endTime - startTime
|
||||
if doProfile:
|
||||
# if we profiled, record the measured duration but don't pollute the task's
|
||||
# normal duration
|
||||
profileInfo.dt = dt
|
||||
# if we profiled, don't pollute the task's recorded duration
|
||||
dt = task.avgDt
|
||||
task.dt = dt
|
||||
|
||||
@ -836,8 +841,12 @@ class TaskManager:
|
||||
task.pstats.start()
|
||||
startTime = self.trueClock.getShortTime()
|
||||
if doProfile:
|
||||
ret = profileFunc(Functor(task, *task.extraArgs),
|
||||
'profiled-task-%s' % task.name, True, log=False)
|
||||
profileSession = ProfileSession(Functor(task, *task.extraArgs),
|
||||
'profiled-task-%s' % task.name)
|
||||
ret = profileSession.run()
|
||||
# set these values *after* profiling in case we're profiling the TaskProfiler
|
||||
profileInfo.session = profileSession
|
||||
profileInfo.profiled = True
|
||||
else:
|
||||
ret = task(*task.extraArgs)
|
||||
endTime = self.trueClock.getShortTime()
|
||||
@ -847,9 +856,7 @@ class TaskManager:
|
||||
# Record the dt
|
||||
dt = endTime - startTime
|
||||
if doProfile:
|
||||
# if we profiled, record the measured duration but don't pollute the task's
|
||||
# normal duration
|
||||
profileInfo.dt = dt
|
||||
# if we profiled, don't pollute the task's recorded duration
|
||||
dt = task.avgDt
|
||||
task.dt = dt
|
||||
|
||||
@ -864,9 +871,6 @@ class TaskManager:
|
||||
else:
|
||||
task.avgDt = 0
|
||||
|
||||
if doProfile:
|
||||
profileInfo.lastProfileResultString = self._getProfileResultString()
|
||||
|
||||
# warn if the task took too long
|
||||
if self.warnTaskDuration and self.globalClock:
|
||||
avgFrameRate = self.globalClock.getAverageFrameRate()
|
||||
@ -1020,17 +1024,21 @@ class TaskManager:
|
||||
self._taskProfiler.flush(name)
|
||||
|
||||
def _setProfileTask(self, task):
|
||||
if self._profileInfo.session:
|
||||
self._profileInfo.session.release()
|
||||
self._profileInfo.session = None
|
||||
self._profileInfo = ScratchPad(
|
||||
taskId = task.id,
|
||||
dt = None,
|
||||
lastProfileResultString = None,
|
||||
profiled = False,
|
||||
session = None,
|
||||
)
|
||||
|
||||
def _getTaskProfileDt(self):
|
||||
return self._profileInfo.dt
|
||||
def _hasProfiledDesignatedTask(self):
|
||||
# have we run a profile of the designated task yet?
|
||||
return self._profileInfo.profiled
|
||||
|
||||
def _getLastProfileResultString(self):
|
||||
return self._profileInfo.lastProfileResultString
|
||||
def _getLastProfileSession(self):
|
||||
return self._profileInfo.session
|
||||
|
||||
def _getRandomTask(self):
|
||||
numTasks = 0
|
||||
|
@ -1,6 +1,6 @@
|
||||
from direct.directnotify.DirectNotifyGlobal import directNotify
|
||||
from direct.fsm.StatePush import FunctionCall
|
||||
from direct.showbase.PythonUtil import getProfileResultString, Averager
|
||||
from direct.showbase.PythonUtil import Averager
|
||||
|
||||
class TaskTracker:
|
||||
# call it TaskProfiler to avoid confusion for the user
|
||||
@ -8,60 +8,72 @@ class TaskTracker:
|
||||
def __init__(self, namePattern):
|
||||
self._namePattern = namePattern
|
||||
self._durationAverager = Averager('%s-durationAverager' % namePattern)
|
||||
self._avgData = None
|
||||
self._avgDataDur = None
|
||||
self._maxNonSpikeData = None
|
||||
self._maxNonSpikeDataDur = None
|
||||
self._avgSession = None
|
||||
self._maxNonSpikeSession = None
|
||||
def destroy(self):
|
||||
self.flush()
|
||||
del self._namePattern
|
||||
del self._durationAverager
|
||||
def flush(self):
|
||||
self._durationAverager.reset()
|
||||
self._avgData = None
|
||||
self._avgDataDur = None
|
||||
self._maxNonSpikeData = None
|
||||
self._maxNonSpikeDataDur = None
|
||||
if self._avgSession:
|
||||
self._avgSession.release()
|
||||
if self._maxNonSpikeSession:
|
||||
self._maxNonSpikeSession.release()
|
||||
self._avgSession = None
|
||||
self._maxNonSpikeSession = None
|
||||
def getNamePattern(self, namePattern):
|
||||
return self._namePattern
|
||||
def addDuration(self, duration, data, isSpike):
|
||||
def addProfileSession(self, session, isSpike):
|
||||
duration = session.getWallClockDuration()
|
||||
self._durationAverager.addValue(duration)
|
||||
storeAvgData = True
|
||||
storeMaxNSData = True
|
||||
if self._avgDataDur is not None:
|
||||
storeAvg = True
|
||||
storeMaxNS = True
|
||||
if self._avgSession is not None:
|
||||
avgDur = self.getAvgDuration()
|
||||
if abs(self._avgDataDur - avgDur) < abs(duration - avgDur):
|
||||
if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
|
||||
# current avg data is more average than this new sample, keep the data we've
|
||||
# already got stored
|
||||
storeAvgData = False
|
||||
storeAvg = False
|
||||
if isSpike:
|
||||
storeMaxNSData = False
|
||||
storeMaxNS = False
|
||||
else:
|
||||
if self._maxNonSpikeDataDur is not None and self._maxNonSpikeDataDur > duration:
|
||||
storeMaxNSData = False
|
||||
if storeAvgData:
|
||||
self._avgData = data
|
||||
self._avgDataDur = duration
|
||||
if storeMaxNSData:
|
||||
self._maxNonSpikeData = data
|
||||
self._maxNonSpikeDataDur = duration
|
||||
if (self._maxNonSpikeSession is not None and
|
||||
self._maxNonSpikeSession.getWallClockDuration() > duration):
|
||||
storeMaxNS = False
|
||||
if storeAvg:
|
||||
if self._avgSession:
|
||||
self._avgSession.release()
|
||||
session.acquire()
|
||||
self._avgSession = session
|
||||
if storeMaxNS:
|
||||
if self._maxNonSpikeSession:
|
||||
self._maxNonSpikeSession.release()
|
||||
session.acquire()
|
||||
self._maxNonSpikeSession = session
|
||||
def getAvgDuration(self):
|
||||
return self._durationAverager.getAverage()
|
||||
def getNumDurationSamples(self):
|
||||
return self._durationAverager.getCount()
|
||||
def getAvgData(self):
|
||||
# returns duration, data for closest-to-average sample
|
||||
return self._avgDataDur, self._avgData
|
||||
def getMaxNonSpikeData(self):
|
||||
# returns duration, data for closest-to-average sample
|
||||
return self._maxNonSpikeDataDur, self._maxNonSpikeData
|
||||
def getAvgSession(self):
|
||||
# returns profile session for closest-to-average sample
|
||||
return self._avgSession
|
||||
def getMaxNonSpikeSession(self):
|
||||
# returns profile session for closest-to-average sample
|
||||
return self._maxNonSpikeSession
|
||||
def log(self):
|
||||
self.notify.info('task CPU profile (%s):\n'
|
||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
|
||||
self._namePattern,
|
||||
self._avgDataDur, self._avgData,
|
||||
self._maxNonSpikeDataDur, self._maxNonSpikeData,
|
||||
))
|
||||
if self._avgSession:
|
||||
self.notify.info('task CPU profile (%s):\n'
|
||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
|
||||
self._namePattern,
|
||||
self._avgSession.getWallClockDuration(),
|
||||
self._avgSession.getResults(),
|
||||
self._maxNonSpikeSession.getWallClockDuration(),
|
||||
self._maxNonSpikeSession.getResults(),
|
||||
))
|
||||
else:
|
||||
self.notify.info('task CPU profile (%s): no data collected' % self._namePattern)
|
||||
|
||||
class TaskProfiler:
|
||||
# this does intermittent profiling of tasks running on the system
|
||||
@ -77,8 +89,6 @@ class TaskProfiler:
|
||||
self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
|
||||
# defines spike as longer than this multiple of avg task duration
|
||||
self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
|
||||
# assign getProfileResultString to the taskMgr here, since Task.py can't import PythonUtil
|
||||
taskMgr._getProfileResultString = getProfileResultString
|
||||
|
||||
def destroy(self):
|
||||
if taskMgr.getProfileTasks():
|
||||
@ -117,31 +127,32 @@ class TaskProfiler:
|
||||
def _doProfileTasks(self, task=None):
|
||||
# gather data from the previous frame
|
||||
# set up for the next frame
|
||||
profileDt = taskMgr._getTaskProfileDt()
|
||||
if (self._task is not None) and (profileDt is not None):
|
||||
lastProfileResult = taskMgr._getLastProfileResultString()
|
||||
if lastProfileResult:
|
||||
namePattern = self._task.getNamePattern()
|
||||
if namePattern not in self._namePattern2tracker:
|
||||
self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
|
||||
tracker = self._namePattern2tracker[namePattern]
|
||||
isSpike = False
|
||||
# do we have enough samples?
|
||||
if tracker.getNumDurationSamples() > self._minSamples:
|
||||
# was this a spike?
|
||||
if profileDt > (tracker.getAvgDuration() * self._spikeThreshold):
|
||||
isSpike = True
|
||||
avgDur, avgResult = tracker.getAvgData()
|
||||
maxNSDur, maxNSResult = tracker.getMaxNonSpikeData()
|
||||
self.notify.info('task CPU spike profile (%s):\n'
|
||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
|
||||
'== SPIKE (%s wall-clock seconds)\n%s' % (
|
||||
namePattern,
|
||||
avgDur, avgResult,
|
||||
maxNSDur, maxNSResult,
|
||||
profileDt, lastProfileResult))
|
||||
tracker.addDuration(profileDt, lastProfileResult, isSpike)
|
||||
if (self._task is not None) and taskMgr._hasProfiledDesignatedTask():
|
||||
session = taskMgr._getLastProfileSession()
|
||||
sessionDur = session.getWallClockDuration()
|
||||
namePattern = self._task.getNamePattern()
|
||||
if namePattern not in self._namePattern2tracker:
|
||||
self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
|
||||
tracker = self._namePattern2tracker[namePattern]
|
||||
isSpike = False
|
||||
# do we have enough samples?
|
||||
if tracker.getNumDurationSamples() > self._minSamples:
|
||||
# was this a spike?
|
||||
if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
|
||||
print 'sessionDur=%s' % sessionDur
|
||||
print 'avgDur=%s' % tracker.getAvgDuration()
|
||||
isSpike = True
|
||||
avgSession = tracker.getAvgSession()
|
||||
maxNSSession = tracker.getMaxNonSpikeSession()
|
||||
self.notify.info('task CPU spike profile (%s):\n'
|
||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
|
||||
'== SPIKE (%s wall-clock seconds)\n%s' % (
|
||||
namePattern,
|
||||
avgSession.getWallClockDuration(), avgSession.getResults(),
|
||||
maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
|
||||
sessionDur, session.getResults()))
|
||||
tracker.addProfileSession(session, isSpike)
|
||||
|
||||
# set up the next task
|
||||
self._task = taskMgr._getRandomTask()
|
||||
|
Loading…
x
Reference in New Issue
Block a user