mirror of
https://github.com/panda3d/panda3d.git
synced 2025-10-04 19:08:55 -04:00
accurate profile duration tracking, output filtering (no 0.000 second spikes), preliminary profile result aggregation
This commit is contained in:
parent
ef771a796b
commit
61d0f91617
@ -26,12 +26,9 @@ class ProfileSession:
|
|||||||
def __init__(self, func, name):
|
def __init__(self, func, name):
|
||||||
self._func = func
|
self._func = func
|
||||||
self._name = name
|
self._name = name
|
||||||
self._filename = 'profileData-%s-%s' % (self._name, id(self))
|
self._filenameBase = 'profileData-%s-%s' % (self._name, id(self))
|
||||||
self._profileSucceeded = False
|
|
||||||
self._wallClockDur = None
|
|
||||||
self._ramFile = None
|
|
||||||
self._refCount = 0
|
self._refCount = 0
|
||||||
self._resultCache = {}
|
self._reset()
|
||||||
self.acquire()
|
self.acquire()
|
||||||
|
|
||||||
def acquire(self):
|
def acquire(self):
|
||||||
@ -44,28 +41,51 @@ class ProfileSession:
|
|||||||
def _destroy(self):
|
def _destroy(self):
|
||||||
del self._func
|
del self._func
|
||||||
del self._name
|
del self._name
|
||||||
del self._filename
|
del self._filenameBase
|
||||||
del self._wallClockDur
|
del self._filenameCounter
|
||||||
del self._ramFile
|
del self._filenames
|
||||||
|
del self._duration
|
||||||
|
del self._filename2ramFile
|
||||||
del self._resultCache
|
del self._resultCache
|
||||||
|
del self._successfulProfiles
|
||||||
|
|
||||||
def run(self):
|
def _reset(self):
|
||||||
|
self._filenameCounter = 0
|
||||||
|
self._filenames = []
|
||||||
|
# index of next file to be added to stats object
|
||||||
|
self._statFileCounter = 0
|
||||||
|
self._successfulProfiles = 0
|
||||||
|
self._duration = None
|
||||||
|
self._filename2ramFile = {}
|
||||||
|
self._stats = None
|
||||||
|
self._resultCache = {}
|
||||||
|
|
||||||
|
def _getNextFilename(self):
|
||||||
|
filename = '%s-%s' % (self._filenameBase, self._filenameCounter)
|
||||||
|
self._filenameCounter += 1
|
||||||
|
return filename
|
||||||
|
|
||||||
|
def run(self, aggregate=True):
|
||||||
# make sure this instance doesn't get destroyed inside self._func
|
# make sure this instance doesn't get destroyed inside self._func
|
||||||
self.acquire()
|
self.acquire()
|
||||||
|
|
||||||
self._profileSucceeded = False
|
if not aggregate:
|
||||||
|
self._reset()
|
||||||
|
|
||||||
# if we're already profiling, just run the func and don't profile
|
# if we're already profiling, just run the func and don't profile
|
||||||
if 'globalProfileFunc' in __builtin__.__dict__:
|
if 'globalProfileFunc' in __builtin__.__dict__:
|
||||||
result = self._func()
|
result = self._func()
|
||||||
self._wallClockDur = 0.
|
if self._duration is None:
|
||||||
|
self._duration = 0.
|
||||||
else:
|
else:
|
||||||
# put the function in the global namespace so that profile can find it
|
# put the function in the global namespace so that profile can find it
|
||||||
__builtin__.globalProfileFunc = self._func
|
__builtin__.globalProfileFunc = self._func
|
||||||
__builtin__.globalProfileResult = [None]
|
__builtin__.globalProfileResult = [None]
|
||||||
|
|
||||||
# set up the RAM file
|
# set up the RAM file
|
||||||
_installProfileCustomFuncs(self._filename)
|
self._filenames.append(self._getNextFilename())
|
||||||
|
filename = self._filenames[-1]
|
||||||
|
_installProfileCustomFuncs(filename)
|
||||||
|
|
||||||
# do the profiling
|
# do the profiling
|
||||||
Profile = profile.Profile
|
Profile = profile.Profile
|
||||||
@ -76,55 +96,87 @@ class ProfileSession:
|
|||||||
# this is based on profile.run, the code is replicated here to allow us to
|
# this is based on profile.run, the code is replicated here to allow us to
|
||||||
# eliminate a memory leak
|
# eliminate a memory leak
|
||||||
prof = Profile()
|
prof = Profile()
|
||||||
# try to get wall-clock duration that is as accurate as possible
|
|
||||||
startT = self.TrueClock.getShortTime()
|
|
||||||
try:
|
try:
|
||||||
prof = prof.run(statement)
|
prof = prof.run(statement)
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
pass
|
pass
|
||||||
# try to get wall-clock duration that is as accurate as possible
|
|
||||||
endT = self.TrueClock.getShortTime()
|
|
||||||
# this has to be run immediately after profiling for the timings to be accurate
|
# this has to be run immediately after profiling for the timings to be accurate
|
||||||
# tell the Profile object to generate output to the RAM file
|
# tell the Profile object to generate output to the RAM file
|
||||||
prof.dump_stats(self._filename)
|
prof.dump_stats(filename)
|
||||||
|
|
||||||
dur = endT - startT
|
|
||||||
|
|
||||||
# eliminate the memory leak
|
# eliminate the memory leak
|
||||||
del prof.dispatcher
|
del prof.dispatcher
|
||||||
|
|
||||||
# and store the results
|
|
||||||
self._wallClockDur = dur
|
|
||||||
|
|
||||||
# store the RAM file for later
|
# store the RAM file for later
|
||||||
self._ramFile = _getProfileResultFileInfo(self._filename)
|
profData = _getProfileResultFileInfo(filename)
|
||||||
|
self._filename2ramFile[filename] = profData
|
||||||
|
# calculate the duration (this is dependent on the internal Python profile data format.
|
||||||
|
# see profile.py and pstats.py, this was copied from pstats.Stats.strip_dirs)
|
||||||
|
maxTime = 0.
|
||||||
|
for cc, nc, tt, ct, callers in profData[1].itervalues():
|
||||||
|
if ct > maxTime:
|
||||||
|
maxTime = ct
|
||||||
|
self._duration = maxTime
|
||||||
# clean up the RAM file support
|
# clean up the RAM file support
|
||||||
_removeProfileCustomFuncs(self._filename)
|
_removeProfileCustomFuncs(filename)
|
||||||
|
|
||||||
# clean up the globals
|
# clean up the globals
|
||||||
result = globalProfileResult[0]
|
result = globalProfileResult[0]
|
||||||
del __builtin__.__dict__['globalProfileFunc']
|
del __builtin__.__dict__['globalProfileFunc']
|
||||||
del __builtin__.__dict__['globalProfileResult']
|
del __builtin__.__dict__['globalProfileResult']
|
||||||
|
|
||||||
self._profileSucceeded = True
|
self._successfulProfiles += 1
|
||||||
|
|
||||||
self.release()
|
self.release()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getWallClockDuration(self):
|
def getDuration(self):
|
||||||
# this might not be accurate, it may include time taken up by other processes
|
return self._duration
|
||||||
return self._wallClockDur
|
|
||||||
|
|
||||||
def profileSucceeded(self):
|
def profileSucceeded(self):
|
||||||
return self._profileSucceeded
|
return self._successfulProfiles > 0
|
||||||
|
|
||||||
|
def _restoreRamFile(self, filename):
|
||||||
|
# set up the RAM file
|
||||||
|
_installProfileCustomFuncs(filename)
|
||||||
|
# install the stored RAM file from self.run()
|
||||||
|
_setProfileResultsFileInfo(filename, self._filename2ramFile[filename])
|
||||||
|
|
||||||
|
def _discardRamFile(self, filename):
|
||||||
|
# take down the RAM file
|
||||||
|
_removeProfileCustomFuncs(filename)
|
||||||
|
# and discard it
|
||||||
|
del self._filename2ramFile[filename]
|
||||||
|
|
||||||
def getResults(self,
|
def getResults(self,
|
||||||
lines=80,
|
lines=80,
|
||||||
sorts=['cumulative', 'time', 'calls'],
|
sorts=['cumulative', 'time', 'calls'],
|
||||||
callInfo=False):
|
callInfo=False):
|
||||||
if not self._profileSucceeded:
|
if not self.profileSucceeded():
|
||||||
output = '%s: profiler already running, could not profile' % self._name
|
output = '%s: profiler already running, could not profile' % self._name
|
||||||
else:
|
else:
|
||||||
|
# make sure our stats object exists and is up-to-date
|
||||||
|
statsChanged = (self._statFileCounter < len(self._filenames))
|
||||||
|
|
||||||
|
if self._stats is None:
|
||||||
|
for filename in self._filenames:
|
||||||
|
self._restoreRamFile(filename)
|
||||||
|
self._stats = pstats.Stats(*self._filenames)
|
||||||
|
self._statFileCounter = len(self._filenames)
|
||||||
|
for filename in self._filenames:
|
||||||
|
self._discardRamFile(filename)
|
||||||
|
else:
|
||||||
|
while self._statFileCounter < len(self._filenames):
|
||||||
|
filename = self._filenames[self._statFileCounter]
|
||||||
|
self._restoreRamFile(filename)
|
||||||
|
self._stats.add(filename)
|
||||||
|
self._discardRamFile(filename)
|
||||||
|
|
||||||
|
if statsChanged:
|
||||||
|
self._stats.strip_dirs()
|
||||||
|
# throw out any cached result strings
|
||||||
|
self._resultCache = {}
|
||||||
|
|
||||||
# make sure the arguments will hash efficiently if callers provide different types
|
# make sure the arguments will hash efficiently if callers provide different types
|
||||||
lines = int(lines)
|
lines = int(lines)
|
||||||
sorts = list(sorts)
|
sorts = list(sorts)
|
||||||
@ -134,19 +186,13 @@ class ProfileSession:
|
|||||||
# we've already created this output string, get it from the cache
|
# we've already created this output string, get it from the cache
|
||||||
output = self._resultCache[k]
|
output = self._resultCache[k]
|
||||||
else:
|
else:
|
||||||
# set up the RAM file
|
# now get human-readable output from the profile stats
|
||||||
_installProfileCustomFuncs(self._filename)
|
|
||||||
# install the stored RAM file from self.run()
|
|
||||||
_setProfileResultsFileInfo(self._filename, self._ramFile)
|
|
||||||
|
|
||||||
# now extract human-readable output from the RAM file
|
|
||||||
|
|
||||||
# capture print output to a string
|
# capture print output to a string
|
||||||
sc = StdoutCapture()
|
sc = StdoutCapture()
|
||||||
|
|
||||||
# print the info to stdout
|
# print the info to stdout
|
||||||
s = pstats.Stats(self._filename)
|
s = self._stats
|
||||||
s.strip_dirs()
|
|
||||||
for sort in sorts:
|
for sort in sorts:
|
||||||
s.sort_stats(sort)
|
s.sort_stats(sort)
|
||||||
s.print_stats(lines)
|
s.print_stats(lines)
|
||||||
@ -160,9 +206,6 @@ class ProfileSession:
|
|||||||
# restore stdout to what it was before
|
# restore stdout to what it was before
|
||||||
sc.destroy()
|
sc.destroy()
|
||||||
|
|
||||||
# clean up the RAM file support
|
|
||||||
_removeProfileCustomFuncs(self._filename)
|
|
||||||
|
|
||||||
# cache this result
|
# cache this result
|
||||||
self._resultCache[k] = output
|
self._resultCache[k] = output
|
||||||
|
|
||||||
|
@ -5,11 +5,18 @@ from direct.showbase.PythonUtil import Averager
|
|||||||
class TaskTracker:
|
class TaskTracker:
|
||||||
# call it TaskProfiler to avoid confusion for the user
|
# call it TaskProfiler to avoid confusion for the user
|
||||||
notify = directNotify.newCategory("TaskProfiler")
|
notify = directNotify.newCategory("TaskProfiler")
|
||||||
|
MinSamples = None
|
||||||
|
SpikeThreshold = None
|
||||||
def __init__(self, namePrefix):
|
def __init__(self, namePrefix):
|
||||||
self._namePrefix = namePrefix
|
self._namePrefix = namePrefix
|
||||||
self._durationAverager = Averager('%s-durationAverager' % namePrefix)
|
self._durationAverager = Averager('%s-durationAverager' % namePrefix)
|
||||||
self._avgSession = None
|
self._avgSession = None
|
||||||
self._maxNonSpikeSession = None
|
self._maxNonSpikeSession = None
|
||||||
|
if TaskTracker.MinSamples is None:
|
||||||
|
# number of samples required before spikes start getting identified
|
||||||
|
TaskTracker.MinSamples = config.GetInt('profile-task-spike-min-samples', 30)
|
||||||
|
# defines spike as longer than this multiple of avg task duration
|
||||||
|
TaskTracker.SpikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
|
||||||
def destroy(self):
|
def destroy(self):
|
||||||
self.flush()
|
self.flush()
|
||||||
del self._namePrefix
|
del self._namePrefix
|
||||||
@ -24,14 +31,43 @@ class TaskTracker:
|
|||||||
self._maxNonSpikeSession = None
|
self._maxNonSpikeSession = None
|
||||||
def getNamePrefix(self, namePrefix):
|
def getNamePrefix(self, namePrefix):
|
||||||
return self._namePrefix
|
return self._namePrefix
|
||||||
def addProfileSession(self, session, isSpike):
|
def _checkSpike(self, session):
|
||||||
duration = session.getWallClockDuration()
|
duration = session.getDuration()
|
||||||
|
isSpike = False
|
||||||
|
# was it long enough to show up on a printout?
|
||||||
|
if duration >= .001:
|
||||||
|
# do we have enough samples?
|
||||||
|
if self.getNumDurationSamples() > self.MinSamples:
|
||||||
|
# was this a spike?
|
||||||
|
if duration > (self.getAvgDuration() * self.SpikeThreshold):
|
||||||
|
isSpike = True
|
||||||
|
avgSession = self.getAvgSession()
|
||||||
|
maxNSSession = self.getMaxNonSpikeSession()
|
||||||
|
self.notify.info('task CPU spike profile (%s):\n'
|
||||||
|
'== AVERAGE (%s seconds)\n%s\n'
|
||||||
|
'== LONGEST NON-SPIKE (%s seconds)\n%s\n'
|
||||||
|
'== SPIKE (%s seconds)\n%s' % (
|
||||||
|
self._namePrefix,
|
||||||
|
avgSession.getDuration(), avgSession.getResults(),
|
||||||
|
maxNSSession.getDuration(), maxNSSession.getResults(),
|
||||||
|
duration, session.getResults()))
|
||||||
|
return isSpike
|
||||||
|
def addProfileSession(self, session):
|
||||||
|
isSpike = self._checkSpike(session)
|
||||||
|
duration = session.getDuration()
|
||||||
self._durationAverager.addValue(duration)
|
self._durationAverager.addValue(duration)
|
||||||
|
if self.getNumDurationSamples() == self.MinSamples:
|
||||||
|
# if we just collected enough samples to check for spikes, see if
|
||||||
|
# our max 'non-spike' session collected so far counts as a spike
|
||||||
|
# and if so, log it
|
||||||
|
if self._checkSpike(self._maxNonSpikeSession):
|
||||||
|
self._maxNonSpikeSession = self._avgSession
|
||||||
|
|
||||||
storeAvg = True
|
storeAvg = True
|
||||||
storeMaxNS = True
|
storeMaxNS = True
|
||||||
if self._avgSession is not None:
|
if self._avgSession is not None:
|
||||||
avgDur = self.getAvgDuration()
|
avgDur = self.getAvgDuration()
|
||||||
if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
|
if abs(self._avgSession.getDuration() - avgDur) < abs(duration - avgDur):
|
||||||
# current avg data is more average than this new sample, keep the data we've
|
# current avg data is more average than this new sample, keep the data we've
|
||||||
# already got stored
|
# already got stored
|
||||||
storeAvg = False
|
storeAvg = False
|
||||||
@ -39,7 +75,7 @@ class TaskTracker:
|
|||||||
storeMaxNS = False
|
storeMaxNS = False
|
||||||
else:
|
else:
|
||||||
if (self._maxNonSpikeSession is not None and
|
if (self._maxNonSpikeSession is not None and
|
||||||
self._maxNonSpikeSession.getWallClockDuration() > duration):
|
self._maxNonSpikeSession.getDuration() > duration):
|
||||||
storeMaxNS = False
|
storeMaxNS = False
|
||||||
if storeAvg:
|
if storeAvg:
|
||||||
if self._avgSession:
|
if self._avgSession:
|
||||||
@ -64,12 +100,12 @@ class TaskTracker:
|
|||||||
def log(self):
|
def log(self):
|
||||||
if self._avgSession:
|
if self._avgSession:
|
||||||
self.notify.info('task CPU profile (%s):\n'
|
self.notify.info('task CPU profile (%s):\n'
|
||||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
'== AVERAGE (%s seconds)\n%s\n'
|
||||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
|
'== LONGEST NON-SPIKE (%s seconds)\n%s' % (
|
||||||
self._namePrefix,
|
self._namePrefix,
|
||||||
self._avgSession.getWallClockDuration(),
|
self._avgSession.getDuration(),
|
||||||
self._avgSession.getResults(),
|
self._avgSession.getResults(),
|
||||||
self._maxNonSpikeSession.getWallClockDuration(),
|
self._maxNonSpikeSession.getDuration(),
|
||||||
self._maxNonSpikeSession.getResults(),
|
self._maxNonSpikeSession.getResults(),
|
||||||
))
|
))
|
||||||
else:
|
else:
|
||||||
@ -85,10 +121,6 @@ class TaskProfiler:
|
|||||||
# table of task name pattern to TaskTracker
|
# table of task name pattern to TaskTracker
|
||||||
self._namePrefix2tracker = {}
|
self._namePrefix2tracker = {}
|
||||||
self._task = None
|
self._task = None
|
||||||
# number of samples required before spikes start getting identified
|
|
||||||
self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
|
|
||||||
# defines spike as longer than this multiple of avg task duration
|
|
||||||
self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
|
|
||||||
|
|
||||||
def destroy(self):
|
def destroy(self):
|
||||||
if taskMgr.getProfileTasks():
|
if taskMgr.getProfileTasks():
|
||||||
@ -131,30 +163,11 @@ class TaskProfiler:
|
|||||||
session = taskMgr._getLastProfileSession()
|
session = taskMgr._getLastProfileSession()
|
||||||
# if we couldn't profile, throw this result out
|
# if we couldn't profile, throw this result out
|
||||||
if session.profileSucceeded():
|
if session.profileSucceeded():
|
||||||
sessionDur = session.getWallClockDuration()
|
|
||||||
namePrefix = self._task.getNamePrefix()
|
namePrefix = self._task.getNamePrefix()
|
||||||
if namePrefix not in self._namePrefix2tracker:
|
if namePrefix not in self._namePrefix2tracker:
|
||||||
self._namePrefix2tracker[namePrefix] = TaskTracker(namePrefix)
|
self._namePrefix2tracker[namePrefix] = TaskTracker(namePrefix)
|
||||||
tracker = self._namePrefix2tracker[namePrefix]
|
tracker = self._namePrefix2tracker[namePrefix]
|
||||||
isSpike = False
|
tracker.addProfileSession(session)
|
||||||
# do we have enough samples?
|
|
||||||
if tracker.getNumDurationSamples() > self._minSamples:
|
|
||||||
# was this a spike?
|
|
||||||
if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
|
|
||||||
print 'sessionDur=%s' % sessionDur
|
|
||||||
print 'avgDur=%s' % tracker.getAvgDuration()
|
|
||||||
isSpike = True
|
|
||||||
avgSession = tracker.getAvgSession()
|
|
||||||
maxNSSession = tracker.getMaxNonSpikeSession()
|
|
||||||
self.notify.info('task CPU spike profile (%s):\n'
|
|
||||||
'== AVERAGE (%s wall-clock seconds)\n%s\n'
|
|
||||||
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
|
|
||||||
'== SPIKE (%s wall-clock seconds)\n%s' % (
|
|
||||||
namePrefix,
|
|
||||||
avgSession.getWallClockDuration(), avgSession.getResults(),
|
|
||||||
maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
|
|
||||||
sessionDur, session.getResults()))
|
|
||||||
tracker.addProfileSession(session, isSpike)
|
|
||||||
|
|
||||||
# set up the next task
|
# set up the next task
|
||||||
self._task = taskMgr._getRandomTask()
|
self._task = taskMgr._getRandomTask()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user