accurate profile duration tracking, output filtering (no 0.000 second spikes), preliminary profile result aggregation

This commit is contained in:
Darren Ranalli 2008-10-14 00:45:49 +00:00
parent ef771a796b
commit 61d0f91617
2 changed files with 129 additions and 73 deletions

View File

@ -26,12 +26,9 @@ class ProfileSession:
def __init__(self, func, name):
self._func = func
self._name = name
self._filename = 'profileData-%s-%s' % (self._name, id(self))
self._profileSucceeded = False
self._wallClockDur = None
self._ramFile = None
self._filenameBase = 'profileData-%s-%s' % (self._name, id(self))
self._refCount = 0
self._resultCache = {}
self._reset()
self.acquire()
def acquire(self):
@ -44,28 +41,51 @@ class ProfileSession:
def _destroy(self):
del self._func
del self._name
del self._filename
del self._wallClockDur
del self._ramFile
del self._filenameBase
del self._filenameCounter
del self._filenames
del self._duration
del self._filename2ramFile
del self._resultCache
del self._successfulProfiles
def run(self):
def _reset(self):
self._filenameCounter = 0
self._filenames = []
# index of next file to be added to stats object
self._statFileCounter = 0
self._successfulProfiles = 0
self._duration = None
self._filename2ramFile = {}
self._stats = None
self._resultCache = {}
def _getNextFilename(self):
filename = '%s-%s' % (self._filenameBase, self._filenameCounter)
self._filenameCounter += 1
return filename
def run(self, aggregate=True):
# make sure this instance doesn't get destroyed inside self._func
self.acquire()
self._profileSucceeded = False
if not aggregate:
self._reset()
# if we're already profiling, just run the func and don't profile
if 'globalProfileFunc' in __builtin__.__dict__:
result = self._func()
self._wallClockDur = 0.
if self._duration is None:
self._duration = 0.
else:
# put the function in the global namespace so that profile can find it
__builtin__.globalProfileFunc = self._func
__builtin__.globalProfileResult = [None]
# set up the RAM file
_installProfileCustomFuncs(self._filename)
self._filenames.append(self._getNextFilename())
filename = self._filenames[-1]
_installProfileCustomFuncs(filename)
# do the profiling
Profile = profile.Profile
@ -76,55 +96,87 @@ class ProfileSession:
# this is based on profile.run, the code is replicated here to allow us to
# eliminate a memory leak
prof = Profile()
# try to get wall-clock duration that is as accurate as possible
startT = self.TrueClock.getShortTime()
try:
prof = prof.run(statement)
except SystemExit:
pass
# try to get wall-clock duration that is as accurate as possible
endT = self.TrueClock.getShortTime()
# this has to be run immediately after profiling for the timings to be accurate
# tell the Profile object to generate output to the RAM file
prof.dump_stats(self._filename)
dur = endT - startT
prof.dump_stats(filename)
# eliminate the memory leak
del prof.dispatcher
# and store the results
self._wallClockDur = dur
# store the RAM file for later
self._ramFile = _getProfileResultFileInfo(self._filename)
profData = _getProfileResultFileInfo(filename)
self._filename2ramFile[filename] = profData
# calculate the duration (this is dependent on the internal Python profile data format.
# see profile.py and pstats.py, this was copied from pstats.Stats.strip_dirs)
maxTime = 0.
for cc, nc, tt, ct, callers in profData[1].itervalues():
if ct > maxTime:
maxTime = ct
self._duration = maxTime
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
_removeProfileCustomFuncs(filename)
# clean up the globals
result = globalProfileResult[0]
del __builtin__.__dict__['globalProfileFunc']
del __builtin__.__dict__['globalProfileResult']
self._profileSucceeded = True
self._successfulProfiles += 1
self.release()
return result
def getWallClockDuration(self):
# this might not be accurate, it may include time taken up by other processes
return self._wallClockDur
def getDuration(self):
return self._duration
def profileSucceeded(self):
return self._profileSucceeded
return self._successfulProfiles > 0
def _restoreRamFile(self, filename):
# set up the RAM file
_installProfileCustomFuncs(filename)
# install the stored RAM file from self.run()
_setProfileResultsFileInfo(filename, self._filename2ramFile[filename])
def _discardRamFile(self, filename):
# take down the RAM file
_removeProfileCustomFuncs(filename)
# and discard it
del self._filename2ramFile[filename]
def getResults(self,
lines=80,
sorts=['cumulative', 'time', 'calls'],
callInfo=False):
if not self._profileSucceeded:
if not self.profileSucceeded():
output = '%s: profiler already running, could not profile' % self._name
else:
# make sure our stats object exists and is up-to-date
statsChanged = (self._statFileCounter < len(self._filenames))
if self._stats is None:
for filename in self._filenames:
self._restoreRamFile(filename)
self._stats = pstats.Stats(*self._filenames)
self._statFileCounter = len(self._filenames)
for filename in self._filenames:
self._discardRamFile(filename)
else:
while self._statFileCounter < len(self._filenames):
filename = self._filenames[self._statFileCounter]
self._restoreRamFile(filename)
self._stats.add(filename)
self._discardRamFile(filename)
if statsChanged:
self._stats.strip_dirs()
# throw out any cached result strings
self._resultCache = {}
# make sure the arguments will hash efficiently if callers provide different types
lines = int(lines)
sorts = list(sorts)
@ -134,19 +186,13 @@ class ProfileSession:
# we've already created this output string, get it from the cache
output = self._resultCache[k]
else:
# set up the RAM file
_installProfileCustomFuncs(self._filename)
# install the stored RAM file from self.run()
_setProfileResultsFileInfo(self._filename, self._ramFile)
# now extract human-readable output from the RAM file
# now get human-readable output from the profile stats
# capture print output to a string
sc = StdoutCapture()
# print the info to stdout
s = pstats.Stats(self._filename)
s.strip_dirs()
s = self._stats
for sort in sorts:
s.sort_stats(sort)
s.print_stats(lines)
@ -160,9 +206,6 @@ class ProfileSession:
# restore stdout to what it was before
sc.destroy()
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
# cache this result
self._resultCache[k] = output

View File

@ -5,11 +5,18 @@ from direct.showbase.PythonUtil import Averager
class TaskTracker:
# call it TaskProfiler to avoid confusion for the user
notify = directNotify.newCategory("TaskProfiler")
MinSamples = None
SpikeThreshold = None
def __init__(self, namePrefix):
self._namePrefix = namePrefix
self._durationAverager = Averager('%s-durationAverager' % namePrefix)
self._avgSession = None
self._maxNonSpikeSession = None
if TaskTracker.MinSamples is None:
# number of samples required before spikes start getting identified
TaskTracker.MinSamples = config.GetInt('profile-task-spike-min-samples', 30)
# defines spike as longer than this multiple of avg task duration
TaskTracker.SpikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
def destroy(self):
self.flush()
del self._namePrefix
@ -24,14 +31,43 @@ class TaskTracker:
self._maxNonSpikeSession = None
def getNamePrefix(self, namePrefix):
return self._namePrefix
def addProfileSession(self, session, isSpike):
duration = session.getWallClockDuration()
def _checkSpike(self, session):
duration = session.getDuration()
isSpike = False
# was it long enough to show up on a printout?
if duration >= .001:
# do we have enough samples?
if self.getNumDurationSamples() > self.MinSamples:
# was this a spike?
if duration > (self.getAvgDuration() * self.SpikeThreshold):
isSpike = True
avgSession = self.getAvgSession()
maxNSSession = self.getMaxNonSpikeSession()
self.notify.info('task CPU spike profile (%s):\n'
'== AVERAGE (%s seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s seconds)\n%s\n'
'== SPIKE (%s seconds)\n%s' % (
self._namePrefix,
avgSession.getDuration(), avgSession.getResults(),
maxNSSession.getDuration(), maxNSSession.getResults(),
duration, session.getResults()))
return isSpike
def addProfileSession(self, session):
isSpike = self._checkSpike(session)
duration = session.getDuration()
self._durationAverager.addValue(duration)
if self.getNumDurationSamples() == self.MinSamples:
# if we just collected enough samples to check for spikes, see if
# our max 'non-spike' session collected so far counts as a spike
# and if so, log it
if self._checkSpike(self._maxNonSpikeSession):
self._maxNonSpikeSession = self._avgSession
storeAvg = True
storeMaxNS = True
if self._avgSession is not None:
avgDur = self.getAvgDuration()
if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
if abs(self._avgSession.getDuration() - avgDur) < abs(duration - avgDur):
# current avg data is more average than this new sample, keep the data we've
# already got stored
storeAvg = False
@ -39,7 +75,7 @@ class TaskTracker:
storeMaxNS = False
else:
if (self._maxNonSpikeSession is not None and
self._maxNonSpikeSession.getWallClockDuration() > duration):
self._maxNonSpikeSession.getDuration() > duration):
storeMaxNS = False
if storeAvg:
if self._avgSession:
@ -64,12 +100,12 @@ class TaskTracker:
def log(self):
if self._avgSession:
self.notify.info('task CPU profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
'== AVERAGE (%s seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s seconds)\n%s' % (
self._namePrefix,
self._avgSession.getWallClockDuration(),
self._avgSession.getDuration(),
self._avgSession.getResults(),
self._maxNonSpikeSession.getWallClockDuration(),
self._maxNonSpikeSession.getDuration(),
self._maxNonSpikeSession.getResults(),
))
else:
@ -85,10 +121,6 @@ class TaskProfiler:
# table of task name pattern to TaskTracker
self._namePrefix2tracker = {}
self._task = None
# number of samples required before spikes start getting identified
self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
# defines spike as longer than this multiple of avg task duration
self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
def destroy(self):
if taskMgr.getProfileTasks():
@ -131,30 +163,11 @@ class TaskProfiler:
session = taskMgr._getLastProfileSession()
# if we couldn't profile, throw this result out
if session.profileSucceeded():
sessionDur = session.getWallClockDuration()
namePrefix = self._task.getNamePrefix()
if namePrefix not in self._namePrefix2tracker:
self._namePrefix2tracker[namePrefix] = TaskTracker(namePrefix)
tracker = self._namePrefix2tracker[namePrefix]
isSpike = False
# do we have enough samples?
if tracker.getNumDurationSamples() > self._minSamples:
# was this a spike?
if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
print 'sessionDur=%s' % sessionDur
print 'avgDur=%s' % tracker.getAvgDuration()
isSpike = True
avgSession = tracker.getAvgSession()
maxNSSession = tracker.getMaxNonSpikeSession()
self.notify.info('task CPU spike profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
'== SPIKE (%s wall-clock seconds)\n%s' % (
namePrefix,
avgSession.getWallClockDuration(), avgSession.getResults(),
maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
sessionDur, session.getResults()))
tracker.addProfileSession(session, isSpike)
tracker.addProfileSession(session)
# set up the next task
self._task = taskMgr._getRandomTask()