added ProfileSession, don't format profile output until it's requested

This commit is contained in:
Darren Ranalli 2008-10-02 02:00:34 +00:00
parent ee89d48297
commit eaa1318450
4 changed files with 260 additions and 85 deletions

View File

@ -0,0 +1,141 @@
from pandac.libpandaexpressModules import TrueClock
from direct.showbase.PythonUtil import (
StdoutCapture, _installProfileCustomFuncs,_removeProfileCustomFuncs,
_profileWithoutGarbageLeak, _getProfileResultFileInfo, _setProfileResultsFileInfo,
_clearProfileResultFileInfo, )
import __builtin__
import profile
import pstats
from StringIO import StringIO
import marshal
class ProfileSession:
# class that encapsulates a profile of a single callable using Python's standard
# 'profile' module
#
# defers formatting of profile results until they are requested
#
# implementation sidesteps memory leak in Python profile module,
# and redirects file output to RAM file for efficiency
DefaultFilename = 'profilesession'
DefaultLines = 80
DefaultSorts = ['cumulative', 'time', 'calls']
TrueClock = TrueClock.getGlobalPtr()
def __init__(self, func, name):
self._func = func
self._name = name
self._filename = 'profileData-%s-%s' % (self._name, id(self))
self._wallClockDur = None
self._ramFile = None
self._refCount = 0
self.acquire()
def acquire(self):
self._refCount += 1
def release(self):
self._refCount -= 1
if not self._refCount:
self._destroy()
def _destroy(self):
del self._func
del self._name
del self._filename
del self._wallClockDur
del self._ramFile
def run(self):
# make sure this instance doesn't get destroyed inside self._func
self.acquire()
# put the function in the global namespace so that profile can find it
assert 'globalProfileFunc' not in __builtin__.__dict__
__builtin__.globalProfileFunc = self._func
__builtin__.globalProfileResult = [None]
# set up the RAM file
_installProfileCustomFuncs(self._filename)
# do the profiling
Profile = profile.Profile
statement = 'globalProfileResult[0]=globalProfileFunc()'
sort = -1
retVal = None
# this is based on profile.run, the code is replicated here to allow us to
# eliminate a memory leak
prof = Profile()
# try to get wall-clock duration that is as accurate as possible
startT = self.TrueClock.getShortTime()
try:
prof = prof.run(statement)
except SystemExit:
pass
# try to get wall-clock duration that is as accurate as possible
endT = self.TrueClock.getShortTime()
# this has to be run immediately after profiling for the timings to be accurate
# tell the Profile object to generate output to the RAM file
prof.dump_stats(self._filename)
dur = endT - startT
# eliminate the memory leak
del prof.dispatcher
# and store the results
self._wallClockDur = dur
# store the RAM file for later
self._ramFile = _getProfileResultFileInfo(self._filename)
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
# clean up the globals
result = globalProfileResult[0]
del __builtin__.__dict__['globalProfileFunc']
del __builtin__.__dict__['globalProfileResult']
self.release()
return result
def getWallClockDuration(self):
# this might not be accurate, it may include time taken up by other processes
return self._wallClockDur
def getResults(self,
lines=80,
sorts=['cumulative', 'time', 'calls'],
callInfo=False):
# set up the RAM file
_installProfileCustomFuncs(self._filename)
# install the stored RAM file from self.run()
_setProfileResultsFileInfo(self._filename, self._ramFile)
# now extract human-readable output from the RAM file
# capture print output to a string
sc = StdoutCapture()
# print the info to stdout
s = pstats.Stats(self._filename)
s.strip_dirs()
for sort in sorts:
s.sort_stats(sort)
s.print_stats(lines)
if callInfo:
s.print_callees(lines)
s.print_callers(lines)
# make a copy of the print output
output = sc.getString()
# restore stdout to what it was before
sc.destroy()
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
return output

View File

@ -1086,6 +1086,21 @@ def _installProfileCustomFuncs(filename):
movedLoadFuncs.append(marshal.load)
marshal.load = _profileMarshalLoad
def _getProfileResultFileInfo(filename):
return (profileFilename2file.get(filename, None),
profileFilename2marshalData.get(filename, None))
def _setProfileResultsFileInfo(filename, info):
f, m = info
if f:
profileFilename2file[filename] = f
if m:
profileFilename2marshalData[filename] = m
def _clearProfileResultFileInfo(filename):
profileFilename2file.pop(filename, None)
profileFilename2marshalData.pop(filename, None)
def _removeProfileCustomFuncs(filename):
assert profileFilenameList.top() == filename
marshal.load = movedLoadFuncs.pop()

View File

@ -16,6 +16,7 @@ from direct.directnotify.DirectNotifyGlobal import *
from direct.showbase.PythonUtil import *
from direct.showbase.MessengerGlobal import *
from direct.showbase import ExceptionVarDump
from direct.showbase.ProfileSession import ProfileSession
import time
import fnmatch
import string
@ -406,8 +407,8 @@ class TaskManager:
self._taskProfiler = None
self._profileInfo = ScratchPad(
taskId = None,
dt = None,
lastProfileResultString = None,
profiled = False,
session = None,
)
# We copy this value in from __builtins__ when it gets set.
@ -809,14 +810,20 @@ class TaskManager:
# by the task when it runs
profileInfo = self._profileInfo
doProfile = (task.id == profileInfo.taskId)
# don't profile the same task twice in a row
doProfile = doProfile and (not profileInfo.profiled)
if not self.taskTimerVerbose:
startTime = self.trueClock.getShortTime()
# don't record timing info
if doProfile:
ret = profileFunc(Functor(task, *task.extraArgs),
'TASK_PROFILE:%s' % task.name, True, log=False)
profileSession = ProfileSession(Functor(task, *task.extraArgs),
'TASK_PROFILE:%s' % task.name)
ret = profileSession.run()
# set these values *after* profiling in case we're profiling the TaskProfiler
profileInfo.session = profileSession
profileInfo.profiled = True
else:
ret = task(*task.extraArgs)
endTime = self.trueClock.getShortTime()
@ -824,9 +831,7 @@ class TaskManager:
# Record the dt
dt = endTime - startTime
if doProfile:
# if we profiled, record the measured duration but don't pollute the task's
# normal duration
profileInfo.dt = dt
# if we profiled, don't pollute the task's recorded duration
dt = task.avgDt
task.dt = dt
@ -836,8 +841,12 @@ class TaskManager:
task.pstats.start()
startTime = self.trueClock.getShortTime()
if doProfile:
ret = profileFunc(Functor(task, *task.extraArgs),
'profiled-task-%s' % task.name, True, log=False)
profileSession = ProfileSession(Functor(task, *task.extraArgs),
'profiled-task-%s' % task.name)
ret = profileSession.run()
# set these values *after* profiling in case we're profiling the TaskProfiler
profileInfo.session = profileSession
profileInfo.profiled = True
else:
ret = task(*task.extraArgs)
endTime = self.trueClock.getShortTime()
@ -847,9 +856,7 @@ class TaskManager:
# Record the dt
dt = endTime - startTime
if doProfile:
# if we profiled, record the measured duration but don't pollute the task's
# normal duration
profileInfo.dt = dt
# if we profiled, don't pollute the task's recorded duration
dt = task.avgDt
task.dt = dt
@ -864,9 +871,6 @@ class TaskManager:
else:
task.avgDt = 0
if doProfile:
profileInfo.lastProfileResultString = self._getProfileResultString()
# warn if the task took too long
if self.warnTaskDuration and self.globalClock:
avgFrameRate = self.globalClock.getAverageFrameRate()
@ -1020,17 +1024,21 @@ class TaskManager:
self._taskProfiler.flush(name)
def _setProfileTask(self, task):
if self._profileInfo.session:
self._profileInfo.session.release()
self._profileInfo.session = None
self._profileInfo = ScratchPad(
taskId = task.id,
dt = None,
lastProfileResultString = None,
profiled = False,
session = None,
)
def _getTaskProfileDt(self):
return self._profileInfo.dt
def _hasProfiledDesignatedTask(self):
# have we run a profile of the designated task yet?
return self._profileInfo.profiled
def _getLastProfileResultString(self):
return self._profileInfo.lastProfileResultString
def _getLastProfileSession(self):
return self._profileInfo.session
def _getRandomTask(self):
numTasks = 0

View File

@ -1,6 +1,6 @@
from direct.directnotify.DirectNotifyGlobal import directNotify
from direct.fsm.StatePush import FunctionCall
from direct.showbase.PythonUtil import getProfileResultString, Averager
from direct.showbase.PythonUtil import Averager
class TaskTracker:
# call it TaskProfiler to avoid confusion for the user
@ -8,60 +8,72 @@ class TaskTracker:
def __init__(self, namePattern):
self._namePattern = namePattern
self._durationAverager = Averager('%s-durationAverager' % namePattern)
self._avgData = None
self._avgDataDur = None
self._maxNonSpikeData = None
self._maxNonSpikeDataDur = None
self._avgSession = None
self._maxNonSpikeSession = None
def destroy(self):
self.flush()
del self._namePattern
del self._durationAverager
def flush(self):
self._durationAverager.reset()
self._avgData = None
self._avgDataDur = None
self._maxNonSpikeData = None
self._maxNonSpikeDataDur = None
if self._avgSession:
self._avgSession.release()
if self._maxNonSpikeSession:
self._maxNonSpikeSession.release()
self._avgSession = None
self._maxNonSpikeSession = None
def getNamePattern(self, namePattern):
return self._namePattern
def addDuration(self, duration, data, isSpike):
def addProfileSession(self, session, isSpike):
duration = session.getWallClockDuration()
self._durationAverager.addValue(duration)
storeAvgData = True
storeMaxNSData = True
if self._avgDataDur is not None:
storeAvg = True
storeMaxNS = True
if self._avgSession is not None:
avgDur = self.getAvgDuration()
if abs(self._avgDataDur - avgDur) < abs(duration - avgDur):
if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
# current avg data is more average than this new sample, keep the data we've
# already got stored
storeAvgData = False
storeAvg = False
if isSpike:
storeMaxNSData = False
storeMaxNS = False
else:
if self._maxNonSpikeDataDur is not None and self._maxNonSpikeDataDur > duration:
storeMaxNSData = False
if storeAvgData:
self._avgData = data
self._avgDataDur = duration
if storeMaxNSData:
self._maxNonSpikeData = data
self._maxNonSpikeDataDur = duration
if (self._maxNonSpikeSession is not None and
self._maxNonSpikeSession.getWallClockDuration() > duration):
storeMaxNS = False
if storeAvg:
if self._avgSession:
self._avgSession.release()
session.acquire()
self._avgSession = session
if storeMaxNS:
if self._maxNonSpikeSession:
self._maxNonSpikeSession.release()
session.acquire()
self._maxNonSpikeSession = session
def getAvgDuration(self):
return self._durationAverager.getAverage()
def getNumDurationSamples(self):
return self._durationAverager.getCount()
def getAvgData(self):
# returns duration, data for closest-to-average sample
return self._avgDataDur, self._avgData
def getMaxNonSpikeData(self):
# returns duration, data for closest-to-average sample
return self._maxNonSpikeDataDur, self._maxNonSpikeData
def getAvgSession(self):
# returns profile session for closest-to-average sample
return self._avgSession
def getMaxNonSpikeSession(self):
# returns profile session for closest-to-average sample
return self._maxNonSpikeSession
def log(self):
self.notify.info('task CPU profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
self._namePattern,
self._avgDataDur, self._avgData,
self._maxNonSpikeDataDur, self._maxNonSpikeData,
))
if self._avgSession:
self.notify.info('task CPU profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
self._namePattern,
self._avgSession.getWallClockDuration(),
self._avgSession.getResults(),
self._maxNonSpikeSession.getWallClockDuration(),
self._maxNonSpikeSession.getResults(),
))
else:
self.notify.info('task CPU profile (%s): no data collected' % self._namePattern)
class TaskProfiler:
# this does intermittent profiling of tasks running on the system
@ -77,8 +89,6 @@ class TaskProfiler:
self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
# defines spike as longer than this multiple of avg task duration
self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
# assign getProfileResultString to the taskMgr here, since Task.py can't import PythonUtil
taskMgr._getProfileResultString = getProfileResultString
def destroy(self):
if taskMgr.getProfileTasks():
@ -117,31 +127,32 @@ class TaskProfiler:
def _doProfileTasks(self, task=None):
# gather data from the previous frame
# set up for the next frame
profileDt = taskMgr._getTaskProfileDt()
if (self._task is not None) and (profileDt is not None):
lastProfileResult = taskMgr._getLastProfileResultString()
if lastProfileResult:
namePattern = self._task.getNamePattern()
if namePattern not in self._namePattern2tracker:
self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
tracker = self._namePattern2tracker[namePattern]
isSpike = False
# do we have enough samples?
if tracker.getNumDurationSamples() > self._minSamples:
# was this a spike?
if profileDt > (tracker.getAvgDuration() * self._spikeThreshold):
isSpike = True
avgDur, avgResult = tracker.getAvgData()
maxNSDur, maxNSResult = tracker.getMaxNonSpikeData()
self.notify.info('task CPU spike profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
'== SPIKE (%s wall-clock seconds)\n%s' % (
namePattern,
avgDur, avgResult,
maxNSDur, maxNSResult,
profileDt, lastProfileResult))
tracker.addDuration(profileDt, lastProfileResult, isSpike)
if (self._task is not None) and taskMgr._hasProfiledDesignatedTask():
session = taskMgr._getLastProfileSession()
sessionDur = session.getWallClockDuration()
namePattern = self._task.getNamePattern()
if namePattern not in self._namePattern2tracker:
self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
tracker = self._namePattern2tracker[namePattern]
isSpike = False
# do we have enough samples?
if tracker.getNumDurationSamples() > self._minSamples:
# was this a spike?
if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
print 'sessionDur=%s' % sessionDur
print 'avgDur=%s' % tracker.getAvgDuration()
isSpike = True
avgSession = tracker.getAvgSession()
maxNSSession = tracker.getMaxNonSpikeSession()
self.notify.info('task CPU spike profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
'== SPIKE (%s wall-clock seconds)\n%s' % (
namePattern,
avgSession.getWallClockDuration(), avgSession.getResults(),
maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
sessionDur, session.getResults()))
tracker.addProfileSession(session, isSpike)
# set up the next task
self._task = taskMgr._getRandomTask()