added ProfileSession, don't format profile output until it's requested

This commit is contained in:
Darren Ranalli 2008-10-02 02:00:34 +00:00
parent ee89d48297
commit eaa1318450
4 changed files with 260 additions and 85 deletions

View File

@ -0,0 +1,141 @@
from pandac.libpandaexpressModules import TrueClock
from direct.showbase.PythonUtil import (
StdoutCapture, _installProfileCustomFuncs,_removeProfileCustomFuncs,
_profileWithoutGarbageLeak, _getProfileResultFileInfo, _setProfileResultsFileInfo,
_clearProfileResultFileInfo, )
import __builtin__
import profile
import pstats
from StringIO import StringIO
import marshal
class ProfileSession:
# class that encapsulates a profile of a single callable using Python's standard
# 'profile' module
#
# defers formatting of profile results until they are requested
#
# implementation sidesteps memory leak in Python profile module,
# and redirects file output to RAM file for efficiency
DefaultFilename = 'profilesession'
DefaultLines = 80
DefaultSorts = ['cumulative', 'time', 'calls']
TrueClock = TrueClock.getGlobalPtr()
def __init__(self, func, name):
self._func = func
self._name = name
self._filename = 'profileData-%s-%s' % (self._name, id(self))
self._wallClockDur = None
self._ramFile = None
self._refCount = 0
self.acquire()
def acquire(self):
self._refCount += 1
def release(self):
self._refCount -= 1
if not self._refCount:
self._destroy()
def _destroy(self):
del self._func
del self._name
del self._filename
del self._wallClockDur
del self._ramFile
def run(self):
# make sure this instance doesn't get destroyed inside self._func
self.acquire()
# put the function in the global namespace so that profile can find it
assert 'globalProfileFunc' not in __builtin__.__dict__
__builtin__.globalProfileFunc = self._func
__builtin__.globalProfileResult = [None]
# set up the RAM file
_installProfileCustomFuncs(self._filename)
# do the profiling
Profile = profile.Profile
statement = 'globalProfileResult[0]=globalProfileFunc()'
sort = -1
retVal = None
# this is based on profile.run, the code is replicated here to allow us to
# eliminate a memory leak
prof = Profile()
# try to get wall-clock duration that is as accurate as possible
startT = self.TrueClock.getShortTime()
try:
prof = prof.run(statement)
except SystemExit:
pass
# try to get wall-clock duration that is as accurate as possible
endT = self.TrueClock.getShortTime()
# this has to be run immediately after profiling for the timings to be accurate
# tell the Profile object to generate output to the RAM file
prof.dump_stats(self._filename)
dur = endT - startT
# eliminate the memory leak
del prof.dispatcher
# and store the results
self._wallClockDur = dur
# store the RAM file for later
self._ramFile = _getProfileResultFileInfo(self._filename)
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
# clean up the globals
result = globalProfileResult[0]
del __builtin__.__dict__['globalProfileFunc']
del __builtin__.__dict__['globalProfileResult']
self.release()
return result
def getWallClockDuration(self):
# this might not be accurate, it may include time taken up by other processes
return self._wallClockDur
def getResults(self,
lines=80,
sorts=['cumulative', 'time', 'calls'],
callInfo=False):
# set up the RAM file
_installProfileCustomFuncs(self._filename)
# install the stored RAM file from self.run()
_setProfileResultsFileInfo(self._filename, self._ramFile)
# now extract human-readable output from the RAM file
# capture print output to a string
sc = StdoutCapture()
# print the info to stdout
s = pstats.Stats(self._filename)
s.strip_dirs()
for sort in sorts:
s.sort_stats(sort)
s.print_stats(lines)
if callInfo:
s.print_callees(lines)
s.print_callers(lines)
# make a copy of the print output
output = sc.getString()
# restore stdout to what it was before
sc.destroy()
# clean up the RAM file support
_removeProfileCustomFuncs(self._filename)
return output

View File

@ -1086,6 +1086,21 @@ def _installProfileCustomFuncs(filename):
movedLoadFuncs.append(marshal.load) movedLoadFuncs.append(marshal.load)
marshal.load = _profileMarshalLoad marshal.load = _profileMarshalLoad
def _getProfileResultFileInfo(filename):
return (profileFilename2file.get(filename, None),
profileFilename2marshalData.get(filename, None))
def _setProfileResultsFileInfo(filename, info):
f, m = info
if f:
profileFilename2file[filename] = f
if m:
profileFilename2marshalData[filename] = m
def _clearProfileResultFileInfo(filename):
profileFilename2file.pop(filename, None)
profileFilename2marshalData.pop(filename, None)
def _removeProfileCustomFuncs(filename): def _removeProfileCustomFuncs(filename):
assert profileFilenameList.top() == filename assert profileFilenameList.top() == filename
marshal.load = movedLoadFuncs.pop() marshal.load = movedLoadFuncs.pop()

View File

@ -16,6 +16,7 @@ from direct.directnotify.DirectNotifyGlobal import *
from direct.showbase.PythonUtil import * from direct.showbase.PythonUtil import *
from direct.showbase.MessengerGlobal import * from direct.showbase.MessengerGlobal import *
from direct.showbase import ExceptionVarDump from direct.showbase import ExceptionVarDump
from direct.showbase.ProfileSession import ProfileSession
import time import time
import fnmatch import fnmatch
import string import string
@ -406,8 +407,8 @@ class TaskManager:
self._taskProfiler = None self._taskProfiler = None
self._profileInfo = ScratchPad( self._profileInfo = ScratchPad(
taskId = None, taskId = None,
dt = None, profiled = False,
lastProfileResultString = None, session = None,
) )
# We copy this value in from __builtins__ when it gets set. # We copy this value in from __builtins__ when it gets set.
@ -809,14 +810,20 @@ class TaskManager:
# by the task when it runs # by the task when it runs
profileInfo = self._profileInfo profileInfo = self._profileInfo
doProfile = (task.id == profileInfo.taskId) doProfile = (task.id == profileInfo.taskId)
# don't profile the same task twice in a row
doProfile = doProfile and (not profileInfo.profiled)
if not self.taskTimerVerbose: if not self.taskTimerVerbose:
startTime = self.trueClock.getShortTime() startTime = self.trueClock.getShortTime()
# don't record timing info # don't record timing info
if doProfile: if doProfile:
ret = profileFunc(Functor(task, *task.extraArgs), profileSession = ProfileSession(Functor(task, *task.extraArgs),
'TASK_PROFILE:%s' % task.name, True, log=False) 'TASK_PROFILE:%s' % task.name)
ret = profileSession.run()
# set these values *after* profiling in case we're profiling the TaskProfiler
profileInfo.session = profileSession
profileInfo.profiled = True
else: else:
ret = task(*task.extraArgs) ret = task(*task.extraArgs)
endTime = self.trueClock.getShortTime() endTime = self.trueClock.getShortTime()
@ -824,9 +831,7 @@ class TaskManager:
# Record the dt # Record the dt
dt = endTime - startTime dt = endTime - startTime
if doProfile: if doProfile:
# if we profiled, record the measured duration but don't pollute the task's # if we profiled, don't pollute the task's recorded duration
# normal duration
profileInfo.dt = dt
dt = task.avgDt dt = task.avgDt
task.dt = dt task.dt = dt
@ -836,8 +841,12 @@ class TaskManager:
task.pstats.start() task.pstats.start()
startTime = self.trueClock.getShortTime() startTime = self.trueClock.getShortTime()
if doProfile: if doProfile:
ret = profileFunc(Functor(task, *task.extraArgs), profileSession = ProfileSession(Functor(task, *task.extraArgs),
'profiled-task-%s' % task.name, True, log=False) 'profiled-task-%s' % task.name)
ret = profileSession.run()
# set these values *after* profiling in case we're profiling the TaskProfiler
profileInfo.session = profileSession
profileInfo.profiled = True
else: else:
ret = task(*task.extraArgs) ret = task(*task.extraArgs)
endTime = self.trueClock.getShortTime() endTime = self.trueClock.getShortTime()
@ -847,9 +856,7 @@ class TaskManager:
# Record the dt # Record the dt
dt = endTime - startTime dt = endTime - startTime
if doProfile: if doProfile:
# if we profiled, record the measured duration but don't pollute the task's # if we profiled, don't pollute the task's recorded duration
# normal duration
profileInfo.dt = dt
dt = task.avgDt dt = task.avgDt
task.dt = dt task.dt = dt
@ -864,9 +871,6 @@ class TaskManager:
else: else:
task.avgDt = 0 task.avgDt = 0
if doProfile:
profileInfo.lastProfileResultString = self._getProfileResultString()
# warn if the task took too long # warn if the task took too long
if self.warnTaskDuration and self.globalClock: if self.warnTaskDuration and self.globalClock:
avgFrameRate = self.globalClock.getAverageFrameRate() avgFrameRate = self.globalClock.getAverageFrameRate()
@ -1020,17 +1024,21 @@ class TaskManager:
self._taskProfiler.flush(name) self._taskProfiler.flush(name)
def _setProfileTask(self, task): def _setProfileTask(self, task):
if self._profileInfo.session:
self._profileInfo.session.release()
self._profileInfo.session = None
self._profileInfo = ScratchPad( self._profileInfo = ScratchPad(
taskId = task.id, taskId = task.id,
dt = None, profiled = False,
lastProfileResultString = None, session = None,
) )
def _getTaskProfileDt(self): def _hasProfiledDesignatedTask(self):
return self._profileInfo.dt # have we run a profile of the designated task yet?
return self._profileInfo.profiled
def _getLastProfileResultString(self): def _getLastProfileSession(self):
return self._profileInfo.lastProfileResultString return self._profileInfo.session
def _getRandomTask(self): def _getRandomTask(self):
numTasks = 0 numTasks = 0

View File

@ -1,6 +1,6 @@
from direct.directnotify.DirectNotifyGlobal import directNotify from direct.directnotify.DirectNotifyGlobal import directNotify
from direct.fsm.StatePush import FunctionCall from direct.fsm.StatePush import FunctionCall
from direct.showbase.PythonUtil import getProfileResultString, Averager from direct.showbase.PythonUtil import Averager
class TaskTracker: class TaskTracker:
# call it TaskProfiler to avoid confusion for the user # call it TaskProfiler to avoid confusion for the user
@ -8,60 +8,72 @@ class TaskTracker:
def __init__(self, namePattern): def __init__(self, namePattern):
self._namePattern = namePattern self._namePattern = namePattern
self._durationAverager = Averager('%s-durationAverager' % namePattern) self._durationAverager = Averager('%s-durationAverager' % namePattern)
self._avgData = None self._avgSession = None
self._avgDataDur = None self._maxNonSpikeSession = None
self._maxNonSpikeData = None
self._maxNonSpikeDataDur = None
def destroy(self): def destroy(self):
self.flush()
del self._namePattern del self._namePattern
del self._durationAverager del self._durationAverager
def flush(self): def flush(self):
self._durationAverager.reset() self._durationAverager.reset()
self._avgData = None if self._avgSession:
self._avgDataDur = None self._avgSession.release()
self._maxNonSpikeData = None if self._maxNonSpikeSession:
self._maxNonSpikeDataDur = None self._maxNonSpikeSession.release()
self._avgSession = None
self._maxNonSpikeSession = None
def getNamePattern(self, namePattern): def getNamePattern(self, namePattern):
return self._namePattern return self._namePattern
def addDuration(self, duration, data, isSpike): def addProfileSession(self, session, isSpike):
duration = session.getWallClockDuration()
self._durationAverager.addValue(duration) self._durationAverager.addValue(duration)
storeAvgData = True storeAvg = True
storeMaxNSData = True storeMaxNS = True
if self._avgDataDur is not None: if self._avgSession is not None:
avgDur = self.getAvgDuration() avgDur = self.getAvgDuration()
if abs(self._avgDataDur - avgDur) < abs(duration - avgDur): if abs(self._avgSession.getWallClockDuration() - avgDur) < abs(duration - avgDur):
# current avg data is more average than this new sample, keep the data we've # current avg data is more average than this new sample, keep the data we've
# already got stored # already got stored
storeAvgData = False storeAvg = False
if isSpike: if isSpike:
storeMaxNSData = False storeMaxNS = False
else: else:
if self._maxNonSpikeDataDur is not None and self._maxNonSpikeDataDur > duration: if (self._maxNonSpikeSession is not None and
storeMaxNSData = False self._maxNonSpikeSession.getWallClockDuration() > duration):
if storeAvgData: storeMaxNS = False
self._avgData = data if storeAvg:
self._avgDataDur = duration if self._avgSession:
if storeMaxNSData: self._avgSession.release()
self._maxNonSpikeData = data session.acquire()
self._maxNonSpikeDataDur = duration self._avgSession = session
if storeMaxNS:
if self._maxNonSpikeSession:
self._maxNonSpikeSession.release()
session.acquire()
self._maxNonSpikeSession = session
def getAvgDuration(self): def getAvgDuration(self):
return self._durationAverager.getAverage() return self._durationAverager.getAverage()
def getNumDurationSamples(self): def getNumDurationSamples(self):
return self._durationAverager.getCount() return self._durationAverager.getCount()
def getAvgData(self): def getAvgSession(self):
# returns duration, data for closest-to-average sample # returns profile session for closest-to-average sample
return self._avgDataDur, self._avgData return self._avgSession
def getMaxNonSpikeData(self): def getMaxNonSpikeSession(self):
# returns duration, data for closest-to-average sample # returns profile session for closest-to-average sample
return self._maxNonSpikeDataDur, self._maxNonSpikeData return self._maxNonSpikeSession
def log(self): def log(self):
if self._avgSession:
self.notify.info('task CPU profile (%s):\n' self.notify.info('task CPU profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n' '== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % ( '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s' % (
self._namePattern, self._namePattern,
self._avgDataDur, self._avgData, self._avgSession.getWallClockDuration(),
self._maxNonSpikeDataDur, self._maxNonSpikeData, self._avgSession.getResults(),
self._maxNonSpikeSession.getWallClockDuration(),
self._maxNonSpikeSession.getResults(),
)) ))
else:
self.notify.info('task CPU profile (%s): no data collected' % self._namePattern)
class TaskProfiler: class TaskProfiler:
# this does intermittent profiling of tasks running on the system # this does intermittent profiling of tasks running on the system
@ -77,8 +89,6 @@ class TaskProfiler:
self._minSamples = config.GetInt('profile-task-spike-min-samples', 30) self._minSamples = config.GetInt('profile-task-spike-min-samples', 30)
# defines spike as longer than this multiple of avg task duration # defines spike as longer than this multiple of avg task duration
self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.) self._spikeThreshold = config.GetFloat('profile-task-spike-threshold', 10.)
# assign getProfileResultString to the taskMgr here, since Task.py can't import PythonUtil
taskMgr._getProfileResultString = getProfileResultString
def destroy(self): def destroy(self):
if taskMgr.getProfileTasks(): if taskMgr.getProfileTasks():
@ -117,10 +127,9 @@ class TaskProfiler:
def _doProfileTasks(self, task=None): def _doProfileTasks(self, task=None):
# gather data from the previous frame # gather data from the previous frame
# set up for the next frame # set up for the next frame
profileDt = taskMgr._getTaskProfileDt() if (self._task is not None) and taskMgr._hasProfiledDesignatedTask():
if (self._task is not None) and (profileDt is not None): session = taskMgr._getLastProfileSession()
lastProfileResult = taskMgr._getLastProfileResultString() sessionDur = session.getWallClockDuration()
if lastProfileResult:
namePattern = self._task.getNamePattern() namePattern = self._task.getNamePattern()
if namePattern not in self._namePattern2tracker: if namePattern not in self._namePattern2tracker:
self._namePattern2tracker[namePattern] = TaskTracker(namePattern) self._namePattern2tracker[namePattern] = TaskTracker(namePattern)
@ -129,19 +138,21 @@ class TaskProfiler:
# do we have enough samples? # do we have enough samples?
if tracker.getNumDurationSamples() > self._minSamples: if tracker.getNumDurationSamples() > self._minSamples:
# was this a spike? # was this a spike?
if profileDt > (tracker.getAvgDuration() * self._spikeThreshold): if sessionDur > (tracker.getAvgDuration() * self._spikeThreshold):
print 'sessionDur=%s' % sessionDur
print 'avgDur=%s' % tracker.getAvgDuration()
isSpike = True isSpike = True
avgDur, avgResult = tracker.getAvgData() avgSession = tracker.getAvgSession()
maxNSDur, maxNSResult = tracker.getMaxNonSpikeData() maxNSSession = tracker.getMaxNonSpikeSession()
self.notify.info('task CPU spike profile (%s):\n' self.notify.info('task CPU spike profile (%s):\n'
'== AVERAGE (%s wall-clock seconds)\n%s\n' '== AVERAGE (%s wall-clock seconds)\n%s\n'
'== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n' '== LONGEST NON-SPIKE (%s wall-clock seconds)\n%s\n'
'== SPIKE (%s wall-clock seconds)\n%s' % ( '== SPIKE (%s wall-clock seconds)\n%s' % (
namePattern, namePattern,
avgDur, avgResult, avgSession.getWallClockDuration(), avgSession.getResults(),
maxNSDur, maxNSResult, maxNSSession.getWallClockDuration(), maxNSSession.getResults(),
profileDt, lastProfileResult)) sessionDur, session.getResults()))
tracker.addDuration(profileDt, lastProfileResult, isSpike) tracker.addProfileSession(session, isSpike)
# set up the next task # set up the next task
self._task = taskMgr._getRandomTask() self._task = taskMgr._getRandomTask()