558 lines
20 KiB
C++
Executable File
558 lines
20 KiB
C++
Executable File
//-----------------------------------------------------------------------------
|
|
// Torque Game Engine
|
|
// Copyright (C) GarageGames.com, Inc.
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#ifndef _PROFILER_H_
|
|
#define _PROFILER_H_
|
|
|
|
#include "core/torqueConfig.h"
|
|
|
|
#ifdef TORQUE_ENABLE_PROFILER
|
|
|
|
// First, define our macros; that way if we use the profiler in any
|
|
// of the includes we depend on, we'll have them available.
|
|
#define PROFILE_START(name) \
|
|
static ProfilerRoot pdata##name##obj (#name); \
|
|
if(gProfiler) gProfiler->hashPush(& pdata##name##obj )
|
|
|
|
#define PROFILE_END() if(gProfiler) gProfiler->hashPop()
|
|
|
|
#else
|
|
|
|
// If profiler is disabled then just stub the blocks out.
|
|
#define PROFILE_START(x)
|
|
#define PROFILE_END()
|
|
|
|
#endif
|
|
|
|
// Then include our includes and the real profiler code that needs them.
|
|
#ifdef TORQUE_ENABLE_PROFILER
|
|
|
|
#include "platform/platform.h"
|
|
#include "core/stringTable.h"
|
|
#include "console/console.h"
|
|
#include "core/tVector.h"
|
|
#include "core/fileStream.h"
|
|
#include "platform/platformThread.h"
|
|
|
|
struct ProfilerData;
|
|
struct ProfilerRoot;
|
|
struct ProfilerRootData;
|
|
struct ProfilerInstance;
|
|
|
|
/// Torque includes a full-featured code profiler, which can be used to
|
|
/// measure the performance of game code.
|
|
///
|
|
/// The profiler tracks performance information about a number of blocks in
|
|
/// the code base defined by matched sets of PROFILE_START() and
|
|
/// PROFILER_END() blocks. Basically, you can use it to see how much time
|
|
/// a chunk of code is taking out of your overall execution time.
|
|
///
|
|
/// The information tracked includes:
|
|
/// * Percent of time spent in just this block.
|
|
/// * Percent of time spent in this block as well as any other blocks
|
|
/// encountered inside of it.
|
|
/// * Number of invocations, ie, how many times this block was run.
|
|
///
|
|
/// The profiler tracks both global statistics for each block, as well as
|
|
/// providing hierarchical data about performance (ie, what blocks are
|
|
/// called from inside what other blocks, and what time is spent where).
|
|
///
|
|
/// While this information is not as fine grained as that provided by
|
|
/// serious, low-level profiling tools (ie, gprof, VTune, Shark), the
|
|
/// overhead is lower. The profiler can be enabled in beta or debug builds
|
|
/// of your application, and used to easily dump information about low
|
|
/// performance situations. Because it is Torque-aware, it also gives more
|
|
/// useful information in some cases than lower level profilers can provide.
|
|
///
|
|
/// For instance, it can easily show when and how frequently you are
|
|
/// allocating/freeing memory, or what the relative breakdown in time is
|
|
/// between player update code and physics for other items. It can give
|
|
/// useful information for trying to decide if your game is CPU or GPU
|
|
/// bound, or if code is being erroneously run too much. (Seeing a helper
|
|
/// function is getting run several dozen thousand times more than its
|
|
/// parent can be one tip-off. ;)
|
|
///
|
|
/// <b>How To Use The Profiler</b>
|
|
///
|
|
/// First, you must #define TORQUE_ENABLE_PROFILER in torqueConfig.h in
|
|
/// order to active the profiler. Then you can use it from script. Examples
|
|
/// of script use:
|
|
///
|
|
/// @code
|
|
/// // profilerEnable enables or disables profiling. Data is only gathered
|
|
/// // when the profiler is enabled.
|
|
/// profilerEnable(bool enable);
|
|
/// profilerReset(); //resets the data gathered by the profiler
|
|
/// profilerDump(); //dumps all profiler data to the console
|
|
/// profilerDumpToFile(string filename); //dumps all profiler data to a given file
|
|
/// @endcode
|
|
///
|
|
/// The C++ code side of the profiler uses pairs of PROFILE_START() and
|
|
/// PROFILE_END().
|
|
///
|
|
/// When using these macros, make sure there is a PROFILE_END() for every
|
|
/// PROFILE_START and a PROFILE_START() for every PROFILE_END(). It is fine
|
|
/// to nest these macros, however, you must make sure that no matter what
|
|
/// execution path the code takes, the PROFILE macros will be balanced. (This
|
|
/// usually means having a PROFILE_END() before every return statement in a
|
|
/// function, for instance.)
|
|
///
|
|
/// The profiler can be used to locate areas of code that are slow or should
|
|
/// be considered for optimization. Since it tracks the relative time of
|
|
/// execution of that code to the execution of the main loop, it is possible
|
|
/// to benchmark any given code to see if changes made will actually
|
|
/// improve performance.
|
|
///
|
|
/// Here are some examples:
|
|
/// @code
|
|
/// PROFILE_START(TerrainRender);
|
|
/// //some code here
|
|
/// PROFILE_START(TerrainRenderGridSquare);
|
|
/// //some code here
|
|
/// PROFILE_END();
|
|
/// //possibly some code here
|
|
/// PROFILE_END();
|
|
///
|
|
/// void foo()
|
|
/// {
|
|
/// PROFILE_START(foo_do);
|
|
/// if(doThing())
|
|
/// {
|
|
/// PROFILE_END();
|
|
/// return true;
|
|
/// }
|
|
/// PROFILE_END();
|
|
/// return false;
|
|
/// }
|
|
/// @endcode
|
|
///
|
|
/// <b>What Data the Profiler Gathers</b>
|
|
///
|
|
/// The profiler gathers two general representations of the profiling data.
|
|
///
|
|
/// First, it gathers per block data. This is data that relates to every
|
|
/// single execution of a given block - for instance, foo_do in the
|
|
/// above example code has its invocation count and total and non-sub time
|
|
/// tracked regardless of where it is called from. This is used to generate
|
|
/// the first half of the profiler data dump, the sorted list of what blocks
|
|
/// are, overall, taking up the most non-sub time.
|
|
///
|
|
/// Second, data is gathered per instance of a block. This is the data used
|
|
/// to display the nice formatted tree that makes up the second half of a
|
|
/// profiler data dump. A single block may have many instances as it is
|
|
/// called from different parts of the codebase. For instance, foo_do might
|
|
/// get called from the main loop, as well as from the console, making it
|
|
/// show up in several places on the hierarchical view:
|
|
///
|
|
/// @code
|
|
/// MainLoop
|
|
/// foo_do
|
|
/// ConsoleCode
|
|
/// foo_do
|
|
/// @endcode
|
|
///
|
|
/// There are three values gathered per block:
|
|
/// - Total time. Shown as a percentage of the time period that was profiled,
|
|
/// this is the time spent in both this block and all its children.
|
|
/// - Non-sub time. This is the amount of time spent ONLY in this block,
|
|
/// and not any of its children.
|
|
/// - Invocations. The number of times this block was executed.
|
|
///
|
|
/// <b>How the Profiler Works</b>
|
|
///
|
|
/// Internally, all profiler state is managed by ProfilerInstance. The
|
|
/// gProfiler is just a wrapper around this, responsible for dispatching
|
|
/// profiler events (ie, PROFILER_START and PROFILE_END macros, which
|
|
/// correspond to calls to Profiler::hashPush and Profiler::hashPop
|
|
/// respectively) to the appropriate ProfilerInstance. There is one
|
|
/// ProfilerInstance per thread, representing the entire profiler state
|
|
/// for that thread.
|
|
///
|
|
/// ProfilerRoots are allocated statically in each PROFILER_START macro, and
|
|
/// are used as "anchors" for a table of ProfilerRootDatas contained in each
|
|
/// ProfilerInstance. Each ProfilerRoot is assigned an ID which is used for
|
|
/// fast lookups in the ProfilerInstance's ProfilerRootData table. There are
|
|
/// potentially many ProfilerRootDatas for a given ProfilerRoot, but only one
|
|
/// and exactly one PRD for a PR in a given ProfilerInstance.
|
|
///
|
|
/// Together, a PR and a PRD represent a "block", i.e., a specific chunk
|
|
/// of code begun with a PROFILE_START() and ended with one or more
|
|
/// PROFILE_END()s. This is all that is needed to gather the per-block
|
|
/// data mentioned in the previous section.
|
|
///
|
|
/// However, context is often important in analysis, so we also gather
|
|
/// hierarchical data, tracking what blocks are called from which other
|
|
/// blocks. This allows us to know that, for instance, we only allocate
|
|
/// memory once out of every hundred frames in the terrain inner loops,
|
|
/// while executing a single console function can involve several frees
|
|
/// and allocs. (That is to say, TerrainRender is invoked hundreds of times,
|
|
/// while MemoryAlloc and MemoryFree only show up under it once or twice,
|
|
/// and ConsoleExec shows up once or twice with MemoryAlloc and MemoryFree
|
|
/// being invoked a dozen times each.)
|
|
///
|
|
/// To acheive this, we make use of the ProfilerData structure. For each
|
|
/// "instance" of a block we encounter, we allocate a ProfilerData and place
|
|
/// it in a tree structure. A given ProfilerRootData in a ProfilerInstance
|
|
/// might have many ProfilerDatas, representing different situations under
|
|
/// which it is executed, ie, different blocks that call it.
|
|
///
|
|
/// So we build a list, and grow a tree from it. By tracking our position
|
|
/// in the tree, we can easily build up a consistent view of how our code
|
|
/// blocks are called, and how we spend our time in different calling
|
|
/// hierarchies.
|
|
class Profiler
|
|
{
|
|
friend class ProfilerInstance;
|
|
|
|
ProfilerInstance *mInstanceListHead;
|
|
ThreadStorage mCurrentInstance;
|
|
|
|
/// Get the current ProfilerInstance for the executing thread, or
|
|
/// allocate it if it is unavailable.
|
|
ProfilerInstance *getCurrentInstance();
|
|
|
|
/// We only want one thing to dump at a time - imagine the madness
|
|
/// of interleaved output - so we stick a mutex here to manage that, and
|
|
/// some other state information, as it's easy to get to from wherever
|
|
/// we've got the lock.
|
|
void *mDumpMutex;
|
|
|
|
bool mCurrentDumpIsConsole;
|
|
bool mCurrentDumpIsFile;
|
|
Stream *mCurrentDumpStream;
|
|
|
|
public:
|
|
Profiler();
|
|
~Profiler();
|
|
|
|
/// Get a list of the IDs of all the threads we're profiling. Pass these
|
|
/// IDs into the function variants that require an id parameter in order
|
|
/// to control a specific thread's profiling activity.
|
|
void getThreadList(Vector<U32> &ids);
|
|
|
|
ProfilerInstance *getInstanceByID(const U32 id);
|
|
|
|
void enable(bool enable);
|
|
void dumpToConsole();
|
|
void dumpToFile(const char *fileName);
|
|
void enableMarker(const char *marker, bool enable);
|
|
void reset();
|
|
|
|
void hashPush(ProfilerRoot *pr);
|
|
void hashPop();
|
|
};
|
|
|
|
extern Profiler *gProfiler;
|
|
|
|
/// Helper structure used to store general information about a profiler
|
|
/// block.
|
|
///
|
|
/// Everything that follows are implementation details.
|
|
///
|
|
/// @see Profiler for information on how to use the profiler.
|
|
///
|
|
/// We use a slightly indirected scheme to manage the complexities of
|
|
/// profiling threads. Basically, the ProfilerRoot structures store the
|
|
/// name and other constant, global information about a given profiler
|
|
/// block. We walk the list at startup to generate a list at startup
|
|
/// which is then used to lookup into a thread-specific table of
|
|
/// ProfilerRootDatas. This way we can easily track profile information
|
|
/// for each thread without duplication of information or trying to make
|
|
/// the core profiling data structures thread-safe via synchronization
|
|
/// primitives, which can be horribly slow.
|
|
///
|
|
/// Profilers must be fast and deterministic, so that they don't impact
|
|
/// what they measure. :)
|
|
struct ProfilerRoot
|
|
{
|
|
/// Name of this profile block.
|
|
const char *mName;
|
|
|
|
/// Hash of the profile block name, for fast lookups.
|
|
U32 mNameHash;
|
|
|
|
/// Next block in the master list.
|
|
ProfilerRoot *mNextRoot;
|
|
|
|
/// Assigned sequentially to map to the thread table.
|
|
/// This allows us to quickly lookup into it. (A hash
|
|
/// would be slow.)
|
|
U32 mID;
|
|
|
|
// This structure is exactly 16 bytes. (4 words at 4 bytes each)
|
|
// Following are non-data members:
|
|
|
|
/// Head of master list of all profiler blocks.
|
|
static ProfilerRoot *smRootList;
|
|
|
|
/// Global count of roots.
|
|
static U32 smRootCount;
|
|
|
|
/// Constructor - called by the PROFILE_START macro.
|
|
ProfilerRoot(const char *name);
|
|
};
|
|
|
|
/// Profiler thread table data element. One of these is created for every
|
|
/// ProfilerRoot, to store thread specific data. They are indexed by
|
|
/// ProfilerRoot::mID.
|
|
///
|
|
/// @see Profiler for information on how to use the profiler.
|
|
struct ProfilerRootData
|
|
{
|
|
ProfilerRootData();
|
|
~ProfilerRootData();
|
|
|
|
/// We keep a pointer back to the original root structure so we can do
|
|
/// name lookups and the like.
|
|
ProfilerRoot *mRoot;
|
|
|
|
/// Head of linked list of actual ProfilerDatas for this
|
|
/// block.
|
|
ProfilerData *mFirstProfilerData;
|
|
|
|
/// Should profile information for this block be gathered?
|
|
bool mEnabled;
|
|
|
|
/// @name Data Accumulators
|
|
///
|
|
///
|
|
/// @{
|
|
|
|
/// Total time spent in all instances of this block.
|
|
F64 mTotalTime;
|
|
|
|
/// Total time spent in all child instances of this block, excluding
|
|
/// this block.
|
|
F64 mSubTime;
|
|
|
|
/// Total number of times all instances of this block have been invoked.
|
|
U32 mTotalInvokeCount;
|
|
|
|
/// @}
|
|
|
|
/// Reset all the data accumulators.
|
|
void reset();
|
|
};
|
|
|
|
/// Runtime information on a given instance of a profiler block.
|
|
///
|
|
/// @see Profiler for information on how to use the profiler.
|
|
///
|
|
/// Generated on the fly as we build our profiler tree, this represents
|
|
/// profiled instances of a block. This enables us to accurately track
|
|
/// recursive situations and the like.
|
|
///
|
|
/// It is organized in the following lists:
|
|
/// - Root list. Each root has a list of all its instances associated
|
|
/// with it.
|
|
/// - ProfilerData list. Each thread keeps a master list of all the
|
|
/// ProfilerDatas associated with it.
|
|
/// - Hash list. Children of a ProfilerData are kept in a hash indexed
|
|
/// by name. Thus a bin list is maintained for each bin of the hash.
|
|
/// - Child list. The children of a ProfilerData are all kept in a
|
|
/// list for easy traversal.
|
|
///
|
|
struct ProfilerData
|
|
{
|
|
ProfilerData();
|
|
~ProfilerData();
|
|
|
|
/// What ProfilerRootData do we belong to?
|
|
ProfilerRootData *mOwningRoot;
|
|
|
|
/// Next ProfilerData for this root.
|
|
ProfilerData *mNextForRoot;
|
|
|
|
/// Master list for the profiled thread.
|
|
ProfilerData *mNextProfilerData;
|
|
|
|
/// Next ProfilerData in the hash bin.
|
|
ProfilerData *mNextHash;
|
|
|
|
/// Who is our parent?
|
|
ProfilerData *mParent;
|
|
|
|
/// Next sibling of our parent.
|
|
ProfilerData *mNextSibling;
|
|
|
|
/// Our first child.
|
|
ProfilerData *mFirstChild;
|
|
|
|
enum {
|
|
HashTableSize = 32,
|
|
};
|
|
ProfilerData *mChildHash[HashTableSize];
|
|
|
|
/// For fast lookups, we cache the last seen child. If we see the same
|
|
/// profiler block many times in a row this can save us a lot of
|
|
/// traversal in Profiler::hashPush.
|
|
ProfilerData *mLastSeenProfiler;
|
|
|
|
/// The hash of this block's names, kept here for fast lookups.
|
|
U32 mHash;
|
|
|
|
/// Number of times we have recursed into this block from itself.
|
|
U32 mSubDepth;
|
|
|
|
/// Number of times this block instance has been run.
|
|
U32 mInvokeCount;
|
|
|
|
/// The high performance timer needs some memory to store its start time.
|
|
U32 mStartTime[2];
|
|
|
|
/// Total time we've spent in this block instance.
|
|
F64 mTotalTime;
|
|
|
|
/// Total time we've spent in this block instance's children, excluding
|
|
/// this instance itself.
|
|
F64 mSubTime;
|
|
};
|
|
|
|
/// Manages thread-local profiler state.
|
|
///
|
|
/// @see Profiler for information on how to use the profiler.
|
|
///
|
|
/// This is the meat of the profiler; there are one of these for each
|
|
/// profiler and they are responsible for tracking all of the state for a
|
|
/// thread's profile information.
|
|
class ProfilerInstance
|
|
{
|
|
friend class Profiler;
|
|
|
|
/// @name Dump Code
|
|
///
|
|
/// Miscellaneous helper functions for dumping the profiler state to
|
|
/// disk or console. We have a unified interface to avoid duplicating
|
|
/// a bunch of complex output code for file/console output.
|
|
///
|
|
/// If you have a burning need to know what these guys do, just read the
|
|
/// code. It's really pretty straightforward.
|
|
///
|
|
/// @{
|
|
|
|
/// Print data to the appropriate place(s). (Console, stream, or both.)
|
|
static void printFunc(char *fmt, ...);
|
|
|
|
/// Print a sorted vector of profiler block data in a nice way.
|
|
static void printSortedRootData(Vector<ProfilerRootData*> &rootVector, F64 totalTime);
|
|
|
|
/// Recursively dump part of the profiler data tree.
|
|
static void profilerDataDumpRecurse(ProfilerData *data, char *buffer, U32 bufferLen, F64 totalTime);
|
|
|
|
/// @}
|
|
|
|
/// We tend to encounter new blocks part-way through execution. This is
|
|
/// due to the nature of static variables (which our global roots are).
|
|
/// They only initialize when they come into scope! So, we have to be able
|
|
/// to grow our local root data table. This ensures it has at least
|
|
/// newCount slows in it, and makes sure everything points to the right
|
|
/// roots.
|
|
void growRoots(U32 newCount);
|
|
|
|
enum
|
|
{
|
|
MaxStackDepth = 256,
|
|
DumpFileNameLength = 256
|
|
};
|
|
|
|
/// ID of the thread we have profiled data for.
|
|
U32 mThreadID;
|
|
|
|
/// Mutex to manage some nasty conditions - we lock on deconstruction,
|
|
/// and also when we're dumping so we don't try to dump from two places
|
|
/// at the same time.
|
|
///
|
|
/// @note This isn't bullet proof yet on the deconstruct case, but we
|
|
/// don't actually kill off profiler data so we're ok. :)
|
|
void *mMutex;
|
|
|
|
/// Thread-local table of all the ProfilerRootDatas.
|
|
///
|
|
/// Since we have to keep track of per-block data specific to each
|
|
/// thread (it's a pain to update, nevermind interpret data global to
|
|
/// all threads), we keep a local copy of everything here so we're
|
|
/// essentially independent of the ProfilerRoots scattered throughout
|
|
/// the codebase.
|
|
ProfilerRootData *mRootTable;
|
|
|
|
/// Number of roots we have current allocated. Since not all roots are
|
|
/// known (they get initialized as they're visited...), we have to be
|
|
/// able to reallocate our thread-local root table. This isn't much fun
|
|
/// but enables us to be much more robust.
|
|
///
|
|
/// @see growRoots
|
|
U32 mRootCount;
|
|
|
|
/// Our current ProfilerData. This indicates where in the profiler tree
|
|
/// we are at the moment, and is updated on (almost) every push or pop.
|
|
ProfilerData *mCurrentProfilerData;
|
|
|
|
/// List of all allocated ProfilerDatas. Important for book-keeping
|
|
/// and update operations.
|
|
ProfilerData *mProfileList;
|
|
|
|
/// This is the root ProfilerData in our local tree of profiler data.
|
|
ProfilerData *mRootProfilerData;
|
|
|
|
/// Currently enabled?
|
|
bool mEnabled;
|
|
|
|
/// Current depth of profiler stack.
|
|
S32 mStackDepth;
|
|
|
|
/// Maximum allowed profiler stack depth.
|
|
U32 mMaxStackDepth;
|
|
|
|
/// Enable state for next frame.
|
|
bool mNextEnable;
|
|
|
|
/// Should we dump to console at next frame?
|
|
bool mDumpToConsole;
|
|
|
|
/// Should we dump to file at next frame?
|
|
bool mDumpToFile;
|
|
|
|
/// If so, where?
|
|
char mDumpFileName[DumpFileNameLength];
|
|
|
|
/// Next allocated ProfilerInstance.
|
|
ProfilerInstance *mNextInstance;
|
|
|
|
/// Do a bunch of extra checking to make sure our data structures
|
|
/// are clean.
|
|
///
|
|
/// @note This is mostly kept around for debug situations, and is not
|
|
/// normally called.
|
|
void validate();
|
|
|
|
/// Look up the ProfilerRootData that corresponds to a given
|
|
/// ProfilerRoot.(
|
|
inline ProfilerRootData *lookup(ProfilerRoot *pr)
|
|
{
|
|
return mRootTable + pr->mID;
|
|
}
|
|
|
|
/// Helper function to do the actual dumping. You should have the
|
|
/// instance's mutex before you call this.
|
|
void dump();
|
|
|
|
public:
|
|
ProfilerInstance();
|
|
~ProfilerInstance();
|
|
|
|
void hashPush(ProfilerRoot *pr);
|
|
void hashPop();
|
|
|
|
void enable(bool enable);
|
|
void enableMarker(const char *name, bool enable);
|
|
void reset();
|
|
void dumpToConsole();
|
|
void dumpToFile(const char *fileName);
|
|
};
|
|
|
|
#endif /* TORQUE_ENABLE_PROFILER */
|
|
|
|
#endif /* _PROFILER_H_ */
|