https://github.com/HeisSpiter/acprof
Tip revision: 832b6fc1557de9138d7aa6cdde855bab01a3e2ef authored by Pierre Schweitzer on 26 September 2013, 12:23:04 UTC
Simplify the call graph storage by removing redundant information
Simplify the call graph storage by removing redundant information
Tip revision: 832b6fc
prof.ah
#ifndef __PROF_AH__
#define __PROF_AH__
#include <set>
#include <new>
#include <map>
#include <vector>
#include <ctime>
#include <string>
#include <cstring>
#include <sstream>
#include <fstream>
#include <iterator>
#include <unistd.h>
#include <iostream>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#ifdef ACPROF_WITH_MULTITHREAD
#include <pthread.h>
#include <semaphore.h>
#endif
struct TTimeInfo {
timeval fUserTime;
timeval fKernelTime;
};
struct TTotalInfo {
unsigned long fTotalTicks;
TTimeInfo fTotalTime;
unsigned long fCalls;
std::string fFile;
int fLine;
std::vector<unsigned long> fCoreUsage;
unsigned long * fCoresTracks;
unsigned long fNbProcs;
TTotalInfo(unsigned long totalTicks, const TTimeInfo & totalTime, unsigned long calls, const std::string & file, int line) : fTotalTicks(totalTicks), fTotalTime(totalTime), fCalls(calls), fFile(file), fLine(line) { };
};
struct TCallInfo {
TCallInfo * fParent;
TCallInfo * fNeighbor;
std::string fFunction;
std::string fFile;
int fLine;
unsigned long fCalls;
unsigned long fTotalTicks;
TTimeInfo fTotalTime;
clock_t fStartTicks;
TTimeInfo fStartTime;
unsigned long fReferenceCount;
TCallInfo * fChildren;
std::vector<unsigned long> fCoreUsage;
unsigned long * fCoresTracks;
unsigned long fNbProcs;
TCallInfo(TCallInfo * parent, TCallInfo * neighbor, const std::string & function, const std::string & file, int line, clock_t startTicks, const timeval & userTime, const timeval & kernelTime) :
fParent(parent), fNeighbor(neighbor), fFunction(function), fFile(file), fLine(line), fCalls(1), fTotalTicks(0), fStartTicks(startTicks),
fReferenceCount(1), fChildren(0), fCoresTracks(0), fNbProcs(0) { timerclear(&fTotalTime.fUserTime); timerclear(&fTotalTime.fKernelTime); fStartTime.fUserTime = userTime; fStartTime.fKernelTime = kernelTime; };
};
enum EOutput {
kStdOut,
kCallgrind,
kOutputMax
};
#ifdef ACPROF_WITH_MULTITHREAD
#define GetParent() fParent[threadId]
#define GetLevel() fLevel[threadId]
#else
#define GetParent() fParent
#define GetLevel() fLevel
#endif
#define TProfilerAssert(e) if (!(e)) InternalAssert(__FILE__, __LINE__, __FUNCTION__, #e)
aspect TProfiler {
pointcut functions() = "% ...::%(...)";
pointcut mainargs() = "% main(int, char**)";
pointcut profiler() = "% TProfiler::%(...)";
#ifdef ACPROF_WITH_MULTITHREAD
pointcut thread_create() = "% TProfiler::pthread_create(...)";
#endif
advice execution(mainargs()) : before() {
int * argc = (int *)tjp->arg(0);
char *** argv = (char ***)tjp->arg(1);
const long nbProcs = sysconf(_SC_NPROCESSORS_CONF);
TProfilerAssert(argc != 0);
TProfilerAssert(argv != 0);
// Save call values
fOldArgc = *argc;
fOldArgv = *argv;
// Reset args
*argv = (char **)operator new(fOldArgc * sizeof(void *));
fNewArgv = *argv;
int pos = 0;
fWithCoreUsage = false;
fWithCoreTrack = false;
for (int i = 0; i < fOldArgc; ++i) {
// Check for our args
if (strncmp(fOldArgv[i], "--acprof-out=", 13) == 0) {
if (strncmp(fOldArgv[i] + 13, "callgrind", 9) == 0) {
if (fWithCoreTrack) {
std::cerr << "Cannot use callgrind format with tracking" << std::endl;
} else {
std::cout << "Will use callgrind format output" << std::endl;
fFormat = kCallgrind;
}
}
--(*argc);
} else if (strncmp(fOldArgv[i], "--acprof-with-core=", 19) == 0) {
// We don't need to enable such feature if we don't have enough cores
if (nbProcs > 1) {
// We can't have both at a time, tracking has higher priority
if (!fWithCoreTrack && strncmp(fOldArgv[i] + 19, "usage", 5) == 0) {
fWithCoreUsage = true;
} else if (strncmp(fOldArgv[i] + 19, "track", 5) == 0) {
fWithCoreUsage = false;
fWithCoreTrack = true;
// We really cannot allow this, due to memory leaks
if (fFormat != kStdOut) {
std::cerr << "Forcing format to stdout again. If you want callgrind format, don't use tracking" << std::endl;
fFormat = kStdOut;
}
}
} else {
std::cerr << "Ignored core usage, not enough core on the system to have relevant results" << std::endl;
}
--(*argc);
} else {
// Only pass args that aren't ours to the application
(*argv)[pos] = fOldArgv[i];
++pos;
}
}
// Zero the rest
for (; pos < fOldArgc; ++pos) {
(*argv)[pos] = 0;
}
}
advice execution(mainargs()) : after() {
int * argc = (int *)tjp->arg(0);
char *** argv = (char ***)tjp->arg(1);
TProfilerAssert(argc != 0);
TProfilerAssert(argv != 0);
// Restore values
*argc = fOldArgc;
*argv = fOldArgv;
}
advice execution(functions() && !profiler()) : before() {
const clock_t now = clock();
const int line = JoinPoint::line();
const std::string file = JoinPoint::filename();
const std::string function = JoinPoint::signature();
const long nbProcs = sysconf(_SC_NPROCESSORS_CONF);
int procId = sched_getcpu();
rusage current;
#ifdef ACPROF_WITH_MULTITHREAD
unsigned long threadId;
#endif
TProfilerAssert(getrusage(RUSAGE_SELF, ¤t) == 0);
#ifdef ACPROF_WITH_MULTITHREAD
pthread_mutex_lock(&fBigLock);
threadId = GetThreadId();
// Check if we have just been created
// This check is mandatory. Because we check for being bigger, strictly
// than threadId, it means that main() doesn't go through that
// call path, which is what we want, it wouldn't make sense otherwise
if (threadId > fParent.size())
{
// We are not main
TProfilerAssert(threadId != 0);
// Ensure we can set our data
fParent.resize(threadId, 0);
fLevel.resize(threadId, 0);
// Now, ensure we have data waiting for us
// That's a bit tricky, but the best way to know
// if a function has started a mutex is to attempt
// to lock data
TProfilerAssert(pthread_mutex_trylock(&fCreateLock) < 0);
// Then, properly initialize our data
GetLevel() = fCreationLevel;
GetParent() = (TCallInfo *)fThreadCreator;
// We're done with data: the mutex can be released
sem_post(fReleaseLock);
}
#endif
// Complete call graph
if (GetParent()) {
// We're not a level 0
TProfilerAssert(GetLevel() > 0);
// Only insert if not recursive call
if (function != GetParent()->fFunction) {
// Check whether we are already in
TCallInfo * child = 0;
if (fCalls.size() > GetLevel()) {
child = fCalls[GetLevel()];
}
// Browse every possible entry
if (child) {
for (; child != 0; child = child->fNeighbor) {
// Check if we already had the same function in the same execution path
if (child->fFunction == function && child->fParent == GetParent()) {
TProfilerAssert(child->fStartTicks == -1);
// Increase number of calls
++child->fCalls;
child->fStartTicks = now;
child->fStartTime.fUserTime = current.ru_utime;
child->fStartTime.fKernelTime = current.ru_stime;
++child->fReferenceCount;
// If required, save starting core
if (fWithCoreTrack) {
// When that feature is enabled, we have more than one core
TProfilerAssert(nbProcs > 1);
#ifdef ACPROF_WITH_MULTITHREAD
child->fCoreUsage.resize(fThreadsId.size(), 0);
child->fCoreUsage[threadId] = procId;
#else
child->fCoreUsage[0] = procId;
#endif
}
break;
}
}
}
// Now, we have a room for insertion
if (child == 0) {
child = new TCallInfo(GetParent(), 0, function, file, line, now, current.ru_utime, current.ru_stime);
if (fWithCoreUsage) {
// When that feature is enabled, we have more than one core
TProfilerAssert(nbProcs > 1);
child->fCoreUsage.resize(nbProcs, 0);
} else {
child->fCoreUsage.resize(0, 0);
}
// If we have to track cores usage, allocate the matrix
if (fWithCoreTrack) {
// When that feature is enabled, we have more than one core
TProfilerAssert(nbProcs > 1);
child->fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * nbProcs * nbProcs);
memset(child->fCoresTracks, 0, sizeof(unsigned long) * nbProcs * nbProcs);
child->fNbProcs = nbProcs;
#ifdef ACPROF_WITH_MULTITHREAD
child->fCoreUsage.resize(fThreadsId.size(), 0);
child->fCoreUsage[threadId] = procId;
#else
child->fCoreUsage[0] = procId;
#endif
}
if (fCalls.size() > GetLevel()) {
TCallInfo * same = fCalls[GetLevel()];
child->fNeighbor = same->fNeighbor;
same->fNeighbor = child;
} else {
fCalls.insert(fCalls.end(), child);
}
// Ensure parent has child
if (GetParent()->fChildren == 0)
{
GetParent()->fChildren = child;
}
}
// In any case, jump level
++GetLevel();
// Switch parent
GetParent() = child;
} else {
// Increase number of calls
++GetParent()->fCalls;
// Increment reference count
++GetParent()->fReferenceCount;
}
} else {
// If no parent -> level == 0
TProfilerAssert(GetLevel() == 0);
// Also means no child - if on main()
#ifdef ACPROF_WITH_MULTITHREAD
TProfilerAssert(threadId != 0 || fCalls.size() == 0);
#else
TProfilerAssert(fCalls.size() == 0);
#endif
GetParent() = new TCallInfo(0, 0, function, file, line, now, current.ru_utime, current.ru_stime);
if (fWithCoreUsage) {
// When that feature is enabled, we have more than one core
TProfilerAssert(nbProcs > 1);
GetParent()->fCoreUsage.resize(nbProcs, 0);
} else {
GetParent()->fCoreUsage.resize(0, 0);
}
// If we have to track cores usage, allocate the matrix
if (fWithCoreTrack) {
// When that feature is enabled, we have more than one core
TProfilerAssert(nbProcs > 1);
GetParent()->fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * nbProcs * nbProcs);
memset(GetParent()->fCoresTracks, 0, sizeof(unsigned long) * nbProcs * nbProcs);
GetParent()->fNbProcs = nbProcs;
#ifdef ACPROF_WITH_MULTITHREAD
GetParent()->fCoreUsage.resize(fThreadsId.size(), 0);
GetParent()->fCoreUsage[threadId] = procId;
#else
GetParent()->fCoreUsage[0] = procId;
#endif
}
fCalls.insert(fCalls.end(), GetParent());
// Jump level
++GetLevel();
}
#ifdef ACPROF_WITH_MULTITHREAD
pthread_mutex_unlock(&fBigLock);
#endif
}
advice execution(functions() && !profiler()) : after() {
clock_t now = clock();
std::string function = JoinPoint::signature();
timeval total, add;
rusage current;
int procId = sched_getcpu();
TProfilerAssert(getrusage(RUSAGE_SELF, ¤t) == 0);
unsigned long matrixId;
#ifdef ACPROF_WITH_MULTITHREAD
unsigned long threadId;
#endif
#ifdef ACPROF_WITH_MULTITHREAD
threadId = GetThreadId();
pthread_mutex_lock(&fBigLock);
TProfilerAssert(fParent.size() > threadId);
TProfilerAssert(fLevel.size() > threadId);
#endif
TProfilerAssert(GetParent() != 0);
TProfilerAssert(GetParent()->fStartTicks != -1);
TProfilerAssert(GetParent()->fReferenceCount > 0);
--GetParent()->fReferenceCount;
// Are we done?
if (GetParent()->fReferenceCount == 0) {
TProfilerAssert(now >= GetParent()->fStartTicks);
// Use '<' and not '>=' due to man 3 timeradd recommandation
TProfilerAssert(!timercmp(¤t.ru_utime, &GetParent()->fStartTime.fUserTime, <));
TProfilerAssert(!timercmp(¤t.ru_stime, &GetParent()->fStartTime.fKernelTime, <));
// Update ticks count
GetParent()->fTotalTicks += (now - GetParent()->fStartTicks);
// Update user time
timersub(¤t.ru_utime, &GetParent()->fStartTime.fUserTime, &total);
timeradd(&GetParent()->fTotalTime.fUserTime, &total, &add);
GetParent()->fTotalTime.fUserTime = add;
// Update kernel time
timersub(¤t.ru_stime, &GetParent()->fStartTime.fKernelTime, &total);
timeradd(&GetParent()->fTotalTime.fKernelTime, &total, &add);
GetParent()->fTotalTime.fKernelTime = add;
GetParent()->fStartTicks = -1;
// Update CPU on which we ended and update
if (fWithCoreUsage) {
TProfilerAssert(procId >= 0);
GetParent()->fCoreUsage[procId]++;
} else if (fWithCoreTrack && GetParent()->fCoresTracks) {
TProfilerAssert(procId >= 0 && procId < GetParent()->fNbProcs);
TProfilerAssert(GetParent()->fCoreUsage[0] >= 0 && GetParent()->fCoreUsage[0] < GetParent()->fNbProcs);
#ifdef ACPROF_WITH_MULTITHREAD
threadId = GetThreadId();
TProfilerAssert(threadId < fThreadsId.size());
// Line is starting core, row is ending core
matrixId = GetParent()->fCoreUsage[threadId] * GetParent()->fNbProcs + procId;
#else
matrixId = GetParent()->fCoreUsage[0] * GetParent()->fNbProcs + procId;
#endif
GetParent()->fCoresTracks[matrixId]++;
}
// Decrease level
--GetLevel();
// Change parent
GetParent() = GetParent()->fParent;
}
#ifdef ACPROF_WITH_MULTITHREAD
pthread_mutex_unlock(&fBigLock);
#endif
}
// XXX: Commented out because for whatever reason, C++ generated
// that way is wrong and cannot be built by G++
#if 0
#ifdef ACPROF_WITH_MULTITHREAD
advice execution(thread_create()) : around() {
unsigned long threadId;
// Get current thread ID
threadId = GetThreadId();
// First, lock the create lock, to ensure that we have only one thread
// creation at a time, to keep track of the caller
pthread_mutex_lock(&fCreateLock);
// Store the ID of the thread creator
fThreadCreator = GetParent();
// And store the level of the current thread
fCreationLevel = GetLevel();
// Initialise the semaphore
sem_init(fReleaseLock, 0, 0);
// Finally create thread
JoinPoint::proceed();
// Wait untill data has been read
sem_wait(fReleaseLock);
// Destroy semaphore
sem_destroy(fReleaseLock);
// Release lock
pthread_mutex_unlock(&fCreateLock);
}
#endif
#else
#ifdef ACPROF_WITH_MULTITHREAD
advice execution(thread_create()) : before() {
unsigned long threadId;
// Get current thread ID
threadId = GetThreadId();
// First, lock the create lock, to ensure that we have only one thread
// creation at a time, to keep track of the caller
pthread_mutex_lock(&fCreateLock);
// Store the ID of the thread creator
fThreadCreator = GetParent();
// And store the level of the current thread
fCreationLevel = GetLevel();
// Initialise the semaphore
sem_init(fReleaseLock, 0, 0);
}
advice execution(thread_create()) : after() {
// Wait untill data has been read
sem_wait(fReleaseLock);
// Destroy semaphore
sem_destroy(fReleaseLock);
// Release lock
pthread_mutex_unlock(&fCreateLock);
}
#endif
#endif
TProfiler() {
fFormat = kStdOut;
#ifdef ACPROF_WITH_MULTITHREAD
fLevel.resize(0, 0);
fParent.resize(0, 0);
pthread_mutex_init(&fBigLock, 0);
pthread_mutex_init(&fCreateLock, 0);
fReleaseLock = new sem_t;
#else
fLevel = 0;
fParent = 0;
#endif
}
~TProfiler() {
#ifdef ACPROF_WITH_MULTITHREAD
pthread_mutex_lock(&fBigLock);
#endif
//TProfilerAssert(fLevel == 0);
TProfilerAssert(!fCalls.empty());
if (fFormat == kStdOut) {
std::cout << std::endl;
std::map<std::string, TTotalInfo> total;
// Display graphs at the end
for (int level = fCalls.size() - 1; level >= 0; --level) {
unsigned long previousTicks = 0;
unsigned long currentTicks = 0;
TCallInfo * child = fCalls[level];
while (child != 0) {
TProfilerAssert(child->fTotalTicks >= previousTicks);
// No child -> display and head back
if (child->fChildren == 0) {
TCallInfo * parent = child;
while (parent != 0) {
std::cout << parent->fFunction;
parent = parent->fParent;
if (parent != 0) {
std::cout << " <- ";
} else {
std::cout << std::endl;
}
}
}
// Compute total resources
std::map<std::string, TTotalInfo>::iterator tot = total.find(child->fFunction);
if (tot != total.end()) {
timeval tv;
tot->second.fCalls += child->fCalls;
tot->second.fTotalTicks += child->fTotalTicks;
timeradd(&tot->second.fTotalTime.fUserTime, &child->fTotalTime.fUserTime, &tv);
tot->second.fTotalTime.fUserTime = tv;
timeradd(&tot->second.fTotalTime.fKernelTime, &child->fTotalTime.fKernelTime, &tv);
tot->second.fTotalTime.fKernelTime = tv;
if (fWithCoreUsage) {
// Update
TProfilerAssert(child->fCoreUsage.size() == tot->second.fCoreUsage.size());
for (unsigned int i = 0; i < child->fCoreUsage.size(); ++i) {
tot->second.fCoreUsage[i] += child->fCoreUsage[i];
}
} else if (fWithCoreTrack > 0 && child->fCoresTracks) {
TProfilerAssert(child->fNbProcs > 0);
// If we don't have place yet, do some
if (tot->second.fCoresTracks == 0 && child->fCoresTracks) {
tot->second.fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
// And copy our stuff
memcpy(tot->second.fCoresTracks, child->fCoresTracks, sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
tot->second.fNbProcs = child->fNbProcs;
} else if (child->fCoresTracks) {
TProfilerAssert(child->fNbProcs == tot->second.fNbProcs);
// Merge results, index per index
for (unsigned int i = 0; i < child->fNbProcs; ++i) {
for (unsigned int j = 0; j < child->fNbProcs; ++j) {
*(tot->second.fCoresTracks + i * child->fNbProcs + j) += *(child->fCoresTracks + i * child->fNbProcs + j);
}
}
}
}
} else {
TTotalInfo tot(child->fTotalTicks, child->fTotalTime, child->fCalls, child->fFile, child->fLine);
if (fWithCoreUsage) {
tot.fCoreUsage.resize(child->fCoreUsage.size(), 0);
for (unsigned int i = 0; i < child->fCoreUsage.size(); ++i) {
tot.fCoreUsage[i] = child->fCoreUsage[i];
}
} else if (fWithCoreTrack && child->fCoresTracks) {
TProfilerAssert(child->fNbProcs > 0);
// Just allocate
tot.fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
// And copy our stuff
memcpy(tot.fCoresTracks, child->fCoresTracks, sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
tot.fNbProcs = child->fNbProcs;
}
total.insert(std::pair<std::string, TTotalInfo>(child->fFunction, tot));
}
currentTicks += child->fTotalTicks;
// Release and go to the next
TCallInfo * next = child->fNeighbor;
// ISO-C++ says we can delete null pointer
delete child->fCoresTracks;
delete child;
child = next;
}
previousTicks = currentTicks;
}
std::cout << std::endl;
// Display data at the end
for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
tot != total.end(); ++tot) {
std::cout << "Function: " << tot->first.c_str() << " (" << tot->second.fFile << ":" << tot->second.fLine << "), calls: " << tot->second.fCalls;
unsigned long totalTime = tot->second.fTotalTime.fUserTime.tv_sec * 1000 + tot->second.fTotalTime.fUserTime.tv_usec;
std::cout << ", total ticks: " << tot->second.fTotalTicks << ", total user time: " << totalTime << "ms, total kernel time: ";
totalTime = tot->second.fTotalTime.fKernelTime.tv_sec * 1000 + tot->second.fTotalTime.fKernelTime.tv_usec;
std::cout << totalTime << std::endl;
}
if (fWithCoreUsage) {
std::cout << std::endl;
// Display core usage per function
for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
tot != total.end(); ++tot) {
std::cout << "Function: " << tot->first.c_str() << std::endl;
for (unsigned int j = 0; j < tot->second.fCoreUsage.size(); ++j) {
// Prune cores with no calls
if (tot->second.fCoreUsage[j] > 0) {
std::cout << "Core " << j << ": " << tot->second.fCoreUsage[j] << " calls" << std::endl;
}
}
}
} else if (fWithCoreTrack) {
std::cout << std::endl;
// Display core usage per function
for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
tot != total.end(); ++tot) {
std::cout << "Function: " << tot->first.c_str() << std::endl;
for (unsigned long i = 0; i < tot->second.fNbProcs; ++i) {
for (unsigned long j = 0; j < tot->second.fNbProcs; ++j) {
unsigned long value = *(tot->second.fCoresTracks + i * tot->second.fNbProcs + j);
if (value > 0) {
std::cout << "Core " << i << " to core " << j << ": " <<
value << " calls" << std::endl;
}
}
}
// Don't leak memory
delete tot->second.fCoresTracks;
}
}
std::cout << std::endl;
} else {
std::ofstream outFile;
std::stringstream file;
int pid = getpid();
// Get output file name
file << "callgrind.out." << pid;
outFile.open(file.str().c_str());
// Put PID
outFile << "pid: " << pid << std::endl;
// Get command line
outFile << "cmd:";
for (int i = 0; i < fOldArgc; ++i) {
// If zero => we removed args, there's nothing after
if (fNewArgv[i] == 0) {
break;
}
outFile << " " << fNewArgv[i];
}
outFile << std::endl;
outFile << "events: ticks utime ktime" << std::endl;
// Output summary (ie, total events)
TCallInfo * child = fCalls[0];
outFile << "summary: " << child->fTotalTicks << std::endl;
unsigned long previousTicks = fCalls[0]->fTotalTicks;
for (unsigned int level = 0; level < fCalls.size(); level++) {
unsigned long currentTicks = 0;
child = fCalls[level];
for (; child != 0; child = child->fNeighbor) {
outFile << std::endl;
// First display data about the calling function
outFile << "fl=" << child->fFile << std::endl;
outFile << "fn=" << child->fFunction << std::endl;
unsigned long totalTime = child->fTotalTime.fUserTime.tv_sec * 1000 + child->fTotalTime.fUserTime.tv_usec;
outFile << child->fLine << " " << child->fTotalTicks << " " << totalTime << " ";
totalTime = child->fTotalTime.fKernelTime.tv_sec * 1000 + child->fTotalTime.fKernelTime.tv_usec;
outFile << totalTime << std::endl;
// Then display each of the callee
for (TCallInfo * callee = child->fChildren; callee != 0; callee = callee->fNeighbor) {
outFile << "cfl=" << callee->fFile << std::endl;
outFile << "cfn=" << callee->fFunction << std::endl;
outFile << "calls=" << callee->fCalls << std::endl;
totalTime = callee->fTotalTime.fUserTime.tv_sec * 1000 + callee->fTotalTime.fUserTime.tv_usec;
outFile << callee->fLine << " " << callee->fTotalTicks << " " << totalTime << " ";
totalTime = callee->fTotalTime.fKernelTime.tv_sec * 1000 + callee->fTotalTime.fKernelTime.tv_usec;
outFile << totalTime << std::endl;
}
currentTicks += child->fTotalTicks;
}
TProfilerAssert(previousTicks >= currentTicks);
previousTicks = currentTicks;
}
// Release memory
for (int level = fCalls.size() - 1; level >= 0; --level) {
// Get call stack
child = fCalls[level];
while (child != 0) {
// Release and go to the next
TCallInfo * next = child->fNeighbor;
delete child;
child = next;
}
}
outFile.close();
}
delete fNewArgv;
#ifdef ACPROF_WITH_MULTITHREAD
pthread_mutex_destroy(&fCreateLock);
delete fReleaseLock;
pthread_mutex_unlock(&fBigLock);
pthread_mutex_destroy(&fBigLock);
#endif
}
#ifdef ACPROF_WITH_MULTITHREAD
static int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
void *(*start_routine) (void *), void *arg) {
// Just create the thread
// Everything will have been done in the aspect
return pthread_create(thread, attr, start_routine, arg);
}
#endif
private:
void InternalAssert(const char * file, unsigned int line, const char * function, const char * expression) {
std::cerr << file << ":" << line << ": " << function << ": Assertion `" << expression << "' failed." << std::endl;
#ifdef ACPROF_WITH_MULTITHREAD
// XXX: The assert has to be threads aware!
#else
std::cerr << "Current level: " << fLevel << std::endl;
std::cerr << "Current parent: " << fParent << std::endl;
#endif
std::cerr << "Call tree size: " << fCalls.size() << std::endl;
for (unsigned int i = 0; i < fCalls.size(); ++i) {
TCallInfo * child = fCalls[i];
std::cerr << "Level " << i << std::endl;
for (; child != 0; child = child->fNeighbor) {
std::cerr << "\tChild: " << child << std::endl;
std::cerr << "\tFunction: " << child->fFunction << std::endl;
std::cerr << "\tCalls: " << child->fCalls << std::endl;
std::cerr << "\tTotalTicks: " << child->fTotalTicks << std::endl;
std::cerr << "\tStartTicks: " << child->fStartTicks << std::endl;
std::cerr << "\tReferenceCount: " << child->fReferenceCount << std::endl;
for (unsigned int j = 0; j < child->fCoreUsage.size(); ++j) {
std::cerr << "\tCoreUsage (" << j << "): " << child->fCoreUsage[j] << std::endl;
}
for (TCallInfo * callee = child->fChildren; callee != 0; callee = callee->fNeighbor) {
std::cerr << "\t\tCallee: " << callee << std::endl;
}
std::cerr << "\tNbProcs: " << child->fNbProcs << std::endl;
}
}
throw;
}
std::vector<TCallInfo *> fCalls;
#ifdef ACPROF_WITH_MULTITHREAD
std::vector<unsigned int> fLevel;
std::vector<TCallInfo *> fParent;
#else
unsigned int fLevel;
TCallInfo * fParent;
#endif
int fOldArgc;
char ** fOldArgv;
char ** fNewArgv;
EOutput fFormat;
bool fWithCoreUsage;
bool fWithCoreTrack;
#ifdef ACPROF_WITH_MULTITHREAD
struct TThreadsCompare {
bool operator() (const pthread_t & t1, const pthread_t & t2) const {
return (pthread_equal(t1, t2) != 0);
}
};
unsigned long GetThreadId() {
// This function is only used for cores track
// It is used to translate thread id to something between 0 and "infinity"
// So that we can use this simplified thread ID as vector index
pthread_t tId;
std::set<pthread_t, TThreadsCompare>::iterator currentThread;
tId = pthread_self();
// Try to insert new thread in list
currentThread = fThreadsId.insert(fThreadsId.end(), tId);
// Return the simplified ID for thread
return std::distance(fThreadsId.begin(), currentThread);
}
pthread_mutex_t fBigLock;
void * fThreadCreator;
unsigned int fCreationLevel;
pthread_mutex_t fCreateLock;
sem_t * fReleaseLock;
std::set<pthread_t, TThreadsCompare> fThreadsId;
#endif
};
#endif