Content - e6b9e8da17efcc9d2704c1887e5a7de27d366d67 - 6ea1d9f/prof.ah

visit type:
Tip revision: 832b6fc1557de9138d7aa6cdde855bab01a3e2ef authored by Pierre Schweitzer on 26 September 2013, 12:23:04 UTC
Simplify the call graph storage by removing redundant information
Tip revision: 832b6fc
prof.ah
#ifndef __PROF_AH__
#define __PROF_AH__

#include <set>
#include <new>
#include <map>
#include <vector>
#include <ctime>
#include <string>
#include <cstring>
#include <sstream>
#include <fstream>
#include <iterator>
#include <unistd.h>
#include <iostream>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#ifdef ACPROF_WITH_MULTITHREAD
#include <pthread.h>
#include <semaphore.h>
#endif

struct TTimeInfo {
   timeval fUserTime;
   timeval fKernelTime;
};

struct TTotalInfo {
   unsigned long fTotalTicks;
   TTimeInfo fTotalTime;
   unsigned long fCalls;
   std::string fFile;
   int fLine;
   std::vector<unsigned long> fCoreUsage;
   unsigned long * fCoresTracks;
   unsigned long fNbProcs;
   TTotalInfo(unsigned long totalTicks, const TTimeInfo & totalTime, unsigned long calls, const std::string & file, int line) : fTotalTicks(totalTicks), fTotalTime(totalTime), fCalls(calls), fFile(file), fLine(line) { };
};

struct TCallInfo {
   TCallInfo * fParent;
   TCallInfo * fNeighbor;
   std::string fFunction;
   std::string fFile;
   int fLine;
   unsigned long fCalls;
   unsigned long fTotalTicks;
   TTimeInfo fTotalTime;
   clock_t fStartTicks;
   TTimeInfo fStartTime;
   unsigned long fReferenceCount;
   TCallInfo * fChildren;
   std::vector<unsigned long> fCoreUsage;
   unsigned long * fCoresTracks;
   unsigned long fNbProcs;
   TCallInfo(TCallInfo * parent, TCallInfo * neighbor, const std::string & function, const std::string & file, int line, clock_t startTicks, const timeval & userTime, const timeval & kernelTime) :
      fParent(parent), fNeighbor(neighbor), fFunction(function), fFile(file), fLine(line), fCalls(1), fTotalTicks(0), fStartTicks(startTicks),
      fReferenceCount(1), fChildren(0), fCoresTracks(0), fNbProcs(0) { timerclear(&fTotalTime.fUserTime); timerclear(&fTotalTime.fKernelTime); fStartTime.fUserTime = userTime; fStartTime.fKernelTime = kernelTime; };
};

enum EOutput {
   kStdOut,
   kCallgrind,
   kOutputMax
};

#ifdef ACPROF_WITH_MULTITHREAD
#define GetParent() fParent[threadId]
#define GetLevel() fLevel[threadId]
#else
#define GetParent() fParent
#define GetLevel() fLevel
#endif

#define TProfilerAssert(e) if (!(e)) InternalAssert(__FILE__, __LINE__, __FUNCTION__, #e)

aspect TProfiler {
   pointcut functions() = "% ...::%(...)";
   pointcut mainargs() = "% main(int, char**)";
   pointcut profiler() = "% TProfiler::%(...)";
#ifdef ACPROF_WITH_MULTITHREAD
   pointcut thread_create() = "% TProfiler::pthread_create(...)";
#endif

   advice execution(mainargs()) : before() {
      int * argc = (int *)tjp->arg(0);
      char *** argv = (char ***)tjp->arg(1);
      const long nbProcs = sysconf(_SC_NPROCESSORS_CONF);

      TProfilerAssert(argc != 0);
      TProfilerAssert(argv != 0);

      // Save call values
      fOldArgc = *argc;
      fOldArgv = *argv;

      // Reset args
      *argv = (char **)operator new(fOldArgc * sizeof(void *));
      fNewArgv = *argv;

      int pos = 0;
      fWithCoreUsage = false;
      fWithCoreTrack = false;
      for (int i = 0; i < fOldArgc; ++i) {
         // Check for our args
         if (strncmp(fOldArgv[i], "--acprof-out=", 13) == 0) {
            if (strncmp(fOldArgv[i] + 13, "callgrind", 9) == 0) {
               if (fWithCoreTrack) {
                  std::cerr << "Cannot use callgrind format with tracking" << std::endl;
               } else {
                  std::cout << "Will use callgrind format output" << std::endl;
                  fFormat = kCallgrind;
               }
            }
            --(*argc);
         } else if (strncmp(fOldArgv[i], "--acprof-with-core=", 19) == 0) {
             // We don't need to enable such feature if we don't have enough cores
             if (nbProcs > 1) {
                // We can't have both at a time, tracking has higher priority
                if (!fWithCoreTrack && strncmp(fOldArgv[i] + 19, "usage", 5) == 0) {
                   fWithCoreUsage = true;
                } else if (strncmp(fOldArgv[i] + 19, "track", 5) == 0) {
                   fWithCoreUsage = false;
                   fWithCoreTrack = true;
                   // We really cannot allow this, due to memory leaks
                   if (fFormat != kStdOut) {
                      std::cerr << "Forcing format to stdout again. If you want callgrind format, don't use tracking" << std::endl;
                      fFormat = kStdOut;
                   }
                }
             } else {
                std::cerr << "Ignored core usage, not enough core on the system to have relevant results" << std::endl;
             }
             --(*argc);
         } else {
            // Only pass args that aren't ours to the application
            (*argv)[pos] = fOldArgv[i];
            ++pos;
         }
      }

      // Zero the rest
      for (; pos < fOldArgc; ++pos) {
         (*argv)[pos] = 0;
      }
   }

   advice execution(mainargs()) : after() {
      int * argc = (int *)tjp->arg(0);
      char *** argv = (char ***)tjp->arg(1);

      TProfilerAssert(argc != 0);
      TProfilerAssert(argv != 0);

      // Restore values
      *argc = fOldArgc;
      *argv = fOldArgv;
   }

   advice execution(functions() && !profiler()) : before() {
      const clock_t now = clock();
      const int line = JoinPoint::line();
      const std::string file = JoinPoint::filename();
      const std::string function = JoinPoint::signature();
      const long nbProcs = sysconf(_SC_NPROCESSORS_CONF);
      int procId = sched_getcpu();
      rusage current;
#ifdef ACPROF_WITH_MULTITHREAD
      unsigned long threadId;
#endif
      TProfilerAssert(getrusage(RUSAGE_SELF, &current) == 0);

#ifdef ACPROF_WITH_MULTITHREAD
      pthread_mutex_lock(&fBigLock);

      threadId = GetThreadId();
      // Check if we have just been created
      // This check is mandatory. Because we check for being bigger, strictly
      // than threadId, it means that main() doesn't go through that
      // call path, which is what we want, it wouldn't make sense otherwise
      if (threadId > fParent.size())
      {
          // We are not main
          TProfilerAssert(threadId != 0);

          // Ensure we can set our data
          fParent.resize(threadId, 0);
          fLevel.resize(threadId, 0);

          // Now, ensure we have data waiting for us
          // That's a bit tricky, but the best way to know
          // if a function has started a mutex is to attempt
          // to lock data
          TProfilerAssert(pthread_mutex_trylock(&fCreateLock) < 0);
          // Then, properly initialize our data
          GetLevel() = fCreationLevel;
          GetParent() = (TCallInfo *)fThreadCreator;

          // We're done with data: the mutex can be released
          sem_post(fReleaseLock);
      }
#endif

      // Complete call graph
      if (GetParent()) {
         // We're not a level 0
         TProfilerAssert(GetLevel() > 0);

         // Only insert if not recursive call
         if (function != GetParent()->fFunction) {
            // Check whether we are already in
            TCallInfo * child = 0;
            if (fCalls.size() > GetLevel()) {
               child = fCalls[GetLevel()];
            }

            // Browse every possible entry
            if (child) {
               for (; child != 0; child = child->fNeighbor) {
                  // Check if we already had the same function in the same execution path
                  if (child->fFunction == function && child->fParent == GetParent()) {
                     TProfilerAssert(child->fStartTicks == -1);
                     // Increase number of calls
                     ++child->fCalls;
                     child->fStartTicks = now;
                     child->fStartTime.fUserTime = current.ru_utime;
                     child->fStartTime.fKernelTime = current.ru_stime;
                     ++child->fReferenceCount;
                     // If required, save starting core
                     if (fWithCoreTrack) {
                        // When that feature is enabled, we have more than one core
                        TProfilerAssert(nbProcs > 1);
#ifdef ACPROF_WITH_MULTITHREAD
                        child->fCoreUsage.resize(fThreadsId.size(), 0);
                        child->fCoreUsage[threadId] = procId;
#else
                        child->fCoreUsage[0] = procId;
#endif
                     }
                     break;
                  }
               }
            }

            // Now, we have a room for insertion
            if (child == 0) {
               child = new TCallInfo(GetParent(), 0, function, file, line, now, current.ru_utime, current.ru_stime);
               if (fWithCoreUsage) {
                  // When that feature is enabled, we have more than one core
                  TProfilerAssert(nbProcs > 1);
                  child->fCoreUsage.resize(nbProcs, 0);
               } else {
                  child->fCoreUsage.resize(0, 0);
               }

               // If we have to track cores usage, allocate the matrix
               if (fWithCoreTrack) {
                  // When that feature is enabled, we have more than one core
                  TProfilerAssert(nbProcs > 1);
                  child->fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * nbProcs * nbProcs);
                  memset(child->fCoresTracks, 0, sizeof(unsigned long) * nbProcs * nbProcs);
                  child->fNbProcs = nbProcs;
#ifdef ACPROF_WITH_MULTITHREAD
                  child->fCoreUsage.resize(fThreadsId.size(), 0);
                  child->fCoreUsage[threadId] = procId;
#else
                  child->fCoreUsage[0] = procId;
#endif
               }

               if (fCalls.size() > GetLevel()) {
                  TCallInfo * same = fCalls[GetLevel()];
                  child->fNeighbor = same->fNeighbor;
                  same->fNeighbor = child;
               } else {
                  fCalls.insert(fCalls.end(), child);
               }

               // Ensure parent has child
               if (GetParent()->fChildren == 0)
               {
                   GetParent()->fChildren = child;
               }
            }

            // In any case, jump level
            ++GetLevel();
            // Switch parent
            GetParent() = child;
         } else {
            // Increase number of calls
            ++GetParent()->fCalls;
            // Increment reference count
            ++GetParent()->fReferenceCount;
         }
      } else {
         // If no parent -> level == 0
         TProfilerAssert(GetLevel() == 0);
         // Also means no child - if on main()
#ifdef ACPROF_WITH_MULTITHREAD
	TProfilerAssert(threadId != 0 || fCalls.size() == 0);
#else
	TProfilerAssert(fCalls.size() == 0);
#endif

         GetParent() = new TCallInfo(0, 0, function, file, line, now, current.ru_utime, current.ru_stime);
         if (fWithCoreUsage) {
            // When that feature is enabled, we have more than one core
            TProfilerAssert(nbProcs > 1);
            GetParent()->fCoreUsage.resize(nbProcs, 0);
         } else {
            GetParent()->fCoreUsage.resize(0, 0);
         }

         // If we have to track cores usage, allocate the matrix
         if (fWithCoreTrack) {
            // When that feature is enabled, we have more than one core
            TProfilerAssert(nbProcs > 1);
            GetParent()->fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * nbProcs * nbProcs);
            memset(GetParent()->fCoresTracks, 0, sizeof(unsigned long) * nbProcs * nbProcs);
            GetParent()->fNbProcs = nbProcs;
#ifdef ACPROF_WITH_MULTITHREAD
            GetParent()->fCoreUsage.resize(fThreadsId.size(), 0);
            GetParent()->fCoreUsage[threadId] = procId;
#else
            GetParent()->fCoreUsage[0] = procId;
#endif
         }

         fCalls.insert(fCalls.end(), GetParent());
         // Jump level
         ++GetLevel();
      }

#ifdef ACPROF_WITH_MULTITHREAD
      pthread_mutex_unlock(&fBigLock);
#endif
   }

   advice execution(functions() && !profiler()) : after() {
      clock_t now = clock();
      std::string function = JoinPoint::signature();
      timeval total, add;
      rusage current;
      int procId = sched_getcpu();
      TProfilerAssert(getrusage(RUSAGE_SELF, &current) == 0);
      unsigned long matrixId;
#ifdef ACPROF_WITH_MULTITHREAD
      unsigned long threadId;
#endif

#ifdef ACPROF_WITH_MULTITHREAD
      threadId = GetThreadId();

      pthread_mutex_lock(&fBigLock);

      TProfilerAssert(fParent.size() > threadId);
      TProfilerAssert(fLevel.size() > threadId);
#endif

      TProfilerAssert(GetParent() != 0);
      TProfilerAssert(GetParent()->fStartTicks != -1);
      TProfilerAssert(GetParent()->fReferenceCount > 0);

      --GetParent()->fReferenceCount;

      // Are we done?
      if (GetParent()->fReferenceCount == 0) {
         TProfilerAssert(now >= GetParent()->fStartTicks);
         // Use '<' and not '>=' due to man 3 timeradd recommandation
         TProfilerAssert(!timercmp(&current.ru_utime, &GetParent()->fStartTime.fUserTime, <));
         TProfilerAssert(!timercmp(&current.ru_stime, &GetParent()->fStartTime.fKernelTime, <));

         // Update ticks count
         GetParent()->fTotalTicks += (now - GetParent()->fStartTicks);
         // Update user time
         timersub(&current.ru_utime, &GetParent()->fStartTime.fUserTime, &total);
         timeradd(&GetParent()->fTotalTime.fUserTime, &total, &add);
         GetParent()->fTotalTime.fUserTime = add;
         // Update kernel time
         timersub(&current.ru_stime, &GetParent()->fStartTime.fKernelTime, &total);
         timeradd(&GetParent()->fTotalTime.fKernelTime, &total, &add);
         GetParent()->fTotalTime.fKernelTime = add;
         GetParent()->fStartTicks = -1;

         // Update CPU on which we ended and update
         if (fWithCoreUsage) {
            TProfilerAssert(procId >= 0);
             GetParent()->fCoreUsage[procId]++;
         } else if (fWithCoreTrack && GetParent()->fCoresTracks) {
            TProfilerAssert(procId >= 0 && procId < GetParent()->fNbProcs);
            TProfilerAssert(GetParent()->fCoreUsage[0] >= 0 && GetParent()->fCoreUsage[0] < GetParent()->fNbProcs);
#ifdef ACPROF_WITH_MULTITHREAD
            threadId = GetThreadId();
            TProfilerAssert(threadId < fThreadsId.size());
            // Line is starting core, row is ending core
            matrixId = GetParent()->fCoreUsage[threadId] * GetParent()->fNbProcs + procId;
#else
            matrixId = GetParent()->fCoreUsage[0] * GetParent()->fNbProcs + procId;
#endif
            GetParent()->fCoresTracks[matrixId]++;
         }

         // Decrease level
         --GetLevel();
         // Change parent
         GetParent() = GetParent()->fParent;
      }

#ifdef ACPROF_WITH_MULTITHREAD
      pthread_mutex_unlock(&fBigLock);
#endif
   }

// XXX: Commented out because for whatever reason, C++ generated
// that way is wrong and cannot be built by G++
#if 0
#ifdef ACPROF_WITH_MULTITHREAD
   advice execution(thread_create()) : around() {
      unsigned long threadId;

      // Get current thread ID
      threadId = GetThreadId();
      // First, lock the create lock, to ensure that we have only one thread
      // creation at a time, to keep track of the caller
      pthread_mutex_lock(&fCreateLock);
      // Store the ID of the thread creator
      fThreadCreator = GetParent();
      // And store the level of the current thread
      fCreationLevel = GetLevel();
      // Initialise the semaphore
      sem_init(fReleaseLock, 0, 0);

      // Finally create thread
      JoinPoint::proceed();

      // Wait untill data has been read
      sem_wait(fReleaseLock);
      // Destroy semaphore
      sem_destroy(fReleaseLock);
      // Release lock
      pthread_mutex_unlock(&fCreateLock);
   }
#endif
#else
#ifdef ACPROF_WITH_MULTITHREAD
   advice execution(thread_create()) : before() {
      unsigned long threadId;

      // Get current thread ID
      threadId = GetThreadId();
      // First, lock the create lock, to ensure that we have only one thread
      // creation at a time, to keep track of the caller
      pthread_mutex_lock(&fCreateLock);
      // Store the ID of the thread creator
      fThreadCreator = GetParent();
      // And store the level of the current thread
      fCreationLevel = GetLevel();
      // Initialise the semaphore
      sem_init(fReleaseLock, 0, 0);
   }

   advice execution(thread_create()) : after() {
      // Wait untill data has been read
      sem_wait(fReleaseLock);
      // Destroy semaphore
      sem_destroy(fReleaseLock);
      // Release lock
      pthread_mutex_unlock(&fCreateLock);
   }
#endif
#endif

   TProfiler() {
      fFormat = kStdOut;
#ifdef ACPROF_WITH_MULTITHREAD
      fLevel.resize(0, 0);
      fParent.resize(0, 0);
      pthread_mutex_init(&fBigLock, 0);
      pthread_mutex_init(&fCreateLock, 0);
      fReleaseLock = new sem_t;
#else
      fLevel = 0;
      fParent = 0;
#endif
   }

   ~TProfiler() {
#ifdef ACPROF_WITH_MULTITHREAD
      pthread_mutex_lock(&fBigLock);
#endif

      //TProfilerAssert(fLevel == 0);
      TProfilerAssert(!fCalls.empty());

      if (fFormat == kStdOut) {
         std::cout << std::endl;

         std::map<std::string, TTotalInfo> total;

         // Display graphs at the end
         for (int level = fCalls.size() - 1; level >= 0; --level) {
            unsigned long previousTicks = 0;
            unsigned long currentTicks = 0;
            TCallInfo * child = fCalls[level];
            while (child != 0) {
               TProfilerAssert(child->fTotalTicks >= previousTicks);

               // No child -> display and head back
               if (child->fChildren == 0) {
                  TCallInfo * parent = child;
                  while (parent != 0) {
                     std::cout << parent->fFunction;

                     parent = parent->fParent;
                     if (parent != 0) {
                        std::cout << " <- ";
                     } else {
                        std::cout << std::endl;
                     }
                  }
               }

               // Compute total resources
               std::map<std::string, TTotalInfo>::iterator tot = total.find(child->fFunction);
               if (tot != total.end()) {
                  timeval tv;
                  tot->second.fCalls += child->fCalls;
                  tot->second.fTotalTicks += child->fTotalTicks;
                  timeradd(&tot->second.fTotalTime.fUserTime, &child->fTotalTime.fUserTime, &tv);
                  tot->second.fTotalTime.fUserTime = tv;
                  timeradd(&tot->second.fTotalTime.fKernelTime, &child->fTotalTime.fKernelTime, &tv);
                  tot->second.fTotalTime.fKernelTime = tv;

                  if (fWithCoreUsage) {
                     // Update
                     TProfilerAssert(child->fCoreUsage.size() == tot->second.fCoreUsage.size());
                     for (unsigned int i = 0; i < child->fCoreUsage.size(); ++i) {
                        tot->second.fCoreUsage[i] += child->fCoreUsage[i];
                     }
                  } else if (fWithCoreTrack > 0 && child->fCoresTracks) {
                     TProfilerAssert(child->fNbProcs > 0);
                     // If we don't have place yet, do some
                     if (tot->second.fCoresTracks == 0 && child->fCoresTracks) {
                        tot->second.fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
                        // And copy our stuff
                        memcpy(tot->second.fCoresTracks, child->fCoresTracks, sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
                        tot->second.fNbProcs = child->fNbProcs;
                     } else if (child->fCoresTracks) {
                        TProfilerAssert(child->fNbProcs == tot->second.fNbProcs);
                        // Merge results, index per index
                        for (unsigned int i = 0; i < child->fNbProcs; ++i) {
                           for (unsigned int j = 0; j < child->fNbProcs; ++j) {
                              *(tot->second.fCoresTracks + i * child->fNbProcs + j) += *(child->fCoresTracks + i * child->fNbProcs + j);
                           }
                        } 
                     }
                  }
               } else {
                  TTotalInfo tot(child->fTotalTicks, child->fTotalTime, child->fCalls, child->fFile, child->fLine);
                  if (fWithCoreUsage) {
                     tot.fCoreUsage.resize(child->fCoreUsage.size(), 0);
                     for (unsigned int i = 0; i < child->fCoreUsage.size(); ++i) {
                        tot.fCoreUsage[i] = child->fCoreUsage[i];
                     }
                  } else if (fWithCoreTrack && child->fCoresTracks) {
                     TProfilerAssert(child->fNbProcs > 0);
                     // Just allocate
                     tot.fCoresTracks = (unsigned long *)operator new(sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
                     // And copy our stuff
                     memcpy(tot.fCoresTracks, child->fCoresTracks, sizeof(unsigned long) * child->fNbProcs * child->fNbProcs);
                     tot.fNbProcs = child->fNbProcs;
                  }
                  total.insert(std::pair<std::string, TTotalInfo>(child->fFunction, tot));
               }

               currentTicks += child->fTotalTicks;

               // Release and go to the next
               TCallInfo * next = child->fNeighbor;
               // ISO-C++ says we can delete null pointer
               delete child->fCoresTracks;
               delete child;
               child = next;
            }

            previousTicks = currentTicks;
         }

         std::cout << std::endl;

         // Display data at the end
         for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
              tot != total.end(); ++tot) {
            std::cout << "Function: " << tot->first.c_str() << " (" << tot->second.fFile << ":" << tot->second.fLine << "), calls: " << tot->second.fCalls;
            unsigned long totalTime = tot->second.fTotalTime.fUserTime.tv_sec * 1000 + tot->second.fTotalTime.fUserTime.tv_usec;
            std::cout << ", total ticks: " << tot->second.fTotalTicks << ", total user time: " << totalTime << "ms, total kernel time: ";
            totalTime = tot->second.fTotalTime.fKernelTime.tv_sec * 1000 + tot->second.fTotalTime.fKernelTime.tv_usec;
            std::cout << totalTime << std::endl;
         }

         if (fWithCoreUsage) {
            std::cout << std::endl;
            // Display core usage per function
            for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
                 tot != total.end(); ++tot) {
               std::cout << "Function: " << tot->first.c_str() << std::endl;
               for (unsigned int j = 0; j < tot->second.fCoreUsage.size(); ++j) {
                  // Prune cores with no calls
                  if (tot->second.fCoreUsage[j] > 0) {
                     std::cout << "Core " << j << ": " << tot->second.fCoreUsage[j] << " calls" << std::endl;
                  }
               }
            }
         } else if (fWithCoreTrack) {
            std::cout << std::endl;
            // Display core usage per function
            for (std::map<std::string, TTotalInfo>::const_iterator tot = total.begin();
                 tot != total.end(); ++tot) {
               std::cout << "Function: " << tot->first.c_str() << std::endl;
               for (unsigned long i = 0; i < tot->second.fNbProcs; ++i) {
                  for (unsigned long j = 0; j < tot->second.fNbProcs; ++j) {
                     unsigned long value = *(tot->second.fCoresTracks + i * tot->second.fNbProcs + j);
                     if (value > 0) {
                        std::cout << "Core " << i << " to core " << j << ": " <<
                                     value << " calls" << std::endl;
                     }
                  }
               }

               // Don't leak memory
               delete tot->second.fCoresTracks;
            }
         }

         std::cout << std::endl;
      }  else {
         std::ofstream outFile;
         std::stringstream file;
         int pid = getpid();

         // Get output file name
         file << "callgrind.out." << pid;
         outFile.open(file.str().c_str());

         // Put PID
         outFile << "pid: " << pid << std::endl;

         // Get command line
         outFile << "cmd:";
         for (int i = 0; i < fOldArgc; ++i) {
            // If zero => we removed args, there's nothing after
            if (fNewArgv[i] == 0) {
               break;
            }
            outFile << " " << fNewArgv[i];
         }
         outFile << std::endl;

         outFile << "events: ticks utime ktime" << std::endl;

         // Output summary (ie, total events)
         TCallInfo * child = fCalls[0];
         outFile << "summary: " << child->fTotalTicks << std::endl;

         unsigned long previousTicks = fCalls[0]->fTotalTicks;
         for (unsigned int level = 0; level < fCalls.size(); level++) {
            unsigned long currentTicks = 0;
            child = fCalls[level];
            for (; child != 0; child = child->fNeighbor) {
               outFile << std::endl;
               // First display data about the calling function
               outFile << "fl=" << child->fFile << std::endl;
               outFile << "fn=" << child->fFunction << std::endl;
               unsigned long totalTime = child->fTotalTime.fUserTime.tv_sec * 1000 + child->fTotalTime.fUserTime.tv_usec;
               outFile << child->fLine << " " << child->fTotalTicks << " " << totalTime << " ";
               totalTime = child->fTotalTime.fKernelTime.tv_sec * 1000 + child->fTotalTime.fKernelTime.tv_usec;
               outFile << totalTime << std::endl;

               // Then display each of the callee
               for (TCallInfo * callee = child->fChildren; callee != 0; callee = callee->fNeighbor) {
                  outFile << "cfl=" << callee->fFile << std::endl;
                  outFile << "cfn=" << callee->fFunction << std::endl;
                  outFile << "calls=" << callee->fCalls << std::endl;
                  totalTime = callee->fTotalTime.fUserTime.tv_sec * 1000 + callee->fTotalTime.fUserTime.tv_usec;
                  outFile << callee->fLine << " " << callee->fTotalTicks << " " << totalTime << " ";
                  totalTime = callee->fTotalTime.fKernelTime.tv_sec * 1000 + callee->fTotalTime.fKernelTime.tv_usec;
                  outFile << totalTime << std::endl;
               }

               currentTicks += child->fTotalTicks;
            }

            TProfilerAssert(previousTicks >= currentTicks);
            previousTicks = currentTicks;
         }

         // Release memory
         for (int level = fCalls.size() - 1; level >= 0; --level) {
            // Get call stack
            child = fCalls[level];
            while (child != 0) {	
               // Release and go to the next
               TCallInfo * next = child->fNeighbor;
               delete child;
               child = next;
            }
         }

         outFile.close();
      }

      delete fNewArgv;

#ifdef ACPROF_WITH_MULTITHREAD
      pthread_mutex_destroy(&fCreateLock);
      delete fReleaseLock;
      pthread_mutex_unlock(&fBigLock);
      pthread_mutex_destroy(&fBigLock);
#endif
   }

#ifdef ACPROF_WITH_MULTITHREAD
   static int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
                             void *(*start_routine) (void *), void *arg) {
      // Just create the thread
      // Everything will have been done in the aspect
      return pthread_create(thread, attr, start_routine, arg);
   }
#endif

private:
   void InternalAssert(const char * file, unsigned int line, const char * function, const char * expression) {
      std::cerr << file << ":" << line << ": " << function << ": Assertion `" << expression << "' failed." << std::endl;
#ifdef ACPROF_WITH_MULTITHREAD
      // XXX: The assert has to be threads aware!
#else
      std::cerr << "Current level: " << fLevel << std::endl;
      std::cerr << "Current parent: " << fParent << std::endl;
#endif
      std::cerr << "Call tree size: " << fCalls.size() << std::endl;
      for (unsigned int i = 0; i < fCalls.size(); ++i) {
         TCallInfo * child = fCalls[i];
         std::cerr << "Level " << i << std::endl;
         for (; child != 0; child = child->fNeighbor) {
            std::cerr << "\tChild: " << child << std::endl;
            std::cerr << "\tFunction: " << child->fFunction << std::endl;
            std::cerr << "\tCalls: " << child->fCalls << std::endl;
            std::cerr << "\tTotalTicks: " << child->fTotalTicks << std::endl;
            std::cerr << "\tStartTicks: " << child->fStartTicks << std::endl;
            std::cerr << "\tReferenceCount: " << child->fReferenceCount << std::endl;
            for (unsigned int j = 0; j < child->fCoreUsage.size(); ++j) {
               std::cerr << "\tCoreUsage (" << j << "): " << child->fCoreUsage[j] << std::endl;
            }
            for (TCallInfo * callee = child->fChildren; callee != 0; callee = callee->fNeighbor) {
               std::cerr << "\t\tCallee: " << callee << std::endl;
            }
            std::cerr << "\tNbProcs: " << child->fNbProcs << std::endl;
         }
      }
      throw;
   }

   std::vector<TCallInfo *> fCalls;
#ifdef ACPROF_WITH_MULTITHREAD
   std::vector<unsigned int> fLevel;
   std::vector<TCallInfo *> fParent;
#else
   unsigned int fLevel;
   TCallInfo * fParent;
#endif
   int fOldArgc;
   char ** fOldArgv;
   char ** fNewArgv;
   EOutput fFormat;
   bool fWithCoreUsage;
   bool fWithCoreTrack;

#ifdef ACPROF_WITH_MULTITHREAD
   struct TThreadsCompare {
      bool operator() (const pthread_t & t1, const pthread_t & t2) const {
         return (pthread_equal(t1, t2) != 0);
      }
   };

   unsigned long GetThreadId() {
      // This function is only used for cores track
      // It is used to translate thread id to something between 0 and "infinity"
      // So that we can use this simplified thread ID as vector index
      pthread_t tId;
      std::set<pthread_t, TThreadsCompare>::iterator currentThread;

      tId = pthread_self();

      // Try to insert new thread in list
      currentThread = fThreadsId.insert(fThreadsId.end(), tId);

      // Return the simplified ID for thread
      return std::distance(fThreadsId.begin(), currentThread);
   }

   pthread_mutex_t fBigLock;
   void * fThreadCreator;
   unsigned int fCreationLevel;
   pthread_mutex_t fCreateLock;
   sem_t * fReleaseLock;
   std::set<pthread_t, TThreadsCompare> fThreadsId;
#endif
};

#endif
Browse the archive

https://github.com/HeisSpiter/acprof