Revision 103bccd4b134a2238bfc2ad6dfda13db4993c612 authored by Fons Rademakers on 08 August 2011, 10:30:56 UTC, committed by Fons Rademakers on 08 August 2011, 10:30:56 UTC

git-svn-id: http://root.cern.ch/svn/root/tags/v5-28-00f@40489 27541ba8-7e3a-0410-8455-c3a389f83636
1 parent efce85e
Raw File
hadd.cxx
/*

  This program will add histograms (see note) and Trees from a list of root files and write them
  to a target root file. The target file is newly created and must not be
  identical to one of the source files.

  Syntax:

       hadd targetfile source1 source2 ...
    or
       hadd -f targetfile source1 source2 ...
         (targetfile is overwritten if it exists)

  When -the -f option is specified, one can also specify the compression
  level of the target file. By default the compression level is 1, but
  if "-f0" is specified, the target file will not be compressed.
  if "-f6" is specified, the compression level 6 will be used.

  For example assume 3 files f1, f2, f3 containing histograms hn and Trees Tn
    f1 with h1 h2 h3 T1
    f2 with h1 h4 T1 T2
    f3 with h5
   the result of
     hadd -f x.root f1.root f2.root f3.root
   will be a file x.root with h1 h2 h3 h4 h5 T1 T2
   where h1 will be the sum of the 2 histograms in f1 and f2
         T1 will be the merge of the Trees in f1 and f2

   The files may contain sub-directories.

  if the source files contains histograms and Trees, one can skip
  the Trees with
       hadd -T targetfile source1 source2 ...

  Wildcarding and indirect files are also supported
    hadd result.root  myfil*.root
   will merge all files in myfil*.root
    hadd result.root file1.root @list.txt file2. root myfil*.root
    will merge file1. root, file2. root, all files in myfil*.root
    and all files in the indirect text file list.txt ("@" as the first
    character of the file indicates an indirect file. An indirect file
    is a text file containing a list of other files, including other
    indirect files, one line per file).

  If the sources and and target compression levels are identical (default),
  the program uses the TChain::Merge function with option "fast", ie
  the merge will be done without  unzipping or unstreaming the baskets
  (i.e. direct copy of the raw byte on disk). The "fast" mode is typically
  5 times faster than the mode unzipping and unstreaming the baskets.

  NOTE1: By default histograms are added. However hadd does not support the case where
         histograms have their bit TH1::kIsAverage set.

  NOTE2: hadd returns a status code: 0 if OK, -1 otherwise

  Authors: Rene Brun, Dirk Geppert, Sven A. Schmidt, sven.schmidt@cern.ch
         : rewritten from scratch by Rene Brun (30 November 2005)
            to support files with nested directories.
           Toby Burnett implemented the possibility to use indirect files.
 */

#include "RConfig.h"
#include <string>
#include "TChain.h"
#include "TFile.h"
#include "THashList.h"
#include "TH1.h"
#include "THStack.h"
#include "TKey.h"
#include "TObjString.h"
#include "Riostream.h"
#include "TClass.h"
#include "TSystem.h"
#include <stdlib.h>

TList *FileList;
TFile *Target, *Source;
Bool_t noTrees;
Bool_t fastMethod;
Bool_t reoptimize;

int AddFile(TList* sourcelist, std::string entry, int newcomp) ;
int MergeRootfile( TDirectory *target, TList *sourcelist);

//___________________________________________________________________________
int main( int argc, char **argv )
{

   if ( argc < 3 || "-h" == string(argv[1]) || "--help" == string(argv[1]) ) {
      cout << "Usage: " << argv[0] << " [-f[0-9]] [-T] [-O] targetfile source1 [source2 source3 ...]" << endl;
      cout << "This program will add histograms from a list of root files and write them" << endl;
      cout << "to a target root file. The target file is newly created and must not " << endl;
      cout << "exist, or if -f (\"force\") is given, must not be one of the source files." << endl;
      cout << "Supply at least two source files for this to make sense... ;-)" << endl;
      cout << "If the option -T is used, Trees are not merged" <<endl;
      cout << "If the option -O is used, when merging TTree, the basket size is re-optimized" <<endl;
      cout << "When -the -f option is specified, one can also specify the compression" <<endl;
      cout << "level of the target file. By default the compression level is 1, but" <<endl;
      cout << "if \"-f0\" is specified, the target file will not be compressed." <<endl;
      cout << "if \"-f6\" is specified, the compression level 6 will be used." <<endl;
      cout << "if Target and source files have different compression levels"<<endl;
      cout << " a slower method is used"<<endl;
      return 1;
   }
   FileList = new TList();

   Bool_t force = kFALSE;
   reoptimize = kFALSE;
   noTrees = kFALSE;

   int ffirst = 2;
   Int_t newcomp = 1;
   for( int a = 1; a < argc; ++a ) {
      if ( strcmp(argv[a],"-T") == 0 ) {
         noTrees = kTRUE;
         ++ffirst;
      } else if ( strcmp(argv[a],"-f") == 0 ) {
         force = kTRUE;
         ++ffirst;
      } else if ( strcmp(argv[a],"-O") == 0 ) {
         reoptimize = kTRUE;
         ++ffirst;
      } else if ( argv[a][0] == '-' ) {
         char ft[4];
         for( int j=0; j<=9; ++j ) {
            snprintf(ft,4,"-f%d",j);
            if (!strcmp(argv[a],ft)) {
               force = kTRUE;
               newcomp = j;
               ++ffirst;
               break;
            }
         }
      }
   }

   gSystem->Load("libTreePlayer");

   cout << "Target file: " << argv[ffirst-1] << endl;

   Target = TFile::Open( argv[ffirst-1], (force?"RECREATE":"CREATE") );
   if (!Target || Target->IsZombie()) {
      cerr << "Error opening target file (does " << argv[ffirst-1] << " exist?)." << endl;
      cerr << "Pass \"-f\" argument to force re-creation of output file." << endl;
      exit(1);
   }
   Target->SetCompressionLevel(newcomp);

   // by default hadd can merge Trees in a file that can go up to 100 Gbytes
   // No need to set this, as 100Gb is now the TTree default
   // Long64_t maxsize = 100000000; //100GB
   // maxsize *= 1000;  //to bypass some compiler limitations with big constants
   // TTree::SetMaxTreeSize(maxsize);

   fastMethod = kTRUE;
   for ( int i = ffirst; i < argc; i++ ) {
      if( AddFile(FileList, argv[i], newcomp) !=0 ) return 1;
   }
   if (!fastMethod && !reoptimize) {
      // Don't warn if the user any request re-optimization.
      cout <<"Sources and Target have different compression levels"<<endl;
      cout <<"Merging will be slower"<<endl;
   }

   int status = MergeRootfile( Target, FileList);

   //must delete Target to avoid a problem with dictionaries in~ TROOT
   delete Target;

   return status;
}

//___________________________________________________________________________
int AddFile(TList* sourcelist, std::string entry, int newcomp)
{
   // add a new file to the list of files
   static int count(0);
   if( entry.empty() ) return 0;
   size_t j =entry.find_first_not_of(' ');
   if( j==std::string::npos ) return 0;
   entry = entry.substr(j);
   if( entry.substr(0,1)=="@"){
      std::ifstream indirect_file(entry.substr(1).c_str() );
      if( ! indirect_file.is_open() ) {
         std::cerr<< "Could not open indirect file " << entry.substr(1) << std::endl;
         return 1;
      }
      while( indirect_file ){
         std::string line;
         std::getline(indirect_file, line);
         if( AddFile(sourcelist, line, newcomp)!=0 )return 1;;
      }
      return 0;
   }
   cout << "Source file " << (++count) << ": " << entry << endl;

   TFile* source = TFile::Open( entry.c_str());
   if( source==0 ){
      return 1;
   }
   sourcelist->Add(source);
   if (newcomp != source->GetCompressionLevel()) fastMethod = kFALSE;
   return 0;
}


//___________________________________________________________________________
int MergeRootfile( TDirectory *target, TList *sourcelist)
{
   // Merge all objects in a directory
   int status = 0;
   cout << "Target path: " << target->GetPath() << endl;
   TString path( (char*)strstr( target->GetPath(), ":" ) );
   path.Remove( 0, 2 );

   TDirectory *first_source = (TDirectory*)sourcelist->First();
   Int_t nguess = sourcelist->GetSize()+1000;
   THashList allNames(nguess);
   ((THashList*)target->GetList())->Rehash(nguess);
   ((THashList*)target->GetListOfKeys())->Rehash(nguess);
   TList listH;
   TString listHargs;
   listHargs.Form("((TCollection*)0x%lx)", (ULong_t)&listH);
   while(first_source) {
      TDirectory *current_sourcedir = first_source->GetDirectory(path);
      if (!current_sourcedir) {
         first_source = (TDirectory*)sourcelist->After(first_source);
         continue;
      }

      // loop over all keys in this directory
      TChain *globChain = 0;
      TIter nextkey( current_sourcedir->GetListOfKeys() );
      TKey *key, *oldkey=0;
      //gain time, do not add the objects in the list in memory
      TH1::AddDirectory(kFALSE);

      while ( (key = (TKey*)nextkey())) {
         if (current_sourcedir == target) break;
         //keep only the highest cycle number for each key
         if (oldkey && !strcmp(oldkey->GetName(),key->GetName())) continue;
         if (!strcmp(key->GetClassName(),"TProcessID")) {key->ReadObj(); continue;}
         if (allNames.FindObject(key->GetName())) continue;
         TClass *cl = TClass::GetClass(key->GetClassName());
         if (!cl || !cl->InheritsFrom(TObject::Class())) {
            cout << "Cannot merge object type, name: "
                 << key->GetName() << " title: " << key->GetTitle() << endl;
            continue;
         }
         allNames.Add(new TObjString(key->GetName()));
         // read object from first source file
         //current_sourcedir->cd();
         TObject *obj = key->ReadObj();
         //printf("keyname=%s, obj=%x\n",key->GetName(),obj);

         if ( obj->IsA()->InheritsFrom( TTree::Class() ) ) {

            // loop over all source files create a chain of Trees "globChain"
            if (!noTrees) {
               TString obj_name;
               if (path.Length()) {
                  obj_name = path + "/" + obj->GetName();
               } else {
                  obj_name = obj->GetName();
               }
               globChain = new TChain(obj_name);
               globChain->Add(first_source->GetName());
               TFile *nextsource = (TFile*)sourcelist->After( first_source );
               while ( nextsource ) {
                  //do not add to the list a file that does not contain this Tree
                  TFile *curf = TFile::Open(nextsource->GetName());
                  if (curf) {
                     Bool_t mustAdd = kFALSE;
                     if (curf->FindKey(obj_name)) {
                        mustAdd = kTRUE;
                     } else {
                        //we could be more clever here. No need to import the object
                        //we are missing a function in TDirectory
                        TObject *aobj = curf->Get(obj_name);
                        if (aobj) { mustAdd = kTRUE; delete aobj;}
                     }
                     if (mustAdd) {
                        globChain->Add(nextsource->GetName());
                     }
                  }
                  delete curf;
                  nextsource = (TFile*)sourcelist->After( nextsource );
               }
            }
         } else if ( obj->IsA()->InheritsFrom( TDirectory::Class() ) ) {
            // it's a subdirectory

            cout << "Found subdirectory " << obj->GetName() << endl;
            // create a new subdir of same name and title in the target file
            target->cd();
            TDirectory *newdir = target->mkdir( obj->GetName(), obj->GetTitle() );

            // newdir is now the starting point of another round of merging
            // newdir still knows its depth within the target file via
            // GetPath(), so we can still figure out where we are in the recursion
            status = MergeRootfile( newdir, sourcelist);
            if (status) return status;

         } else if ( obj->InheritsFrom(TObject::Class())
              && obj->IsA()->GetMethodWithPrototype("Merge", "TCollection*") ) {
            // object implements Merge(TCollection*)

            // loop over all source files and merge same-name object
            TFile *nextsource = (TFile*)sourcelist->After( first_source );
            while ( nextsource ) {
               // make sure we are at the correct directory level by cd'ing to path
               TDirectory *ndir = nextsource->GetDirectory(path);
               if (ndir) {
                  ndir->cd();
                  TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
                  if (key2) {
                     TObject *hobj = key2->ReadObj();
                     hobj->ResetBit(kMustCleanup);
                     listH.Add(hobj);
                     Int_t error = 0;
                     obj->Execute("Merge", listHargs.Data(), &error);
                     if (error) {
                        cerr << "Error calling Merge() on " << obj->GetName()
                             << " with the corresponding object in " << nextsource->GetName() << endl;
                     }
                     listH.Delete();
                  }
               }
               nextsource = (TFile*)sourcelist->After( nextsource );
            }
         } else if ( obj->IsA()->InheritsFrom( THStack::Class() ) ) {
            THStack *hstack1 = (THStack*) obj;
            TList* l = new TList();

            // loop over all source files and merge the histos of the
            // corresponding THStacks with the one pointed to by "hstack1"
            TFile *nextsource = (TFile*)sourcelist->After( first_source );
            while ( nextsource ) {
               // make sure we are at the correct directory level by cd'ing to path
               TDirectory *ndir = nextsource->GetDirectory(path);
               if (ndir) {
                  ndir->cd();
                  TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(hstack1->GetName());
                  if (key2) {
                    THStack *hstack2 = (THStack*) key2->ReadObj();
                    l->Add(hstack2->GetHists()->Clone());
                    delete hstack2;
                  }
               }

               nextsource = (TFile*)sourcelist->After( nextsource );
            }
            hstack1->GetHists()->Merge(l);
            l->Delete();
         } else {
            // object is of no type that we can merge
            cout << "Cannot merge object type, name: "
                 << obj->GetName() << " title: " << obj->GetTitle() << endl;

            // loop over all source files and write similar objects directly to the output file
            TFile *nextsource = (TFile*)sourcelist->After( first_source );
            while ( nextsource ) {
               // make sure we are at the correct directory level by cd'ing to path
               TDirectory *ndir = nextsource->GetDirectory(path);
               if (ndir) {
                  ndir->cd();
                  TKey *key2 = (TKey*)gDirectory->GetListOfKeys()->FindObject(key->GetName());
                  if (key2) {
                     TObject *nobj = key2->ReadObj();
                     nobj->ResetBit(kMustCleanup);
                     int nbytes1 = target->WriteTObject(nobj, key2->GetName(), "SingleKey" );
                     if (nbytes1 <= 0) status = -1;
                     delete nobj;
                  }
               }
               nextsource = (TFile*)sourcelist->After( nextsource );
            }
         }

         // now write the merged histogram (which is "in" obj) to the target file
         // note that this will just store obj in the current directory level,
         // which is not persistent until the complete directory itself is stored
         // by "target->Write()" below
         target->cd();

         //!!if the object is a tree, it is stored in globChain...
         if(obj->IsA()->InheritsFrom( TDirectory::Class() )) {
            //printf("cas d'une directory\n");
         } else if(obj->IsA()->InheritsFrom( TTree::Class() )) {
            if (!noTrees) {
               globChain->ls();
               if (fastMethod && !reoptimize) globChain->Merge(target->GetFile(),0,"keep fast");
               else                           globChain->Merge(target->GetFile(),0,"keep");
               delete globChain;
            }
         } else {
            int nbytes2 = obj->Write( key->GetName(), TObject::kSingleKey );
            if (nbytes2 <= 0) status = -1;
         }
         oldkey = key;
         delete obj;
      } // while ( ( TKey *key = (TKey*)nextkey() ) )
      first_source = (TDirectory*)sourcelist->After(first_source);
   }
   // save modifications to target file
   target->SaveSelf(kTRUE);
   return status;
}
back to top