SFrame 3.6
core/src/SFileMerger.cxx
Go to the documentation of this file.
00001 // $Id: SFileMerger.cxx 335 2012-11-21 14:11:47Z krasznaa $
00002 /***************************************************************************
00003  * @Project: SFrame - ROOT-based analysis framework for ATLAS
00004  * @Package: Core
00005  *
00006  * @author Stefan Ask       <Stefan.Ask@cern.ch>           - Manchester
00007  * @author David Berge      <David.Berge@cern.ch>          - CERN
00008  * @author Johannes Haller  <Johannes.Haller@cern.ch>      - Hamburg
00009  * @author A. Krasznahorkay <Attila.Krasznahorkay@cern.ch> - CERN/Debrecen
00010  *
00011  ***************************************************************************/
00012 
00013 // STL include(s):
00014 #include <set>
00015 #include <string>
00016 
00017 // ROOT include(s):
00018 #include <TObject.h>
00019 #include <TFile.h>
00020 #include <TList.h>
00021 #include <TTree.h>
00022 #include <TKey.h>
00023 #include <TSystem.h>
00024 #include <TUUID.h>
00025 #include <TMethodCall.h>
00026 
00027 // Local include(s):
00028 #include "../include/SFileMerger.h"
00029 
00030 SFileMerger::SFileMerger()
00031    : m_inputFiles(), m_outputFile( 0 ), m_logger( "SFileMerger" ) {
00032 
00033 }
00034 
00035 SFileMerger::~SFileMerger() {
00036 
00037    // Close all files before deleting the object
00038    CloseFiles();
00039 }
00040 
00048 Bool_t SFileMerger::AddFile( const TString& fileName ) throw( SError ) {
00049 
00050    //
00051    // Copy the file locally. This is important when reading an ntuple file
00052    // from a remote PROOF farm that might be half way around the world...
00053    //
00054    TUUID uuid;
00055    const TString localName =
00056       TString::Format( "%s/SFRAMEMERGE-%s.root",
00057                        gSystem->TempDirectory(), uuid.AsString() );
00058    if( ! TFile::Cp( fileName, localName, kTRUE ) ) {
00059       REPORT_ERROR( "Couldn't create local copy of: " << fileName );
00060       throw SError( "Couldn't create local copy of: " + fileName,
00061                     SError::SkipCycle );
00062       return kFALSE;
00063    }
00064    REPORT_VERBOSE( fileName << " copied locally as " << localName );
00065 
00066    //
00067    // Try to open the specified file. Throw an exception if it wasn't possible.
00068    //
00069    TFile* ifile = TFile::Open( localName, "READ" );
00070    if( ! ifile ) {
00071       REPORT_ERROR( "Local file could not be opened: " << localName );
00072       throw SError( "Local file could not be opened: " + fileName,
00073                     SError::SkipCycle );
00074       return kFALSE;
00075    }
00076    m_inputFiles.push_back( ifile );
00077    REPORT_VERBOSE( localName << " opened for reading" );
00078 
00079    // Return gracefully:
00080    return kTRUE;
00081 }
00082 
00091 Bool_t SFileMerger::OutputFile( const TString& fileName,
00092                                 const TString& mode ) throw( SError ) {
00093 
00094    //
00095    // Try to open the specified output file. Throw an expection in case of
00096    // problems.
00097    //
00098    TFile* ofile = TFile::Open( fileName, mode );
00099    if( ! ofile ) {
00100       REPORT_ERROR( "Couldn't open output file \"" << fileName
00101                     << "\" in mode \"" << mode << "\"" );
00102       throw SError( "Output file could not be opened: " + fileName,
00103                     SError::SkipCycle );
00104       return kFALSE;
00105    }
00106    m_outputFile = ofile;
00107    REPORT_VERBOSE( fileName << " opened for writing" );
00108 
00109    // Return gracefully:
00110    return kTRUE;
00111 }
00112 
00121 Bool_t SFileMerger::Merge() throw( SError ) {
00122 
00123    //
00124    // Check that we have both input(s) and an output:
00125    //
00126    if( ! m_outputFile ) {
00127       REPORT_ERROR( "Merge(): Output file not specified yet" );
00128       return kFALSE;
00129    }
00130    if( ! m_inputFiles.size() ) {
00131       m_logger << WARNING
00132                << "Merge(): No input files specified. Noting to be done..."
00133                << SLogger::endmsg;
00134       return kFALSE;
00135    }
00136 
00137    m_logger << DEBUG << "Running file merging..." << SLogger::endmsg;
00138 
00139    //
00140    // Loop over all input files:
00141    //
00142    for( std::vector< TFile* >::const_iterator ifile = m_inputFiles.begin();
00143         ifile != m_inputFiles.end(); ++ifile ) {
00144 
00145       REPORT_VERBOSE( "Now processing file: " << ( *ifile )->GetName() );
00146 
00147       // Call the recursive merging function:
00148       MergeDirectory( *ifile, m_outputFile );
00149    }
00150 
00151    //
00152    // Make sure that everything in the output is written out:
00153    //
00154    m_outputFile->SaveSelf( kTRUE );
00155    CloseFiles();
00156 
00157    // Return gracefully:
00158    return kTRUE;
00159 }
00160 
00161 void SFileMerger::CloseFiles() {
00162 
00163    for( std::vector< TFile* >::iterator ifile = m_inputFiles.begin();
00164         ifile != m_inputFiles.end(); ++ifile ) {
00165       ( *ifile )->Close();
00166       // Remove the local copy of the file:
00167       TString p( ( *ifile )->GetPath() );
00168       p = p( 0, p.Index( ':', 0 ) );
00169       REPORT_VERBOSE( "Removing local file: " << p );
00170       gSystem->Unlink( p );
00171       delete ( *ifile );
00172    }
00173    m_inputFiles.clear();
00174    if( m_outputFile ) delete m_outputFile;
00175    m_outputFile = 0;
00176 
00177    return;
00178 }
00179 
00191 void SFileMerger::MergeDirectory( TDirectory* input,
00192                                   TDirectory* output ) throw( SError ) {
00193 
00194    // Get a list of all objects in this directory:
00195    TList* keyList = input->GetListOfKeys();
00196 
00197    //
00198    // Loop over all keys in the root directory, and select the ones describing
00199    // a TTree or a TDirectory. Since one single object can appear multiple times
00200    // in this list (with different "cycles"), keep track of which objectss have
00201    // already been merged into the output.
00202    //
00203    std::set< std::string > processedObjects;
00204    for( Int_t i = 0; i < keyList->GetSize(); ++i ) {
00205 
00206       // Convert to a TKey:
00207       TKey* key = dynamic_cast< TKey* >( keyList->At( i ) );
00208       if( ! key ) {
00209          REPORT_ERROR( "Couldn't cast to TKey. There is some problem in the "
00210                        "code" );
00211          throw SError( "Couldn't cast to TKey. There is some problem in the "
00212                        "code", SError::StopExecution );
00213       }
00214 
00215       //
00216       // Check whether we already processed an object with this name:
00217       //
00218       REPORT_VERBOSE( "Processing key with name: " << key->GetName()
00219                       << ";" << key->GetCycle() );
00220       if( processedObjects.find( key->GetName() ) != processedObjects.end() ) {
00221          m_logger << DEBUG << "Object \"" << key->GetName()
00222                   << "\" has already been processed" << SLogger::endmsg;
00223          continue;
00224       }
00225 
00226       //
00227       // Get the object:
00228       //
00229       TObject* obj = input->Get( key->GetName() );
00230       if( ! obj ) {
00231          REPORT_ERROR( "Couldn't access object with name '" << key->GetName()
00232                        << "'" );
00233          throw SError( "Couldn't access object for which we got a key",
00234                        SError::StopExecution );
00235       }
00236 
00237       //
00238       // Decide how to handle this object:
00239       //
00240       if( obj->IsA()->InheritsFrom( "TDirectory" ) ) {
00241 
00242          // Access the input object as a directory:
00243          TDirectory* indir = dynamic_cast< TDirectory* >( obj );
00244          if( ! indir ) {
00245             REPORT_ERROR( "Couldn't cast to object to TDirectory" );
00246             continue;
00247          }
00248 
00249          // Check if such a directory already exists in the output:
00250          TDirectory* outdir =
00251             dynamic_cast< TDirectory* >( output->Get( key->GetName() ) );
00252          // If it doesn't let's create it:
00253          if( ! outdir ) {
00254             if( ! ( outdir = output->mkdir( key->GetName(),
00255                                             "dummy title" ) ) ) {
00256                REPORT_ERROR( "Failed creating subdirectory with name: "
00257                              << key->GetName() );
00258                throw SError( "Failed creating subdirectory",
00259                              SError::SkipInputData );
00260             }
00261          }
00262 
00263          // Now call this same function recursively:
00264          MergeDirectory( indir, outdir );
00265 
00266       } else if( obj->IsA()->InheritsFrom( "TTree" ) ) {
00267 
00268          //
00269          // See if such a TTree exists in the output already:
00270          //
00271          TTree* otree = 0;
00272          if( ( otree =
00273                dynamic_cast< TTree* >( output->Get( key->GetName() ) ) ) ) {
00274 
00275             //
00276             // If it does, then use the TTree:Merge function to copy the
00277             // contents of the TTree in the input file into the existing TTree
00278             // in the output file.
00279             //
00280             TList itrees;
00281             itrees.Add( obj );
00282             if( otree->Merge( &itrees ) ) {
00283                m_logger << DEBUG << "Merged tree \"" << obj->GetName()
00284                         << "\" from file: " << input->GetName()
00285                         << SLogger::endmsg;
00286                otree->AutoSave();
00287             } else {
00288                throw SError( TString( "There was a problem with merging "
00289                                       "trees \""  ) + obj->GetName() + "\"",
00290                              SError::SkipCycle );
00291             }
00292 
00293          } else {
00294 
00295             //
00296             // If it doesn't exist, then use the TTree::CloneTree function to
00297             // create a copy of the TTree in the input file. Then save this
00298             // copy into the output file.
00299             //
00300             output->cd();
00301             TTree* itree = 0;
00302             if( ! ( itree = dynamic_cast< TTree* >( obj ) ) ) {
00303                REPORT_ERROR( "Coulnd't dynamic cast object to TTree" );
00304                continue;
00305             }
00306 
00307             //
00308             // TTree::MergeTrees would crash in case the input TTree is empty,
00309             // so instead let's use TTree::CloneTree
00310             //
00311             if( ( otree = itree->CloneTree( -1, "fast" ) ) ) {
00312                m_logger << DEBUG << "Cloned tree \"" << itree->GetName()
00313                         << "\" into file: " << m_outputFile->GetName()
00314                         << SLogger::endmsg;
00315                otree->SetDirectory( output );
00316                otree->AutoSave();
00317             } else {
00318                throw SError( TString( "Tree \"" ) + itree->GetName() +
00319                              "\" couldn't be cloned into the output",
00320                              SError::SkipCycle );
00321             }
00322 
00323          }
00324 
00325          // Remember that this TTree has already been processed:
00326          processedObjects.insert( obj->GetName() );
00327 
00328       } else if( obj->IsA()->InheritsFrom( "TObject" ) ) {
00329 
00330          // Check if the object is already in the output:
00331          TObject* oobj = output->Get( key->GetName() );
00332 
00333          // Decide what to do:
00334          if( oobj ) {
00335             // If the object already exists, merge the new object into it:
00336             MergeObjects( obj, oobj );
00337             m_logger << DEBUG << "Merged object \"" << obj->GetName()
00338                      << "\" into file: " << m_outputFile->GetName()
00339                      << SLogger::endmsg;
00340          } else {
00341             // If the object doesn't exist yet, just write this object to the
00342             // output:
00343             output->cd();
00344             obj->Write();
00345             m_logger << DEBUG << "Cloned object \"" << obj->GetName()
00346                      << "\" into file: " << m_outputFile->GetName()
00347                      << SLogger::endmsg;
00348          }
00349       }
00350 
00351    }
00352 
00353    return;
00354 }
00355 
00364 void SFileMerger::MergeObjects( TObject* in, TObject* out ) throw( SError ) {
00365 
00366    // Put the input object into a list:
00367    TList list;
00368    list.Add( in );
00369 
00370    //
00371    // Make sure that the output object supports merging:
00372    //
00373    TMethodCall mergeMethod;
00374    mergeMethod.InitWithPrototype( out->IsA(), "Merge", "TCollection*" );
00375    if( ! mergeMethod.IsValid() ) {
00376       REPORT_ERROR( "Object type \"" << out->ClassName()
00377                     << "\" doesn't support merging" );
00378       return;
00379    }
00380 
00381    //
00382    // Execute the merging:
00383    //
00384    mergeMethod.SetParam( ( Long_t ) &list );
00385    mergeMethod.Execute( out );
00386 
00387    // Let the user know what we did:
00388    REPORT_VERBOSE( "Merged objects of type \"" << out->ClassName()
00389                    << "\" and name: " << out->GetName() );
00390 
00391    // Return gracefully:
00392    return;
00393 }