SFrame 3.6
core/src/SInputData.cxx
Go to the documentation of this file.
00001 // $Id: SInputData.cxx 331 2012-11-20 17:12:44Z krasznaa $
00002 /***************************************************************************
00003  * @Project: SFrame - ROOT-based analysis framework for ATLAS
00004  * @Package: Core
00005  *
00006  * @author Stefan Ask       <Stefan.Ask@cern.ch>           - Manchester
00007  * @author David Berge      <David.Berge@cern.ch>          - CERN
00008  * @author Johannes Haller  <Johannes.Haller@cern.ch>      - Hamburg
00009  * @author A. Krasznahorkay <Attila.Krasznahorkay@cern.ch> - CERN/Debrecen
00010  *
00011  ***************************************************************************/
00012 
00013 // System include(s):
00014 #include <string.h>
00015 
00016 // ROOT include(s):
00017 #include <TFile.h>
00018 #include <TTree.h>
00019 #include <TChain.h>
00020 #include <TFileCollection.h>
00021 #include <TFileInfo.h>
00022 #include <THashList.h>
00023 #include <TDSet.h>
00024 #include <TProof.h>
00025 #include <TROOT.h>
00026 
00027 // Local include(s):
00028 #include "../include/SInputData.h"
00029 #include "../include/SError.h"
00030 #include "../include/SProofManager.h"
00031 #include "../include/STreeTypeDecoder.h"
00032 
00033 #ifndef DOXYGEN_IGNORE
00034 ClassImp( SDataSet );
00035 ClassImp( SFile );
00036 ClassImp( STree );
00037 ClassImp( SInputData );
00038 #endif // DOXYGEN_IGNORE
00039 
00040 using namespace std;
00041 
00042 // Define the constants:
00043 const Int_t STree::INPUT_TREE  = 0x1;
00044 const Int_t STree::OUTPUT_TREE = 0x2;
00045 const Int_t STree::EVENT_TREE  = 0x4;
00046 
00050 SDataSet& SDataSet::operator= ( const SDataSet& parent ) {
00051 
00052    this->name = parent.name;
00053 
00054    return *this;
00055 }
00056 
00066 Bool_t SDataSet::operator== ( const SDataSet& rh ) const {
00067 
00068    if( this->name == rh.name ) {
00069       return kTRUE;
00070    } else {
00071       return kFALSE;
00072    }
00073 }
00074 
00084 Bool_t SDataSet::operator!= ( const SDataSet& rh ) const {
00085 
00086    return ( ! ( *this == rh ) );
00087 }
00088 
00092 SFile& SFile::operator= ( const SFile& parent ) {
00093 
00094    this->file = parent.file;
00095    this->lumi = parent.lumi;
00096    this->events = parent.events;
00097 
00098    return *this;
00099 }
00100 
00110 Bool_t SFile::operator== ( const SFile& rh ) const {
00111 
00112    if( ( this->file == rh.file ) && ( this->lumi == rh.lumi ) &&
00113        ( this->events == rh.events ) ) {
00114       return kTRUE;
00115    } else {
00116       return kFALSE;
00117    }
00118 }
00119 
00129 Bool_t SFile::operator!= ( const SFile& rh ) const {
00130 
00131    return ( ! ( *this == rh ) );
00132 }
00133 
00137 STree& STree::operator= ( const STree& parent ) {
00138 
00139    this->treeName = parent.treeName;
00140    this->type     = parent.type;
00141 
00142    return *this;
00143 }
00144 
00154 Bool_t STree::operator== ( const STree& rh ) const {
00155 
00156    if( ( this->treeName == rh.treeName ) &&
00157        ( this->type     == rh.type ) ) {
00158       return kTRUE;
00159    } else {
00160       return kFALSE;
00161    }
00162 }
00163 
00173 Bool_t STree::operator!= ( const STree& rh ) const {
00174 
00175    return ( ! ( *this == rh ) );
00176 }
00177 
00181 SInputData::SInputData( const char* name )
00182    : TNamed( name, "SFrame input data object" ), m_type( "unknown" ),
00183      m_version( 0 ), m_totalLumiGiven( 0 ), m_totalLumiSum( 0 ),
00184      m_eventsTotal( 0 ), m_neventsmax( -1 ), m_neventsskip( 0 ),
00185      m_cacheable( kFALSE ), m_skipValid( kFALSE ), m_entry( 0 ),
00186      m_dset( 0 ), m_logger( "SInputData" ) {
00187 
00188    REPORT_VERBOSE( "In constructor" );
00189 }
00190 
00203 SInputData::~SInputData() {
00204 
00205    REPORT_VERBOSE( "In destructor" );
00206 }
00207 
00215 void SInputData::AddSFileIn( const SFile& sfile ) {
00216 
00217    m_sfileIn.push_back( sfile );
00218    m_totalLumiSum += sfile.lumi;
00219    return;
00220 }
00221 
00230 void SInputData::AddTree( Int_t type, const STree& stree ) {
00231 
00232    m_trees[ type ].push_back( stree );
00233    return;
00234 }
00235 
00242 void SInputData::AddDataSet( const SDataSet& dset ) {
00243 
00244    m_dataSets.push_back( dset );
00245    m_totalLumiSum += dset.lumi;
00246    return;
00247 }
00248 
00258 void SInputData::ValidateInput( const char* pserver ) throw( SError ) {
00259 
00260    // Check that the user only specified one type of input:
00261    if( GetSFileIn().size() && GetDataSets().size() ) {
00262       m_logger << ERROR << "You cannot use PROOF datasets AND regular input files in the"
00263                << SLogger::endmsg;
00264       m_logger << ERROR << "same InputData at the moment. Please only use one type!"
00265                << SLogger::endmsg;
00266       throw SError( "Trying to use datasets and files in the same ID",
00267                     SError::SkipInputData );
00268    }
00269 
00270    // Check that the user did specify some kind of input:
00271    if( ( ! GetSFileIn().size() ) && ( ! GetDataSets().size() ) ) {
00272       m_logger << ERROR << "You need to define at least one file or one dataset as input"
00273                << SLogger::endmsg;
00274       throw SError( "Missing input specification", SError::SkipInputData );
00275    }
00276 
00277    // Check that the configuration makes sense:
00278    if( GetSkipValid() && ( ( GetNEventsMax() > 0 ) || ( GetNEventsSkip() > 0 ) ) ) {
00279       m_logger << WARNING << "The input file validation can not be skipped when running on "
00280                << "a subset of events\n"
00281                << "Turning on the InputData validation for InputData\n"
00282                << "   Type: " << GetType() << ", Version: " << GetVersion() << SLogger::endmsg;
00283       SetSkipValid( kFALSE );
00284    }
00285 
00286    // Return at this point if the validation can be skipped:
00287    if( GetSkipValid() ) {
00288       m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 
00289                << GetVersion() << "\" : Validation skipped" << SLogger::endmsg;
00290       return;
00291    }
00292 
00293    // Now do the actual validation:
00294    if( GetSFileIn().size() ) {
00295       ValidateInputFiles();
00296    } else if( GetDataSets().size() ) {
00297       if( ! pserver ) {
00298          m_logger << ERROR << "PROOF server not specified. Can't validate datasets!"
00299                   << SLogger::endmsg;
00300          throw SError( "Can't validate PROOF datasets without server name",
00301                        SError::SkipInputData );
00302       }
00303       ValidateInputDataSets( pserver );
00304    }
00305 
00306    return;
00307 
00308 }
00309 
00324 const std::vector< STree >* SInputData::GetTrees( Int_t type ) const {
00325 
00326    std::map< Int_t, std::vector< STree > >::const_iterator itr;
00327    if( ( itr = m_trees.find( type ) ) == m_trees.end() ) {
00328       return 0;
00329    } else {
00330       return &( itr->second );
00331    }
00332 }
00333 
00334 Bool_t SInputData::HasInputTrees() const {
00335 
00336    for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
00337         trees != m_trees.end(); ++trees ) {
00338       for( std::vector< STree >::const_iterator st = trees->second.begin();
00339            st != trees->second.end(); ++st ) {
00340          if( ( st->type & STree::INPUT_TREE ) && ( st->type & STree::EVENT_TREE ) ) {
00341             return kTRUE;
00342          }
00343       }
00344    }
00345 
00346    return kFALSE;
00347 }
00348 
00349 TDSet* SInputData::GetDSet() const {
00350 
00351    return m_dset;
00352 }
00353 
00354 Double_t SInputData::GetTotalLumi() const { 
00355   
00356    Double_t return_lumi = 0.;
00357    // use the given luminosity for this InputData in case it is specified
00358    if( m_totalLumiGiven ) return_lumi = m_totalLumiGiven;
00359    // otherwise use the sum of all files
00360    else return_lumi = m_totalLumiSum;
00361   
00362    // make sure that the lumi is not zero
00363    if( ! return_lumi ) 
00364       m_logger << FATAL << "total luminosity for "<< GetType() << " is ZERO!"
00365                << SLogger::endmsg;
00366 
00367    return return_lumi;
00368 }
00369 
00370 Double_t SInputData::GetScaledLumi() const { 
00371   
00372    Double_t scaled_lumi = 0.;
00373 
00374    if( m_neventsmax > -1. ) {
00375       scaled_lumi = GetTotalLumi() * m_neventsmax / m_eventsTotal;
00376    } else {
00377       scaled_lumi = GetTotalLumi();
00378    }
00379 
00380    return scaled_lumi;
00381 }
00382 
00386 SInputData& SInputData::operator= ( const SInputData& parent ) {
00387 
00388    this->m_type = parent.m_type;
00389    this->m_version = parent.m_version;
00390    this->m_totalLumiGiven = parent.m_totalLumiGiven;
00391    this->m_gencuts = parent.m_gencuts;
00392    this->m_sfileIn = parent.m_sfileIn;
00393    this->m_trees = parent.m_trees;
00394    this->m_dataSets = parent.m_dataSets;
00395    this->m_totalLumiSum = parent.m_totalLumiSum;
00396    this->m_eventsTotal = parent.m_eventsTotal;
00397    this->m_neventsmax = parent.m_neventsmax;
00398    this->m_neventsskip = parent.m_neventsskip;
00399    this->m_cacheable = parent.m_cacheable;
00400    this->m_skipValid = parent.m_skipValid;
00401    this->m_entry = parent.m_entry;
00402 
00403    this->m_dset = parent.m_dset;
00404 
00405    return *this;
00406 
00407 }
00408 
00418 Bool_t SInputData::operator== ( const SInputData& rh ) const {
00419 
00420    if( ( this->m_type == rh.m_type ) && ( this->m_version == rh.m_version ) &&
00421        ( this->m_totalLumiGiven == rh.m_totalLumiGiven ) &&
00422        ( this->m_gencuts == rh.m_gencuts ) && ( this->m_sfileIn == rh.m_sfileIn ) &&
00423        ( this->m_trees == rh.m_trees ) &&
00424        ( this->m_dataSets == rh.m_dataSets ) &&
00425        ( this->m_totalLumiSum == rh.m_totalLumiSum ) &&
00426        ( this->m_eventsTotal == rh.m_eventsTotal ) &&
00427        ( this->m_neventsmax == rh.m_neventsmax ) &&
00428        ( this->m_neventsskip == rh.m_neventsskip ) &&
00429        ( this->m_cacheable == rh.m_cacheable ) &&
00430        ( this->m_skipValid == rh.m_skipValid ) &&
00431        ( this->m_dset->IsEqual( rh.m_dset ) ) ) {
00432       return kTRUE;
00433    } else {
00434       return kFALSE;
00435    }
00436 
00437 }
00438 
00448 Bool_t SInputData::operator!= ( const SInputData& rh ) const {
00449 
00450    return ( ! ( *this == rh ) );
00451 
00452 }
00453 
00459 void SInputData::Print( const Option_t* ) const {
00460 
00461    m_logger << INFO << " ---------------------------------------------------------" << endl;
00462    m_logger << " Type               : " << GetType() << endl;
00463    m_logger << " Version            : " << GetVersion() << endl;
00464    m_logger << " Total luminosity   : " << GetTotalLumi() << "pb-1" << endl;
00465    m_logger << " NEventsMax         : " << GetNEventsMax() << endl;
00466    m_logger << " NEventsSkip        : " << GetNEventsSkip() << endl;
00467    m_logger << " Cacheable          : " << ( GetCacheable() ? "Yes" : "No" ) << endl;
00468    m_logger << " Skip validation    : " << ( GetSkipValid() ? "Yes" : "No" ) << endl;
00469 
00470    for( vector< SGeneratorCut >::const_iterator gc = m_gencuts.begin();
00471         gc != m_gencuts.end(); ++gc )
00472       m_logger << " Generator cut      : '" << gc->GetTreeName() << "' (tree) | '"
00473                << gc->GetFormula() << "' (formula)" << endl;
00474 
00475    for( vector< SDataSet >::const_iterator ds = m_dataSets.begin();
00476         ds != m_dataSets.end(); ++ds )
00477       m_logger << " Data Set           : '" << ds->name << "' (name) | '" << ds->lumi
00478                << "' (lumi)" << endl;
00479    for( vector< SFile >::const_iterator f = m_sfileIn.begin(); f != m_sfileIn.end();
00480         ++f )
00481       m_logger << " Input File         : '" << f->file << "' (file) | '" << f->lumi
00482                << "' (lumi)" << endl;
00483 
00484    for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
00485         trees != m_trees.end(); ++trees ) {
00486       for( std::vector< STree >::const_iterator tree = trees->second.begin();
00487            tree != trees->second.end(); ++tree ) {
00488          m_logger << " Tree               : '" << tree->treeName << "' (name) | '"
00489                   << STreeTypeDecoder::Instance()->GetName( trees->first )
00490                   << "' (type)" << endl;
00491       }
00492    }
00493 
00494    m_logger << " ---------------------------------------------------------" << SLogger::endmsg;
00495 
00496    return;
00497 }
00498 
00506 TString SInputData::GetStringConfig() const {
00507 
00508    // The result string:
00509    TString result;
00510 
00511    // Compose the "header" of the input data:
00512    result += TString::Format( "    <InputData Type=\"%s\"\n", m_type.Data() );
00513    result += TString::Format( "               Version=\"%s\"\n",
00514                               m_version.Data() );
00515    result += TString::Format( "               Lumi=\"%g\"\n",
00516                               m_totalLumiGiven );
00517    result += TString::Format( "               NEventsMax=\"%lld\"\n",
00518                               m_neventsmax );
00519    result += TString::Format( "               NEventsSkip=\"%lld\"\n",
00520                               m_neventsskip );
00521    result += TString::Format( "               Cacheable=\"%s\"\n",
00522                               ( m_cacheable ? "True" : "False" ) );
00523    result += TString::Format( "               SkipValid=\"%s\">\n\n",
00524                               ( m_skipValid ? "True" : "False" ) );
00525 
00526    // Add all the input files:
00527    std::vector< SFile >::const_iterator f_itr = m_sfileIn.begin();
00528    std::vector< SFile >::const_iterator f_end = m_sfileIn.end();
00529    for( ; f_itr != f_end; ++f_itr ) {
00530       result += TString::Format( "        <In FileName=\"%s\" Lumi=\"%g\"/>\n",
00531                                  f_itr->file.Data(), f_itr->lumi );
00532    }
00533 
00534    // Add all the input datasets:
00535    std::vector< SDataSet >::const_iterator d_itr = m_dataSets.begin();
00536    std::vector< SDataSet >::const_iterator d_end = m_dataSets.end();
00537    for( ; d_itr != d_end; ++d_itr ) {
00538       result += TString::Format( "        <DataSet Name=\"%s\" Lumi=\"%g\"/>\n",
00539                                  d_itr->name.Data(), d_itr->lumi );
00540    }
00541 
00542    // Add all the generator cuts:
00543    std::vector< SGeneratorCut >::const_iterator g_itr = m_gencuts.begin();
00544    std::vector< SGeneratorCut >::const_iterator g_end = m_gencuts.end();
00545    for( ; g_itr != g_end; ++g_itr ) {
00546       result += TString::Format( "        <GeneratorCut Tree=\"%s\" "
00547                                  "Formula=\"%s\"/>\n",
00548                                  g_itr->GetTreeName().Data(),
00549                                  g_itr->GetFormula().Data() );
00550    }
00551 
00552    // Add all the trees:
00553    const STreeTypeDecoder* decoder = STreeTypeDecoder::Instance();
00554    std::map< Int_t, std::vector< STree > >::const_iterator t_itr =
00555       m_trees.begin();
00556    std::map< Int_t, std::vector< STree > >::const_iterator t_end =
00557       m_trees.end();
00558    for( ; t_itr != t_end; ++t_itr ) {
00559       std::vector< STree >::const_iterator tt_itr = t_itr->second.begin();
00560       std::vector< STree >::const_iterator tt_end = t_itr->second.end();
00561       for( ; tt_itr != tt_end; ++tt_itr ) {
00562          result += TString::Format( "        <%s Name=\"%s\"/>\n",
00563                                     decoder->GetXMLName( t_itr->first ).Data(),
00564                                     tt_itr->treeName.Data() );
00565       }
00566    }
00567 
00568    // Close the input data block:
00569    result += "    </InputData>";
00570 
00571    // Return the constructed string:
00572    return result;
00573 }
00574 
00575 void SInputData::ValidateInputFiles() throw( SError ) {
00576 
00577    //
00578    // Set up the connection to the InputData cache if it's asked for:
00579    //
00580    TFile* cachefile = 0;
00581    TFileCollection* filecoll = 0;
00582    if( m_cacheable && ( ! m_skipValid ) ) {
00583       // The filename is hardcoded, since this is the only place where it's needed:
00584       cachefile = TFile::Open( ".sframe." + GetType() + "." + GetVersion() + ".idcache.root",
00585                                "UPDATE" );
00586       m_logger << DEBUG << "Opened: " << cachefile->GetName() << SLogger::endmsg;
00587       // Try to access the ID information:
00588       filecoll = ( TFileCollection* ) cachefile->Get( "IDCache" );
00589       if( ! filecoll ) {
00590          // Create a new object. This is needed when a new cache is created.
00591          m_logger << DEBUG << "Creating new TFileCollection" << SLogger::endmsg;
00592          cachefile->cd();
00593          filecoll = new TFileCollection( "IDCache", "InputData cache data" );
00594          cachefile->Append( filecoll );
00595       } else {
00596          // The cache already exists:
00597          m_logger << DEBUG << "Existing TFileCollection found" << SLogger::endmsg;
00598       }
00599    }
00600 
00601    // Flag showing if the cache will have to be saved at the end of the function:
00602    Bool_t cacheUpdated = kFALSE;
00603    Int_t  fileInfoInDataset = 0;
00604 
00605    //
00606    // Loop over all the specified input files:
00607    //
00608    for( std::vector< SFile >::iterator sf = m_sfileIn.begin(); sf != m_sfileIn.end(); ++sf ) {
00609 
00610       //
00611       // Try to load the file's information from the cache. This is *much* faster than
00612       // querying the file itself...
00613       //
00614       if( m_cacheable && LoadInfoOnFile( sf, filecoll ) ) {
00615          ++fileInfoInDataset;
00616          continue;
00617       }
00618 
00619       //
00620       // Open the physical file:
00621       //
00622       TFile* file = TFile::Open( sf->file.Data() );
00623       if( ! file || file->IsZombie() ) {
00624          m_logger << WARNING << "Couldn't open file: " << sf->file.Data() << SLogger::endmsg;
00625          m_logger << WARNING << "Removing it from the input file list" << SLogger::endmsg;
00626          // Erasing the file from the file list is a bit tricky actually:
00627          sf = m_sfileIn.erase( sf );
00628          --sf;
00629          continue;
00630       }
00631 
00632       try {
00633 
00634          // If any of the files had to be opened, then the cache will need to be
00635          // updated in the ROOT file:
00636          cacheUpdated = kTRUE;
00637 
00638          //
00639          // Create/retrieve the object storing the information about the file:
00640          //
00641          TFileInfo* fileinfo = 0;
00642          if( m_cacheable ) {
00643             fileinfo = AccessFileInfo( sf, filecoll );
00644          }
00645 
00646          //
00647          // Investigate the input trees:
00648          //
00649          Bool_t firstPassed = kFALSE;
00650          Long64_t entries = 0;
00651          Int_t numberOfBranches = 0;
00652          // try to load all the input trees
00653          for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
00654               trees != m_trees.end(); ++trees ) {
00655 
00656             m_logger << DEBUG << "Investigating \""
00657                      << STreeTypeDecoder::Instance()->GetName( trees->first )
00658                      << "\" types" << SLogger::endmsg;
00659             for( std::vector< STree >::const_iterator st = trees->second.begin();
00660                  st != trees->second.end(); ++st ) {
00661 
00662                // Only check the existence of input trees:
00663                if( ! ( st->type & STree::INPUT_TREE ) ) continue;
00664 
00665                // Try to access the input tree:
00666                TTree* tree = dynamic_cast< TTree* >( file->Get( st->treeName ) );
00667                if( ! tree ) {
00668                   m_logger << WARNING << "Couldn't find tree " << st->treeName
00669                            << " in file " << sf->file << SLogger::endmsg;
00670                   m_logger << WARNING << "Removing file from the input file list"
00671                            << SLogger::endmsg;
00672                   throw SError( SError::SkipFile );
00673                }
00674 
00675                // Remember how many branches there are in total in the input:
00676                Int_t branchesThisTree = tree->GetNbranches();
00677                m_logger << DEBUG << branchesThisTree << " branches in tree " << tree->GetName() 
00678                         << SLogger::endmsg;
00679                numberOfBranches += branchesThisTree;
00680 
00681                // Check how many events are there in the input:
00682                if( st->type & STree::EVENT_TREE ) {
00683                   if( firstPassed && ( tree->GetEntriesFast() != entries ) ) {
00684                      m_logger << WARNING << "Conflict in number of entries - Tree "
00685                               << tree->GetName() << " has " << tree->GetEntriesFast()
00686                               << " entries, NOT " << entries << SLogger::endmsg;
00687                      m_logger << WARNING << "Removing " << sf->file
00688                               << " from the input file list" << SLogger::endmsg;
00689                      throw SError( SError::SkipFile );
00690                   } else if( ! firstPassed ) {
00691                      firstPassed = kTRUE;
00692                      entries = tree->GetEntriesFast();
00693                   }
00694                }
00695 
00696                //
00697                // Save the information about this tree into the cache:
00698                //
00699                if( m_cacheable ) {
00700                   TFileInfoMeta* tree_info = new TFileInfoMeta( tree->GetName(), "TTree",
00701                                                                 tree->GetEntriesFast() );
00702                   tree_info->SetName( tree->GetName() );
00703                   tree_info->SetTitle( "Meta data info for a TTree" );
00704                   if( ! fileinfo->AddMetaData( tree_info ) ) {
00705                      m_logger << ERROR << "There was a problem caching meta-data for TTree: "
00706                               << tree->GetName() << SLogger::endmsg;
00707                   } else {
00708                      m_logger << VERBOSE << "Meta-data cached for TTree: " << tree->GetName()
00709                               << SLogger::endmsg;
00710                   }
00711                }
00712             }
00713          }
00714 
00715          // Update the ID information:
00716          sf->events = entries;
00717          AddEvents( entries );
00718 
00719          m_logger << DEBUG << numberOfBranches << " branches in total in file "
00720                   << file->GetName() << SLogger::endmsg;
00721 
00722       } catch( const SError& ) {
00723          m_totalLumiSum -= sf->lumi;
00724          sf = m_sfileIn.erase( sf );
00725          --sf;
00726       }
00727 
00728       // Close the input file:
00729       file->Close();
00730       if( file ) delete file;
00731    }
00732 
00733    //
00734    // Save/close the cache file if it needs to be saved/closed:
00735    //
00736    if( m_dset ) delete m_dset;
00737    if( m_cacheable ) {
00738 
00739       //
00740       // Take care of the TFileCollection object:
00741       //
00742       if( cacheUpdated ) {
00743          m_logger << VERBOSE << "Writing file collection object to cache" << SLogger::endmsg;
00744          cachefile->cd();
00745          if( filecoll->Update() == -1 ) {
00746             m_logger << ERROR << "Failed to update the cached information" << SLogger::endmsg;
00747          }
00748          filecoll->Write();
00749 
00750          //
00751          // Create a new dataset and write it to the cache file:
00752          //
00753          m_dset = MakeDataSet();
00754          cachefile->cd();
00755          m_dset->Write();
00756       } else {
00757          // Load the cached dataset:
00758          m_dset = AccessDataSet( cachefile );
00759          if( ! m_dset ) {
00760             throw SError( "There was a logical error in the cache handling.\n"
00761                           " Id Type: " + GetType() + ", Version: " + GetVersion(),
00762                           SError::StopExecution );
00763          }
00764 
00765          // Check if the current configuration is likely to be described by this dataset:
00766          if( fileInfoInDataset == m_dset->GetListOfElements()->GetSize() ) {
00767             m_logger << DEBUG << "The loaded dataset is up to date" << SLogger::endmsg;
00768          } else {
00769             m_logger << DEBUG << "The dataset has to be updated" << SLogger::endmsg;
00770             delete m_dset;
00771             m_dset = MakeDataSet();
00772             cachefile->cd();
00773             m_dset->Write();
00774          }
00775       }
00776 
00777       cachefile->Close();
00778       delete cachefile;
00779 
00780    } else {
00781 
00782       m_dset = MakeDataSet();
00783 
00784    }
00785 
00786    //
00787    // Check that the specified maximum number of events and the number of events to
00788    // skip, make sense:
00789    //
00790    if( GetNEventsSkip() + GetNEventsMax() > GetEventsTotal() ) {
00791       if( GetNEventsSkip() >= GetEventsTotal() ) {
00792          SetNEventsMax( 0 );
00793       } else {
00794          SetNEventsMax( GetEventsTotal() - GetNEventsSkip() );
00795       }
00796    }
00797 
00798    //
00799    // Print some status:
00800    //
00801    m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 
00802             << GetVersion() << "\" : " << GetEventsTotal() << " events" 
00803             << ( ( m_cacheable && ( ! cacheUpdated ) ) ? " (cached)" : "" )
00804             << SLogger::endmsg;
00805 
00806    return;
00807 }
00808 
00809 void SInputData::ValidateInputDataSets( const char* pserver ) throw( SError ) {
00810 
00811    // Connect to the PROOF server:
00812    TProof* server = SProofManager::Instance()->Open( pserver );
00813 
00814    // Check the number of defined datasets. It's only possible to use multiple datasets
00815    // in a single InputData starting from ROOT 5.27/02. In previous releases only the
00816    // first one can be used.
00817    if( ( ROOT_VERSION_CODE < ROOT_VERSION( 5, 27, 02 ) ) &&
00818        // The special PROOF branch of the ROOT development code can also be used:
00819        ( strcmp( gROOT->GetVersion(), "5.26/00-proof" ) ) &&
00820        ( m_dataSets.size() > 1 ) ) {
00821 
00822       m_logger << WARNING << "You're currently using ROOT version: "
00823                << gROOT->GetVersion() << "\n"
00824                << "This version doesn't yet support defining multiple\n"
00825                << "datasets per InputData. Only the first one is going to be used!"
00826                << SLogger::endmsg;
00827       m_logger << WARNING << "To use multiple datasets, upgrate to at least ROOT 5.27/02"
00828                << SLogger::endmsg;
00829       m_dataSets.resize( 1 );
00830       m_totalLumiSum = m_dataSets.front().lumi;
00831    }
00832 
00833    //
00834    // Loop over the specified datasets:
00835    //
00836    for( std::vector< SDataSet >::iterator ds = m_dataSets.begin();
00837         ds != m_dataSets.end(); ++ds ) {
00838 
00839       try {
00840 
00841          //
00842          // Check if the dataset exists on the server:
00843          //
00844          TFileCollection* filecoll = server->GetDataSet( ds->name );
00845          if( ! filecoll ) {
00846             m_logger << ERROR << "Dataset \"" << ds->name << "\" doesn't exist on server: "
00847                      << pserver << SLogger::endmsg;
00848             throw SError( SError::SkipFile );
00849          }
00850 
00851          //
00852          // Investigate the input trees:
00853          //
00854          Bool_t firstPassed = kFALSE;
00855          Long64_t entries = 0;
00856          for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
00857               trees != m_trees.end(); ++trees ) {
00858 
00859             m_logger << DEBUG << "Investigating \""
00860                      << STreeTypeDecoder::Instance()->GetName( trees->first )
00861                      << "\" types" << SLogger::endmsg;
00862             for( std::vector< STree >::const_iterator st = trees->second.begin();
00863                  st != trees->second.end(); ++st ) {
00864 
00865                // Only check the existence of input trees:
00866                if( ! ( st->type & STree::INPUT_TREE ) ) continue;
00867 
00868                // Don't check for trees in sub-directories:
00869                if( st->treeName.Contains( "/" ) ) continue;
00870 
00871                // Try to access information on the input tree:
00872                Long64_t tree_entries = filecoll->GetTotalEntries( "/" + st->treeName );
00873                if( tree_entries == -1 ) {
00874                   m_logger << ERROR << "Couldn't find tree " << st->treeName << " in dataset "
00875                            << ds->name << SLogger::endmsg;
00876                   m_logger << ERROR << "Removing dataset from the input list"
00877                            << SLogger::endmsg;
00878                   throw SError( SError::SkipFile );
00879                }
00880 
00881                // Check how many events are there in the input:
00882                if( st->type & STree::EVENT_TREE ) {
00883                   if( firstPassed && ( tree_entries != entries ) ) {
00884                      m_logger << WARNING << "Conflict in number of entries - Tree "
00885                               << st->treeName << " has " << tree_entries
00886                               << " entries, NOT " << entries << SLogger::endmsg;
00887                      m_logger << WARNING << "Removing " << ds->name
00888                               << " from the input dataset list" << SLogger::endmsg;
00889                      throw SError( SError::SkipFile );
00890                   } else if( ! firstPassed ) {
00891                      firstPassed = kTRUE;
00892                      entries = tree_entries;
00893                   }
00894                }
00895             }
00896          }
00897 
00898          // Update the ID information:
00899          ds->events = entries;
00900          AddEvents( entries );
00901 
00902       } catch( const SError& ) {
00903          m_totalLumiSum -= ds->lumi;
00904          ds = m_dataSets.erase( ds );
00905          --ds;
00906       }
00907 
00908    }
00909 
00910    //
00911    // Print some status:
00912    //
00913    m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 
00914             << GetVersion() << "\" : " << GetEventsTotal() << " events" 
00915             << SLogger::endmsg;
00916 
00917    return;
00918 }
00919 
00920 Bool_t SInputData::LoadInfoOnFile( std::vector< SFile >::iterator& file_itr,
00921                                    TFileCollection* filecoll ) {
00922 
00923    // Retrieve the information about this specific file:
00924    TFileInfo* fileinfo = ( TFileInfo* ) filecoll->GetList()->FindObject( file_itr->file );
00925    if( ! fileinfo ) {
00926       m_logger << VERBOSE << "File unknown: " << file_itr->file << SLogger::endmsg;
00927       return kFALSE;
00928    }
00929 
00930    m_logger << DEBUG << "Information found for: " << file_itr->file << SLogger::endmsg;
00931 
00932    Bool_t firstPassed = kFALSE; // Flag showing if we already know the number of entries
00933    Long64_t entries = 0; // Number of entries in the file
00934 
00935    //
00936    // Check that information is available on all the input trees in the cache:
00937    //
00938    for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
00939         trees != m_trees.end(); ++trees ) {
00940       for( std::vector< STree >::const_iterator st = trees->second.begin();
00941            st != trees->second.end(); ++st ) {
00942 
00943          // Only check the existence of input trees:
00944          if( ! ( st->type & STree::INPUT_TREE ) ) continue;
00945 
00946          // Get the tree information:
00947          TFileInfoMeta* tree_info = fileinfo->GetMetaData( st->treeName );
00948          if( ! tree_info ) {
00949             m_logger << DEBUG << "No description found for: " << st->treeName
00950                      << SLogger::endmsg;
00951             return kFALSE;
00952          }
00953 
00954          // Check how many events are there in the input:
00955          if( st->type & STree::EVENT_TREE ) {
00956             if( ! firstPassed ) {
00957                firstPassed = kTRUE;
00958                entries = tree_info->GetEntries();
00959             } else if( entries != tree_info->GetEntries() ) {
00960                m_logger << WARNING << "Inconsistent cached data for: "
00961                         << file_itr->file << " -> Checking the file again..."
00962                         << SLogger::endmsg;
00963                return kFALSE;
00964             }
00965          }
00966       }
00967    }
00968 
00969    //
00970    // Update the ID with this information:
00971    //
00972    file_itr->events = entries;
00973    AddEvents( entries );
00974 
00975    return kTRUE;
00976 }
00977 
00978 TFileInfo* SInputData::AccessFileInfo( std::vector< SFile >::iterator& file_itr,
00979                                        TFileCollection* filecoll ) {
00980 
00981    TFileInfo* result = 0;
00982 
00983    // Check if we know anything about this file already:
00984    if( ( result = ( TFileInfo* ) filecoll->GetList()->FindObject( file_itr->file ) ) ) {
00985       m_logger << DEBUG << "Updating information for " << file_itr->file << SLogger::endmsg;
00986    } else {
00987       // One has to be very verbose in naming the object, otherwise the stupid
00988       // ROOT container will not be able to find it afterwards...
00989       m_logger << DEBUG << "Creating information for " << file_itr->file << SLogger::endmsg;
00990       result = new TFileInfo( file_itr->file );
00991       result->SetName( file_itr->file );
00992       result->SetTitle( "Description for: " + file_itr->file );
00993       filecoll->Add( result );
00994    }
00995 
00996    return result;
00997 }
00998 
00999 TDSet* SInputData::MakeDataSet() throw( SError ) {
01000 
01001    // Find the name of the "main" TTree in the files:
01002    const char* treeName = 0;
01003    for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin();
01004         trees != m_trees.end(); ++trees ) {
01005       for( std::vector< STree >::const_iterator st = trees->second.begin();
01006            st != trees->second.end(); ++st ) {
01007          if( ( st->type & STree::INPUT_TREE ) && ( st->type & STree::EVENT_TREE ) ) {
01008             treeName = st->treeName.Data();
01009          }
01010       }
01011    }
01012    if( ! treeName ) {
01013       throw SError( "Can't determine input TTree name!", SError::SkipInputData );
01014    }
01015 
01016    // Create a TChain that will be the basis of the dataset:
01017    TChain chain( treeName );
01018    for( std::vector< SFile >::const_iterator file = GetSFileIn().begin();
01019         file != GetSFileIn().end(); ++file ) {
01020       chain.Add( file->file );
01021    }
01022 
01023    // Create the dataset:
01024    TDSet* result = new TDSet( chain );
01025    result->SetName( "DSetCache" );
01026    result->SetTitle( "Cached dataset for ID Type: " + GetType() + ", Version: " +
01027                      GetVersion() );
01028    result->Validate();
01029 
01030    return result;
01031 }
01032 
01033 TDSet* SInputData::AccessDataSet( TDirectory* dir ) {
01034 
01035    return dynamic_cast< TDSet* >( dir->Get( "DSetCache" ) );
01036 }