SFrame 3.6
|
00001 // $Id: SInputData.cxx 331 2012-11-20 17:12:44Z krasznaa $ 00002 /*************************************************************************** 00003 * @Project: SFrame - ROOT-based analysis framework for ATLAS 00004 * @Package: Core 00005 * 00006 * @author Stefan Ask <Stefan.Ask@cern.ch> - Manchester 00007 * @author David Berge <David.Berge@cern.ch> - CERN 00008 * @author Johannes Haller <Johannes.Haller@cern.ch> - Hamburg 00009 * @author A. Krasznahorkay <Attila.Krasznahorkay@cern.ch> - CERN/Debrecen 00010 * 00011 ***************************************************************************/ 00012 00013 // System include(s): 00014 #include <string.h> 00015 00016 // ROOT include(s): 00017 #include <TFile.h> 00018 #include <TTree.h> 00019 #include <TChain.h> 00020 #include <TFileCollection.h> 00021 #include <TFileInfo.h> 00022 #include <THashList.h> 00023 #include <TDSet.h> 00024 #include <TProof.h> 00025 #include <TROOT.h> 00026 00027 // Local include(s): 00028 #include "../include/SInputData.h" 00029 #include "../include/SError.h" 00030 #include "../include/SProofManager.h" 00031 #include "../include/STreeTypeDecoder.h" 00032 00033 #ifndef DOXYGEN_IGNORE 00034 ClassImp( SDataSet ); 00035 ClassImp( SFile ); 00036 ClassImp( STree ); 00037 ClassImp( SInputData ); 00038 #endif // DOXYGEN_IGNORE 00039 00040 using namespace std; 00041 00042 // Define the constants: 00043 const Int_t STree::INPUT_TREE = 0x1; 00044 const Int_t STree::OUTPUT_TREE = 0x2; 00045 const Int_t STree::EVENT_TREE = 0x4; 00046 00050 SDataSet& SDataSet::operator= ( const SDataSet& parent ) { 00051 00052 this->name = parent.name; 00053 00054 return *this; 00055 } 00056 00066 Bool_t SDataSet::operator== ( const SDataSet& rh ) const { 00067 00068 if( this->name == rh.name ) { 00069 return kTRUE; 00070 } else { 00071 return kFALSE; 00072 } 00073 } 00074 00084 Bool_t SDataSet::operator!= ( const SDataSet& rh ) const { 00085 00086 return ( ! ( *this == rh ) ); 00087 } 00088 00092 SFile& SFile::operator= ( const SFile& parent ) { 00093 00094 this->file = parent.file; 00095 this->lumi = parent.lumi; 00096 this->events = parent.events; 00097 00098 return *this; 00099 } 00100 00110 Bool_t SFile::operator== ( const SFile& rh ) const { 00111 00112 if( ( this->file == rh.file ) && ( this->lumi == rh.lumi ) && 00113 ( this->events == rh.events ) ) { 00114 return kTRUE; 00115 } else { 00116 return kFALSE; 00117 } 00118 } 00119 00129 Bool_t SFile::operator!= ( const SFile& rh ) const { 00130 00131 return ( ! ( *this == rh ) ); 00132 } 00133 00137 STree& STree::operator= ( const STree& parent ) { 00138 00139 this->treeName = parent.treeName; 00140 this->type = parent.type; 00141 00142 return *this; 00143 } 00144 00154 Bool_t STree::operator== ( const STree& rh ) const { 00155 00156 if( ( this->treeName == rh.treeName ) && 00157 ( this->type == rh.type ) ) { 00158 return kTRUE; 00159 } else { 00160 return kFALSE; 00161 } 00162 } 00163 00173 Bool_t STree::operator!= ( const STree& rh ) const { 00174 00175 return ( ! ( *this == rh ) ); 00176 } 00177 00181 SInputData::SInputData( const char* name ) 00182 : TNamed( name, "SFrame input data object" ), m_type( "unknown" ), 00183 m_version( 0 ), m_totalLumiGiven( 0 ), m_totalLumiSum( 0 ), 00184 m_eventsTotal( 0 ), m_neventsmax( -1 ), m_neventsskip( 0 ), 00185 m_cacheable( kFALSE ), m_skipValid( kFALSE ), m_entry( 0 ), 00186 m_dset( 0 ), m_logger( "SInputData" ) { 00187 00188 REPORT_VERBOSE( "In constructor" ); 00189 } 00190 00203 SInputData::~SInputData() { 00204 00205 REPORT_VERBOSE( "In destructor" ); 00206 } 00207 00215 void SInputData::AddSFileIn( const SFile& sfile ) { 00216 00217 m_sfileIn.push_back( sfile ); 00218 m_totalLumiSum += sfile.lumi; 00219 return; 00220 } 00221 00230 void SInputData::AddTree( Int_t type, const STree& stree ) { 00231 00232 m_trees[ type ].push_back( stree ); 00233 return; 00234 } 00235 00242 void SInputData::AddDataSet( const SDataSet& dset ) { 00243 00244 m_dataSets.push_back( dset ); 00245 m_totalLumiSum += dset.lumi; 00246 return; 00247 } 00248 00258 void SInputData::ValidateInput( const char* pserver ) throw( SError ) { 00259 00260 // Check that the user only specified one type of input: 00261 if( GetSFileIn().size() && GetDataSets().size() ) { 00262 m_logger << ERROR << "You cannot use PROOF datasets AND regular input files in the" 00263 << SLogger::endmsg; 00264 m_logger << ERROR << "same InputData at the moment. Please only use one type!" 00265 << SLogger::endmsg; 00266 throw SError( "Trying to use datasets and files in the same ID", 00267 SError::SkipInputData ); 00268 } 00269 00270 // Check that the user did specify some kind of input: 00271 if( ( ! GetSFileIn().size() ) && ( ! GetDataSets().size() ) ) { 00272 m_logger << ERROR << "You need to define at least one file or one dataset as input" 00273 << SLogger::endmsg; 00274 throw SError( "Missing input specification", SError::SkipInputData ); 00275 } 00276 00277 // Check that the configuration makes sense: 00278 if( GetSkipValid() && ( ( GetNEventsMax() > 0 ) || ( GetNEventsSkip() > 0 ) ) ) { 00279 m_logger << WARNING << "The input file validation can not be skipped when running on " 00280 << "a subset of events\n" 00281 << "Turning on the InputData validation for InputData\n" 00282 << " Type: " << GetType() << ", Version: " << GetVersion() << SLogger::endmsg; 00283 SetSkipValid( kFALSE ); 00284 } 00285 00286 // Return at this point if the validation can be skipped: 00287 if( GetSkipValid() ) { 00288 m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 00289 << GetVersion() << "\" : Validation skipped" << SLogger::endmsg; 00290 return; 00291 } 00292 00293 // Now do the actual validation: 00294 if( GetSFileIn().size() ) { 00295 ValidateInputFiles(); 00296 } else if( GetDataSets().size() ) { 00297 if( ! pserver ) { 00298 m_logger << ERROR << "PROOF server not specified. Can't validate datasets!" 00299 << SLogger::endmsg; 00300 throw SError( "Can't validate PROOF datasets without server name", 00301 SError::SkipInputData ); 00302 } 00303 ValidateInputDataSets( pserver ); 00304 } 00305 00306 return; 00307 00308 } 00309 00324 const std::vector< STree >* SInputData::GetTrees( Int_t type ) const { 00325 00326 std::map< Int_t, std::vector< STree > >::const_iterator itr; 00327 if( ( itr = m_trees.find( type ) ) == m_trees.end() ) { 00328 return 0; 00329 } else { 00330 return &( itr->second ); 00331 } 00332 } 00333 00334 Bool_t SInputData::HasInputTrees() const { 00335 00336 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 00337 trees != m_trees.end(); ++trees ) { 00338 for( std::vector< STree >::const_iterator st = trees->second.begin(); 00339 st != trees->second.end(); ++st ) { 00340 if( ( st->type & STree::INPUT_TREE ) && ( st->type & STree::EVENT_TREE ) ) { 00341 return kTRUE; 00342 } 00343 } 00344 } 00345 00346 return kFALSE; 00347 } 00348 00349 TDSet* SInputData::GetDSet() const { 00350 00351 return m_dset; 00352 } 00353 00354 Double_t SInputData::GetTotalLumi() const { 00355 00356 Double_t return_lumi = 0.; 00357 // use the given luminosity for this InputData in case it is specified 00358 if( m_totalLumiGiven ) return_lumi = m_totalLumiGiven; 00359 // otherwise use the sum of all files 00360 else return_lumi = m_totalLumiSum; 00361 00362 // make sure that the lumi is not zero 00363 if( ! return_lumi ) 00364 m_logger << FATAL << "total luminosity for "<< GetType() << " is ZERO!" 00365 << SLogger::endmsg; 00366 00367 return return_lumi; 00368 } 00369 00370 Double_t SInputData::GetScaledLumi() const { 00371 00372 Double_t scaled_lumi = 0.; 00373 00374 if( m_neventsmax > -1. ) { 00375 scaled_lumi = GetTotalLumi() * m_neventsmax / m_eventsTotal; 00376 } else { 00377 scaled_lumi = GetTotalLumi(); 00378 } 00379 00380 return scaled_lumi; 00381 } 00382 00386 SInputData& SInputData::operator= ( const SInputData& parent ) { 00387 00388 this->m_type = parent.m_type; 00389 this->m_version = parent.m_version; 00390 this->m_totalLumiGiven = parent.m_totalLumiGiven; 00391 this->m_gencuts = parent.m_gencuts; 00392 this->m_sfileIn = parent.m_sfileIn; 00393 this->m_trees = parent.m_trees; 00394 this->m_dataSets = parent.m_dataSets; 00395 this->m_totalLumiSum = parent.m_totalLumiSum; 00396 this->m_eventsTotal = parent.m_eventsTotal; 00397 this->m_neventsmax = parent.m_neventsmax; 00398 this->m_neventsskip = parent.m_neventsskip; 00399 this->m_cacheable = parent.m_cacheable; 00400 this->m_skipValid = parent.m_skipValid; 00401 this->m_entry = parent.m_entry; 00402 00403 this->m_dset = parent.m_dset; 00404 00405 return *this; 00406 00407 } 00408 00418 Bool_t SInputData::operator== ( const SInputData& rh ) const { 00419 00420 if( ( this->m_type == rh.m_type ) && ( this->m_version == rh.m_version ) && 00421 ( this->m_totalLumiGiven == rh.m_totalLumiGiven ) && 00422 ( this->m_gencuts == rh.m_gencuts ) && ( this->m_sfileIn == rh.m_sfileIn ) && 00423 ( this->m_trees == rh.m_trees ) && 00424 ( this->m_dataSets == rh.m_dataSets ) && 00425 ( this->m_totalLumiSum == rh.m_totalLumiSum ) && 00426 ( this->m_eventsTotal == rh.m_eventsTotal ) && 00427 ( this->m_neventsmax == rh.m_neventsmax ) && 00428 ( this->m_neventsskip == rh.m_neventsskip ) && 00429 ( this->m_cacheable == rh.m_cacheable ) && 00430 ( this->m_skipValid == rh.m_skipValid ) && 00431 ( this->m_dset->IsEqual( rh.m_dset ) ) ) { 00432 return kTRUE; 00433 } else { 00434 return kFALSE; 00435 } 00436 00437 } 00438 00448 Bool_t SInputData::operator!= ( const SInputData& rh ) const { 00449 00450 return ( ! ( *this == rh ) ); 00451 00452 } 00453 00459 void SInputData::Print( const Option_t* ) const { 00460 00461 m_logger << INFO << " ---------------------------------------------------------" << endl; 00462 m_logger << " Type : " << GetType() << endl; 00463 m_logger << " Version : " << GetVersion() << endl; 00464 m_logger << " Total luminosity : " << GetTotalLumi() << "pb-1" << endl; 00465 m_logger << " NEventsMax : " << GetNEventsMax() << endl; 00466 m_logger << " NEventsSkip : " << GetNEventsSkip() << endl; 00467 m_logger << " Cacheable : " << ( GetCacheable() ? "Yes" : "No" ) << endl; 00468 m_logger << " Skip validation : " << ( GetSkipValid() ? "Yes" : "No" ) << endl; 00469 00470 for( vector< SGeneratorCut >::const_iterator gc = m_gencuts.begin(); 00471 gc != m_gencuts.end(); ++gc ) 00472 m_logger << " Generator cut : '" << gc->GetTreeName() << "' (tree) | '" 00473 << gc->GetFormula() << "' (formula)" << endl; 00474 00475 for( vector< SDataSet >::const_iterator ds = m_dataSets.begin(); 00476 ds != m_dataSets.end(); ++ds ) 00477 m_logger << " Data Set : '" << ds->name << "' (name) | '" << ds->lumi 00478 << "' (lumi)" << endl; 00479 for( vector< SFile >::const_iterator f = m_sfileIn.begin(); f != m_sfileIn.end(); 00480 ++f ) 00481 m_logger << " Input File : '" << f->file << "' (file) | '" << f->lumi 00482 << "' (lumi)" << endl; 00483 00484 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 00485 trees != m_trees.end(); ++trees ) { 00486 for( std::vector< STree >::const_iterator tree = trees->second.begin(); 00487 tree != trees->second.end(); ++tree ) { 00488 m_logger << " Tree : '" << tree->treeName << "' (name) | '" 00489 << STreeTypeDecoder::Instance()->GetName( trees->first ) 00490 << "' (type)" << endl; 00491 } 00492 } 00493 00494 m_logger << " ---------------------------------------------------------" << SLogger::endmsg; 00495 00496 return; 00497 } 00498 00506 TString SInputData::GetStringConfig() const { 00507 00508 // The result string: 00509 TString result; 00510 00511 // Compose the "header" of the input data: 00512 result += TString::Format( " <InputData Type=\"%s\"\n", m_type.Data() ); 00513 result += TString::Format( " Version=\"%s\"\n", 00514 m_version.Data() ); 00515 result += TString::Format( " Lumi=\"%g\"\n", 00516 m_totalLumiGiven ); 00517 result += TString::Format( " NEventsMax=\"%lld\"\n", 00518 m_neventsmax ); 00519 result += TString::Format( " NEventsSkip=\"%lld\"\n", 00520 m_neventsskip ); 00521 result += TString::Format( " Cacheable=\"%s\"\n", 00522 ( m_cacheable ? "True" : "False" ) ); 00523 result += TString::Format( " SkipValid=\"%s\">\n\n", 00524 ( m_skipValid ? "True" : "False" ) ); 00525 00526 // Add all the input files: 00527 std::vector< SFile >::const_iterator f_itr = m_sfileIn.begin(); 00528 std::vector< SFile >::const_iterator f_end = m_sfileIn.end(); 00529 for( ; f_itr != f_end; ++f_itr ) { 00530 result += TString::Format( " <In FileName=\"%s\" Lumi=\"%g\"/>\n", 00531 f_itr->file.Data(), f_itr->lumi ); 00532 } 00533 00534 // Add all the input datasets: 00535 std::vector< SDataSet >::const_iterator d_itr = m_dataSets.begin(); 00536 std::vector< SDataSet >::const_iterator d_end = m_dataSets.end(); 00537 for( ; d_itr != d_end; ++d_itr ) { 00538 result += TString::Format( " <DataSet Name=\"%s\" Lumi=\"%g\"/>\n", 00539 d_itr->name.Data(), d_itr->lumi ); 00540 } 00541 00542 // Add all the generator cuts: 00543 std::vector< SGeneratorCut >::const_iterator g_itr = m_gencuts.begin(); 00544 std::vector< SGeneratorCut >::const_iterator g_end = m_gencuts.end(); 00545 for( ; g_itr != g_end; ++g_itr ) { 00546 result += TString::Format( " <GeneratorCut Tree=\"%s\" " 00547 "Formula=\"%s\"/>\n", 00548 g_itr->GetTreeName().Data(), 00549 g_itr->GetFormula().Data() ); 00550 } 00551 00552 // Add all the trees: 00553 const STreeTypeDecoder* decoder = STreeTypeDecoder::Instance(); 00554 std::map< Int_t, std::vector< STree > >::const_iterator t_itr = 00555 m_trees.begin(); 00556 std::map< Int_t, std::vector< STree > >::const_iterator t_end = 00557 m_trees.end(); 00558 for( ; t_itr != t_end; ++t_itr ) { 00559 std::vector< STree >::const_iterator tt_itr = t_itr->second.begin(); 00560 std::vector< STree >::const_iterator tt_end = t_itr->second.end(); 00561 for( ; tt_itr != tt_end; ++tt_itr ) { 00562 result += TString::Format( " <%s Name=\"%s\"/>\n", 00563 decoder->GetXMLName( t_itr->first ).Data(), 00564 tt_itr->treeName.Data() ); 00565 } 00566 } 00567 00568 // Close the input data block: 00569 result += " </InputData>"; 00570 00571 // Return the constructed string: 00572 return result; 00573 } 00574 00575 void SInputData::ValidateInputFiles() throw( SError ) { 00576 00577 // 00578 // Set up the connection to the InputData cache if it's asked for: 00579 // 00580 TFile* cachefile = 0; 00581 TFileCollection* filecoll = 0; 00582 if( m_cacheable && ( ! m_skipValid ) ) { 00583 // The filename is hardcoded, since this is the only place where it's needed: 00584 cachefile = TFile::Open( ".sframe." + GetType() + "." + GetVersion() + ".idcache.root", 00585 "UPDATE" ); 00586 m_logger << DEBUG << "Opened: " << cachefile->GetName() << SLogger::endmsg; 00587 // Try to access the ID information: 00588 filecoll = ( TFileCollection* ) cachefile->Get( "IDCache" ); 00589 if( ! filecoll ) { 00590 // Create a new object. This is needed when a new cache is created. 00591 m_logger << DEBUG << "Creating new TFileCollection" << SLogger::endmsg; 00592 cachefile->cd(); 00593 filecoll = new TFileCollection( "IDCache", "InputData cache data" ); 00594 cachefile->Append( filecoll ); 00595 } else { 00596 // The cache already exists: 00597 m_logger << DEBUG << "Existing TFileCollection found" << SLogger::endmsg; 00598 } 00599 } 00600 00601 // Flag showing if the cache will have to be saved at the end of the function: 00602 Bool_t cacheUpdated = kFALSE; 00603 Int_t fileInfoInDataset = 0; 00604 00605 // 00606 // Loop over all the specified input files: 00607 // 00608 for( std::vector< SFile >::iterator sf = m_sfileIn.begin(); sf != m_sfileIn.end(); ++sf ) { 00609 00610 // 00611 // Try to load the file's information from the cache. This is *much* faster than 00612 // querying the file itself... 00613 // 00614 if( m_cacheable && LoadInfoOnFile( sf, filecoll ) ) { 00615 ++fileInfoInDataset; 00616 continue; 00617 } 00618 00619 // 00620 // Open the physical file: 00621 // 00622 TFile* file = TFile::Open( sf->file.Data() ); 00623 if( ! file || file->IsZombie() ) { 00624 m_logger << WARNING << "Couldn't open file: " << sf->file.Data() << SLogger::endmsg; 00625 m_logger << WARNING << "Removing it from the input file list" << SLogger::endmsg; 00626 // Erasing the file from the file list is a bit tricky actually: 00627 sf = m_sfileIn.erase( sf ); 00628 --sf; 00629 continue; 00630 } 00631 00632 try { 00633 00634 // If any of the files had to be opened, then the cache will need to be 00635 // updated in the ROOT file: 00636 cacheUpdated = kTRUE; 00637 00638 // 00639 // Create/retrieve the object storing the information about the file: 00640 // 00641 TFileInfo* fileinfo = 0; 00642 if( m_cacheable ) { 00643 fileinfo = AccessFileInfo( sf, filecoll ); 00644 } 00645 00646 // 00647 // Investigate the input trees: 00648 // 00649 Bool_t firstPassed = kFALSE; 00650 Long64_t entries = 0; 00651 Int_t numberOfBranches = 0; 00652 // try to load all the input trees 00653 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 00654 trees != m_trees.end(); ++trees ) { 00655 00656 m_logger << DEBUG << "Investigating \"" 00657 << STreeTypeDecoder::Instance()->GetName( trees->first ) 00658 << "\" types" << SLogger::endmsg; 00659 for( std::vector< STree >::const_iterator st = trees->second.begin(); 00660 st != trees->second.end(); ++st ) { 00661 00662 // Only check the existence of input trees: 00663 if( ! ( st->type & STree::INPUT_TREE ) ) continue; 00664 00665 // Try to access the input tree: 00666 TTree* tree = dynamic_cast< TTree* >( file->Get( st->treeName ) ); 00667 if( ! tree ) { 00668 m_logger << WARNING << "Couldn't find tree " << st->treeName 00669 << " in file " << sf->file << SLogger::endmsg; 00670 m_logger << WARNING << "Removing file from the input file list" 00671 << SLogger::endmsg; 00672 throw SError( SError::SkipFile ); 00673 } 00674 00675 // Remember how many branches there are in total in the input: 00676 Int_t branchesThisTree = tree->GetNbranches(); 00677 m_logger << DEBUG << branchesThisTree << " branches in tree " << tree->GetName() 00678 << SLogger::endmsg; 00679 numberOfBranches += branchesThisTree; 00680 00681 // Check how many events are there in the input: 00682 if( st->type & STree::EVENT_TREE ) { 00683 if( firstPassed && ( tree->GetEntriesFast() != entries ) ) { 00684 m_logger << WARNING << "Conflict in number of entries - Tree " 00685 << tree->GetName() << " has " << tree->GetEntriesFast() 00686 << " entries, NOT " << entries << SLogger::endmsg; 00687 m_logger << WARNING << "Removing " << sf->file 00688 << " from the input file list" << SLogger::endmsg; 00689 throw SError( SError::SkipFile ); 00690 } else if( ! firstPassed ) { 00691 firstPassed = kTRUE; 00692 entries = tree->GetEntriesFast(); 00693 } 00694 } 00695 00696 // 00697 // Save the information about this tree into the cache: 00698 // 00699 if( m_cacheable ) { 00700 TFileInfoMeta* tree_info = new TFileInfoMeta( tree->GetName(), "TTree", 00701 tree->GetEntriesFast() ); 00702 tree_info->SetName( tree->GetName() ); 00703 tree_info->SetTitle( "Meta data info for a TTree" ); 00704 if( ! fileinfo->AddMetaData( tree_info ) ) { 00705 m_logger << ERROR << "There was a problem caching meta-data for TTree: " 00706 << tree->GetName() << SLogger::endmsg; 00707 } else { 00708 m_logger << VERBOSE << "Meta-data cached for TTree: " << tree->GetName() 00709 << SLogger::endmsg; 00710 } 00711 } 00712 } 00713 } 00714 00715 // Update the ID information: 00716 sf->events = entries; 00717 AddEvents( entries ); 00718 00719 m_logger << DEBUG << numberOfBranches << " branches in total in file " 00720 << file->GetName() << SLogger::endmsg; 00721 00722 } catch( const SError& ) { 00723 m_totalLumiSum -= sf->lumi; 00724 sf = m_sfileIn.erase( sf ); 00725 --sf; 00726 } 00727 00728 // Close the input file: 00729 file->Close(); 00730 if( file ) delete file; 00731 } 00732 00733 // 00734 // Save/close the cache file if it needs to be saved/closed: 00735 // 00736 if( m_dset ) delete m_dset; 00737 if( m_cacheable ) { 00738 00739 // 00740 // Take care of the TFileCollection object: 00741 // 00742 if( cacheUpdated ) { 00743 m_logger << VERBOSE << "Writing file collection object to cache" << SLogger::endmsg; 00744 cachefile->cd(); 00745 if( filecoll->Update() == -1 ) { 00746 m_logger << ERROR << "Failed to update the cached information" << SLogger::endmsg; 00747 } 00748 filecoll->Write(); 00749 00750 // 00751 // Create a new dataset and write it to the cache file: 00752 // 00753 m_dset = MakeDataSet(); 00754 cachefile->cd(); 00755 m_dset->Write(); 00756 } else { 00757 // Load the cached dataset: 00758 m_dset = AccessDataSet( cachefile ); 00759 if( ! m_dset ) { 00760 throw SError( "There was a logical error in the cache handling.\n" 00761 " Id Type: " + GetType() + ", Version: " + GetVersion(), 00762 SError::StopExecution ); 00763 } 00764 00765 // Check if the current configuration is likely to be described by this dataset: 00766 if( fileInfoInDataset == m_dset->GetListOfElements()->GetSize() ) { 00767 m_logger << DEBUG << "The loaded dataset is up to date" << SLogger::endmsg; 00768 } else { 00769 m_logger << DEBUG << "The dataset has to be updated" << SLogger::endmsg; 00770 delete m_dset; 00771 m_dset = MakeDataSet(); 00772 cachefile->cd(); 00773 m_dset->Write(); 00774 } 00775 } 00776 00777 cachefile->Close(); 00778 delete cachefile; 00779 00780 } else { 00781 00782 m_dset = MakeDataSet(); 00783 00784 } 00785 00786 // 00787 // Check that the specified maximum number of events and the number of events to 00788 // skip, make sense: 00789 // 00790 if( GetNEventsSkip() + GetNEventsMax() > GetEventsTotal() ) { 00791 if( GetNEventsSkip() >= GetEventsTotal() ) { 00792 SetNEventsMax( 0 ); 00793 } else { 00794 SetNEventsMax( GetEventsTotal() - GetNEventsSkip() ); 00795 } 00796 } 00797 00798 // 00799 // Print some status: 00800 // 00801 m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 00802 << GetVersion() << "\" : " << GetEventsTotal() << " events" 00803 << ( ( m_cacheable && ( ! cacheUpdated ) ) ? " (cached)" : "" ) 00804 << SLogger::endmsg; 00805 00806 return; 00807 } 00808 00809 void SInputData::ValidateInputDataSets( const char* pserver ) throw( SError ) { 00810 00811 // Connect to the PROOF server: 00812 TProof* server = SProofManager::Instance()->Open( pserver ); 00813 00814 // Check the number of defined datasets. It's only possible to use multiple datasets 00815 // in a single InputData starting from ROOT 5.27/02. In previous releases only the 00816 // first one can be used. 00817 if( ( ROOT_VERSION_CODE < ROOT_VERSION( 5, 27, 02 ) ) && 00818 // The special PROOF branch of the ROOT development code can also be used: 00819 ( strcmp( gROOT->GetVersion(), "5.26/00-proof" ) ) && 00820 ( m_dataSets.size() > 1 ) ) { 00821 00822 m_logger << WARNING << "You're currently using ROOT version: " 00823 << gROOT->GetVersion() << "\n" 00824 << "This version doesn't yet support defining multiple\n" 00825 << "datasets per InputData. Only the first one is going to be used!" 00826 << SLogger::endmsg; 00827 m_logger << WARNING << "To use multiple datasets, upgrate to at least ROOT 5.27/02" 00828 << SLogger::endmsg; 00829 m_dataSets.resize( 1 ); 00830 m_totalLumiSum = m_dataSets.front().lumi; 00831 } 00832 00833 // 00834 // Loop over the specified datasets: 00835 // 00836 for( std::vector< SDataSet >::iterator ds = m_dataSets.begin(); 00837 ds != m_dataSets.end(); ++ds ) { 00838 00839 try { 00840 00841 // 00842 // Check if the dataset exists on the server: 00843 // 00844 TFileCollection* filecoll = server->GetDataSet( ds->name ); 00845 if( ! filecoll ) { 00846 m_logger << ERROR << "Dataset \"" << ds->name << "\" doesn't exist on server: " 00847 << pserver << SLogger::endmsg; 00848 throw SError( SError::SkipFile ); 00849 } 00850 00851 // 00852 // Investigate the input trees: 00853 // 00854 Bool_t firstPassed = kFALSE; 00855 Long64_t entries = 0; 00856 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 00857 trees != m_trees.end(); ++trees ) { 00858 00859 m_logger << DEBUG << "Investigating \"" 00860 << STreeTypeDecoder::Instance()->GetName( trees->first ) 00861 << "\" types" << SLogger::endmsg; 00862 for( std::vector< STree >::const_iterator st = trees->second.begin(); 00863 st != trees->second.end(); ++st ) { 00864 00865 // Only check the existence of input trees: 00866 if( ! ( st->type & STree::INPUT_TREE ) ) continue; 00867 00868 // Don't check for trees in sub-directories: 00869 if( st->treeName.Contains( "/" ) ) continue; 00870 00871 // Try to access information on the input tree: 00872 Long64_t tree_entries = filecoll->GetTotalEntries( "/" + st->treeName ); 00873 if( tree_entries == -1 ) { 00874 m_logger << ERROR << "Couldn't find tree " << st->treeName << " in dataset " 00875 << ds->name << SLogger::endmsg; 00876 m_logger << ERROR << "Removing dataset from the input list" 00877 << SLogger::endmsg; 00878 throw SError( SError::SkipFile ); 00879 } 00880 00881 // Check how many events are there in the input: 00882 if( st->type & STree::EVENT_TREE ) { 00883 if( firstPassed && ( tree_entries != entries ) ) { 00884 m_logger << WARNING << "Conflict in number of entries - Tree " 00885 << st->treeName << " has " << tree_entries 00886 << " entries, NOT " << entries << SLogger::endmsg; 00887 m_logger << WARNING << "Removing " << ds->name 00888 << " from the input dataset list" << SLogger::endmsg; 00889 throw SError( SError::SkipFile ); 00890 } else if( ! firstPassed ) { 00891 firstPassed = kTRUE; 00892 entries = tree_entries; 00893 } 00894 } 00895 } 00896 } 00897 00898 // Update the ID information: 00899 ds->events = entries; 00900 AddEvents( entries ); 00901 00902 } catch( const SError& ) { 00903 m_totalLumiSum -= ds->lumi; 00904 ds = m_dataSets.erase( ds ); 00905 --ds; 00906 } 00907 00908 } 00909 00910 // 00911 // Print some status: 00912 // 00913 m_logger << INFO << "Input type \"" << GetType() << "\" version \"" 00914 << GetVersion() << "\" : " << GetEventsTotal() << " events" 00915 << SLogger::endmsg; 00916 00917 return; 00918 } 00919 00920 Bool_t SInputData::LoadInfoOnFile( std::vector< SFile >::iterator& file_itr, 00921 TFileCollection* filecoll ) { 00922 00923 // Retrieve the information about this specific file: 00924 TFileInfo* fileinfo = ( TFileInfo* ) filecoll->GetList()->FindObject( file_itr->file ); 00925 if( ! fileinfo ) { 00926 m_logger << VERBOSE << "File unknown: " << file_itr->file << SLogger::endmsg; 00927 return kFALSE; 00928 } 00929 00930 m_logger << DEBUG << "Information found for: " << file_itr->file << SLogger::endmsg; 00931 00932 Bool_t firstPassed = kFALSE; // Flag showing if we already know the number of entries 00933 Long64_t entries = 0; // Number of entries in the file 00934 00935 // 00936 // Check that information is available on all the input trees in the cache: 00937 // 00938 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 00939 trees != m_trees.end(); ++trees ) { 00940 for( std::vector< STree >::const_iterator st = trees->second.begin(); 00941 st != trees->second.end(); ++st ) { 00942 00943 // Only check the existence of input trees: 00944 if( ! ( st->type & STree::INPUT_TREE ) ) continue; 00945 00946 // Get the tree information: 00947 TFileInfoMeta* tree_info = fileinfo->GetMetaData( st->treeName ); 00948 if( ! tree_info ) { 00949 m_logger << DEBUG << "No description found for: " << st->treeName 00950 << SLogger::endmsg; 00951 return kFALSE; 00952 } 00953 00954 // Check how many events are there in the input: 00955 if( st->type & STree::EVENT_TREE ) { 00956 if( ! firstPassed ) { 00957 firstPassed = kTRUE; 00958 entries = tree_info->GetEntries(); 00959 } else if( entries != tree_info->GetEntries() ) { 00960 m_logger << WARNING << "Inconsistent cached data for: " 00961 << file_itr->file << " -> Checking the file again..." 00962 << SLogger::endmsg; 00963 return kFALSE; 00964 } 00965 } 00966 } 00967 } 00968 00969 // 00970 // Update the ID with this information: 00971 // 00972 file_itr->events = entries; 00973 AddEvents( entries ); 00974 00975 return kTRUE; 00976 } 00977 00978 TFileInfo* SInputData::AccessFileInfo( std::vector< SFile >::iterator& file_itr, 00979 TFileCollection* filecoll ) { 00980 00981 TFileInfo* result = 0; 00982 00983 // Check if we know anything about this file already: 00984 if( ( result = ( TFileInfo* ) filecoll->GetList()->FindObject( file_itr->file ) ) ) { 00985 m_logger << DEBUG << "Updating information for " << file_itr->file << SLogger::endmsg; 00986 } else { 00987 // One has to be very verbose in naming the object, otherwise the stupid 00988 // ROOT container will not be able to find it afterwards... 00989 m_logger << DEBUG << "Creating information for " << file_itr->file << SLogger::endmsg; 00990 result = new TFileInfo( file_itr->file ); 00991 result->SetName( file_itr->file ); 00992 result->SetTitle( "Description for: " + file_itr->file ); 00993 filecoll->Add( result ); 00994 } 00995 00996 return result; 00997 } 00998 00999 TDSet* SInputData::MakeDataSet() throw( SError ) { 01000 01001 // Find the name of the "main" TTree in the files: 01002 const char* treeName = 0; 01003 for( std::map< Int_t, std::vector< STree > >::const_iterator trees = m_trees.begin(); 01004 trees != m_trees.end(); ++trees ) { 01005 for( std::vector< STree >::const_iterator st = trees->second.begin(); 01006 st != trees->second.end(); ++st ) { 01007 if( ( st->type & STree::INPUT_TREE ) && ( st->type & STree::EVENT_TREE ) ) { 01008 treeName = st->treeName.Data(); 01009 } 01010 } 01011 } 01012 if( ! treeName ) { 01013 throw SError( "Can't determine input TTree name!", SError::SkipInputData ); 01014 } 01015 01016 // Create a TChain that will be the basis of the dataset: 01017 TChain chain( treeName ); 01018 for( std::vector< SFile >::const_iterator file = GetSFileIn().begin(); 01019 file != GetSFileIn().end(); ++file ) { 01020 chain.Add( file->file ); 01021 } 01022 01023 // Create the dataset: 01024 TDSet* result = new TDSet( chain ); 01025 result->SetName( "DSetCache" ); 01026 result->SetTitle( "Cached dataset for ID Type: " + GetType() + ", Version: " + 01027 GetVersion() ); 01028 result->Validate(); 01029 01030 return result; 01031 } 01032 01033 TDSet* SInputData::AccessDataSet( TDirectory* dir ) { 01034 01035 return dynamic_cast< TDSet* >( dir->Get( "DSetCache" ) ); 01036 }