OPeNDAP Hyrax Back End Server (BES) Updated for version 3.8.3
|
00001 // BESCache.cc 00002 00003 // This file is part of bes, A C++ back-end server implementation framework 00004 // for the OPeNDAP Data Access Protocol. 00005 00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research 00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu> 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU Lesser General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 2.1 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // Lesser General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU Lesser General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // You can contact University Corporation for Atmospheric Research at 00024 // 3080 Center Green Drive, Boulder, CO 80301 00025 00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005 00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR. 00028 // 00029 // Authors: 00030 // pwest Patrick West <pwest@ucar.edu> 00031 // jgarcia Jose Garcia <jgarcia@ucar.edu> 00032 00033 #include "config.h" 00034 00035 #include <unistd.h> // for unlink 00036 #include <sys/types.h> 00037 #include <sys/stat.h> 00038 #include <dirent.h> 00039 #include <fcntl.h> 00040 00041 #include <cstring> 00042 #include <cerrno> 00043 #include <map> 00044 #include <iostream> 00045 #include <sstream> 00046 00047 using std::multimap ; 00048 using std::pair ; 00049 using std::greater ; 00050 using std::endl ; 00051 00052 #include "BESCache.h" 00053 #include "TheBESKeys.h" 00054 #include "BESSyntaxUserError.h" 00055 #include "BESInternalError.h" 00056 #include "BESDebug.h" 00057 00058 #define BES_CACHE_CHAR '#' 00059 00060 typedef struct _cache_entry 00061 { 00062 string name ; 00063 int size ; 00064 } cache_entry ; 00065 00066 void 00067 BESCache::check_ctor_params() 00068 { 00069 if( _cache_dir.empty() ) 00070 { 00071 string err = "The cache directory was not specified, must be non-empty"; 00072 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00073 } 00074 00075 struct stat buf; 00076 int statret = stat( _cache_dir.c_str(), &buf ) ; 00077 if( statret != 0 || ! S_ISDIR(buf.st_mode) ) 00078 { 00079 string err = "The cache directory " + _cache_dir + " does not exist" ; 00080 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00081 } 00082 00083 if( _prefix.empty() ) 00084 { 00085 string err = "The cache file prefix was not specified, must be non-empty" ; 00086 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00087 } 00088 00089 if( _cache_size == 0 ) 00090 { 00091 string err = "The cache size was not specified, must be non-zero" ; 00092 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00093 } 00094 // the cache size is specified in megabytes. When calculating 00095 // the size of the cache we convert to bytes, which is 1048576 00096 // bytes per meg. The max unsigned int allows for only 4095 00097 // megabytes. 00098 if( _cache_size > 4095 ) _cache_size = 4095 ; 00099 00100 BESDEBUG( "bes", "BES Cache: directory " << _cache_dir 00101 << ", prefix " << _prefix 00102 << ", max size " << _cache_size << endl ) ; 00103 } 00104 00114 BESCache::BESCache( const string &cache_dir, 00115 const string &prefix, 00116 unsigned int size ) 00117 : _cache_dir( cache_dir ), 00118 _prefix( prefix ), 00119 _cache_size( size ), 00120 _lock_fd( -1 ) 00121 { 00122 check_ctor_params(); // Throws BESSyntaxUserError on error. 00123 } 00124 00139 BESCache::BESCache( BESKeys &keys, 00140 const string &cache_dir_key, 00141 const string &prefix_key, 00142 const string &size_key ) 00143 : _cache_size( 0 ), 00144 _lock_fd( -1 ) 00145 { 00146 bool found = false ; 00147 keys.get_value( cache_dir_key, _cache_dir, found ) ; 00148 if( !found ) 00149 { 00150 string err = "The cache directory key " + cache_dir_key 00151 + " was not found in the BES configuration file" ; 00152 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00153 } 00154 00155 found = false ; 00156 keys.get_value( prefix_key, _prefix, found ) ; 00157 if( !found ) 00158 { 00159 string err = "The prefix key " + prefix_key 00160 + " was not found in the BES configuration file" ; 00161 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00162 } 00163 00164 found = false ; 00165 string cache_size_str ; 00166 keys.get_value( size_key, cache_size_str, found ) ; 00167 if( !found ) 00168 { 00169 string err = "The size key " + size_key 00170 + " was not found in the BES configuration file" ; 00171 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00172 } 00173 00174 std::istringstream is( cache_size_str ) ; 00175 is >> _cache_size ; 00176 00177 check_ctor_params(); // Throws BESSyntaxUserError on error. 00178 } 00179 00186 bool 00187 BESCache::lock( unsigned int retry, unsigned int num_tries ) 00188 { 00189 // make sure we aren't retrying too many times 00190 if( num_tries > MAX_LOCK_TRIES ) 00191 num_tries = MAX_LOCK_TRIES ; 00192 if( retry > MAX_LOCK_RETRY_MS ) 00193 retry = MAX_LOCK_RETRY_MS ; 00194 00195 bool got_lock = true ; 00196 if( _lock_fd == -1 ) 00197 { 00198 string lock_file = _cache_dir + "/lock" ; 00199 unsigned int tries = 0 ; 00200 _lock_fd = open( lock_file.c_str(), 00201 O_CREAT | O_EXCL, 00202 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ; 00203 while( _lock_fd < 0 && got_lock ) 00204 { 00205 tries ++ ; 00206 if( tries > num_tries ) 00207 { 00208 _lock_fd = -1 ; 00209 got_lock = false ; 00210 } 00211 else 00212 { 00213 usleep( retry ) ; 00214 _lock_fd = open( lock_file.c_str(), 00215 O_CREAT | O_EXCL, 00216 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ; 00217 } 00218 } 00219 } 00220 else 00221 { 00222 // This would be a programming error, or we've gotten into a 00223 // situation where the lock is lost. Lock has been called on the 00224 // same cache object twice in a row without an unlock being called. 00225 string err = "The cache dir " + _cache_dir + " is already locked" ; 00226 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00227 } 00228 00229 return got_lock ; 00230 } 00231 00238 bool 00239 BESCache::unlock() 00240 { 00241 // if we call unlock twice in a row, does it matter? I say no, just say 00242 // that it is unlocked. 00243 bool unlocked = true ; 00244 if( _lock_fd != -1 ) 00245 { 00246 string lock_file = _cache_dir + "/lock" ; 00247 close( _lock_fd ) ; 00248 (void)unlink( lock_file.c_str() ) ; 00249 } 00250 00251 _lock_fd = -1 ; 00252 00253 return unlocked ; 00254 } 00255 00269 bool 00270 BESCache::is_cached( const string &src, string &target ) 00271 { 00272 bool is_it = true ; 00273 string tmp_target = src ; 00274 00275 // Create the file that would be created in the cache directory 00276 //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g' 00277 if( tmp_target.at(0) == '/' ) 00278 { 00279 tmp_target = src.substr( 1, tmp_target.length() - 1 ) ; 00280 } 00281 string::size_type slash = 0 ; 00282 while( ( slash = tmp_target.find( '/' ) ) != string::npos ) 00283 { 00284 tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ; 00285 } 00286 string::size_type last_dot = tmp_target.rfind( '.' ) ; 00287 if( last_dot != string::npos ) 00288 { 00289 tmp_target = tmp_target.substr( 0, last_dot ) ; 00290 } 00291 00292 target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ; 00293 00294 // Determine if the target file is already in the cache or not 00295 struct stat buf; 00296 int statret = stat( target.c_str(), &buf ) ; 00297 if( statret != 0 ) 00298 { 00299 is_it = false ; 00300 } 00301 00302 return is_it ; 00303 } 00304 00313 void 00314 BESCache::purge( ) 00315 { 00316 unsigned int max_size = _cache_size * 1048576 ; // Bytes/Meg 00317 struct stat buf; 00318 unsigned int size = 0 ; // total size of all cached files 00319 unsigned int avg_size = 0 ; 00320 unsigned int num_files_in_cache = 0 ; 00321 time_t curr_time = time( NULL ) ; // grab the current time so we can 00322 // determine the oldest file 00323 // map of time,entry values 00324 multimap<double,cache_entry,greater<double> > contents ; 00325 00326 // the prefix is actually the specified prefix plus the cache char '#' 00327 string match_prefix = _prefix + BES_CACHE_CHAR ; 00328 00329 // go through the cache directory and collect all of the files that 00330 // start with the matching prefix 00331 DIR *dip = opendir( _cache_dir.c_str() ) ; 00332 if( dip != NULL ) 00333 { 00334 struct dirent *dit; 00335 while( ( dit = readdir( dip ) ) != NULL ) 00336 { 00337 string dirEntry = dit->d_name ; 00338 if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0) 00339 { 00340 // Now that we have found a match we want to get the size of 00341 // the file and the last access time from the file. 00342 string fullPath = _cache_dir + "/" + dirEntry ; 00343 int statret = stat( fullPath.c_str(), &buf ) ; 00344 if( statret == 0 ) 00345 { 00346 size += buf.st_size ; 00347 00348 // Find out how old the file is 00349 time_t file_time = buf.st_atime ; 00350 // I think we can use the access time without the diff, 00351 // since it's the relative ages that determine when to 00352 // delete a file. Good idea to use the access time so 00353 // recently used (read) files will linger. jhrg 5/9/07 00354 double time_diff = difftime( curr_time, file_time ) ; 00355 cache_entry entry ; 00356 entry.name = fullPath ; 00357 entry.size = buf.st_size ; 00358 contents.insert( pair<double,cache_entry>( time_diff, entry ) ); 00359 } 00360 num_files_in_cache++ ; 00361 } 00362 } 00363 00364 // We're done looking in the directory, close it 00365 closedir( dip ) ; 00366 00367 if( num_files_in_cache ) avg_size = size / num_files_in_cache ; 00368 00369 BESDEBUG( "bes", "cache size = " << size << endl ) ; 00370 BESDEBUG( "bes", "avg size = " << avg_size << endl ) ; 00371 BESDEBUG( "bes", "num files in cache = " 00372 << num_files_in_cache << endl ) ; 00373 if( BESISDEBUG( "bes" ) ) 00374 { 00375 BESDEBUG( "bes", endl << "BEFORE" << endl ) ; 00376 multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ; 00377 multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ; 00378 for( ; ti != te; ti++ ) 00379 { 00380 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ; 00381 } 00382 BESDEBUG( "bes", endl ) ; 00383 } 00384 00385 // if the size of files is greater than max allowed then we need to 00386 // purge the cache directory. Keep going until the size is less than 00387 // the max. 00388 multimap<double,cache_entry,greater<double> >::iterator i ; 00389 if( (size+avg_size) > max_size ) 00390 { 00391 // Maybe change this to size + (fraction of max_size) > max_size? 00392 // jhrg 5/9/07 00393 while( (size+avg_size) > max_size ) 00394 { 00395 i = contents.begin() ; 00396 if( i == contents.end() ) 00397 { 00398 // if we've reached the end of the cache directory, 00399 // there are no more elements in the cache, then set 00400 // the size and avg_size to 0 so that we can get out 00401 // of this loop. 00402 size = 0 ; 00403 avg_size = 0 ; 00404 } 00405 else 00406 { 00407 BESDEBUG( "bes", "BESCache::purge - removing " 00408 << (*i).second.name << endl ) ; 00409 if( remove( (*i).second.name.c_str() ) != 0 ) 00410 { 00411 char *s_err = strerror( errno ) ; 00412 string err = "Unable to remove the file " 00413 + (*i).second.name 00414 + " from the cache: " ; 00415 if( s_err ) 00416 { 00417 err.append( s_err ) ; 00418 } 00419 else 00420 { 00421 err.append( "Unknown error" ) ; 00422 } 00423 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00424 } 00425 size -= (*i).second.size ; 00426 contents.erase( i ) ; 00427 } 00428 } 00429 } 00430 00431 if( BESISDEBUG( "bes" ) ) 00432 { 00433 BESDEBUG( "bes", endl << "AFTER" << endl ) ; 00434 multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ; 00435 multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ; 00436 for( ; ti != te; ti++ ) 00437 { 00438 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ; 00439 } 00440 } 00441 } 00442 else 00443 { 00444 string err = "Unable to open cache directory " + _cache_dir ; 00445 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00446 } 00447 } 00448 00456 void 00457 BESCache::dump( ostream &strm ) const 00458 { 00459 strm << BESIndent::LMarg << "BESCache::dump - (" 00460 << (void *)this << ")" << endl ; 00461 BESIndent::Indent() ; 00462 strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ; 00463 strm << BESIndent::LMarg << "prefix: " << _prefix << endl ; 00464 strm << BESIndent::LMarg << "size: " << _cache_size << endl ; 00465 BESIndent::UnIndent() ; 00466 } 00467