OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESCache.cc
Go to the documentation of this file.
1 // BESCache.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <unistd.h> // for unlink
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 #include <fcntl.h>
40 
41 #include <cstring>
42 #include <cerrno>
43 #include <map>
44 #include <iostream>
45 #include <sstream>
46 
47 using std::multimap ;
48 using std::pair ;
49 using std::greater ;
50 using std::endl ;
51 
52 #include "BESCache.h"
53 #include "TheBESKeys.h"
54 #include "BESSyntaxUserError.h"
55 #include "BESInternalError.h"
56 #include "BESDebug.h"
57 
58 #define BES_CACHE_CHAR '#'
59 
60 typedef struct _cache_entry
61 {
62  string name ;
63  int size ;
64 } cache_entry ;
65 
66 void
67 BESCache::check_ctor_params()
68 {
69  if( _cache_dir.empty() )
70  {
71  string err = "The cache directory was not specified, must be non-empty";
72  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
73  }
74 
75  struct stat buf;
76  int statret = stat( _cache_dir.c_str(), &buf ) ;
77  if( statret != 0 || ! S_ISDIR(buf.st_mode) )
78  {
79  string err = "The cache directory " + _cache_dir + " does not exist" ;
80  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
81  }
82 
83  if( _prefix.empty() )
84  {
85  string err = "The cache file prefix was not specified, must be non-empty" ;
86  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
87  }
88 
89  if( _cache_size == 0 )
90  {
91  string err = "The cache size was not specified, must be non-zero" ;
92  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
93  }
94  // the cache size is specified in megabytes. When calculating
95  // the size of the cache we convert to bytes, which is 1048576
96  // bytes per meg. The max unsigned int allows for only 4095
97  // megabytes.
98  if( _cache_size > 4095 ) _cache_size = 4095 ;
99 
100  BESDEBUG( "bes", "BES Cache: directory " << _cache_dir
101  << ", prefix " << _prefix
102  << ", max size " << _cache_size << endl ) ;
103 }
104 
114 BESCache::BESCache( const string &cache_dir,
115  const string &prefix,
116  unsigned int size )
117  : _cache_dir( cache_dir ),
118  _prefix( prefix ),
119  _cache_size( size ),
120  _lock_fd( -1 )
121 {
122  check_ctor_params(); // Throws BESSyntaxUserError on error.
123 }
124 
139 BESCache::BESCache( BESKeys &keys,
140  const string &cache_dir_key,
141  const string &prefix_key,
142  const string &size_key )
143  : _cache_size( 0 ),
144  _lock_fd( -1 )
145 {
146  bool found = false ;
147  keys.get_value( cache_dir_key, _cache_dir, found ) ;
148  if( !found )
149  {
150  string err = "The cache directory key " + cache_dir_key
151  + " was not found in the BES configuration file" ;
152  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
153  }
154 
155  found = false ;
156  keys.get_value( prefix_key, _prefix, found ) ;
157  if( !found )
158  {
159  string err = "The prefix key " + prefix_key
160  + " was not found in the BES configuration file" ;
161  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
162  }
163 
164  found = false ;
165  string cache_size_str ;
166  keys.get_value( size_key, cache_size_str, found ) ;
167  if( !found )
168  {
169  string err = "The size key " + size_key
170  + " was not found in the BES configuration file" ;
171  throw BESInternalError( err, __FILE__, __LINE__ ) ;
172  }
173 
174  std::istringstream is( cache_size_str ) ;
175  is >> _cache_size ;
176 
177  check_ctor_params(); // Throws BESSyntaxUserError on error.
178 }
179 
186 bool
187 BESCache::lock( unsigned int retry, unsigned int num_tries )
188 {
189  // make sure we aren't retrying too many times
190  if( num_tries > MAX_LOCK_TRIES )
191  num_tries = MAX_LOCK_TRIES ;
192  if( retry > MAX_LOCK_RETRY_MS )
193  retry = MAX_LOCK_RETRY_MS ;
194 
195  bool got_lock = true ;
196  if( _lock_fd == -1 )
197  {
198  string lock_file = _cache_dir + "/lock" ;
199  unsigned int tries = 0 ;
200  _lock_fd = open( lock_file.c_str(),
201  O_CREAT | O_EXCL,
202  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
203  while( _lock_fd < 0 && got_lock )
204  {
205  tries ++ ;
206  if( tries > num_tries )
207  {
208  _lock_fd = -1 ;
209  got_lock = false ;
210  }
211  else
212  {
213  usleep( retry ) ;
214  _lock_fd = open( lock_file.c_str(),
215  O_CREAT | O_EXCL,
216  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
217  }
218  }
219  }
220  else
221  {
222  // This would be a programming error, or we've gotten into a
223  // situation where the lock is lost. Lock has been called on the
224  // same cache object twice in a row without an unlock being called.
225  string err = "The cache dir " + _cache_dir + " is already locked" ;
226  throw BESInternalError( err, __FILE__, __LINE__ ) ;
227  }
228 
229  return got_lock ;
230 }
231 
238 bool
240 {
241  // if we call unlock twice in a row, does it matter? I say no, just say
242  // that it is unlocked.
243  bool unlocked = true ;
244  if( _lock_fd != -1 )
245  {
246  string lock_file = _cache_dir + "/lock" ;
247  close( _lock_fd ) ;
248  (void)unlink( lock_file.c_str() ) ;
249  }
250 
251  _lock_fd = -1 ;
252 
253  return unlocked ;
254 }
255 
269 bool
270 BESCache::is_cached( const string &src, string &target )
271 {
272  bool is_it = true ;
273  string tmp_target = src ;
274 
275  // Create the file that would be created in the cache directory
276  //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
277  if( tmp_target.at(0) == '/' )
278  {
279  tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
280  }
281  string::size_type slash = 0 ;
282  while( ( slash = tmp_target.find( '/' ) ) != string::npos )
283  {
284  tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
285  }
286  string::size_type last_dot = tmp_target.rfind( '.' ) ;
287  if( last_dot != string::npos )
288  {
289  tmp_target = tmp_target.substr( 0, last_dot ) ;
290  }
291 
292  target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
293 
294  // Determine if the target file is already in the cache or not
295  struct stat buf;
296  int statret = stat( target.c_str(), &buf ) ;
297  if( statret != 0 )
298  {
299  is_it = false ;
300  }
301 
302  return is_it ;
303 }
304 
313 void
315 {
316  unsigned int max_size = _cache_size * 1048576 ; // Bytes/Meg
317  struct stat buf;
318  unsigned int size = 0 ; // total size of all cached files
319  unsigned int avg_size = 0 ;
320  unsigned int num_files_in_cache = 0 ;
321  time_t curr_time = time( NULL ) ; // grab the current time so we can
322  // determine the oldest file
323  // map of time,entry values
324  multimap<double,cache_entry,greater<double> > contents ;
325 
326  // the prefix is actually the specified prefix plus the cache char '#'
327  string match_prefix = _prefix + BES_CACHE_CHAR ;
328 
329  // go through the cache directory and collect all of the files that
330  // start with the matching prefix
331  DIR *dip = opendir( _cache_dir.c_str() ) ;
332  if( dip != NULL )
333  {
334  struct dirent *dit;
335  while( ( dit = readdir( dip ) ) != NULL )
336  {
337  string dirEntry = dit->d_name ;
338  if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
339  {
340  // Now that we have found a match we want to get the size of
341  // the file and the last access time from the file.
342  string fullPath = _cache_dir + "/" + dirEntry ;
343  int statret = stat( fullPath.c_str(), &buf ) ;
344  if( statret == 0 )
345  {
346  size += buf.st_size ;
347 
348  // Find out how old the file is
349  time_t file_time = buf.st_atime ;
350  // I think we can use the access time without the diff,
351  // since it's the relative ages that determine when to
352  // delete a file. Good idea to use the access time so
353  // recently used (read) files will linger. jhrg 5/9/07
354  double time_diff = difftime( curr_time, file_time ) ;
355  cache_entry entry ;
356  entry.name = fullPath ;
357  entry.size = buf.st_size ;
358  contents.insert( pair<double,cache_entry>( time_diff, entry ) );
359  }
360  num_files_in_cache++ ;
361  }
362  }
363 
364  // We're done looking in the directory, close it
365  closedir( dip ) ;
366 
367  if( num_files_in_cache ) avg_size = size / num_files_in_cache ;
368 
369  BESDEBUG( "bes", "cache size = " << size << endl ) ;
370  BESDEBUG( "bes", "avg size = " << avg_size << endl ) ;
371  BESDEBUG( "bes", "num files in cache = "
372  << num_files_in_cache << endl ) ;
373  if( BESISDEBUG( "bes" ) )
374  {
375  BESDEBUG( "bes", endl << "BEFORE" << endl ) ;
376  multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
377  multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
378  for( ; ti != te; ti++ )
379  {
380  BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
381  }
382  BESDEBUG( "bes", endl ) ;
383  }
384 
385  // if the size of files is greater than max allowed then we need to
386  // purge the cache directory. Keep going until the size is less than
387  // the max.
388  multimap<double,cache_entry,greater<double> >::iterator i ;
389  if( (size+avg_size) > max_size )
390  {
391  // Maybe change this to size + (fraction of max_size) > max_size?
392  // jhrg 5/9/07
393  while( (size+avg_size) > max_size )
394  {
395  i = contents.begin() ;
396  if( i == contents.end() )
397  {
398  // if we've reached the end of the cache directory,
399  // there are no more elements in the cache, then set
400  // the size and avg_size to 0 so that we can get out
401  // of this loop.
402  size = 0 ;
403  avg_size = 0 ;
404  }
405  else
406  {
407  BESDEBUG( "bes", "BESCache::purge - removing "
408  << (*i).second.name << endl ) ;
409  if( remove( (*i).second.name.c_str() ) != 0 )
410  {
411  char *s_err = strerror( errno ) ;
412  string err = "Unable to remove the file "
413  + (*i).second.name
414  + " from the cache: " ;
415  if( s_err )
416  {
417  err.append( s_err ) ;
418  }
419  else
420  {
421  err.append( "Unknown error" ) ;
422  }
423  throw BESInternalError( err, __FILE__, __LINE__ ) ;
424  }
425  size -= (*i).second.size ;
426  contents.erase( i ) ;
427  }
428  }
429  }
430 
431  if( BESISDEBUG( "bes" ) )
432  {
433  BESDEBUG( "bes", endl << "AFTER" << endl ) ;
434  multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
435  multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
436  for( ; ti != te; ti++ )
437  {
438  BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
439  }
440  }
441  }
442  else
443  {
444  string err = "Unable to open cache directory " + _cache_dir ;
445  throw BESInternalError( err, __FILE__, __LINE__ ) ;
446  }
447 }
448 
456 void
457 BESCache::dump( ostream &strm ) const
458 {
459  strm << BESIndent::LMarg << "BESCache::dump - ("
460  << (void *)this << ")" << endl ;
462  strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
463  strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
464  strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
466 }
467 
#define MAX_LOCK_TRIES
Definition: BESCache.h:45
#define BES_CACHE_CHAR
Definition: BESCache.cc:58
#define BESISDEBUG(x)
macro used to determine if the specified debug context is set
Definition: BESDebug.h:83
#define MAX_LOCK_RETRY_MS
Definition: BESCache.h:44
exception thrown if inernal error encountered
virtual bool lock(unsigned int retry_ms, unsigned int num_tries)
lock the cache using a file lock
Definition: BESCache.cc:187
virtual void purge()
Check to see if the cache size exceeds the size specified in the constructor and purge older files un...
Definition: BESCache.cc:314
virtual void dump(ostream &strm) const
dumps information about this object
Definition: BESCache.cc:457
static void Indent()
Definition: BESIndent.cc:38
error thrown if there is a user syntax error in the request or any other user error ...
mapping of key/value pairs defining different behaviors of an application.
Definition: BESKeys.h:84
static ostream & LMarg(ostream &strm)
Definition: BESIndent.cc:73
virtual bool unlock()
unlock the cache
Definition: BESCache.cc:239
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:466
virtual bool is_cached(const string &src, string &target)
Determine if the file specified by src is cached.
Definition: BESCache.cc:270
struct _cache_entry cache_entry
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
static void UnIndent()
Definition: BESIndent.cc:44