34 #include <sys/types.h> 61 HTTPCache *HTTPCache::_instance = 0;
70 static pthread_mutex_t instance_mutex;
71 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77 #define MKDIR(a,b) _mkdir((a)) 78 #define UMASK(a) _umask((a)) 79 #define REMOVE(a) remove((a)) 80 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE) 81 #define DIR_SEPARATOR_CHAR '\\' 82 #define DIR_SEPARATOR_STR "\\" 84 #define MKDIR(a,b) mkdir((a), (b)) 85 #define UMASK(a) umask((a)) 86 #define REMOVE(a) remove((a)) 87 #define MKSTEMP(a) mkstemp((a)) 88 #define DIR_SEPARATOR_CHAR '/' 89 #define DIR_SEPARATOR_STR "/" 93 #define CACHE_LOC "\\tmp\\" 94 #define CACHE_ROOT "dods-cache\\" 96 #define CACHE_LOC "/tmp/" 97 #define CACHE_ROOT "dods-cache/" 99 #define CACHE_INDEX ".index" 100 #define CACHE_LOCK ".lock" 101 #define CACHE_META ".meta" 104 #define NO_LM_EXPIRATION 24*3600 // 24 hours 106 #define DUMP_FREQUENCY 10 // Dump index every x loads 108 #define MEGA 0x100000L 109 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 110 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 111 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 112 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 113 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 119 status =
INIT(&instance_mutex);
122 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
154 HTTPCache::instance(
const string &cache_root,
bool force)
156 LOCK(&instance_mutex);
157 DBG(cerr <<
"Entering instance(); (" << hex << _instance << dec <<
")" 162 _instance =
new HTTPCache(cache_root, force);
164 DBG(cerr <<
"New instance: " << _instance <<
", cache root: " 165 << _instance->d_cache_root << endl);
167 atexit(delete_instance);
178 EventHandler *old_eh = SignalHandler::instance()->register_handler
181 SignalHandler::instance()->register_handler(SIGINT, old_eh);
183 "Could not register event handler for SIGINT without superseding an existing one.");
186 old_eh = SignalHandler::instance()->register_handler
189 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
191 "Could not register event handler for SIGPIPE without superseding an existing one.");
194 old_eh = SignalHandler::instance()->register_handler
197 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
199 "Could not register event handler for SIGTERM without superseding an existing one.");
205 DBG2(cerr <<
"The constructor threw an Error!" << endl);
211 DBGN(cerr <<
"returning " << hex << _instance << dec << endl);
220 HTTPCache::delete_instance()
222 DBG(cerr <<
"Entering delete_instance()..." << endl);
223 if (HTTPCache::_instance) {
224 DBG(cerr <<
"Deleting the cache: " << HTTPCache::_instance << endl);
225 delete HTTPCache::_instance;
226 HTTPCache::_instance = 0;
229 DBG(cerr <<
"Exiting delete_instance()" << endl);
246 HTTPCache::HTTPCache(
string cache_root,
bool force) :
247 d_locked_open_file(0),
248 d_cache_enabled(
false),
249 d_cache_protected(
false),
250 d_expire_ignored(
false),
251 d_always_validate(
false),
260 d_http_cache_table(0)
262 DBG(cerr <<
"Entering the constructor for " <<
this <<
"... ");
264 int status = pthread_once(&once_block, once_init_routine);
266 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
268 INIT(&d_cache_mutex);
280 set_cache_root(cache_root);
283 if (!get_single_user_lock(force))
284 throw Error(
"Could not get single user lock for the cache");
294 if (stat(cache_root.c_str(), &s) == 0)
295 block_size = s.st_blksize;
297 throw Error(
"Could not set file system block size.");
299 d_http_cache_table =
new HTTPCacheTable(d_cache_root, block_size);
300 d_cache_enabled =
true;
302 DBGN(cerr <<
"exiting" << endl);
317 HTTPCache::~HTTPCache()
319 DBG(cerr <<
"Entering the destructor for " <<
this <<
"... ");
323 perform_garbage_collection();
325 d_http_cache_table->cache_index_write();
335 delete d_http_cache_table;
337 release_single_user_lock();
339 DBGN(cerr <<
"exiting destructor." << endl);
352 HTTPCache::stopGC()
const 354 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
364 HTTPCache::startGC()
const 366 DBG(cerr <<
"startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
367 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
385 HTTPCache::perform_garbage_collection()
387 DBG(cerr <<
"Performing garbage collection" << endl);
406 HTTPCache::expired_gc()
408 if (!d_expire_ignored) {
409 d_http_cache_table->delete_expired_entries();
436 d_http_cache_table->delete_by_hits(hits);
446 void HTTPCache::too_big_gc() {
448 d_http_cache_table->delete_by_size(d_max_entry_size);
463 bool HTTPCache::get_single_user_lock(
bool force) {
464 if (!d_locked_open_file) {
470 create_cache_root(d_cache_root);
481 if ((fp = fopen(lock.c_str(),
"r")) != NULL) {
482 int res = fclose(fp);
484 DBG(cerr <<
"Failed to close " << (
void *)fp << endl);
492 if ((fp = fopen(lock.c_str(),
"w")) == NULL)
495 d_locked_open_file = fp;
505 HTTPCache::release_single_user_lock()
507 if (d_locked_open_file) {
508 int res = fclose(d_locked_open_file);
510 DBG(cerr <<
"Failed to close " << (
void *)d_locked_open_file << endl) ;
512 d_locked_open_file = 0;
526 HTTPCache::get_cache_root()
const 541 HTTPCache::create_cache_root(
const string &cache_root)
543 struct stat stat_info;
544 string::size_type cur = 0;
547 cur = cache_root[1] ==
':' ? 3 : 1;
553 string dir = cache_root.substr(0, cur);
554 if (stat(dir.c_str(), &stat_info) == -1) {
555 DBG2(cerr <<
"Cache....... Creating " << dir << endl);
556 mode_t mask =
UMASK(0);
557 if (
MKDIR(dir.c_str(), 0777) < 0) {
558 DBG2(cerr <<
"Error: can't create." << endl);
560 throw Error(
string(
"Could not create the directory for the cache. Failed when building path at ") + dir +
string(
"."));
565 DBG2(cerr <<
"Cache....... Found " << dir << endl);
586 HTTPCache::set_cache_root(
const string &root)
597 char * cr = (
char *) getenv(
"DODS_CACHE");
598 if (!cr) cr = (
char *) getenv(
"TMP");
599 if (!cr) cr = (
char *) getenv(
"TEMP");
612 if (d_http_cache_table)
613 d_http_cache_table->set_cache_root(d_cache_root);
628 HTTPCache::set_cache_enabled(
bool mode)
630 lock_cache_interface();
632 d_cache_enabled = mode;
634 unlock_cache_interface();
640 HTTPCache::is_cache_enabled()
const 642 DBG2(cerr <<
"In HTTPCache::is_cache_enabled: (" << d_cache_enabled <<
")" 644 return d_cache_enabled;
660 lock_cache_interface();
662 d_cache_disconnected = mode;
664 unlock_cache_interface();
670 HTTPCache::get_cache_disconnected()
const 672 return d_cache_disconnected;
684 HTTPCache::set_expire_ignored(
bool mode)
686 lock_cache_interface();
688 d_expire_ignored = mode;
690 unlock_cache_interface();
697 HTTPCache::is_expire_ignored()
const 699 return d_expire_ignored;
718 HTTPCache::set_max_size(
unsigned long size)
720 lock_cache_interface();
725 (size > ULONG_MAX ? ULONG_MAX : size *
MEGA);
726 unsigned long old_size = d_total_size;
727 d_total_size = new_size;
731 if (new_size < old_size && startGC()) {
732 perform_garbage_collection();
733 d_http_cache_table->cache_index_write();
737 unlock_cache_interface();
738 DBGN(cerr <<
"Unlocking interface." << endl);
742 DBG2(cerr <<
"Cache....... Total cache size: " << d_total_size
743 <<
" with " << d_folder_size
744 <<
" bytes for meta information and folders and at least " 745 << d_gc_buffer <<
" bytes free after every gc" << endl);
747 unlock_cache_interface();
753 HTTPCache::get_max_size()
const 755 return d_total_size /
MEGA;
767 HTTPCache::set_max_entry_size(
unsigned long size)
769 lock_cache_interface();
772 unsigned long new_size = size *
MEGA;
773 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
774 unsigned long old_size = d_max_entry_size;
775 d_max_entry_size = new_size;
776 if (new_size < old_size && startGC()) {
777 perform_garbage_collection();
778 d_http_cache_table->cache_index_write();
783 unlock_cache_interface();
787 DBG2(cerr <<
"Cache...... Max entry cache size is " 788 << d_max_entry_size << endl);
790 unlock_cache_interface();
798 HTTPCache::get_max_entry_size()
const 800 return d_max_entry_size /
MEGA;
814 HTTPCache::set_default_expiration(
const int exp_time)
816 lock_cache_interface();
818 d_default_expiration = exp_time;
820 unlock_cache_interface();
826 HTTPCache::get_default_expiration()
const 828 return d_default_expiration;
836 HTTPCache::set_always_validate(
bool validate)
838 d_always_validate = validate;
845 HTTPCache::get_always_validate()
const 847 return d_always_validate;
867 HTTPCache::set_cache_control(
const vector<string> &cc)
869 lock_cache_interface();
872 d_cache_control = cc;
874 vector<string>::const_iterator i;
875 for (i = cc.begin(); i != cc.end(); ++i) {
876 string header = (*i).substr(0, (*i).find(
':'));
877 string value = (*i).substr((*i).find(
": ") + 2);
878 if (header !=
"Cache-Control") {
879 throw InternalErr(__FILE__, __LINE__,
"Expected cache control header not found.");
882 if (value ==
"no-cache" || value ==
"no-store")
883 d_cache_enabled =
false;
884 else if (value.find(
"max-age") != string::npos) {
885 string max_age = value.substr(value.find(
"=" + 1));
888 else if (value ==
"max-stale")
890 else if (value.find(
"max-stale") != string::npos) {
891 string max_stale = value.substr(value.find(
"=" + 1));
894 else if (value.find(
"min-fresh") != string::npos) {
895 string min_fresh = value.substr(value.find(
"=" + 1));
902 unlock_cache_interface();
906 unlock_cache_interface();
915 HTTPCache::get_cache_control()
917 return d_cache_control;
931 HTTPCache::is_url_in_cache(
const string &url)
933 DBG(cerr <<
"Is this url in the cache? (" << url <<
")" << endl);
936 bool status = entry != 0;
951 return header.find(
"Connection") != string::npos
952 || header.find(
"Keep-Alive") != string::npos
953 || header.find(
"Proxy-Authenticate") != string::npos
954 || header.find(
"Proxy-Authorization") != string::npos
955 || header.find(
"Transfer-Encoding") != string::npos
956 || header.find(
"Upgrade") != string::npos;
971 HTTPCache::write_metadata(
const string &cachename,
const vector<string> &headers)
974 d_open_files.push_back(fname);
976 FILE *dest = fopen(fname.c_str(),
"w");
979 "Could not open named cache entry file.");
982 vector<string>::const_iterator i;
983 for (i = headers.begin(); i != headers.end(); ++i) {
985 fwrite((*i).c_str(), (*i).size(), 1, dest);
986 fwrite(
"\n", 1, 1, dest);
990 int res = fclose(dest);
992 DBG(cerr <<
"HTTPCache::write_metadata - Failed to close " 996 d_open_files.pop_back();
1010 HTTPCache::read_metadata(
const string &cachename, vector<string> &headers)
1012 FILE *md = fopen(
string(cachename +
CACHE_META).c_str(),
"r");
1015 "Could not open named cache entry meta data file.");
1019 while (!feof(md) && fgets(line, 1024, md)) {
1020 line[min(1024, static_cast<int>(strlen(line)))-1] =
'\0';
1021 headers.push_back(
string(line));
1024 int res = fclose(md);
1026 DBG(cerr <<
"HTTPCache::read_metadata - Failed to close " 1053 HTTPCache::write_body(
const string &cachename,
const FILE *src)
1055 d_open_files.push_back(cachename);
1057 FILE *dest = fopen(cachename.c_str(),
"wb");
1060 "Could not open named cache entry file.");
1068 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1069 total += fwrite(line, 1, n, dest);
1073 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1074 int res = fclose(dest);
1075 res = res & unlink(cachename.c_str());
1077 DBG(cerr <<
"HTTPCache::write_body - Failed to close/unlink " 1081 "I/O error transferring data to the cache.");
1084 rewind(const_cast<FILE *>(src));
1086 int res = fclose(dest);
1088 DBG(cerr <<
"HTTPCache::write_body - Failed to close " 1092 d_open_files.pop_back();
1106 HTTPCache::open_body(
const string &cachename)
1108 FILE *src = fopen(cachename.c_str(),
"rb");
1110 throw InternalErr(__FILE__, __LINE__,
"Could not open cache file.");
1141 HTTPCache::cache_response(
const string &url, time_t request_time,
1142 const vector<string> &headers,
const FILE *body)
1144 lock_cache_interface();
1146 DBG(cerr <<
"Caching url: " << url <<
"." << endl);
1150 if (url.find(
"http:") == string::npos &&
1151 url.find(
"https:") == string::npos) {
1152 unlock_cache_interface();
1159 d_http_cache_table->remove_entry_from_cache_table(url);
1165 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1167 DBG(cerr <<
"Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1168 <<
"(" << url <<
")" << endl);
1170 delete entry; entry = 0;
1171 unlock_cache_interface();
1176 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1178 d_http_cache_table->create_location(entry);
1182 d_http_cache_table->add_entry_to_cache_table(entry);
1190 DBG(cerr <<
"Too big; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url
1193 delete entry; entry = 0;
1194 unlock_cache_interface();
1200 perform_garbage_collection();
1202 d_http_cache_table->cache_index_write();
1206 unlock_cache_interface();
1210 unlock_cache_interface();
1234 HTTPCache::get_conditional_request_headers(
const string &url)
1236 lock_cache_interface();
1239 vector<string> headers;
1241 DBG(cerr <<
"Getting conditional request headers for " << url << endl);
1244 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1246 throw Error(
"There is no cache entry for the URL: " + url);
1249 headers.push_back(
string(
"If-None-Match: ") + entry->
get_etag());
1251 if (entry->
get_lm() > 0) {
1252 time_t lm = entry->
get_lm();
1253 headers.push_back(
string(
"If-Modified-Since: ")
1258 headers.push_back(
string(
"If-Modified-Since: ")
1263 headers.push_back(
string(
"If-Modified-Since: ")
1267 unlock_cache_interface();
1270 unlock_cache_interface();
1283 struct HeaderLess: binary_function<const string&, const string&, bool>
1285 bool operator()(
const string &s1,
const string &s2)
const {
1286 return s1.substr(0, s1.find(
':')) < s2.substr(0, s2.find(
':'));
1304 HTTPCache::update_response(
const string &url, time_t request_time,
1305 const vector<string> &headers)
1307 lock_cache_interface();
1310 DBG(cerr <<
"Updating the response headers for: " << url << endl);
1313 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1315 throw Error(
"There is no cache entry for the URL: " + url);
1318 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1321 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1329 set<string, HeaderLess> merged_headers;
1332 copy(headers.begin(), headers.end(),
1333 inserter(merged_headers, merged_headers.begin()));
1336 vector<string> old_headers;
1338 copy(old_headers.begin(), old_headers.end(),
1339 inserter(merged_headers, merged_headers.begin()));
1344 vector<string> result;
1345 copy(merged_headers.rbegin(), merged_headers.rend(),
1346 back_inserter(result));
1350 unlock_cache_interface();
1356 unlock_cache_interface();
1373 HTTPCache::is_url_valid(
const string &url)
1375 lock_cache_interface();
1380 DBG(cerr <<
"Is this URL valid? (" << url <<
")" << endl);
1383 if (d_always_validate) {
1384 unlock_cache_interface();
1388 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1390 throw Error(
"There is no cache entry for the URL: " + url);
1399 unlock_cache_interface();
1408 if (d_max_age >= 0 && current_age > d_max_age) {
1409 DBG(cerr <<
"Cache....... Max-age validation" << endl);
1411 unlock_cache_interface();
1414 if (d_min_fresh >= 0
1416 DBG(cerr <<
"Cache....... Min-fresh validation" << endl);
1418 unlock_cache_interface();
1423 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1425 unlock_cache_interface();
1431 unlock_cache_interface();
1465 FILE * HTTPCache::get_cached_response(
const string &url,
1466 vector<string> &headers,
string &cacheName) {
1467 lock_cache_interface();
1472 DBG(cerr <<
"Getting the cached response for " << url << endl);
1475 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1477 unlock_cache_interface();
1484 DBG(cerr <<
"Headers just read from cache: " << endl);
1485 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr,
"\n")));
1489 DBG(cerr <<
"Returning: " << url <<
" from the cache." << endl);
1491 d_http_cache_table->bind_entry_to_data(entry, body);
1495 unlock_cache_interface();
1499 unlock_cache_interface();
1515 HTTPCache::get_cached_response(
const string &url, vector<string> &headers)
1517 string discard_name;
1518 return get_cached_response(url, headers, discard_name);
1532 HTTPCache::get_cached_response(
const string &url)
1534 string discard_name;
1535 vector<string> discard_headers;
1536 return get_cached_response(url, discard_headers, discard_name);
1552 HTTPCache::release_cached_response(FILE *body)
1554 lock_cache_interface();
1557 d_http_cache_table->uncouple_entry_from_data(body);
1560 unlock_cache_interface();
1564 unlock_cache_interface();
1580 HTTPCache::purge_cache()
1582 lock_cache_interface();
1585 if (d_http_cache_table->is_locked_read_responses())
1586 throw Error(
"Attempt to purge the cache with entries in use.");
1588 d_http_cache_table->delete_all_entries();
1591 unlock_cache_interface();
1595 unlock_cache_interface();
time_t parse_time(const char *str, bool expand)
void unlock_read_response()
void lock_write_response()
A class for software fault reporting.
bool is_hop_by_hop_header(const string &header)
#define MAX_CACHE_ENTRY_SIZE
void set_size(unsigned long sz)
string date_time_str(time_t *calendar, bool local)
bool get_must_revalidate()
string get_error_message() const
time_t get_freshness_lifetime()
#define MIN_CACHE_TOTAL_SIZE
void unlock_write_response()
#define DIR_SEPARATOR_CHAR
time_t get_response_time()
A class for error processing.
time_t get_corrected_initial_age()