libdap++  Updated for version 3.8.2
HTTPCache.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG
29 //#define DODS_DEBUG2
30 
31 #include <pthread.h>
32 #include <limits.h>
33 #include <unistd.h> // for stat
34 #include <sys/types.h> // for stat and mkdir
35 #include <sys/stat.h>
36 
37 #include <cstring>
38 #include <iostream>
39 #include <sstream>
40 #include <algorithm>
41 #include <iterator>
42 #include <set>
43 
44 #include "Error.h"
45 #include "InternalErr.h"
46 #include "ResponseTooBigErr.h"
47 #ifndef WIN32
48 #include "SignalHandler.h"
49 #endif
51 #include "HTTPCacheTable.h"
52 #include "HTTPCache.h"
53 
54 #include "util_mit.h"
55 #include "debug.h"
56 
57 using namespace std;
58 
59 namespace libdap {
60 
61 HTTPCache *HTTPCache::_instance = 0;
62 
63 // instance_mutex is used to ensure that only one instance is created.
64 // That is, it protects the body of the HTTPCache::instance() method. This
65 // mutex is initialized from within the static function once_init_routine()
66 // and the call to that takes place using pthread_once_init() where the mutex
67 // once_block is used to protect that call. All of this ensures that no matter
68 // how many threads call the instance() method, only one instance is ever
69 // made.
70 static pthread_mutex_t instance_mutex;
71 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
72 
73 #ifdef WIN32
74 #include <direct.h>
75 #include <time.h>
76 #include <fcntl.h>
77 #define MKDIR(a,b) _mkdir((a))
78 #define UMASK(a) _umask((a))
79 #define REMOVE(a) remove((a))
80 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
81 #define DIR_SEPARATOR_CHAR '\\'
82 #define DIR_SEPARATOR_STR "\\"
83 #else
84 #define MKDIR(a,b) mkdir((a), (b))
85 #define UMASK(a) umask((a))
86 #define REMOVE(a) remove((a))
87 #define MKSTEMP(a) mkstemp((a))
88 #define DIR_SEPARATOR_CHAR '/'
89 #define DIR_SEPARATOR_STR "/"
90 #endif
91 
92 #ifdef WIN32
93 #define CACHE_LOC "\\tmp\\"
94 #define CACHE_ROOT "dods-cache\\"
95 #else
96 #define CACHE_LOC "/tmp/"
97 #define CACHE_ROOT "dods-cache/"
98 #endif
99 #define CACHE_INDEX ".index"
100 #define CACHE_LOCK ".lock"
101 #define CACHE_META ".meta"
102 //#define CACHE_EMPTY_ETAG "@cache@"
103 
104 #define NO_LM_EXPIRATION 24*3600 // 24 hours
105 
106 #define DUMP_FREQUENCY 10 // Dump index every x loads
107 
108 #define MEGA 0x100000L
109 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
110 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
111 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
112 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
113 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
114 
115 static void
116 once_init_routine()
117 {
118  int status;
119  status = INIT(&instance_mutex);
120 
121  if (status != 0)
122  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
123 }
124 
153 HTTPCache *
154 HTTPCache::instance(const string &cache_root, bool force)
155 {
156  LOCK(&instance_mutex);
157  DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
158  << "... ");
159 
160  try {
161  if (!_instance) {
162  _instance = new HTTPCache(cache_root, force);
163 
164  DBG(cerr << "New instance: " << _instance << ", cache root: "
165  << _instance->d_cache_root << endl);
166 
167  atexit(delete_instance);
168 
169 #ifndef WIN32
170  // Register the interrupt handler. If we've already registered
171  // one, barf. If this becomes a problem, hack SignalHandler so
172  // that we can chain these handlers... 02/10/04 jhrg
173  //
174  // Technically we're leaking memory here. However, since this
175  // class is a singleton, we know that only three objects will
176  // ever be created and they will all exist until the process
177  // exits. We can let this slide... 02/12/04 jhrg
178  EventHandler *old_eh = SignalHandler::instance()->register_handler
179  (SIGINT, new HTTPCacheInterruptHandler);
180  if (old_eh) {
181  SignalHandler::instance()->register_handler(SIGINT, old_eh);
183  "Could not register event handler for SIGINT without superseding an existing one.");
184  }
185 
186  old_eh = SignalHandler::instance()->register_handler
187  (SIGPIPE, new HTTPCacheInterruptHandler);
188  if (old_eh) {
189  SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
191  "Could not register event handler for SIGPIPE without superseding an existing one.");
192  }
193 
194  old_eh = SignalHandler::instance()->register_handler
195  (SIGTERM, new HTTPCacheInterruptHandler);
196  if (old_eh) {
197  SignalHandler::instance()->register_handler(SIGTERM, old_eh);
199  "Could not register event handler for SIGTERM without superseding an existing one.");
200  }
201 #endif
202  }
203  }
204  catch (...) {
205  DBG2(cerr << "The constructor threw an Error!" << endl);
206  UNLOCK(&instance_mutex);
207  throw;
208  }
209 
210  UNLOCK(&instance_mutex);
211  DBGN(cerr << "returning " << hex << _instance << dec << endl);
212 
213  return _instance;
214 }
215 
219 void
220 HTTPCache::delete_instance()
221 {
222  DBG(cerr << "Entering delete_instance()..." << endl);
223  if (HTTPCache::_instance) {
224  DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
225  delete HTTPCache::_instance;
226  HTTPCache::_instance = 0;
227  }
228 
229  DBG(cerr << "Exiting delete_instance()" << endl);
230 }
231 
246 HTTPCache::HTTPCache(string cache_root, bool force) :
247  d_locked_open_file(0),
248  d_cache_enabled(false),
249  d_cache_protected(false),
250  d_expire_ignored(false),
251  d_always_validate(false),
252  d_total_size(CACHE_TOTAL_SIZE * MEGA),
253  d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
254  d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
255  d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
256  d_default_expiration(NO_LM_EXPIRATION),
257  d_max_age(-1),
258  d_max_stale(-1),
259  d_min_fresh(-1),
260  d_http_cache_table(0)
261 {
262  DBG(cerr << "Entering the constructor for " << this << "... ");
263 
264  int status = pthread_once(&once_block, once_init_routine);
265  if (status != 0)
266  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
267 
268  INIT(&d_cache_mutex);
269 
270  // This used to throw an Error object if we could not get the
271  // single user lock. However, that results in an invalid object. It's
272  // better to have an instance that has default values. If we cannot get
273  // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
274  //
275  // I fixed this block so that the cache root is set before we try to get
276  // the single user lock. That was the fix for bug #661. To make that
277  // work, I had to move the call to create_cache_root out of
278  // set_cache_root(). 09/08/03 jhrg
279 
280  set_cache_root(cache_root);
281  int block_size;
282 
283  if (!get_single_user_lock(force))
284  throw Error("Could not get single user lock for the cache");
285 
286 #ifdef WIN32
287  // Windows is unable to provide us this information. 4096 appears
288  // a best guess. It is likely to be in the range [2048, 8192] on
289  // windows, but will the level of truth of that statement vary over
290  // time ?
291  block_size = 4096;
292 #else
293  struct stat s;
294  if (stat(cache_root.c_str(), &s) == 0)
295  block_size = s.st_blksize;
296  else
297  throw Error("Could not set file system block size.");
298 #endif
299  d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
300  d_cache_enabled = true;
301 
302  DBGN(cerr << "exiting" << endl);
303 }
304 
317 HTTPCache::~HTTPCache()
318 {
319  DBG(cerr << "Entering the destructor for " << this << "... ");
320 
321  try {
322  if (startGC())
323  perform_garbage_collection();
324 
325  d_http_cache_table->cache_index_write();
326  }
327  catch (Error &e) {
328  // If the cache index cannot be written, we've got problems. However,
329  // unless we're debugging, still free up the cache table in memory.
330  // How should we let users know they cache index is not being
331  // written?? 10/03/02 jhrg
332  DBG(cerr << e.get_error_message() << endl);
333  }
334 
335  delete d_http_cache_table;
336 
337  release_single_user_lock();
338 
339  DBGN(cerr << "exiting destructor." << endl);
340  DESTROY(&d_cache_mutex);
341 }
342 
343 
347 
351 bool
352 HTTPCache::stopGC() const
353 {
354  return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
355 }
356 
363 bool
364 HTTPCache::startGC() const
365 {
366  DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
367  return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
368 }
369 
384 void
385 HTTPCache::perform_garbage_collection()
386 {
387  DBG(cerr << "Performing garbage collection" << endl);
388 
389  // Remove all the expired responses.
390  expired_gc();
391 
392  // Remove entries larger than max_entry_size.
393  too_big_gc();
394 
395  // Remove entries starting with zero hits, 1, ..., until stopGC()
396  // returns true.
397  hits_gc();
398 }
399 
405 void
406 HTTPCache::expired_gc()
407 {
408  if (!d_expire_ignored) {
409  d_http_cache_table->delete_expired_entries();
410  }
411 }
412 
429 void
430 HTTPCache::hits_gc()
431 {
432  int hits = 0;
433 
434  if (startGC()) {
435  while (!stopGC()) {
436  d_http_cache_table->delete_by_hits(hits);
437  hits++;
438  }
439  }
440 }
441 
446 void HTTPCache::too_big_gc() {
447  if (startGC())
448  d_http_cache_table->delete_by_size(d_max_entry_size);
449 }
450 
452 
463 bool HTTPCache::get_single_user_lock(bool force) {
464  if (!d_locked_open_file) {
465  FILE * fp = NULL;
466 
467  try {
468  // It's OK to call create_cache_root if the directory already
469  // exists.
470  create_cache_root(d_cache_root);
471  }
472  catch (Error &e) {
473  // We need to catch and return false because this method is
474  // called from a ctor and throwing at this point will result in a
475  // partially constructed object. 01/22/04 jhrg
476  return false;
477  }
478 
479  // Try to read the lock file. If we can open for reading, it exists.
480  string lock = d_cache_root + CACHE_LOCK;
481  if ((fp = fopen(lock.c_str(), "r")) != NULL) {
482  int res = fclose(fp);
483  if (res) {
484  DBG(cerr << "Failed to close " << (void *)fp << endl);
485  }
486  if (force)
487  REMOVE(lock.c_str());
488  else
489  return false;
490  }
491 
492  if ((fp = fopen(lock.c_str(), "w")) == NULL)
493  return false;
494 
495  d_locked_open_file = fp;
496  return true;
497  }
498 
499  return false;
500 }
501 
504 void
505 HTTPCache::release_single_user_lock()
506 {
507  if (d_locked_open_file) {
508  int res = fclose(d_locked_open_file);
509  if (res) {
510  DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
511  }
512  d_locked_open_file = 0;
513  }
514 
515  string lock = d_cache_root + CACHE_LOCK;
516  REMOVE(lock.c_str());
517 }
518 
521 
525 string
526 HTTPCache::get_cache_root() const
527 {
528  return d_cache_root;
529 }
530 
531 
540 void
541 HTTPCache::create_cache_root(const string &cache_root)
542 {
543  struct stat stat_info;
544  string::size_type cur = 0;
545 
546 #ifdef WIN32
547  cur = cache_root[1] == ':' ? 3 : 1;
548  typedef int mode_t;
549 #else
550  cur = 1;
551 #endif
552  while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
553  string dir = cache_root.substr(0, cur);
554  if (stat(dir.c_str(), &stat_info) == -1) {
555  DBG2(cerr << "Cache....... Creating " << dir << endl);
556  mode_t mask = UMASK(0);
557  if (MKDIR(dir.c_str(), 0777) < 0) {
558  DBG2(cerr << "Error: can't create." << endl);
559  UMASK(mask);
560  throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
561  }
562  UMASK(mask);
563  }
564  else {
565  DBG2(cerr << "Cache....... Found " << dir << endl);
566  }
567  cur++;
568  }
569 }
570 
585 void
586 HTTPCache::set_cache_root(const string &root)
587 {
588  if (root != "") {
589  d_cache_root = root;
590  // cache root should end in /.
591  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
592  d_cache_root += DIR_SEPARATOR_CHAR;
593  }
594  else {
595  // If no cache root has been indicated then look for a suitable
596  // location.
597  char * cr = (char *) getenv("DODS_CACHE");
598  if (!cr) cr = (char *) getenv("TMP");
599  if (!cr) cr = (char *) getenv("TEMP");
600  if (!cr) cr = CACHE_LOC;
601 
602  d_cache_root = cr;
603  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
604  d_cache_root += DIR_SEPARATOR_CHAR;
605 
606  d_cache_root += CACHE_ROOT;
607  }
608 
609  // Test d_hhtp_cache_table because this method can be called before that
610  // instance is created and also can be called later to cahnge the cache
611  // root. jhrg 05.14.08
612  if (d_http_cache_table)
613  d_http_cache_table->set_cache_root(d_cache_root);
614 }
615 
627 void
628 HTTPCache::set_cache_enabled(bool mode)
629 {
630  lock_cache_interface();
631 
632  d_cache_enabled = mode;
633 
634  unlock_cache_interface();
635 }
636 
639 bool
640 HTTPCache::is_cache_enabled() const
641 {
642  DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
643  << endl);
644  return d_cache_enabled;
645 }
646 
657 void
658 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
659 {
660  lock_cache_interface();
661 
662  d_cache_disconnected = mode;
663 
664  unlock_cache_interface();
665 }
666 
670 HTTPCache::get_cache_disconnected() const
671 {
672  return d_cache_disconnected;
673 }
674 
683 void
684 HTTPCache::set_expire_ignored(bool mode)
685 {
686  lock_cache_interface();
687 
688  d_expire_ignored = mode;
689 
690  unlock_cache_interface();
691 }
692 
693 /* Is the cache ignoring Expires headers returned with responses that have
694  been cached? */
695 
696 bool
697 HTTPCache::is_expire_ignored() const
698 {
699  return d_expire_ignored;
700 }
701 
717 void
718 HTTPCache::set_max_size(unsigned long size)
719 {
720  lock_cache_interface();
721 
722  try {
723  unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
725  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
726  unsigned long old_size = d_total_size;
727  d_total_size = new_size;
728  d_folder_size = d_total_size / CACHE_FOLDER_PCT;
729  d_gc_buffer = d_total_size / CACHE_GC_PCT;
730 
731  if (new_size < old_size && startGC()) {
732  perform_garbage_collection();
733  d_http_cache_table->cache_index_write();
734  }
735  }
736  catch (...) {
737  unlock_cache_interface();
738  DBGN(cerr << "Unlocking interface." << endl);
739  throw;
740  }
741 
742  DBG2(cerr << "Cache....... Total cache size: " << d_total_size
743  << " with " << d_folder_size
744  << " bytes for meta information and folders and at least "
745  << d_gc_buffer << " bytes free after every gc" << endl);
746 
747  unlock_cache_interface();
748 }
749 
752 unsigned long
753 HTTPCache::get_max_size() const
754 {
755  return d_total_size / MEGA;
756 }
757 
766 void
767 HTTPCache::set_max_entry_size(unsigned long size)
768 {
769  lock_cache_interface();
770 
771  try {
772  unsigned long new_size = size * MEGA;
773  if (new_size > 0 && new_size < d_total_size - d_folder_size) {
774  unsigned long old_size = d_max_entry_size;
775  d_max_entry_size = new_size;
776  if (new_size < old_size && startGC()) {
777  perform_garbage_collection();
778  d_http_cache_table->cache_index_write();
779  }
780  }
781  }
782  catch (...) {
783  unlock_cache_interface();
784  throw;
785  }
786 
787  DBG2(cerr << "Cache...... Max entry cache size is "
788  << d_max_entry_size << endl);
789 
790  unlock_cache_interface();
791 }
792 
797 unsigned long
798 HTTPCache::get_max_entry_size() const
799 {
800  return d_max_entry_size / MEGA;
801 }
802 
813 void
814 HTTPCache::set_default_expiration(const int exp_time)
815 {
816  lock_cache_interface();
817 
818  d_default_expiration = exp_time;
819 
820  unlock_cache_interface();
821 }
822 
825 int
826 HTTPCache::get_default_expiration() const
827 {
828  return d_default_expiration;
829 }
830 
835 void
836 HTTPCache::set_always_validate(bool validate)
837 {
838  d_always_validate = validate;
839 }
840 
844 bool
845 HTTPCache::get_always_validate() const
846 {
847  return d_always_validate;
848 }
849 
866 void
867 HTTPCache::set_cache_control(const vector<string> &cc)
868 {
869  lock_cache_interface();
870 
871  try {
872  d_cache_control = cc;
873 
874  vector<string>::const_iterator i;
875  for (i = cc.begin(); i != cc.end(); ++i) {
876  string header = (*i).substr(0, (*i).find(':'));
877  string value = (*i).substr((*i).find(": ") + 2);
878  if (header != "Cache-Control") {
879  throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
880  }
881  else {
882  if (value == "no-cache" || value == "no-store")
883  d_cache_enabled = false;
884  else if (value.find("max-age") != string::npos) {
885  string max_age = value.substr(value.find("=" + 1));
886  d_max_age = parse_time(max_age.c_str());
887  }
888  else if (value == "max-stale")
889  d_max_stale = 0; // indicates will take anything;
890  else if (value.find("max-stale") != string::npos) {
891  string max_stale = value.substr(value.find("=" + 1));
892  d_max_stale = parse_time(max_stale.c_str());
893  }
894  else if (value.find("min-fresh") != string::npos) {
895  string min_fresh = value.substr(value.find("=" + 1));
896  d_min_fresh = parse_time(min_fresh.c_str());
897  }
898  }
899  }
900  }
901  catch (...) {
902  unlock_cache_interface();
903  throw;
904  }
905 
906  unlock_cache_interface();
907 }
908 
909 
914 vector<string>
915 HTTPCache::get_cache_control()
916 {
917  return d_cache_control;
918 }
919 
921 
930 bool
931 HTTPCache::is_url_in_cache(const string &url)
932 {
933  DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
934 
935  HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
936  bool status = entry != 0;
937  if (entry) {
938  entry->unlock_read_response();
939  }
940  return status;
941 }
942 
948 bool
949 is_hop_by_hop_header(const string &header)
950 {
951  return header.find("Connection") != string::npos
952  || header.find("Keep-Alive") != string::npos
953  || header.find("Proxy-Authenticate") != string::npos
954  || header.find("Proxy-Authorization") != string::npos
955  || header.find("Transfer-Encoding") != string::npos
956  || header.find("Upgrade") != string::npos;
957 }
958 
970 void
971 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
972 {
973  string fname = cachename + CACHE_META;
974  d_open_files.push_back(fname);
975 
976  FILE *dest = fopen(fname.c_str(), "w");
977  if (!dest) {
978  throw InternalErr(__FILE__, __LINE__,
979  "Could not open named cache entry file.");
980  }
981 
982  vector<string>::const_iterator i;
983  for (i = headers.begin(); i != headers.end(); ++i) {
984  if (!is_hop_by_hop_header(*i)) {
985  fwrite((*i).c_str(), (*i).size(), 1, dest);
986  fwrite("\n", 1, 1, dest);
987  }
988  }
989 
990  int res = fclose(dest);
991  if (res) {
992  DBG(cerr << "HTTPCache::write_metadata - Failed to close "
993  << dest << endl);
994  }
995 
996  d_open_files.pop_back();
997 }
998 
1009 void
1010 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1011 {
1012  FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1013  if (!md) {
1014  throw InternalErr(__FILE__, __LINE__,
1015  "Could not open named cache entry meta data file.");
1016  }
1017 
1018  char line[1024];
1019  while (!feof(md) && fgets(line, 1024, md)) {
1020  line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1021  headers.push_back(string(line));
1022  }
1023 
1024  int res = fclose(md);
1025  if (res) {
1026  DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1027  << md << endl);
1028  }
1029 }
1030 
1052 int
1053 HTTPCache::write_body(const string &cachename, const FILE *src)
1054 {
1055  d_open_files.push_back(cachename);
1056 
1057  FILE *dest = fopen(cachename.c_str(), "wb");
1058  if (!dest) {
1059  throw InternalErr(__FILE__, __LINE__,
1060  "Could not open named cache entry file.");
1061  }
1062 
1063  // Read and write in 1k blocks; an attempt at doing this efficiently.
1064  // 09/30/02 jhrg
1065  char line[1024];
1066  size_t n;
1067  int total = 0;
1068  while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1069  total += fwrite(line, 1, n, dest);
1070  DBG2(sleep(3));
1071  }
1072 
1073  if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1074  int res = fclose(dest);
1075  res = res & unlink(cachename.c_str());
1076  if (res) {
1077  DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1078  << dest << endl);
1079  }
1080  throw InternalErr(__FILE__, __LINE__,
1081  "I/O error transferring data to the cache.");
1082  }
1083 
1084  rewind(const_cast<FILE *>(src));
1085 
1086  int res = fclose(dest);
1087  if (res) {
1088  DBG(cerr << "HTTPCache::write_body - Failed to close "
1089  << dest << endl);
1090  }
1091 
1092  d_open_files.pop_back();
1093 
1094  return total;
1095 }
1096 
1105 FILE *
1106 HTTPCache::open_body(const string &cachename)
1107 {
1108  FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1109  if (!src)
1110  throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1111 
1112  return src;
1113 }
1114 
1140 bool
1141 HTTPCache::cache_response(const string &url, time_t request_time,
1142  const vector<string> &headers, const FILE *body)
1143 {
1144  lock_cache_interface();
1145 
1146  DBG(cerr << "Caching url: " << url << "." << endl);
1147 
1148  try {
1149  // If this is not an http or https URL, don't cache.
1150  if (url.find("http:") == string::npos &&
1151  url.find("https:") == string::npos) {
1152  unlock_cache_interface();
1153  return false;
1154  }
1155 
1156  // This does nothing if url is not already in the cache. It's
1157  // more efficient to do this than to first check and see if the entry
1158  // exists. 10/10/02 jhrg
1159  d_http_cache_table->remove_entry_from_cache_table(url);
1160 
1162  entry->lock_write_response();
1163 
1164  try {
1165  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1166  if (entry->is_no_cache()) {
1167  DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1168  << "(" << url << ")" << endl);
1169  entry->unlock_write_response();
1170  delete entry; entry = 0;
1171  unlock_cache_interface();
1172  return false;
1173  }
1174 
1175  // corrected_initial_age, freshness_lifetime, response_time.
1176  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1177 
1178  d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1179  // move these write function to cache table
1180  entry->set_size(write_body(entry->get_cachename(), body));
1181  write_metadata(entry->get_cachename(), headers);
1182  d_http_cache_table->add_entry_to_cache_table(entry);
1183  entry->unlock_write_response();
1184  }
1185  catch (ResponseTooBigErr &e) {
1186  // Oops. Bummer. Clean up and exit.
1187  DBG(cerr << e.get_error_message() << endl);
1188  REMOVE(entry->get_cachename().c_str());
1189  REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1190  DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1191  << ")" << endl);
1192  entry->unlock_write_response();
1193  delete entry; entry = 0;
1194  unlock_cache_interface();
1195  return false;
1196  }
1197 
1198  if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1199  if (startGC())
1200  perform_garbage_collection();
1201 
1202  d_http_cache_table->cache_index_write(); // resets new_entries
1203  }
1204  }
1205  catch (...) {
1206  unlock_cache_interface();
1207  throw;
1208  }
1209 
1210  unlock_cache_interface();
1211 
1212  return true;
1213 }
1214 
1233 vector<string>
1234 HTTPCache::get_conditional_request_headers(const string &url)
1235 {
1236  lock_cache_interface();
1237 
1238  HTTPCacheTable::CacheEntry *entry = 0;
1239  vector<string> headers;
1240 
1241  DBG(cerr << "Getting conditional request headers for " << url << endl);
1242 
1243  try {
1244  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1245  if (!entry)
1246  throw Error("There is no cache entry for the URL: " + url);
1247 
1248  if (entry->get_etag() != "")
1249  headers.push_back(string("If-None-Match: ") + entry->get_etag());
1250 
1251  if (entry->get_lm() > 0) {
1252  time_t lm = entry->get_lm();
1253  headers.push_back(string("If-Modified-Since: ")
1254  + date_time_str(&lm));
1255  }
1256  else if (entry->get_max_age() > 0) {
1257  time_t max_age = entry->get_max_age();
1258  headers.push_back(string("If-Modified-Since: ")
1259  + date_time_str(&max_age));
1260  }
1261  else if (entry->get_expires() > 0) {
1262  time_t expires = entry->get_expires();
1263  headers.push_back(string("If-Modified-Since: ")
1264  + date_time_str(&expires));
1265  }
1266  entry->unlock_read_response();
1267  unlock_cache_interface();
1268  }
1269  catch (...) {
1270  unlock_cache_interface();
1271  if (entry) {
1272  entry->unlock_read_response();
1273  }
1274  throw;
1275  }
1276 
1277  return headers;
1278 }
1279 
1283 struct HeaderLess: binary_function<const string&, const string&, bool>
1284 {
1285  bool operator()(const string &s1, const string &s2) const {
1286  return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1287  }
1288 };
1289 
1303 void
1304 HTTPCache::update_response(const string &url, time_t request_time,
1305  const vector<string> &headers)
1306 {
1307  lock_cache_interface();
1308 
1309  HTTPCacheTable::CacheEntry *entry = 0;
1310  DBG(cerr << "Updating the response headers for: " << url << endl);
1311 
1312  try {
1313  entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1314  if (!entry)
1315  throw Error("There is no cache entry for the URL: " + url);
1316 
1317  // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1318  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1319 
1320  // Update corrected_initial_age, freshness_lifetime, response_time.
1321  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1322 
1323  // Merge the new headers with those in the persistent store. How:
1324  // Load the new headers into a set, then merge the old headers. Since
1325  // set<> ignores duplicates, old headers with the same name as a new
1326  // header will got into the bit bucket. Define a special compare
1327  // functor to make sure that headers are compared using only their
1328  // name and not their value too.
1329  set<string, HeaderLess> merged_headers;
1330 
1331  // Load in the new headers
1332  copy(headers.begin(), headers.end(),
1333  inserter(merged_headers, merged_headers.begin()));
1334 
1335  // Get the old headers and load them in.
1336  vector<string> old_headers;
1337  read_metadata(entry->get_cachename(), old_headers);
1338  copy(old_headers.begin(), old_headers.end(),
1339  inserter(merged_headers, merged_headers.begin()));
1340 
1341  // Read the values back out. Use reverse iterators with back_inserter
1342  // to preserve header order. NB: vector<> does not support push_front
1343  // so we can't use front_inserter(). 01/09/03 jhrg
1344  vector<string> result;
1345  copy(merged_headers.rbegin(), merged_headers.rend(),
1346  back_inserter(result));
1347 
1348  write_metadata(entry->get_cachename(), result);
1349  entry->unlock_write_response();
1350  unlock_cache_interface();
1351  }
1352  catch (...) {
1353  if (entry) {
1354  entry->unlock_read_response();
1355  }
1356  unlock_cache_interface();
1357  throw;
1358  }
1359 }
1360 
1372 bool
1373 HTTPCache::is_url_valid(const string &url)
1374 {
1375  lock_cache_interface();
1376 
1377  bool freshness;
1378  HTTPCacheTable::CacheEntry *entry = 0;
1379 
1380  DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1381 
1382  try {
1383  if (d_always_validate) {
1384  unlock_cache_interface();
1385  return false; // force re-validation.
1386  }
1387 
1388  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1389  if (!entry)
1390  throw Error("There is no cache entry for the URL: " + url);
1391 
1392  // If we supported range requests, we'd need code here to check if
1393  // there was only a partial response in the cache. 10/02/02 jhrg
1394 
1395  // In case this entry is of type "must-revalidate" then we consider it
1396  // invalid.
1397  if (entry->get_must_revalidate()) {
1398  entry->unlock_read_response();
1399  unlock_cache_interface();
1400  return false;
1401  }
1402 
1403  time_t resident_time = time(NULL) - entry->get_response_time();
1404  time_t current_age = entry->get_corrected_initial_age() + resident_time;
1405 
1406  // Check that the max-age, max-stale, and min-fresh directives
1407  // given in the request cache control header is followed.
1408  if (d_max_age >= 0 && current_age > d_max_age) {
1409  DBG(cerr << "Cache....... Max-age validation" << endl);
1410  entry->unlock_read_response();
1411  unlock_cache_interface();
1412  return false;
1413  }
1414  if (d_min_fresh >= 0
1415  && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1416  DBG(cerr << "Cache....... Min-fresh validation" << endl);
1417  entry->unlock_read_response();
1418  unlock_cache_interface();
1419  return false;
1420  }
1421 
1422  freshness = (entry->get_freshness_lifetime()
1423  + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1424  entry->unlock_read_response();
1425  unlock_cache_interface();
1426  }
1427  catch (...) {
1428  if (entry) {
1429  entry->unlock_read_response();
1430  }
1431  unlock_cache_interface();
1432  throw;
1433  }
1434 
1435  return freshness;
1436 }
1437 
1465 FILE * HTTPCache::get_cached_response(const string &url,
1466  vector<string> &headers, string &cacheName) {
1467  lock_cache_interface();
1468 
1469  FILE *body;
1470  HTTPCacheTable::CacheEntry *entry = 0;
1471 
1472  DBG(cerr << "Getting the cached response for " << url << endl);
1473 
1474  try {
1475  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1476  if (!entry) {
1477  unlock_cache_interface();
1478  return 0;
1479  }
1480 
1481  cacheName = entry->get_cachename();
1482  read_metadata(entry->get_cachename(), headers);
1483 
1484  DBG(cerr << "Headers just read from cache: " << endl);
1485  DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1486 
1487  body = open_body(entry->get_cachename());
1488 
1489  DBG(cerr << "Returning: " << url << " from the cache." << endl);
1490 
1491  d_http_cache_table->bind_entry_to_data(entry, body);
1492  }
1493  catch (...) {
1494  if (entry)
1495  unlock_cache_interface();
1496  throw;
1497  }
1498 
1499  unlock_cache_interface();
1500 
1501  return body;
1502 }
1514 FILE *
1515 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1516 {
1517  string discard_name;
1518  return get_cached_response(url, headers, discard_name);
1519 }
1520 
1531 FILE *
1532 HTTPCache::get_cached_response(const string &url)
1533 {
1534  string discard_name;
1535  vector<string> discard_headers;
1536  return get_cached_response(url, discard_headers, discard_name);
1537 }
1538 
1551 void
1552 HTTPCache::release_cached_response(FILE *body)
1553 {
1554  lock_cache_interface();
1555 
1556  try {
1557  d_http_cache_table->uncouple_entry_from_data(body);
1558  }
1559  catch (...) {
1560  unlock_cache_interface();
1561  throw;
1562  }
1563 
1564  unlock_cache_interface();
1565 }
1566 
1579 void
1580 HTTPCache::purge_cache()
1581 {
1582  lock_cache_interface();
1583 
1584  try {
1585  if (d_http_cache_table->is_locked_read_responses())
1586  throw Error("Attempt to purge the cache with entries in use.");
1587 
1588  d_http_cache_table->delete_all_entries();
1589  }
1590  catch (...) {
1591  unlock_cache_interface();
1592  throw;
1593  }
1594 
1595  unlock_cache_interface();
1596 }
1597 
1598 } // namespace libdap
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:132
#define DESTROY(m)
Definition: HTTPCache.h:77
#define LOCK(m)
Definition: HTTPCache.h:73
#define DBGN(x)
Definition: debug.h:59
#define CACHE_ROOT
Definition: HTTPCache.cc:97
#define DBG2(x)
Definition: debug.h:73
A class for software fault reporting.
Definition: InternalErr.h:64
#define DUMP_FREQUENCY
Definition: HTTPCache.cc:106
#define MKDIR(a, b)
Definition: HTTPCache.cc:84
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:949
#define DBG(x)
Definition: debug.h:58
#define CACHE_GC_PCT
Definition: HTTPCache.cc:111
#define CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:109
#define MAX_CACHE_ENTRY_SIZE
Definition: HTTPCache.cc:113
void set_size(unsigned long sz)
#define MEGA
Definition: HTTPCache.cc:108
#define REMOVE(a)
Definition: HTTPCache.cc:86
#define NO_LM_EXPIRATION
Definition: HTTPCache.cc:104
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:284
string get_error_message() const
Definition: Error.cc:279
#define CACHE_LOCK
Definition: HTTPCache.cc:100
#define MIN_CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:112
#define CACHE_FOLDER_PCT
Definition: HTTPCache.cc:110
#define DIR_SEPARATOR_CHAR
Definition: HTTPCache.cc:88
#define UNLOCK(m)
Definition: HTTPCache.h:75
#define UMASK(a)
Definition: HTTPCache.cc:85
#define CACHE_META
Definition: HTTPCache.cc:101
A class for error processing.
Definition: Error.h:90
#define INIT(m)
Definition: HTTPCache.h:76
#define CACHE_LOC
Definition: HTTPCache.cc:96