libdap++  Updated for version 3.8.2
HTTPConnect.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 static char rcsid[] not_used =
30  { "$Id: HTTPConnect.cc 21699 2009-11-05 00:06:01Z jimg $"
31  };
32 
33 #ifdef HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 
37 #ifdef WIN32
38 #include <io.h>
39 #endif
40 
41 #include <string>
42 #include <vector>
43 #include <functional>
44 #include <algorithm>
45 #include <sstream>
46 #include <iterator>
47 #include <cstdlib>
48 #include <cstring>
49 
50 //#define DODS_DEBUG
51 //#define DODS_DEBUG2
52 //#define HTTP_TRACE
53 
54 #include "debug.h"
55 #include "mime_util.h"
56 #include "GNURegex.h"
57 #include "HTTPCache.h"
58 #include "HTTPConnect.h"
59 #include "RCReader.h"
60 #include "HTTPResponse.h"
61 #include "HTTPCacheResponse.h"
62 
63 using namespace std;
64 
65 namespace libdap {
66 
67 // These global variables are not MT-Safe, but I'm leaving them as is because
68 // they are used only for debugging (set them in a debugger like gdb or ddd).
69 // They are not static because I *believe* that many debuggers cannot access
70 // static variables. 08/07/02 jhrg
71 
72 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
73 int www_trace = 0;
74 
75 // Keep the temporary files; useful for debugging.
77 
78 #define CLIENT_ERR_MIN 400
79 #define CLIENT_ERR_MAX 417
80 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
81  {
82  "Bad Request:",
83  "Unauthorized: Contact the server administrator.",
84  "Payment Required.",
85  "Forbidden: Contact the server administrator.",
86  "Not Found: The data source or server could not be found.\n\
87  Often this means that the OPeNDAP server is missing or needs attention;\n\
88  Please contact the server administrator.",
89  "Method Not Allowed.",
90  "Not Acceptable.",
91  "Proxy Authentication Required.",
92  "Request Time-out.",
93  "Conflict.",
94  "Gone:.",
95  "Length Required.",
96  "Precondition Failed.",
97  "Request Entity Too Large.",
98  "Request URI Too Large.",
99  "Unsupported Media Type.",
100  "Requested Range Not Satisfiable.",
101  "Expectation Failed."
102  };
103 
104 #define SERVER_ERR_MIN 500
105 #define SERVER_ERR_MAX 505
106 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] =
107  {
108  "Internal Server Error.",
109  "Not Implemented.",
110  "Bad Gateway.",
111  "Service Unavailable.",
112  "Gateway Time-out.",
113  "HTTP Version Not Supported."
114  };
115 
118 static string
119 http_status_to_string(int status)
120 {
121  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
122  return string(http_client_errors[status - CLIENT_ERR_MIN]);
123  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
124  return string(http_server_errors[status - SERVER_ERR_MIN]);
125  else
126  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
127 }
128 
133 class ParseHeader : public unary_function<const string &, void>
134 {
135  ObjectType type; // What type of object is in the stream?
136  string server; // Server's version string.
137  string protocol; // Server's protocol version.
138  string location; // Url returned by server
139 
140 public:
141  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
142  { }
143 
144  void operator()(const string &line)
145  {
146  string name, value;
147  parse_mime_header(line, name, value);
148  if (name == "content-description") {
149  DBG2(cerr << name << ": " << value << endl);
150  type = get_description_type(value);
151  }
152  // The second test (== "dods/0.0") tests if xopendap-server has already
153  // been seen. If so, use that header in preference to the old
154  // XDODS-Server header. jhrg 2/7/06
155  else if (name == "xdods-server" && server == "dods/0.0") {
156  DBG2(cerr << name << ": " << value << endl);
157  server = value;
158  }
159  else if (name == "xopendap-server") {
160  DBG2(cerr << name << ": " << value << endl);
161  server = value;
162  }
163  else if (name == "xdap") {
164  DBG2(cerr << name << ": " << value << endl);
165  protocol = value;
166  }
167  else if (server == "dods/0.0" && name == "server") {
168  DBG2(cerr << name << ": " << value << endl);
169  server = value;
170  }
171  else if (name == "location") {
172  DBG2(cerr << name << ": " << value << endl);
173  location = value;
174  }
175  else if (type == unknown_type && name == "content-type"
176  && line.find("text/html") != string::npos) {
177  DBG2(cerr << name << ": text/html..." << endl);
178  type = web_error;
179  }
180  }
181 
182  ObjectType get_object_type()
183  {
184  return type;
185  }
186 
187  string get_server()
188  {
189  return server;
190  }
191 
192  string get_protocol()
193  {
194  return protocol;
195  }
196 
197  string get_location() {
198  return location;
199  }
200 };
201 
218 static size_t
219 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
220 {
221  DBG2(cerr << "Inside the header parser." << endl);
222  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
223 
224  // Grab the header, minus the trailing newline. Or \r\n pair.
225  string complete_line;
226  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
227  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
228  else
229  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
230 
231  // Store all non-empty headers that are not HTTP status codes
232  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
233  DBG(cerr << "Header line: " << complete_line << endl);
234  hdrs->push_back(complete_line);
235  }
236 
237  return size * nmemb;
238 }
239 
241 static int
242 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
243 {
244  string message(msg, size);
245 
246  switch (info) {
247  case CURLINFO_TEXT:
248  cerr << "Text: " << message; break;
249  case CURLINFO_HEADER_IN:
250  cerr << "Header in: " << message; break;
251  case CURLINFO_HEADER_OUT:
252  cerr << "Header out: " << message; break;
253  case CURLINFO_DATA_IN:
254  cerr << "Data in: " << message; break;
255  case CURLINFO_DATA_OUT:
256  cerr << "Data out: " << message; break;
257  case CURLINFO_END:
258  cerr << "End: " << message; break;
259 #ifdef CURLINFO_SSL_DATA_IN
260  case CURLINFO_SSL_DATA_IN:
261  cerr << "SSL Data in: " << message; break;
262 #endif
263 #ifdef CURLINFO_SSL_DATA_OUT
264  case CURLINFO_SSL_DATA_OUT:
265  cerr << "SSL Data out: " << message; break;
266 #endif
267  default:
268  cerr << "Curl info: " << message; break;
269  }
270  return 0;
271 }
272 
276 void
277 HTTPConnect::www_lib_init()
278 {
279  d_curl = curl_easy_init();
280  if (!d_curl)
281  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
282 
283  // Now set options that will remain constant for the duration of this
284  // CURL object.
285 
286  // Set the proxy host.
287  if (!d_rcr->get_proxy_server_host().empty()) {
288  DBG(cerr << "Setting up a proxy server." << endl);
289  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
290  << endl);
291  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
292  << endl);
293  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
294  << endl);
295  curl_easy_setopt(d_curl, CURLOPT_PROXY,
296  d_rcr->get_proxy_server_host().c_str());
297  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
298  d_rcr->get_proxy_server_port());
299 
300  // As of 4/21/08 only NTLM, Digest and Basic work.
301 #ifdef CURLOPT_PROXYAUTH
302  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
303 #endif
304 
305  // Password might not be required. 06/21/04 jhrg
306  if (!d_rcr->get_proxy_server_userpw().empty())
307  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
308  d_rcr->get_proxy_server_userpw().c_str());
309  }
310 
311  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
312  // We have to set FailOnError to false for any of the non-Basic
313  // authentication schemes to work. 07/28/03 jhrg
314  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
315 
316  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
317  // choosing the the 'safest' one supported by the server.
318  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
319  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
320 
321  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
322  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
323  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
324  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
325  // param of save_raw_http_headers to a vector<string> object.
326 
327  // Follow 302 (redirect) responses
328  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
329  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
330 
331  // If the user turns off SSL validation...
332  if (!d_rcr->get_validate_ssl() == 0) {
333  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
334  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
335  }
336 
337  // Look to see if cookies are turned on in the .dodsrc file. If so,
338  // activate here. We honor 'session cookies' (cookies without an
339  // expiration date) here so that session-base SSO systems will work as
340  // expected.
341  if (!d_cookie_jar.empty()) {
342  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
343  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
344  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
345  }
346 
347  if (www_trace) {
348  cerr << "Curl version: " << curl_version() << endl;
349  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
350  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
351  }
352 }
353 
357 class BuildHeaders : public unary_function<const string &, void>
358 {
359  struct curl_slist *d_cl;
360 
361 public:
362  BuildHeaders() : d_cl(0)
363  {}
364 
365  void operator()(const string &header)
366  {
367  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
368  << endl);
369  d_cl = curl_slist_append(d_cl, header.c_str());
370  }
371 
372  struct curl_slist *get_headers()
373  {
374  return d_cl;
375  }
376 };
377 
392 long
393 HTTPConnect::read_url(const string &url, FILE *stream,
394  vector<string> *resp_hdrs,
395  const vector<string> *headers)
396 {
397  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
398 
399 #ifdef WIN32
400  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
401  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
402  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
403  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
404  // this issue is that one should not pass a FILE * to a windows DLL. Close
405  // inspection of libcurl yields that their default write function when using
406  // the CURLOPT_WRITEDATA is just "fwrite".
407  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
408  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
409 #else
410  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
411 #endif
412 
413  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
414  ostream_iterator<string>(cerr, "\n")));
415 
416  BuildHeaders req_hdrs;
417  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
418  req_hdrs);
419  if (headers)
420  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
421  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
422 
423  // Turn off the proxy for this URL?
424  bool temporary_proxy = false;
425  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
426  DBG(cerr << "Suppress proxy for url: " << url << endl);
427  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
428  }
429 
430  string::size_type at_sign = url.find('@');
431  // Assume username:password present *and* assume it's an HTTP URL; it *is*
432  // HTTPConnect, after all. 7 is position after "http://"; the second arg
433  // to substr() is the sub string length.
434  if (at_sign != url.npos)
435  d_upstring = url.substr(7, at_sign - 7);
436 
437  if (!d_upstring.empty())
438  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
439 
440  // Pass save_raw_http_headers() a pointer to the vector<string> where the
441  // response headers may be stored. Callers can use the resp_hdrs
442  // value/result parameter to get the raw response header information .
443  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
444 
445  CURLcode res = curl_easy_perform(d_curl);
446 
447  // Free the header list and null the value in d_curl.
448  curl_slist_free_all(req_hdrs.get_headers());
449  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
450 
451  // Reset the proxy?
452  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
453  curl_easy_setopt(d_curl, CURLOPT_PROXY,
454  d_rcr->get_proxy_server_host().c_str());
455 
456  if (res != 0)
457  throw Error(d_error_buffer);
458 
459  long status;
460  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
461  if (res != 0)
462  throw Error(d_error_buffer);
463 
464  return status;
465 }
466 
470 bool
471 HTTPConnect::url_uses_proxy_for(const string &url) throw()
472 {
473  if (d_rcr->is_proxy_for_used()) {
474  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
475  int index = 0, matchlen;
476  return host_regex.search(url.c_str(), url.size(), matchlen, index)
477  != -1;
478  }
479 
480  return false;
481 }
482 
486 bool
487 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
488 {
489  return d_rcr->is_no_proxy_for_used()
490  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
491 }
492 
493 // Public methods. Mostly...
494 
501 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
502  d_cookie_jar(""),
503  d_dap_client_protocol_major(2),
504  d_dap_client_protocol_minor(0)
505 
506 {
507  d_accept_deflate = rcr->get_deflate();
508  d_rcr = rcr;
509 
510  // Load in the default headers to send with a request. The empty Pragma
511  // headers overrides libcurl's default Pragma: no-cache header (which
512  // will disable caching by Squid, et c.). The User-Agent header helps
513  // make server logs more readable. 05/05/03 jhrg
514  d_request_headers.push_back(string("Pragma:"));
515  string user_agent = string("User-Agent: ") + string(CNAME)
516  + string("/") + string(CVER);
517  d_request_headers.push_back(user_agent);
518  if (d_accept_deflate)
519  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
520 
521  // HTTPCache::instance returns a valid ptr or 0.
522  if (d_rcr->get_use_cache())
523  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
524  true);
525  else
526  d_http_cache = 0;
527 
528  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
529  << ")" << endl);
530 
531  if (d_http_cache) {
532  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
533  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
534  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
535  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
536  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
537  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
538  }
539 
540  d_cookie_jar = rcr->get_cookie_jar();
541 
542  www_lib_init(); // This may throw either Error or InternalErr
543 }
544 
546 {
547  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
548 
549  curl_easy_cleanup(d_curl);
550 
551  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
552 }
553 
566 HTTPResponse *
567 HTTPConnect::fetch_url(const string &url)
568 {
569 #ifdef HTTP_TRACE
570  cout << "GET " << url << " HTTP/1.0" << endl;
571 #endif
572 
573  HTTPResponse *stream;
574 
575  if (d_http_cache && d_http_cache->is_cache_enabled()) {
576  stream = caching_fetch_url(url);
577  }
578  else {
579  stream = plain_fetch_url(url);
580  }
581 
582 #ifdef HTTP_TRACE
583  stringstream ss;
584  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
585  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
586  ss << stream->get_headers()->at(i) << endl;
587  }
588  cout << ss.str();
589 #endif
590 
591  ParseHeader parser;
592 
593  parser = for_each(stream->get_headers()->begin(),
594  stream->get_headers()->end(), ParseHeader());
595 
596 #ifdef HTTP_TRACE
597  cout << endl << endl;
598 #endif
599 
600  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
601  if (parser.get_location() != "" &&
602  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
603  return fetch_url(parser.get_location());
604  }
605 
606  stream->set_type(parser.get_object_type());
607  stream->set_version(parser.get_server());
608  stream->set_protocol(parser.get_protocol());
609 
610  return stream;
611 }
612 
613 // Look around for a reasonable place to put a temporary file. Check first
614 // the value of the TMPDIR env var. If that does not yeild a path that's
615 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
616 // defined in stdio.h. If both come up empty, then use `./'.
617 //
618 // This function allocates storage using new. The caller must delete the char
619 // array.
620 
621 // Change this to a version that either returns a string or an open file
622 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
623 // (see open()) to make it more secure. Ideal solution: get deserialize()
624 // methods to read from a stream returned by libcurl, not from a temporary
625 // file. 9/21/07 jhrg
626 static char *
627 get_tempfile_template(const char *file_template)
628 {
629  const char *c;
630 
631 #ifdef WIN32
632  // whitelist for a WIN32 directory
633  Regex directory("[-a-zA-Z0-9_\\]*");
634 
635  c = getenv("TEMP");
636  if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
637  goto valid_temp_directory;
638 
639  c= getenv("TMP");
640  if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0))
641  goto valid_temp_directory;
642 #else
643  // whitelist for a directory
644  Regex directory("[-a-zA-Z0-9_/]*");
645 
646  c = getenv("TMPDIR");
647  if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0))
648  goto valid_temp_directory;
649 
650 #ifdef P_tmpdir
651  if (access(P_tmpdir, W_OK | R_OK) == 0) {
652  c = P_tmpdir;
653  goto valid_temp_directory;
654  }
655 #endif
656 
657 #endif // WIN32
658 
659  c = ".";
660 
661 valid_temp_directory:
662  // Sanitize allocation
663  int size = strlen(c) + strlen(file_template) + 2;
664  if (!size_ok(1, size))
665  throw Error("Bad temporary file name.");
666 
667  char *temp = new char[size];
668  strncpy(temp, c, size-2);
669  strcat(temp, "/");
670 
671  strcat(temp, file_template);
672 
673  return temp;
674 }
675 
694 string
695 get_temp_file(FILE *&stream) throw(InternalErr)
696 {
697  // get_tempfile_template() uses new, must call delete
698  char *dods_temp = get_tempfile_template("dodsXXXXXX");
699 
700  // Open truncated for update. NB: mkstemp() returns a file descriptor.
701 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
702  stream = fopen(_mktemp(dods_temp), "w+b");
703 #else
704  stream = fdopen(mkstemp(dods_temp), "w+");
705 #endif
706 
707  if (!stream)
708  throw InternalErr("I/O Error: Failed to open a temporary file for the data values.");
709 
710  string dods_temp_s = dods_temp;
711  delete[] dods_temp; dods_temp = 0;
712 
713  return dods_temp_s;
714 }
715 
717 void
718 close_temp(FILE *s, const string &name)
719 {
720  int res = fclose(s);
721  if (res) {
722  DBG(cerr << "Counld not close the temporary file: " << name << endl);
723  }
724 
725  unlink(name.c_str());
726 }
727 
749 HTTPResponse *
750 HTTPConnect::caching_fetch_url(const string &url)
751 {
752  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
753 
754  vector<string> *headers = new vector<string> ;
755  FILE *s = d_http_cache->get_cached_response(url, *headers);
756  if (!s) {
757  // url not in cache; get it and cache it
758  DBGN(cerr << "no; getting response and caching." << endl);
759  time_t now = time(0);
760  HTTPResponse *rs = plain_fetch_url(url);
761  d_http_cache->cache_response(url, now, *(rs->get_headers()),
762  rs->get_stream());
763 
764  return rs;
765  }
766  else { // url in cache
767  DBGN(cerr << "yes... ");
768 
769  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
770  DBGN(cerr << "and it's valid; using cached response." << endl);
771  HTTPCacheResponse *crs =
772  new HTTPCacheResponse(s, 200, headers, d_http_cache);
773  return crs;
774  }
775  else { // url in cache but not valid; validate
776  DBGN(cerr << "but it's not valid; validating... ");
777 
778  d_http_cache->release_cached_response(s);
779 
780  vector<string> *resp_hdrs = new vector<string> ;
781  vector<string> cond_hdrs =
782  d_http_cache->get_conditional_request_headers(url);
783  FILE *body = 0;
784  string dods_temp = get_temp_file(body);
785  time_t now = time(0); // When was the request made (now).
786  long http_status;
787 
788  try {
789  http_status = read_url(url, body, resp_hdrs, &cond_hdrs);
790  rewind(body);
791  }
792  catch (Error &e) {
793  close_temp(body, dods_temp);
794  throw ;
795  }
796 
797  switch (http_status) {
798  case 200: { // New headers and new body
799  DBGN(cerr << "read a new response; caching." << endl);
800 
801  d_http_cache->cache_response(url, now, *resp_hdrs, body);
802  HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs,
803  dods_temp);
804 
805  return rs;
806  }
807 
808  case 304: { // Just new headers, use cached body
809  DBGN(cerr << "cached response valid; updating." << endl);
810 
811  close_temp(body, dods_temp);
812  d_http_cache->update_response(url, now, *resp_hdrs);
813 
814  vector<string> *headers = new vector<string>;
815  FILE *hs = d_http_cache->get_cached_response(url, *headers);
816  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, d_http_cache);
817  return crs;
818  }
819 
820  default: { // Oops.
821  close_temp(body, dods_temp);
822  if (http_status >= 400) {
823  string msg = "Error while reading the URL: ";
824  msg += url;
825  msg
826  += ".\nThe OPeNDAP server returned the following message:\n";
827  msg += http_status_to_string(http_status);
828  throw Error(msg);
829  }
830  else {
831  throw InternalErr(__FILE__, __LINE__,
832  "Bad response from the HTTP server: " + long_to_string(http_status));
833  }
834  }
835  }
836  }
837  }
838 
839  throw InternalErr(__FILE__, __LINE__, "Should never get here");
840 }
841 
853 HTTPResponse *
854 HTTPConnect::plain_fetch_url(const string &url)
855 {
856  DBG(cerr << "Getting URL: " << url << endl);
857  FILE *stream = 0;
858  string dods_temp = get_temp_file(stream);
859  vector<string> *resp_hdrs = new vector<string>;
860 
861  int status = -1;
862  try {
863  status = read_url(url, stream, resp_hdrs); // Throws Error.
864  if (status >= 400) {
865  string msg = "Error while reading the URL: ";
866  msg += url;
867  msg += ".\nThe OPeNDAP server returned the following message:\n";
868  msg += http_status_to_string(status);
869  throw Error(msg);
870  }
871  }
872 
873  catch (Error &e) {
874  close_temp(stream, dods_temp);
875  throw e;
876  }
877 
878  rewind(stream);
879 
880  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
881 }
882 
894 void
896 {
897  d_accept_deflate = deflate;
898 
899  if (d_accept_deflate) {
900  if (find(d_request_headers.begin(), d_request_headers.end(),
901  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
902  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
903  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
904  ostream_iterator<string>(cerr, "\n")));
905  }
906  else {
907  vector<string>::iterator i;
908  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
909  bind2nd(equal_to<string>(),
910  string("Accept-Encoding: deflate, gzip, compress")));
911  d_request_headers.erase(i, d_request_headers.end());
912  }
913 }
914 
916 class HeaderMatch : public unary_function<const string &, bool> {
917  const string &d_header;
918  public:
919  HeaderMatch(const string &header) : d_header(header) {}
920  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
921 };
922 
931 void
932 HTTPConnect::set_xdap_protocol(int major, int minor)
933 {
934  // Look for, and remove if one exists, an XDAP-Accept header
935  vector<string>::iterator i;
936  i = find_if(d_request_headers.begin(), d_request_headers.end(),
937  HeaderMatch("XDAP-Accept:"));
938  if (i != d_request_headers.end())
939  d_request_headers.erase(i);
940 
941  // Record and add the new header value
942  d_dap_client_protocol_major = major;
943  d_dap_client_protocol_minor = minor;
944  ostringstream xdap_accept;
945  xdap_accept << "XDAP-Accept: " << major << "." << minor;
946 
947  d_request_headers.push_back(xdap_accept.str());
948 
949  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
950  ostream_iterator<string>(cerr, "\n")));
951 }
952 
968 void
969 HTTPConnect::set_credentials(const string &u, const string &p)
970 {
971  if (u.empty())
972  return;
973 
974  // Store the credentials locally.
975  d_username = u;
976  d_password = p;
977 
978  d_upstring = u + ":" + p;
979 }
980 
981 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1234
string get_cookie_jar() const
Definition: RCReader.h:258
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:628
#define not_used
Definition: config.h:521
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1373
void set_credentials(const string &u, const string &p)
Definition: HTTPConnect.cc:969
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:154
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:718
int get_max_cache_size() const
Definition: RCReader.h:143
bool get_deflate() const
Definition: RCReader.h:168
#define DBGN(x)
Definition: debug.h:59
#define SERVER_ERR_MIN
Definition: HTTPConnect.cc:104
virtual void set_type(ObjectType o)
Definition: Response.h:143
bool size_ok(unsigned int sz, unsigned int nelem)
sanitize the size of an array. Test for integer overflow when dynamically allocating an array...
Definition: util.cc:523
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1465
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:57
#define SERVER_ERR_MAX
Definition: HTTPConnect.cc:105
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:567
virtual void set_version(const string &v)
Definition: Response.h:147
#define DBG2(x)
Definition: debug.h:73
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:758
virtual void set_protocol(const string &p)
Definition: Response.h:151
#define DBG(x)
Definition: debug.h:58
int get_always_validate() const
Definition: RCReader.h:159
#define CLIENT_ERR_MAX
Definition: HTTPConnect.cc:79
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:333
virtual FILE * get_stream() const
Definition: Response.h:115
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1304
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:718
friend class ParseHeader
Definition: HTTPConnect.h:114
int get_ignore_expires() const
Definition: RCReader.h:151
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:695
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1141
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:895
bool get_use_cache() const
Definition: RCReader.h:139
string long_to_string(long val, int base)
Definition: util.cc:440
string get_dods_cache_root() const
Definition: RCReader.h:135
bool is_cache_enabled() const
Definition: HTTPCache.cc:640
void set_always_validate(bool validate)
Definition: HTTPCache.cc:836
void set_xdap_protocol(int major, int minor)
Definition: HTTPConnect.cc:932
virtual ~HTTPConnect()
Definition: HTTPConnect.cc:545
int dods_keep_temps
Definition: HTTPConnect.cc:76
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:814
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1552
unsigned int get_max_cached_obj() const
Definition: RCReader.h:147
#define CVER
Definition: config.h:31
#define CNAME
Definition: config.h:23
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:684
#define CLIENT_ERR_MIN
Definition: HTTPConnect.cc:78
virtual int get_status() const
Definition: Response.h:111
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:767
int www_trace
Definition: HTTPConnect.cc:73
virtual vector< string > * get_headers() const
Definition: HTTPResponse.h:122
int get_default_expires() const
Definition: RCReader.h:155