libdap++  Updated for version 3.11.7
HTTPConnect.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 static char rcsid[] not_used =
30  { "$Id: HTTPConnect.cc 25101 2011-12-19 22:03:29Z jimg $"
31  };
32 
33 #ifdef HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 
37 #include <sys/stat.h>
38 
39 #ifdef WIN32
40 #include <io.h>
41 #endif
42 
43 #include <string>
44 #include <vector>
45 #include <functional>
46 #include <algorithm>
47 #include <sstream>
48 #include <iterator>
49 #include <cstdlib>
50 #include <cstring>
51 
52 //#define DODS_DEBUG2
53 //#define HTTP_TRACE
54 //#define DODS_DEBUG
55 
56 #undef USE_GETENV
57 
58 
59 #include "debug.h"
60 #include "mime_util.h"
61 #include "GNURegex.h"
62 #include "HTTPCache.h"
63 #include "HTTPConnect.h"
64 #include "RCReader.h"
65 #include "HTTPResponse.h"
66 #include "HTTPCacheResponse.h"
67 
68 using namespace std;
69 
70 namespace libdap {
71 
72 // These global variables are not MT-Safe, but I'm leaving them as is because
73 // they are used only for debugging (set them in a debugger like gdb or ddd).
74 // They are not static because I think that many debuggers cannot access
75 // static variables. 08/07/02 jhrg
76 
77 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
78 int www_trace = 0;
79 
80 // Keep the temporary files; useful for debugging.
82 
83 #define CLIENT_ERR_MIN 400
84 #define CLIENT_ERR_MAX 417
85 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
86  {
87  "Bad Request:",
88  "Unauthorized: Contact the server administrator.",
89  "Payment Required.",
90  "Forbidden: Contact the server administrator.",
91  "Not Found: The data source or server could not be found.\n\
92  Often this means that the OPeNDAP server is missing or needs attention;\n\
93  Please contact the server administrator.",
94  "Method Not Allowed.",
95  "Not Acceptable.",
96  "Proxy Authentication Required.",
97  "Request Time-out.",
98  "Conflict.",
99  "Gone:.",
100  "Length Required.",
101  "Precondition Failed.",
102  "Request Entity Too Large.",
103  "Request URI Too Large.",
104  "Unsupported Media Type.",
105  "Requested Range Not Satisfiable.",
106  "Expectation Failed."
107  };
108 
109 #define SERVER_ERR_MIN 500
110 #define SERVER_ERR_MAX 505
111 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
112  {
113  "Internal Server Error.",
114  "Not Implemented.",
115  "Bad Gateway.",
116  "Service Unavailable.",
117  "Gateway Time-out.",
118  "HTTP Version Not Supported."
119  };
120 
123 static string
124 http_status_to_string(int status)
125 {
126  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
127  return string(http_client_errors[status - CLIENT_ERR_MIN]);
128  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
129  return string(http_server_errors[status - SERVER_ERR_MIN]);
130  else
131  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
132 }
133 
138 class ParseHeader : public unary_function<const string &, void>
139 {
140  ObjectType type; // What type of object is in the stream?
141  string server; // Server's version string.
142  string protocol; // Server's protocol version.
143  string location; // Url returned by server
144 
145 public:
146  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
147  { }
148 
149  void operator()(const string &line)
150  {
151  string name, value;
152  parse_mime_header(line, name, value);
153  if (name == "content-description") {
154  DBG2(cerr << name << ": " << value << endl);
155  type = get_description_type(value);
156  }
157  // The second test (== "dods/0.0") tests if xopendap-server has already
158  // been seen. If so, use that header in preference to the old
159  // XDODS-Server header. jhrg 2/7/06
160  else if (name == "xdods-server" && server == "dods/0.0") {
161  DBG2(cerr << name << ": " << value << endl);
162  server = value;
163  }
164  else if (name == "xopendap-server") {
165  DBG2(cerr << name << ": " << value << endl);
166  server = value;
167  }
168  else if (name == "xdap") {
169  DBG2(cerr << name << ": " << value << endl);
170  protocol = value;
171  }
172  else if (server == "dods/0.0" && name == "server") {
173  DBG2(cerr << name << ": " << value << endl);
174  server = value;
175  }
176  else if (name == "location") {
177  DBG2(cerr << name << ": " << value << endl);
178  location = value;
179  }
180  else if (type == unknown_type && name == "content-type"
181  && line.find("text/html") != string::npos) {
182  DBG2(cerr << name << ": text/html..." << endl);
183  type = web_error;
184  }
185  }
186 
187  ObjectType get_object_type()
188  {
189  return type;
190  }
191 
192  string get_server()
193  {
194  return server;
195  }
196 
197  string get_protocol()
198  {
199  return protocol;
200  }
201 
202  string get_location() {
203  return location;
204  }
205 };
206 
223 static size_t
224 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
225 {
226  DBG2(cerr << "Inside the header parser." << endl);
227  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
228 
229  // Grab the header, minus the trailing newline. Or \r\n pair.
230  string complete_line;
231  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
232  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
233  else
234  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
235 
236  // Store all non-empty headers that are not HTTP status codes
237  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
238  DBG(cerr << "Header line: " << complete_line << endl);
239  hdrs->push_back(complete_line);
240  }
241 
242  return size * nmemb;
243 }
244 
246 static int
247 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
248 {
249  string message(msg, size);
250 
251  switch (info) {
252  case CURLINFO_TEXT:
253  cerr << "Text: " << message; break;
254  case CURLINFO_HEADER_IN:
255  cerr << "Header in: " << message; break;
256  case CURLINFO_HEADER_OUT:
257  cerr << "Header out: " << message; break;
258  case CURLINFO_DATA_IN:
259  cerr << "Data in: " << message; break;
260  case CURLINFO_DATA_OUT:
261  cerr << "Data out: " << message; break;
262  case CURLINFO_END:
263  cerr << "End: " << message; break;
264 #ifdef CURLINFO_SSL_DATA_IN
265  case CURLINFO_SSL_DATA_IN:
266  cerr << "SSL Data in: " << message; break;
267 #endif
268 #ifdef CURLINFO_SSL_DATA_OUT
269  case CURLINFO_SSL_DATA_OUT:
270  cerr << "SSL Data out: " << message; break;
271 #endif
272  default:
273  cerr << "Curl info: " << message; break;
274  }
275  return 0;
276 }
277 
281 void
282 HTTPConnect::www_lib_init()
283 {
284  d_curl = curl_easy_init();
285  if (!d_curl)
286  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
287 
288  // Now set options that will remain constant for the duration of this
289  // CURL object.
290 
291  // Set the proxy host.
292  if (!d_rcr->get_proxy_server_host().empty()) {
293  DBG(cerr << "Setting up a proxy server." << endl);
294  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
295  << endl);
296  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
297  << endl);
298  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
299  << endl);
300  curl_easy_setopt(d_curl, CURLOPT_PROXY,
301  d_rcr->get_proxy_server_host().c_str());
302  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
303  d_rcr->get_proxy_server_port());
304 
305  // As of 4/21/08 only NTLM, Digest and Basic work.
306 #ifdef CURLOPT_PROXYAUTH
307  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
308 #endif
309 
310  // Password might not be required. 06/21/04 jhrg
311  if (!d_rcr->get_proxy_server_userpw().empty())
312  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
313  d_rcr->get_proxy_server_userpw().c_str());
314  }
315 
316  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
317  // We have to set FailOnError to false for any of the non-Basic
318  // authentication schemes to work. 07/28/03 jhrg
319  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
320 
321  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
322  // choosing the the 'safest' one supported by the server.
323  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
324  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
325 
326  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
327  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
328  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
329  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
330  // param of save_raw_http_headers to a vector<string> object.
331 
332  // Follow 302 (redirect) responses
333  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
334  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
335 
336  // If the user turns off SSL validation...
337  if (!d_rcr->get_validate_ssl() == 0) {
338  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
339  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
340  }
341 
342  // Look to see if cookies are turned on in the .dodsrc file. If so,
343  // activate here. We honor 'session cookies' (cookies without an
344  // expiration date) here so that session-base SSO systems will work as
345  // expected.
346  if (!d_cookie_jar.empty()) {
347  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
348  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
349  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
350  }
351 
352  if (www_trace) {
353  cerr << "Curl version: " << curl_version() << endl;
354  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
355  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
356  }
357 }
358 
362 class BuildHeaders : public unary_function<const string &, void>
363 {
364  struct curl_slist *d_cl;
365 
366 public:
367  BuildHeaders() : d_cl(0)
368  {}
369 
370  void operator()(const string &header)
371  {
372  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
373  << endl);
374  d_cl = curl_slist_append(d_cl, header.c_str());
375  }
376 
377  struct curl_slist *get_headers()
378  {
379  return d_cl;
380  }
381 };
382 
397 long
398 HTTPConnect::read_url(const string &url, FILE *stream,
399  vector<string> *resp_hdrs,
400  const vector<string> *headers)
401 {
402  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
403 
404 #ifdef WIN32
405  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
406  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
407  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
408  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
409  // this issue is that one should not pass a FILE * to a windows DLL. Close
410  // inspection of libcurl yields that their default write function when using
411  // the CURLOPT_WRITEDATA is just "fwrite".
412  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
413  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
414 #else
415  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
416 #endif
417 
418  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
419  ostream_iterator<string>(cerr, "\n")));
420 
421  BuildHeaders req_hdrs;
422  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
423  req_hdrs);
424  if (headers)
425  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
426  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
427 
428  // Turn off the proxy for this URL?
429  bool temporary_proxy = false;
430  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
431  DBG(cerr << "Suppress proxy for url: " << url << endl);
432  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
433  }
434 
435  string::size_type at_sign = url.find('@');
436  // Assume username:password present *and* assume it's an HTTP URL; it *is*
437  // HTTPConnect, after all. 7 is position after "http://"; the second arg
438  // to substr() is the sub string length.
439  if (at_sign != url.npos)
440  d_upstring = url.substr(7, at_sign - 7);
441 
442  if (!d_upstring.empty())
443  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
444 
445  // Pass save_raw_http_headers() a pointer to the vector<string> where the
446  // response headers may be stored. Callers can use the resp_hdrs
447  // value/result parameter to get the raw response header information .
448  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
449 
450  CURLcode res = curl_easy_perform(d_curl);
451 
452  // Free the header list and null the value in d_curl.
453  curl_slist_free_all(req_hdrs.get_headers());
454  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
455 
456  // Reset the proxy?
457  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
458  curl_easy_setopt(d_curl, CURLOPT_PROXY,
459  d_rcr->get_proxy_server_host().c_str());
460 
461  if (res != 0)
462  throw Error(d_error_buffer);
463 
464  long status;
465  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
466  if (res != 0)
467  throw Error(d_error_buffer);
468 
469  return status;
470 }
471 
475 bool
476 HTTPConnect::url_uses_proxy_for(const string &url) throw()
477 {
478  if (d_rcr->is_proxy_for_used()) {
479  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
480  int index = 0, matchlen;
481  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
482  }
483 
484  return false;
485 }
486 
490 bool
491 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
492 {
493  return d_rcr->is_no_proxy_for_used()
494  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
495 }
496 
497 // Public methods. Mostly...
498 
505 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
506  d_cookie_jar(""),
507  d_dap_client_protocol_major(2),
508  d_dap_client_protocol_minor(0)
509 
510 {
511  d_accept_deflate = rcr->get_deflate();
512  d_rcr = rcr;
513 
514  // Load in the default headers to send with a request. The empty Pragma
515  // headers overrides libcurl's default Pragma: no-cache header (which
516  // will disable caching by Squid, et c.). The User-Agent header helps
517  // make server logs more readable. 05/05/03 jhrg
518  d_request_headers.push_back(string("Pragma:"));
519  string user_agent = string("User-Agent: ") + string(CNAME)
520  + string("/") + string(CVER);
521  d_request_headers.push_back(user_agent);
522  if (d_accept_deflate)
523  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
524 
525  // HTTPCache::instance returns a valid ptr or 0.
526  if (d_rcr->get_use_cache())
527  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
528  true);
529  else
530  d_http_cache = 0;
531 
532  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
533  << ")" << endl);
534 
535  if (d_http_cache) {
536  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
537  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
538  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
539  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
540  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
541  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
542  }
543 
544  d_cookie_jar = rcr->get_cookie_jar();
545 
546  www_lib_init(); // This may throw either Error or InternalErr
547 }
548 
550 {
551  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
552 
553  curl_easy_cleanup(d_curl);
554 
555  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
556 }
557 
570 HTTPResponse *
571 HTTPConnect::fetch_url(const string &url)
572 {
573 #ifdef HTTP_TRACE
574  cout << "GET " << url << " HTTP/1.0" << endl;
575 #endif
576 
577  HTTPResponse *stream;
578 
579  if (d_http_cache && d_http_cache->is_cache_enabled()) {
580  stream = caching_fetch_url(url);
581  }
582  else {
583  stream = plain_fetch_url(url);
584  }
585 
586 #ifdef HTTP_TRACE
587  stringstream ss;
588  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
589  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
590  ss << stream->get_headers()->at(i) << endl;
591  }
592  cout << ss.str();
593 #endif
594 
595  ParseHeader parser;
596 
597  parser = for_each(stream->get_headers()->begin(),
598  stream->get_headers()->end(), ParseHeader());
599 
600 #ifdef HTTP_TRACE
601  cout << endl << endl;
602 #endif
603 
604  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
605  if (parser.get_location() != "" &&
606  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
607  delete stream;
608  return fetch_url(parser.get_location());
609  }
610 
611  stream->set_type(parser.get_object_type());
612  stream->set_version(parser.get_server());
613  stream->set_protocol(parser.get_protocol());
614 
615  return stream;
616 }
617 
618 // Look around for a reasonable place to put a temporary file. Check first
619 // the value of the TMPDIR env var. If that does not yeild a path that's
620 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
621 // defined in stdio.h. If both come up empty, then use `./'.
622 
623 // Change this to a version that either returns a string or an open file
624 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
625 // (see open()) to make it more secure. Ideal solution: get deserialize()
626 // methods to read from a stream returned by libcurl, not from a temporary
627 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11
628 static string
629 get_tempfile_template(const string &file_template)
630 {
631  string c;
632 
633  // Windows has one idea of the standard name(s) for a temporary files dir
634 #ifdef WIN32
635  // white list for a WIN32 directory
636  Regex directory("[-a-zA-Z0-9_:\\]*");
637 
638  // If we're OK to use getenv(), try it.
639 #ifdef USE_GETENV
640  c = getenv("TEMP");
641  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
642  goto valid_temp_directory;
643 
644  c= getenv("TMP");
645  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
646  goto valid_temp_directory;
647 #endif // USE_GETENV
648 
649  // The windows default
650  c = "c:\tmp";
651  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
652  goto valid_temp_directory;
653 
654 #else // Unix/Linux/OSX has another...
655  // white list for a directory
656  Regex directory("[-a-zA-Z0-9_/]*");
657 #ifdef USE_GETENV
658  c = getenv("TMPDIR");
659  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
660  goto valid_temp_directory;
661 #endif // USE_GETENV
662 
663  // Unix defines this sometimes - if present, use it.
664 #ifdef P_tmpdir
665  if (access(P_tmpdir, W_OK | R_OK) == 0) {
666  c = P_tmpdir;
667  goto valid_temp_directory;
668  }
669 #endif
670 
671  // The Unix default
672  c = "/tmp";
673  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
674  goto valid_temp_directory;
675 
676 #endif // WIN32
677 
678  // If we found nothing useful, use the current directory
679  c = ".";
680 
681 valid_temp_directory:
682 
683 #ifdef WIN32
684  c += "\\" + file_template;
685 #else
686  c += "/" + file_template;
687 #endif
688 
689  return c;
690 }
691 
710 string
711 get_temp_file(FILE *&stream) throw(InternalErr)
712 {
713  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
714 
715  vector<char> pathname(dods_temp.length() + 1);
716 
717  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
718 
719  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
720 
721  // Open truncated for update. NB: mkstemp() returns a file descriptor.
722 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
723  stream = fopen(_mktemp(&pathname[0]), "w+b");
724 #else
725  // Make sure that temp files are accessible only by the owner.
726  umask(077);
727  stream = fdopen(mkstemp(&pathname[0]), "w+");
728 #endif
729 
730  if (!stream) {
731  throw InternalErr(__FILE__, __LINE__,
732  "Failed to open a temporary file for the data values ("
733  + dods_temp + ")");
734  }
735 
736  dods_temp = &pathname[0];
737  return dods_temp;
738 }
739 
741 void
742 close_temp(FILE *s, const string &name)
743 {
744  int res = fclose(s);
745  if (res)
746  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
747 
748  res = unlink(name.c_str());
749  if (res != 0)
750  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
751 }
752 
774 HTTPResponse *
775 HTTPConnect::caching_fetch_url(const string &url)
776 {
777  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
778 
779  vector<string> *headers = new vector<string>;
780  string file_name;
781  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
782  if (!s) {
783  // url not in cache; get it and cache it
784  DBGN(cerr << "no; getting response and caching." << endl);
785  delete headers; headers = 0;
786  time_t now = time(0);
787  HTTPResponse *rs = plain_fetch_url(url);
788  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
789 
790  return rs;
791  }
792  else { // url in cache
793  DBGN(cerr << "yes... ");
794 
795  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
796  DBGN(cerr << "and it's valid; using cached response." << endl);
797  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
798  return crs;
799  }
800  else { // url in cache but not valid; validate
801  DBGN(cerr << "but it's not valid; validating... ");
802 
803  d_http_cache->release_cached_response(s); // This closes 's'
804  headers->clear();
805  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
806  FILE *body = 0;
807  string dods_temp = get_temp_file(body);
808  time_t now = time(0); // When was the request made (now).
809  long http_status;
810 
811  try {
812  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
813  rewind(body);
814  }
815  catch (Error &e) {
816  close_temp(body, dods_temp);
817  delete headers;
818  throw ;
819  }
820 
821  switch (http_status) {
822  case 200: { // New headers and new body
823  DBGN(cerr << "read a new response; caching." << endl);
824 
825  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
826  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
827 
828  return rs;
829  }
830 
831  case 304: { // Just new headers, use cached body
832  DBGN(cerr << "cached response valid; updating." << endl);
833 
834  close_temp(body, dods_temp);
835  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
836  string file_name;
837  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
838  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
839  return crs;
840  }
841 
842  default: { // Oops.
843  close_temp(body, dods_temp);
844  if (http_status >= 400) {
845  delete headers; headers = 0;
846  string msg = "Error while reading the URL: ";
847  msg += url;
848  msg
849  += ".\nThe OPeNDAP server returned the following message:\n";
850  msg += http_status_to_string(http_status);
851  throw Error(msg);
852  }
853  else {
854  delete headers; headers = 0;
855  throw InternalErr(__FILE__, __LINE__,
856  "Bad response from the HTTP server: " + long_to_string(http_status));
857  }
858  }
859  }
860  }
861  }
862 
863  throw InternalErr(__FILE__, __LINE__, "Should never get here");
864 }
865 
877 HTTPResponse *
878 HTTPConnect::plain_fetch_url(const string &url)
879 {
880  DBG(cerr << "Getting URL: " << url << endl);
881  FILE *stream = 0;
882  string dods_temp = get_temp_file(stream);
883  vector<string> *resp_hdrs = new vector<string>;
884 
885  int status = -1;
886  try {
887  status = read_url(url, stream, resp_hdrs); // Throws Error.
888  if (status >= 400) {
889  delete resp_hdrs;
890  string msg = "Error while reading the URL: ";
891  msg += url;
892  msg += ".\nThe OPeNDAP server returned the following message:\n";
893  msg += http_status_to_string(status);
894  throw Error(msg);
895  }
896  }
897 
898  catch (Error &e) {
899  delete resp_hdrs;
900  close_temp(stream, dods_temp);
901  throw;
902  }
903 
904  rewind(stream);
905 
906  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
907 }
908 
920 void
922 {
923  d_accept_deflate = deflate;
924 
925  if (d_accept_deflate) {
926  if (find(d_request_headers.begin(), d_request_headers.end(),
927  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
928  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
929  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
930  ostream_iterator<string>(cerr, "\n")));
931  }
932  else {
933  vector<string>::iterator i;
934  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
935  bind2nd(equal_to<string>(),
936  string("Accept-Encoding: deflate, gzip, compress")));
937  d_request_headers.erase(i, d_request_headers.end());
938  }
939 }
940 
942 class HeaderMatch : public unary_function<const string &, bool> {
943  const string &d_header;
944  public:
945  HeaderMatch(const string &header) : d_header(header) {}
946  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
947 };
948 
957 void
958 HTTPConnect::set_xdap_protocol(int major, int minor)
959 {
960  // Look for, and remove if one exists, an XDAP-Accept header
961  vector<string>::iterator i;
962  i = find_if(d_request_headers.begin(), d_request_headers.end(),
963  HeaderMatch("XDAP-Accept:"));
964  if (i != d_request_headers.end())
965  d_request_headers.erase(i);
966 
967  // Record and add the new header value
968  d_dap_client_protocol_major = major;
969  d_dap_client_protocol_minor = minor;
970  ostringstream xdap_accept;
971  xdap_accept << "XDAP-Accept: " << major << "." << minor;
972 
973  d_request_headers.push_back(xdap_accept.str());
974 
975  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
976  ostream_iterator<string>(cerr, "\n")));
977 }
978 
994 void
995 HTTPConnect::set_credentials(const string &u, const string &p)
996 {
997  if (u.empty())
998  return;
999 
1000  // Store the credentials locally.
1001  d_username = u;
1002  d_password = p;
1003 
1004  d_upstring = u + ":" + p;
1005 }
1006 
1007 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1230
bool is_cache_enabled() const
Definition: HTTPCache.cc:626
virtual int get_status() const
Definition: Response.h:111
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:614
#define not_used
Definition: config.h:853
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1369
void set_credentials(const string &u, const string &p)
Definition: HTTPConnect.cc:995
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:126
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:704
int get_ignore_expires() const
Definition: RCReader.h:151
#define DBGN(x)
Definition: debug.h:59
#define SERVER_ERR_MIN
Definition: HTTPConnect.cc:109
virtual void set_type(ObjectType o)
Definition: Response.h:143
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1461
string get_cookie_jar() const
Definition: RCReader.h:258
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:57
#define SERVER_ERR_MAX
Definition: HTTPConnect.cc:110
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:571
virtual void set_version(const string &v)
Definition: Response.h:147
int get_default_expires() const
Definition: RCReader.h:155
#define DBG2(x)
Definition: debug.h:73
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:765
virtual void set_protocol(const string &p)
Definition: Response.h:151
unsigned int get_max_cached_obj() const
Definition: RCReader.h:147
bool get_deflate() const
Definition: RCReader.h:168
#define DBG(x)
Definition: debug.h:58
int get_max_cache_size() const
Definition: RCReader.h:143
#define CLIENT_ERR_MAX
Definition: HTTPConnect.cc:84
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:333
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1300
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:742
friend class ParseHeader
Definition: HTTPConnect.h:115
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:711
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1137
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:921
string long_to_string(long val, int base)
Definition: util.cc:483
void set_always_validate(bool validate)
Definition: HTTPCache.cc:822
void set_xdap_protocol(int major, int minor)
Definition: HTTPConnect.cc:958
virtual ~HTTPConnect()
Definition: HTTPConnect.cc:549
int dods_keep_temps
Definition: HTTPConnect.cc:81
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:800
bool get_use_cache() const
Definition: RCReader.h:139
int get_always_validate() const
Definition: RCReader.h:159
virtual vector< string > * get_headers() const
Definition: HTTPResponse.h:124
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1552
#define CVER
Definition: config.h:34
#define CNAME
Definition: config.h:26
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:670
#define CLIENT_ERR_MIN
Definition: HTTPConnect.cc:83
string get_dods_cache_root() const
Definition: RCReader.h:135
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:753
int www_trace
Definition: HTTPConnect.cc:78