libdap++  Updated for version 3.11.7
HTTPCache.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <iostream>
40 #include <sstream>
41 #include <algorithm>
42 #include <iterator>
43 #include <set>
44 
45 #include "Error.h"
46 #include "InternalErr.h"
47 #include "ResponseTooBigErr.h"
48 #ifndef WIN32
49 #include "SignalHandler.h"
50 #endif
52 #include "HTTPCacheTable.h"
53 #include "HTTPCache.h"
54 #include "HTTPCacheMacros.h"
55 
56 #include "util_mit.h"
57 #include "debug.h"
58 
59 using namespace std;
60 
61 namespace libdap {
62 
63 HTTPCache *HTTPCache::_instance = 0;
64 
65 // instance_mutex is used to ensure that only one instance is created.
66 // That is, it protects the body of the HTTPCache::instance() method. This
67 // mutex is initialized from within the static function once_init_routine()
68 // and the call to that takes place using pthread_once_init() where the mutex
69 // once_block is used to protect that call. All of this ensures that no matter
70 // how many threads call the instance() method, only one instance is ever
71 // made.
72 static pthread_mutex_t instance_mutex;
73 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
74 
75 
76 #define NO_LM_EXPIRATION 24*3600 // 24 hours
77 
78 #define DUMP_FREQUENCY 10 // Dump index every x loads
79 
80 #define MEGA 0x100000L
81 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
82 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
83 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
84 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
85 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
86 
87 static void
88 once_init_routine()
89 {
90  int status;
91  status = INIT(&instance_mutex);
92 
93  if (status != 0)
94  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
95 }
96 
125 HTTPCache *
126 HTTPCache::instance(const string &cache_root, bool force)
127 {
128  int status = pthread_once(&once_block, once_init_routine);
129  if (status != 0)
130  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
131 
132  LOCK(&instance_mutex);
133 
134  DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
135  << "... ");
136 
137  try {
138  if (!_instance) {
139  _instance = new HTTPCache(cache_root, force);
140 
141  DBG(cerr << "New instance: " << _instance << ", cache root: "
142  << _instance->d_cache_root << endl);
143 
144  atexit(delete_instance);
145 
146 #ifndef WIN32
147  // Register the interrupt handler. If we've already registered
148  // one, barf. If this becomes a problem, hack SignalHandler so
149  // that we can chain these handlers... 02/10/04 jhrg
150  //
151  // Technically we're leaking memory here. However, since this
152  // class is a singleton, we know that only three objects will
153  // ever be created and they will all exist until the process
154  // exits. We can let this slide... 02/12/04 jhrg
155  EventHandler *old_eh = SignalHandler::instance()->register_handler
156  (SIGINT, new HTTPCacheInterruptHandler);
157  if (old_eh) {
158  SignalHandler::instance()->register_handler(SIGINT, old_eh);
160  "Could not register event handler for SIGINT without superseding an existing one.");
161  }
162 
163  old_eh = SignalHandler::instance()->register_handler
164  (SIGPIPE, new HTTPCacheInterruptHandler);
165  if (old_eh) {
166  SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
168  "Could not register event handler for SIGPIPE without superseding an existing one.");
169  }
170 
171  old_eh = SignalHandler::instance()->register_handler
172  (SIGTERM, new HTTPCacheInterruptHandler);
173  if (old_eh) {
174  SignalHandler::instance()->register_handler(SIGTERM, old_eh);
176  "Could not register event handler for SIGTERM without superseding an existing one.");
177  }
178 #endif
179  }
180  }
181  catch (...) {
182  DBG2(cerr << "The constructor threw an Error!" << endl);
183  UNLOCK(&instance_mutex);
184  throw;
185  }
186 
187  UNLOCK(&instance_mutex);
188  DBGN(cerr << "returning " << hex << _instance << dec << endl);
189 
190  return _instance;
191 }
192 
196 void
197 HTTPCache::delete_instance()
198 {
199  DBG(cerr << "Entering delete_instance()..." << endl);
200  if (HTTPCache::_instance) {
201  DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
202  delete HTTPCache::_instance;
203  HTTPCache::_instance = 0;
204  }
205 
206  DBG(cerr << "Exiting delete_instance()" << endl);
207 }
208 
223 HTTPCache::HTTPCache(string cache_root, bool force) :
224  d_locked_open_file(0),
225  d_cache_enabled(false),
226  d_cache_protected(false),
227  d_expire_ignored(false),
228  d_always_validate(false),
229  d_total_size(CACHE_TOTAL_SIZE * MEGA),
230  d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
231  d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
232  d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
233  d_default_expiration(NO_LM_EXPIRATION),
234  d_max_age(-1),
235  d_max_stale(-1),
236  d_min_fresh(-1),
237  d_http_cache_table(0)
238 {
239  DBG(cerr << "Entering the constructor for " << this << "... ");
240 #if 0
241  int status = pthread_once(&once_block, once_init_routine);
242  if (status != 0)
243  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
244 #endif
245  INIT(&d_cache_mutex);
246 
247  // This used to throw an Error object if we could not get the
248  // single user lock. However, that results in an invalid object. It's
249  // better to have an instance that has default values. If we cannot get
250  // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
251  //
252  // I fixed this block so that the cache root is set before we try to get
253  // the single user lock. That was the fix for bug #661. To make that
254  // work, I had to move the call to create_cache_root out of
255  // set_cache_root(). 09/08/03 jhrg
256 
257  set_cache_root(cache_root);
258  int block_size;
259 
260  if (!get_single_user_lock(force))
261  throw Error("Could not get single user lock for the cache");
262 
263 #ifdef WIN32
264  // Windows is unable to provide us this information. 4096 appears
265  // a best guess. It is likely to be in the range [2048, 8192] on
266  // windows, but will the level of truth of that statement vary over
267  // time ?
268  block_size = 4096;
269 #else
270  struct stat s;
271  if (stat(cache_root.c_str(), &s) == 0)
272  block_size = s.st_blksize;
273  else
274  throw Error("Could not set file system block size.");
275 #endif
276  d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
277  d_cache_enabled = true;
278 
279  DBGN(cerr << "exiting" << endl);
280 }
281 
295 {
296  DBG(cerr << "Entering the destructor for " << this << "... ");
297 
298  try {
299  if (startGC())
300  perform_garbage_collection();
301 
302  d_http_cache_table->cache_index_write();
303  }
304  catch (Error &e) {
305  // If the cache index cannot be written, we've got problems. However,
306  // unless we're debugging, still free up the cache table in memory.
307  // How should we let users know they cache index is not being
308  // written?? 10/03/02 jhrg
309  DBG(cerr << e.get_error_message() << endl);
310  }
311 
312  delete d_http_cache_table;
313 
314  release_single_user_lock();
315 
316  DBGN(cerr << "exiting destructor." << endl);
317  DESTROY(&d_cache_mutex);
318 }
319 
320 
324 
328 bool
329 HTTPCache::stopGC() const
330 {
331  return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
332 }
333 
340 bool
341 HTTPCache::startGC() const
342 {
343  DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
344  return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
345 }
346 
361 void
362 HTTPCache::perform_garbage_collection()
363 {
364  DBG(cerr << "Performing garbage collection" << endl);
365 
366  // Remove all the expired responses.
367  expired_gc();
368 
369  // Remove entries larger than max_entry_size.
370  too_big_gc();
371 
372  // Remove entries starting with zero hits, 1, ..., until stopGC()
373  // returns true.
374  hits_gc();
375 }
376 
382 void
383 HTTPCache::expired_gc()
384 {
385  if (!d_expire_ignored) {
386  d_http_cache_table->delete_expired_entries();
387  }
388 }
389 
406 void
407 HTTPCache::hits_gc()
408 {
409  int hits = 0;
410 
411  if (startGC()) {
412  while (!stopGC()) {
413  d_http_cache_table->delete_by_hits(hits);
414  hits++;
415  }
416  }
417 }
418 
423 void HTTPCache::too_big_gc() {
424  if (startGC())
425  d_http_cache_table->delete_by_size(d_max_entry_size);
426 }
427 
429 
440 bool HTTPCache::get_single_user_lock(bool force)
441 {
442  if (!d_locked_open_file) {
443  FILE * fp = NULL;
444 
445  try {
446  // It's OK to call create_cache_root if the directory already
447  // exists.
448  create_cache_root(d_cache_root);
449  }
450  catch (Error &e) {
451  // We need to catch and return false because this method is
452  // called from a ctor and throwing at this point will result in a
453  // partially constructed object. 01/22/04 jhrg
454  DBG(cerr << "Failure to create the cache root" << endl);
455  return false;
456  }
457 
458  // Try to read the lock file. If we can open for reading, it exists.
459  string lock = d_cache_root + CACHE_LOCK;
460  if ((fp = fopen(lock.c_str(), "r")) != NULL) {
461  int res = fclose(fp);
462  if (res) {
463  DBG(cerr << "Failed to close " << (void *)fp << endl);
464  }
465  if (force)
466  REMOVE(lock.c_str());
467  else
468  return false;
469  }
470 
471  if ((fp = fopen(lock.c_str(), "w")) == NULL) {
472  DBG(cerr << "Could not open for write access" << endl);
473  return false;
474  }
475 
476  d_locked_open_file = fp;
477  return true;
478  }
479 
480  cerr << "locked_open_file is true" << endl;
481  return false;
482 }
483 
486 void
487 HTTPCache::release_single_user_lock()
488 {
489  if (d_locked_open_file) {
490  int res = fclose(d_locked_open_file);
491  if (res) {
492  DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
493  }
494  d_locked_open_file = 0;
495  }
496 
497  string lock = d_cache_root + CACHE_LOCK;
498  REMOVE(lock.c_str());
499 }
500 
503 
507 string
509 {
510  return d_cache_root;
511 }
512 
513 
522 void
523 HTTPCache::create_cache_root(const string &cache_root)
524 {
525  struct stat stat_info;
526  string::size_type cur = 0;
527 
528 #ifdef WIN32
529  cur = cache_root[1] == ':' ? 3 : 1;
530  typedef int mode_t;
531 #else
532  cur = 1;
533 #endif
534  while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
535  string dir = cache_root.substr(0, cur);
536  if (stat(dir.c_str(), &stat_info) == -1) {
537  DBG2(cerr << "Cache....... Creating " << dir << endl);
538  mode_t mask = UMASK(0);
539  if (MKDIR(dir.c_str(), 0777) < 0) {
540  DBG2(cerr << "Error: can't create." << endl);
541  UMASK(mask);
542  throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
543  }
544  UMASK(mask);
545  }
546  else {
547  DBG2(cerr << "Cache....... Found " << dir << endl);
548  }
549  cur++;
550  }
551 }
552 
567 void
568 HTTPCache::set_cache_root(const string &root)
569 {
570  if (root != "") {
571  d_cache_root = root;
572  // cache root should end in /.
573  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
574  d_cache_root += DIR_SEPARATOR_CHAR;
575  }
576  else {
577  // If no cache root has been indicated then look for a suitable
578  // location.
579 #ifdef USE_GETENV
580  char * cr = (char *) getenv("DODS_CACHE");
581  if (!cr) cr = (char *) getenv("TMP");
582  if (!cr) cr = (char *) getenv("TEMP");
583  if (!cr) cr = (char*)CACHE_LOCATION;
584  d_cache_root = cr;
585 #else
586  d_cache_root = CACHE_LOCATION;
587 #endif
588 
589  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
590  d_cache_root += DIR_SEPARATOR_CHAR;
591 
592  d_cache_root += CACHE_ROOT;
593  }
594 
595  // Test d_hhtp_cache_table because this method can be called before that
596  // instance is created and also can be called later to cahnge the cache
597  // root. jhrg 05.14.08
598  if (d_http_cache_table)
599  d_http_cache_table->set_cache_root(d_cache_root);
600 }
601 
613 void
615 {
617 
618  d_cache_enabled = mode;
619 
621 }
622 
625 bool
627 {
628  DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
629  << endl);
630  return d_cache_enabled;
631 }
632 
643 void
645 {
647 
648  d_cache_disconnected = mode;
649 
651 }
652 
657 {
658  return d_cache_disconnected;
659 }
660 
669 void
671 {
673 
674  d_expire_ignored = mode;
675 
677 }
678 
679 /* Is the cache ignoring Expires headers returned with responses that have
680  been cached? */
681 
682 bool
684 {
685  return d_expire_ignored;
686 }
687 
703 void
704 HTTPCache::set_max_size(unsigned long size)
705 {
707 
708  try {
709  unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
711  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
712  unsigned long old_size = d_total_size;
713  d_total_size = new_size;
714  d_folder_size = d_total_size / CACHE_FOLDER_PCT;
715  d_gc_buffer = d_total_size / CACHE_GC_PCT;
716 
717  if (new_size < old_size && startGC()) {
718  perform_garbage_collection();
719  d_http_cache_table->cache_index_write();
720  }
721  }
722  catch (...) {
724  DBGN(cerr << "Unlocking interface." << endl);
725  throw;
726  }
727 
728  DBG2(cerr << "Cache....... Total cache size: " << d_total_size
729  << " with " << d_folder_size
730  << " bytes for meta information and folders and at least "
731  << d_gc_buffer << " bytes free after every gc" << endl);
732 
734 }
735 
738 unsigned long
740 {
741  return d_total_size / MEGA;
742 }
743 
752 void
753 HTTPCache::set_max_entry_size(unsigned long size)
754 {
756 
757  try {
758  unsigned long new_size = size * MEGA;
759  if (new_size > 0 && new_size < d_total_size - d_folder_size) {
760  unsigned long old_size = d_max_entry_size;
761  d_max_entry_size = new_size;
762  if (new_size < old_size && startGC()) {
763  perform_garbage_collection();
764  d_http_cache_table->cache_index_write();
765  }
766  }
767  }
768  catch (...) {
770  throw;
771  }
772 
773  DBG2(cerr << "Cache...... Max entry cache size is "
774  << d_max_entry_size << endl);
775 
777 }
778 
783 unsigned long
785 {
786  return d_max_entry_size / MEGA;
787 }
788 
799 void
801 {
803 
804  d_default_expiration = exp_time;
805 
807 }
808 
811 int
813 {
814  return d_default_expiration;
815 }
816 
821 void
823 {
824  d_always_validate = validate;
825 }
826 
830 bool
832 {
833  return d_always_validate;
834 }
835 
852 void
853 HTTPCache::set_cache_control(const vector<string> &cc)
854 {
856 
857  try {
858  d_cache_control = cc;
859 
860  vector<string>::const_iterator i;
861  for (i = cc.begin(); i != cc.end(); ++i) {
862  string header = (*i).substr(0, (*i).find(':'));
863  string value = (*i).substr((*i).find(": ") + 2);
864  if (header != "Cache-Control") {
865  throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
866  }
867  else {
868  if (value == "no-cache" || value == "no-store")
869  d_cache_enabled = false;
870  else if (value.find("max-age") != string::npos) {
871  string max_age = value.substr(value.find("=" + 1));
872  d_max_age = parse_time(max_age.c_str());
873  }
874  else if (value == "max-stale")
875  d_max_stale = 0; // indicates will take anything;
876  else if (value.find("max-stale") != string::npos) {
877  string max_stale = value.substr(value.find("=" + 1));
878  d_max_stale = parse_time(max_stale.c_str());
879  }
880  else if (value.find("min-fresh") != string::npos) {
881  string min_fresh = value.substr(value.find("=" + 1));
882  d_min_fresh = parse_time(min_fresh.c_str());
883  }
884  }
885  }
886  }
887  catch (...) {
889  throw;
890  }
891 
893 }
894 
895 
900 vector<string>
902 {
903  return d_cache_control;
904 }
905 
907 
916 bool
917 HTTPCache::is_url_in_cache(const string &url)
918 {
919  DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
920 
921  HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
922  bool status = entry != 0;
923  if (entry) {
924  entry->unlock_read_response();
925  }
926  return status;
927 }
928 
934 bool
935 is_hop_by_hop_header(const string &header)
936 {
937  return header.find("Connection") != string::npos
938  || header.find("Keep-Alive") != string::npos
939  || header.find("Proxy-Authenticate") != string::npos
940  || header.find("Proxy-Authorization") != string::npos
941  || header.find("Transfer-Encoding") != string::npos
942  || header.find("Upgrade") != string::npos;
943 }
944 
956 void
957 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
958 {
959  string fname = cachename + CACHE_META;
960  d_open_files.push_back(fname);
961 
962  FILE *dest = fopen(fname.c_str(), "w");
963  if (!dest) {
964  throw InternalErr(__FILE__, __LINE__,
965  "Could not open named cache entry file.");
966  }
967 
968  vector<string>::const_iterator i;
969  for (i = headers.begin(); i != headers.end(); ++i) {
970  if (!is_hop_by_hop_header(*i)) {
971  int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
972  if (s != 1) {
973  fclose(dest);
974  throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
975  }
976  s = fwrite("\n", 1, 1, dest);
977  if (s != 1) {
978  fclose(dest);
979  throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
980  }
981  }
982  }
983 
984  int res = fclose(dest);
985  if (res) {
986  DBG(cerr << "HTTPCache::write_metadata - Failed to close "
987  << dest << endl);
988  }
989 
990  d_open_files.pop_back();
991 }
992 
1003 void
1004 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1005 {
1006  FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1007  if (!md) {
1008  throw InternalErr(__FILE__, __LINE__,
1009  "Could not open named cache entry meta data file.");
1010  }
1011 
1012  char line[1024];
1013  while (!feof(md) && fgets(line, 1024, md)) {
1014  line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1015  headers.push_back(string(line));
1016  }
1017 
1018  int res = fclose(md);
1019  if (res) {
1020  DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1021  << md << endl);
1022  }
1023 }
1024 
1046 int
1047 HTTPCache::write_body(const string &cachename, const FILE *src)
1048 {
1049  d_open_files.push_back(cachename);
1050 
1051  FILE *dest = fopen(cachename.c_str(), "wb");
1052  if (!dest) {
1053  throw InternalErr(__FILE__, __LINE__,
1054  "Could not open named cache entry file.");
1055  }
1056 
1057  // Read and write in 1k blocks; an attempt at doing this efficiently.
1058  // 09/30/02 jhrg
1059  char line[1024];
1060  size_t n;
1061  int total = 0;
1062  while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1063  total += fwrite(line, 1, n, dest);
1064  DBG2(sleep(3));
1065  }
1066 
1067  if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1068  int res = fclose(dest);
1069  res = res & unlink(cachename.c_str());
1070  if (res) {
1071  DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1072  << dest << endl);
1073  }
1074  throw InternalErr(__FILE__, __LINE__,
1075  "I/O error transferring data to the cache.");
1076  }
1077 
1078  rewind(const_cast<FILE *>(src));
1079 
1080  int res = fclose(dest);
1081  if (res) {
1082  DBG(cerr << "HTTPCache::write_body - Failed to close "
1083  << dest << endl);
1084  }
1085 
1086  d_open_files.pop_back();
1087 
1088  return total;
1089 }
1090 
1099 FILE *
1100 HTTPCache::open_body(const string &cachename)
1101 {
1102  DBG(cerr << "cachename: " << cachename << endl);
1103 
1104  FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1105  if (!src)
1106  throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1107 
1108  return src;
1109 }
1110 
1136 bool
1137 HTTPCache::cache_response(const string &url, time_t request_time,
1138  const vector<string> &headers, const FILE *body)
1139 {
1141 
1142  DBG(cerr << "Caching url: " << url << "." << endl);
1143 
1144  try {
1145  // If this is not an http or https URL, don't cache.
1146  if (url.find("http:") == string::npos &&
1147  url.find("https:") == string::npos) {
1149  return false;
1150  }
1151 
1152  // This does nothing if url is not already in the cache. It's
1153  // more efficient to do this than to first check and see if the entry
1154  // exists. 10/10/02 jhrg
1155  d_http_cache_table->remove_entry_from_cache_table(url);
1156 
1158  entry->lock_write_response();
1159 
1160  try {
1161  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1162  if (entry->is_no_cache()) {
1163  DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1164  << "(" << url << ")" << endl);
1165  entry->unlock_write_response();
1166  delete entry; entry = 0;
1168  return false;
1169  }
1170 
1171  // corrected_initial_age, freshness_lifetime, response_time.
1172  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1173 
1174  d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1175  // move these write function to cache table
1176  entry->set_size(write_body(entry->get_cachename(), body));
1177  write_metadata(entry->get_cachename(), headers);
1178  d_http_cache_table->add_entry_to_cache_table(entry);
1179  entry->unlock_write_response();
1180  }
1181  catch (ResponseTooBigErr &e) {
1182  // Oops. Bummer. Clean up and exit.
1183  DBG(cerr << e.get_error_message() << endl);
1184  REMOVE(entry->get_cachename().c_str());
1185  REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1186  DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1187  << ")" << endl);
1188  entry->unlock_write_response();
1189  delete entry; entry = 0;
1191  return false;
1192  }
1193 
1194  if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1195  if (startGC())
1196  perform_garbage_collection();
1197 
1198  d_http_cache_table->cache_index_write(); // resets new_entries
1199  }
1200  }
1201  catch (...) {
1203  throw;
1204  }
1205 
1207 
1208  return true;
1209 }
1210 
1229 vector<string>
1231 {
1233 
1234  HTTPCacheTable::CacheEntry *entry = 0;
1235  vector<string> headers;
1236 
1237  DBG(cerr << "Getting conditional request headers for " << url << endl);
1238 
1239  try {
1240  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1241  if (!entry)
1242  throw Error("There is no cache entry for the URL: " + url);
1243 
1244  if (entry->get_etag() != "")
1245  headers.push_back(string("If-None-Match: ") + entry->get_etag());
1246 
1247  if (entry->get_lm() > 0) {
1248  time_t lm = entry->get_lm();
1249  headers.push_back(string("If-Modified-Since: ")
1250  + date_time_str(&lm));
1251  }
1252  else if (entry->get_max_age() > 0) {
1253  time_t max_age = entry->get_max_age();
1254  headers.push_back(string("If-Modified-Since: ")
1255  + date_time_str(&max_age));
1256  }
1257  else if (entry->get_expires() > 0) {
1258  time_t expires = entry->get_expires();
1259  headers.push_back(string("If-Modified-Since: ")
1260  + date_time_str(&expires));
1261  }
1262  entry->unlock_read_response();
1264  }
1265  catch (...) {
1267  if (entry) {
1268  entry->unlock_read_response();
1269  }
1270  throw;
1271  }
1272 
1273  return headers;
1274 }
1275 
1279 struct HeaderLess: binary_function<const string&, const string&, bool>
1280 {
1281  bool operator()(const string &s1, const string &s2) const {
1282  return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1283  }
1284 };
1285 
1299 void
1300 HTTPCache::update_response(const string &url, time_t request_time,
1301  const vector<string> &headers)
1302 {
1304 
1305  HTTPCacheTable::CacheEntry *entry = 0;
1306  DBG(cerr << "Updating the response headers for: " << url << endl);
1307 
1308  try {
1309  entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1310  if (!entry)
1311  throw Error("There is no cache entry for the URL: " + url);
1312 
1313  // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1314  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1315 
1316  // Update corrected_initial_age, freshness_lifetime, response_time.
1317  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1318 
1319  // Merge the new headers with those in the persistent store. How:
1320  // Load the new headers into a set, then merge the old headers. Since
1321  // set<> ignores duplicates, old headers with the same name as a new
1322  // header will got into the bit bucket. Define a special compare
1323  // functor to make sure that headers are compared using only their
1324  // name and not their value too.
1325  set<string, HeaderLess> merged_headers;
1326 
1327  // Load in the new headers
1328  copy(headers.begin(), headers.end(),
1329  inserter(merged_headers, merged_headers.begin()));
1330 
1331  // Get the old headers and load them in.
1332  vector<string> old_headers;
1333  read_metadata(entry->get_cachename(), old_headers);
1334  copy(old_headers.begin(), old_headers.end(),
1335  inserter(merged_headers, merged_headers.begin()));
1336 
1337  // Read the values back out. Use reverse iterators with back_inserter
1338  // to preserve header order. NB: vector<> does not support push_front
1339  // so we can't use front_inserter(). 01/09/03 jhrg
1340  vector<string> result;
1341  copy(merged_headers.rbegin(), merged_headers.rend(),
1342  back_inserter(result));
1343 
1344  write_metadata(entry->get_cachename(), result);
1345  entry->unlock_write_response();
1347  }
1348  catch (...) {
1349  if (entry) {
1350  entry->unlock_read_response();
1351  }
1353  throw;
1354  }
1355 }
1356 
1368 bool
1369 HTTPCache::is_url_valid(const string &url)
1370 {
1372 
1373  bool freshness;
1374  HTTPCacheTable::CacheEntry *entry = 0;
1375 
1376  DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1377 
1378  try {
1379  if (d_always_validate) {
1381  return false; // force re-validation.
1382  }
1383 
1384  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1385  if (!entry)
1386  throw Error("There is no cache entry for the URL: " + url);
1387 
1388  // If we supported range requests, we'd need code here to check if
1389  // there was only a partial response in the cache. 10/02/02 jhrg
1390 
1391  // In case this entry is of type "must-revalidate" then we consider it
1392  // invalid.
1393  if (entry->get_must_revalidate()) {
1394  entry->unlock_read_response();
1396  return false;
1397  }
1398 
1399  time_t resident_time = time(NULL) - entry->get_response_time();
1400  time_t current_age = entry->get_corrected_initial_age() + resident_time;
1401 
1402  // Check that the max-age, max-stale, and min-fresh directives
1403  // given in the request cache control header is followed.
1404  if (d_max_age >= 0 && current_age > d_max_age) {
1405  DBG(cerr << "Cache....... Max-age validation" << endl);
1406  entry->unlock_read_response();
1408  return false;
1409  }
1410  if (d_min_fresh >= 0
1411  && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1412  DBG(cerr << "Cache....... Min-fresh validation" << endl);
1413  entry->unlock_read_response();
1415  return false;
1416  }
1417 
1418  freshness = (entry->get_freshness_lifetime()
1419  + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1420  entry->unlock_read_response();
1422  }
1423  catch (...) {
1424  if (entry) {
1425  entry->unlock_read_response();
1426  }
1428  throw;
1429  }
1430 
1431  return freshness;
1432 }
1433 
1461 FILE * HTTPCache::get_cached_response(const string &url,
1462  vector<string> &headers, string &cacheName) {
1464 
1465  FILE *body = 0;
1466  HTTPCacheTable::CacheEntry *entry = 0;
1467 
1468  DBG(cerr << "Getting the cached response for " << url << endl);
1469 
1470  try {
1471  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1472  if (!entry) {
1474  return 0;
1475  }
1476 
1477  cacheName = entry->get_cachename();
1478  read_metadata(entry->get_cachename(), headers);
1479 
1480  DBG(cerr << "Headers just read from cache: " << endl);
1481  DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1482 
1483  body = open_body(entry->get_cachename());
1484 
1485  DBG(cerr << "Returning: " << url << " from the cache." << endl);
1486 
1487  d_http_cache_table->bind_entry_to_data(entry, body);
1488  }
1489  catch (...) {
1490  // Why make this unlock operation conditional on entry?
1491  if (entry)
1493  if (body != 0)
1494  fclose(body);
1495  throw;
1496  }
1497 
1499 
1500  return body;
1501 }
1502 
1514 FILE *
1515 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1516 {
1517  string discard_name;
1518  return get_cached_response(url, headers, discard_name);
1519 }
1520 
1531 FILE *
1533 {
1534  string discard_name;
1535  vector<string> discard_headers;
1536  return get_cached_response(url, discard_headers, discard_name);
1537 }
1538 
1551 void
1553 {
1555 
1556  try {
1557  d_http_cache_table->uncouple_entry_from_data(body);
1558  }
1559  catch (...) {
1561  throw;
1562  }
1563 
1565 }
1566 
1579 void
1581 {
1583 
1584  try {
1585  if (d_http_cache_table->is_locked_read_responses())
1586  throw Error("Attempt to purge the cache with entries in use.");
1587 
1588  d_http_cache_table->delete_all_entries();
1589  }
1590  catch (...) {
1592  throw;
1593  }
1594 
1596 }
1597 
1598 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1230
bool is_cache_enabled() const
Definition: HTTPCache.cc:626
vector< string > get_cache_control()
Definition: HTTPCache.cc:901
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:614
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:132
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1369
string get_error_message() const
Definition: Error.cc:279
void create_location(CacheEntry *entry)
virtual ~HTTPCache()
Definition: HTTPCache.cc:294
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:704
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const vector< string > &headers)
#define UMASK(a)
#define DBGN(x)
Definition: debug.h:59
void add_entry_to_cache_table(CacheEntry *entry)
#define UNLOCK(m)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1461
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void delete_by_size(unsigned int size)
#define DESTROY(m)
int get_new_entries() const
#define DBG2(x)
Definition: debug.h:73
int get_default_expiration() const
Definition: HTTPCache.cc:812
A class for software fault reporting.
Definition: InternalErr.h:64
#define DUMP_FREQUENCY
Definition: HTTPCache.cc:78
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:935
unsigned long get_max_entry_size() const
Definition: HTTPCache.cc:784
#define DBG(x)
Definition: debug.h:58
#define CACHE_GC_PCT
Definition: HTTPCache.cc:83
#define CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:81
bool is_expire_ignored() const
Definition: HTTPCache.cc:683
#define MAX_CACHE_ENTRY_SIZE
Definition: HTTPCache.cc:85
void set_size(unsigned long sz)
#define LOCK(m)
CacheDisconnectedMode get_cache_disconnected() const
Definition: HTTPCache.cc:656
#define DIR_SEPARATOR_CHAR
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1300
#define MEGA
Definition: HTTPCache.cc:80
bool get_always_validate() const
Definition: HTTPCache.cc:831
#define CACHE_ROOT
#define NO_LM_EXPIRATION
Definition: HTTPCache.cc:76
void bind_entry_to_data(CacheEntry *entry, FILE *body)
string get_cache_root() const
Definition: HTTPCache.cc:508
void set_cache_control(const vector< string > &cc)
Definition: HTTPCache.cc:853
void delete_expired_entries(time_t time=0)
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:284
unsigned long get_current_size() const
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1137
#define MIN_CACHE_TOTAL_SIZE
Definition: HTTPCache.cc:84
string long_to_string(long val, int base)
Definition: util.cc:483
#define INIT(m)
#define CACHE_FOLDER_PCT
Definition: HTTPCache.cc:82
void set_always_validate(bool validate)
Definition: HTTPCache.cc:822
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:800
#define CACHE_META
void remove_entry_from_cache_table(const string &url)
#define CACHE_LOCK
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1552
#define REMOVE(a)
void unlock_cache_interface()
Definition: HTTPCache.h:246
CacheEntry * get_write_locked_entry_from_cache_table(const string &url)
void lock_cache_interface()
Definition: HTTPCache.h:241
unsigned long get_max_size() const
Definition: HTTPCache.cc:739
void delete_by_hits(int hits)
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:670
#define MKDIR(a, b)
#define CACHE_LOCATION
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:753
void uncouple_entry_from_data(FILE *body)
void set_cache_root(const string &cr)
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition: HTTPCache.cc:644