58 #include "mime_util.h"
59 #include "media_types.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
80 int www_trace_extensive = 0;
83 int dods_keep_temps = 0;
85 #define CLIENT_ERR_MIN 400
86 #define CLIENT_ERR_MAX 417
87 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
90 "Unauthorized: Contact the server administrator.",
92 "Forbidden: Contact the server administrator.",
93 "Not Found: The data source or server could not be found.\n\
94 Often this means that the OPeNDAP server is missing or needs attention.\n\
95 Please contact the server administrator.",
96 "Method Not Allowed.",
98 "Proxy Authentication Required.",
103 "Precondition Failed.",
104 "Request Entity Too Large.",
105 "Request URI Too Large.",
106 "Unsupported Media Type.",
107 "Requested Range Not Satisfiable.",
108 "Expectation Failed."
111 #define SERVER_ERR_MIN 500
112 #define SERVER_ERR_MAX 505
113 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
115 "Internal Server Error.",
118 "Service Unavailable.",
120 "HTTP Version Not Supported."
126 http_status_to_string(
int status)
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
133 return string(
"Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
137 determine_object_type(
const string &header_value)
142 string::size_type plus = header_value.find(
'+');
144 string type_extension =
"";
145 if (plus != string::npos) {
146 base_type= header_value.substr(0, plus);
147 type_extension = header_value.substr(plus+1);
150 base_type = header_value;
152 if (base_type == DMR_Content_Type
153 || (base_type.find(
"application/") != string::npos
154 && base_type.find(
"dap4.dataset-metadata") != string::npos)) {
155 if (type_extension ==
"xml")
160 else if (base_type == DAP4_DATA_Content_Type
161 || (base_type.find(
"application/") != string::npos
162 && base_type.find(
"dap4.data") != string::npos)) {
165 else if (header_value.find(
"text/html") != string::npos) {
176 class ParseHeader :
public unary_function<const string &, void>
184 ParseHeader() : type(unknown_type), server(
"dods/0.0"), protocol(
"2.0")
187 void operator()(
const string &line)
192 DBG2(cerr << name <<
": " << value << endl);
197 if (type == unknown_type && name ==
"content-type") {
198 type = determine_object_type(value);
200 if (name ==
"content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
206 else if (name ==
"xdods-server" && server ==
"dods/0.0") {
209 else if (name ==
"xopendap-server") {
212 else if (name ==
"xdap") {
215 else if (server ==
"dods/0.0" && name ==
"server") {
218 else if (name ==
"location") {
233 string get_protocol()
238 string get_location() {
259 save_raw_http_headers(
void *ptr,
size_t size,
size_t nmemb,
void *resp_hdrs)
261 DBG2(cerr <<
"Inside the header parser." << endl);
262 vector<string> *hdrs =
static_cast<vector<string> *
>(resp_hdrs);
265 string complete_line;
266 if (nmemb > 1 && *(
static_cast<char*
>(ptr) + size * (nmemb - 2)) ==
'\r')
267 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 2));
269 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 1));
272 if (complete_line !=
"" && complete_line.find(
"HTTP") == string::npos) {
273 DBG(cerr <<
"Header line: " << complete_line << endl);
274 hdrs->push_back(complete_line);
282 curl_debug(CURL *, curl_infotype info,
char *msg,
size_t size,
void *)
284 string message(msg, size);
288 cerr <<
"Text: " << message;
break;
289 case CURLINFO_HEADER_IN:
290 cerr <<
"Header in: " << message;
break;
291 case CURLINFO_HEADER_OUT:
292 cerr <<
"Header out: " << message;
break;
293 case CURLINFO_DATA_IN:
294 if (www_trace_extensive)
295 cerr <<
"Data in: " << message;
break;
296 case CURLINFO_DATA_OUT:
297 if (www_trace_extensive)
298 cerr <<
"Data out: " << message;
break;
300 cerr <<
"End: " << message;
break;
301 #ifdef CURLINFO_SSL_DATA_IN
302 case CURLINFO_SSL_DATA_IN:
303 cerr <<
"SSL Data in: " << message;
break;
305 #ifdef CURLINFO_SSL_DATA_OUT
306 case CURLINFO_SSL_DATA_OUT:
307 cerr <<
"SSL Data out: " << message;
break;
310 if (www_trace_extensive)
311 cerr <<
"Curl info: " << message;
break;
320 HTTPConnect::www_lib_init()
322 curl_global_init(CURL_GLOBAL_DEFAULT);
324 d_curl = curl_easy_init();
326 throw InternalErr(__FILE__, __LINE__,
"Could not initialize libcurl.");
328 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
330 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
336 if (!d_rcr->get_proxy_server_host().empty()) {
337 DBG(cerr <<
"Setting up a proxy server." << endl);
338 DBG(cerr <<
"Proxy host: " << d_rcr->get_proxy_server_host()
340 DBG(cerr <<
"Proxy port: " << d_rcr->get_proxy_server_port()
342 DBG(cerr <<
"Proxy pwd : " << d_rcr->get_proxy_server_userpw()
344 curl_easy_setopt(d_curl, CURLOPT_PROXY,
345 d_rcr->get_proxy_server_host().c_str());
346 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
347 d_rcr->get_proxy_server_port());
350 #ifdef CURLOPT_PROXYAUTH
351 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (
long)CURLAUTH_ANY);
355 if (!d_rcr->get_proxy_server_userpw().empty())
356 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
357 d_rcr->get_proxy_server_userpw().c_str());
362 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
367 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (
long)CURLAUTH_ANY);
369 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
370 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
371 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
376 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
377 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
380 if (d_rcr->get_validate_ssl() == 0) {
381 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
382 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
387 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
393 if (!d_cookie_jar.empty()) {
394 DBG(cerr <<
"Setting the cookie jar to: " << d_cookie_jar << endl);
395 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
396 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
400 cerr <<
"Curl version: " << curl_version() << endl;
401 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
402 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
409 class BuildHeaders :
public unary_function<const string &, void>
411 struct curl_slist *d_cl;
414 BuildHeaders() : d_cl(0)
417 void operator()(
const string &header)
419 DBG(cerr <<
"Adding '" << header.c_str() <<
"' to the header list."
421 d_cl = curl_slist_append(d_cl, header.c_str());
424 struct curl_slist *get_headers()
445 HTTPConnect::read_url(
const string &url, FILE *stream, vector<string> *resp_hdrs,
const vector<string> *headers)
447 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
457 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
460 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
463 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
464 ostream_iterator<string>(cerr,
"\n")));
466 BuildHeaders req_hdrs;
467 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
470 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
472 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
475 bool temporary_proxy =
false;
476 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
477 DBG(cerr <<
"Suppress proxy for url: " << url << endl);
478 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
481 string::size_type at_sign = url.find(
'@');
485 if (at_sign != url.npos)
486 d_upstring = url.substr(7, at_sign - 7);
488 if (!d_upstring.empty())
489 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
494 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
499 CURLcode res = curl_easy_perform(d_curl);
502 curl_slist_free_all(req_hdrs.get_headers());
503 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
506 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
507 curl_easy_setopt(d_curl, CURLOPT_PROXY,
508 d_rcr->get_proxy_server_host().c_str());
511 throw Error(d_error_buffer);
514 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
516 throw Error(d_error_buffer);
519 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
520 if (res == CURLE_OK && ct_ptr)
521 d_content_type = ct_ptr;
532 HTTPConnect::url_uses_proxy_for(
const string &url)
534 if (d_rcr->is_proxy_for_used()) {
535 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
536 int index = 0, matchlen;
537 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
547 HTTPConnect::url_uses_no_proxy_for(
const string &url)
throw()
549 return d_rcr->is_no_proxy_for_used()
550 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
561 HTTPConnect::HTTPConnect(
RCReader *rcr,
bool use_cpp) : d_username(
""), d_password(
""), d_cookie_jar(
""),
562 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
565 d_accept_deflate = rcr->get_deflate();
572 d_request_headers.push_back(
string(
"Pragma:"));
573 string user_agent = string(
"User-Agent: ") + string(CNAME)
574 + string(
"/") + string(CVER);
575 d_request_headers.push_back(user_agent);
576 if (d_accept_deflate)
577 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
580 if (d_rcr->get_use_cache())
585 DBG2(cerr <<
"Cache object created (" << hex << d_http_cache << dec
591 d_http_cache->
set_max_size(d_rcr->get_max_cache_size());
597 d_cookie_jar = rcr->get_cookie_jar();
602 HTTPConnect::~HTTPConnect()
604 DBG2(cerr <<
"Entering the HTTPConnect dtor" << endl);
606 curl_easy_cleanup(d_curl);
608 DBG2(cerr <<
"Leaving the HTTPConnect dtor" << endl);
612 class HeaderMatch :
public unary_function<const string &, bool> {
613 const string &d_header;
615 HeaderMatch(
const string &header) : d_header(header) {}
616 bool operator()(
const string &arg) {
return arg.find(d_header) == 0; }
635 cout <<
"GET " << url <<
" HTTP/1.0" << endl;
641 stream = caching_fetch_url(url);
644 stream = plain_fetch_url(url);
649 ss <<
"HTTP/1.0 " << stream->get_status() <<
" -" << endl;
650 for (
size_t i = 0; i < stream->get_headers()->size(); i++) {
651 ss << stream->get_headers()->at(i) << endl;
662 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
663 HeaderMatch(
"Content-Type:")) == stream->get_headers()->end())
664 stream->get_headers()->push_back(
"Content-Type: " + d_content_type);
666 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
669 cout << endl << endl;
673 if (parser.get_location() !=
"" &&
674 url.substr(0,url.find(
"?",0)).compare(parser.get_location().substr(0,url.find(
"?",0))) != 0) {
679 stream->set_type(parser.get_object_type());
681 stream->set_version(parser.get_server());
682 stream->set_protocol(parser.get_protocol());
684 if (d_use_cpp_streams) {
702 get_tempfile_template(
const string &file_template)
709 Regex directory(
"[-a-zA-Z0-9_:\\]*");
714 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
715 goto valid_temp_directory;
718 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
719 goto valid_temp_directory;
724 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
725 goto valid_temp_directory;
729 Regex directory(
"[-a-zA-Z0-9_/]*");
731 c = getenv(
"TMPDIR");
732 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
733 goto valid_temp_directory;
738 if (access(P_tmpdir, W_OK | R_OK) == 0) {
740 goto valid_temp_directory;
746 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
747 goto valid_temp_directory;
754 valid_temp_directory:
757 c +=
"\\" + file_template;
759 c +=
"/" + file_template;
786 string dods_temp = get_tempfile_template((
string)
"dodsXXXXXX");
788 vector<char> pathname(dods_temp.length() + 1);
790 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
792 DBG(cerr <<
"pathanme: " << &pathname[0] <<
" (" << dods_temp.length() + 1 <<
")" << endl);
795 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
796 stream = fopen(_mktemp(&pathname[0]),
"w+b");
799 int mask = umask(077);
801 throw Error(
"Could not set the file creation mask: " +
string(strerror(errno)));
802 int fd = mkstemp(&pathname[0]);
804 throw Error(
"Could not create a temporary file to store the response: " +
string(strerror(errno)));
806 stream = fdopen(fd,
"w+");
811 throw Error(
"Failed to open a temporary file for the data values (" + dods_temp +
")");
813 dods_temp = &pathname[0];
828 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
830 res = unlink(name.c_str());
832 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
857 HTTPConnect::caching_fetch_url(
const string &url)
859 DBG(cerr <<
"Is this URL (" << url <<
") in the cache?... ");
861 vector<string> *headers =
new vector<string>;
866 DBGN(cerr <<
"no; getting response and caching." << endl);
867 delete headers; headers = 0;
868 time_t now = time(0);
869 HTTPResponse *rs = plain_fetch_url(url);
870 d_http_cache->
cache_response(url, now, *(rs->get_headers()), rs->get_stream());
875 DBGN(cerr <<
"yes... ");
878 DBGN(cerr <<
"and it's valid; using cached response." << endl);
879 HTTPCacheResponse *crs =
new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
883 DBGN(cerr <<
"but it's not valid; validating... ");
890 time_t now = time(0);
894 http_status = read_url(url, body, headers, &cond_hdrs);
903 switch (http_status) {
905 DBGN(cerr <<
"read a new response; caching." << endl);
908 HTTPResponse *rs =
new HTTPResponse(body, http_status, headers, dods_temp);
914 DBGN(cerr <<
"cached response valid; updating." << endl);
920 HTTPCacheResponse *crs =
new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
926 if (http_status >= 400) {
927 delete headers; headers = 0;
928 string msg =
"Error while reading the URL: ";
931 +=
".\nThe OPeNDAP server returned the following message:\n";
932 msg += http_status_to_string(http_status);
936 delete headers; headers = 0;
937 throw InternalErr(__FILE__, __LINE__,
938 "Bad response from the HTTP server: " + long_to_string(http_status));
945 throw InternalErr(__FILE__, __LINE__,
"Should never get here");
960 HTTPConnect::plain_fetch_url(
const string &url)
962 DBG(cerr <<
"Getting URL: " << url << endl);
965 vector<string> *resp_hdrs =
new vector<string>;
969 status = read_url(url, stream, resp_hdrs);
972 string msg =
"Error while reading the URL: ";
974 msg +=
".\nThe OPeNDAP server returned the following message:\n";
975 msg += http_status_to_string(status);
987 if (d_use_cpp_streams) {
989 fstream *in =
new fstream(dods_temp.c_str(), ios::in|ios::binary);
990 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
995 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
1015 d_accept_deflate = deflate;
1017 if (d_accept_deflate) {
1018 if (find(d_request_headers.begin(), d_request_headers.end(),
1019 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1020 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
1021 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1022 ostream_iterator<string>(cerr,
"\n")));
1025 vector<string>::iterator i;
1026 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1027 bind2nd(equal_to<string>(),
1028 string(
"Accept-Encoding: deflate, gzip, compress")));
1029 d_request_headers.erase(i, d_request_headers.end());
1045 vector<string>::iterator i;
1046 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1047 HeaderMatch(
"XDAP-Accept:"));
1048 if (i != d_request_headers.end())
1049 d_request_headers.erase(i);
1052 d_dap_client_protocol_major = major;
1053 d_dap_client_protocol_minor = minor;
1054 ostringstream xdap_accept;
1055 xdap_accept <<
"XDAP-Accept: " << major <<
"." << minor;
1057 d_request_headers.push_back(xdap_accept.str());
1059 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1060 ostream_iterator<string>(cerr,
"\n")));
1088 d_upstring = u +
":" + p;
A class for error processing.
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
static HTTPCache * instance(const string &cache_root, bool force=false)
void set_expire_ignored(bool mode)
void set_default_expiration(int exp_time)
void release_cached_response(FILE *response)
vector< string > get_conditional_request_headers(const string &url)
void set_cache_enabled(bool mode)
void set_max_entry_size(unsigned long size)
bool is_url_valid(const string &url)
void set_always_validate(bool validate)
void update_response(const string &url, time_t request_time, const vector< string > &headers)
void set_max_size(unsigned long size)
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
top level DAP object to house generic methods
ObjectType get_description_type(const string &value)
void parse_mime_header(const string &header, string &name, string &value)
string get_temp_file(FILE *&stream)
void close_temp(FILE *s, const string &name)
ObjectType
The type of object in the stream coming from the data server.