diff --git a/panda/src/downloader/chunkedStreamBuf.cxx b/panda/src/downloader/chunkedStreamBuf.cxx index fb3cdd1cac..c6694828ac 100644 --- a/panda/src/downloader/chunkedStreamBuf.cxx +++ b/panda/src/downloader/chunkedStreamBuf.cxx @@ -17,6 +17,7 @@ //////////////////////////////////////////////////////////////////// #include "chunkedStreamBuf.h" +#include // This module is not compiled if OpenSSL is not available. #ifdef HAVE_SSL @@ -149,13 +150,34 @@ read_chars(char *start, size_t length) { (*_source)->read(start, length); size_t read_count = (*_source)->gcount(); _chunk_remaining -= read_count; + + if (read_count == 0 && (*_source)->is_closed()) { + // Whoops, the socket closed while we were downloading. + if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) { + _doc->_state = HTTPChannel::S_failure; + } + } + return read_count; } // Read the next chunk. string line; - if (!http_getline(line)) { + bool got_line = http_getline(line); + while (got_line && line.empty()) { + // Skip blank lines. There really should be exactly one blank + // line, but who's counting? It's tricky to count and maintain + // reentry for nonblocking I/O. + got_line = http_getline(line); + } + if (!got_line) { // EOF (or data unavailable) while trying to read the chunk size. + if ((*_source)->is_closed()) { + // Whoops, the socket closed while we were downloading. + if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) { + _doc->_state = HTTPChannel::S_failure; + } + } return 0; } size_t chunk_size = (size_t)strtol(line.c_str(), NULL, 16); @@ -192,11 +214,18 @@ http_getline(string &str) { switch (ch) { case '\n': // end-of-line character, we're done. - if (downloader_cat.is_spam()) { - downloader_cat.spam() << "recv: " << _working_getline << "\n"; - } str = _working_getline; _working_getline = string(); + { + // Trim trailing whitespace. We're not required to do this per the + // HTTP spec, but let's be generous. + size_t p = str.length(); + while (p > 0 && isspace(str[p - 1])) { + --p; + } + str = str.substr(0, p); + } + return true; case '\r': diff --git a/panda/src/downloader/httpChannel.I b/panda/src/downloader/httpChannel.I index cf8d76e836..62c648b0b6 100644 --- a/panda/src/downloader/httpChannel.I +++ b/panda/src/downloader/httpChannel.I @@ -276,6 +276,21 @@ request_header(const URLSpec &url) { begin_request("HEAD", url, string(), true); } +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::is_download_complete +// Access: Published +// Description: Returns true when a download_to() or +// download_to_ram() has executed and the file has been +// fully downloaded. If this still returns false after +// processing has completed, there was an error in +// transmission. +//////////////////////////////////////////////////////////////////// +INLINE bool HTTPChannel:: +is_download_complete() const { + return (_download_dest != DD_none && + (_state == S_read_body || _state == S_read_trailer)); +} + //////////////////////////////////////////////////////////////////// // Function: HTTPChannel::check_socket // Access: Private diff --git a/panda/src/downloader/httpChannel.cxx b/panda/src/downloader/httpChannel.cxx index 33eda15713..b384c6c1ff 100644 --- a/panda/src/downloader/httpChannel.cxx +++ b/panda/src/downloader/httpChannel.cxx @@ -21,6 +21,7 @@ #include "bioStream.h" #include "chunkedStream.h" #include "identityStream.h" +#include "buffer.h" // for Ramfile #ifdef HAVE_SSL @@ -57,10 +58,12 @@ HTTPChannel(HTTPClient *client) : _http_version_string = _client->get_http_version_string(); _state = S_new; _done_state = S_new; + _started_download = false; _sent_so_far = 0; _proxy_tunnel = false; _body_stream = NULL; _sbio = NULL; + _download_to_ramfile = NULL; } //////////////////////////////////////////////////////////////////// @@ -71,6 +74,7 @@ HTTPChannel(HTTPClient *client) : HTTPChannel:: ~HTTPChannel() { free_bio(); + reset_download_to(); } //////////////////////////////////////////////////////////////////// @@ -190,13 +194,33 @@ write_headers(ostream &out) const { //////////////////////////////////////////////////////////////////// bool HTTPChannel:: run() { + if (_state == _done_state || _state == S_failure) { + if (!reached_done_state()) { + return false; + } + } + + if (_started_download) { + switch (_download_dest) { + case DD_none: + return false; // We're done. + + case DD_file: + return run_download_to_file(); + + case DD_ram: + return run_download_to_ram(); + } + } + if (downloader_cat.is_spam()) { downloader_cat.spam() << "begin run(), _state = " << (int)_state << ", _done_state = " << (int)_done_state << "\n"; } - if (_state == _done_state || _state == S_failure) { - return false; + + if (_state == _done_state) { + return reached_done_state(); } bool repeat_later; @@ -288,18 +312,13 @@ run() { if (_state == _done_state || _state == S_failure) { // We've reached our terminal state. - if (downloader_cat.is_spam()) { - downloader_cat.spam() - << "terminating run(), _state = " << (int)_state - << ", _done_state = " << (int)_done_state << "\n"; - } - return false; + return reached_done_state(); } } while (!repeat_later || _bio.is_null()); if (downloader_cat.is_spam()) { downloader_cat.spam() - << "continue run() later, _state = " << (int)_state + << "later run(), _state = " << (int)_state << ", _done_state = " << (int)_done_state << "\n"; } return true; @@ -309,7 +328,13 @@ run() { // Function: HTTPChannel::read_body // Access: Published // Description: Returns a newly-allocated istream suitable for -// reading the body of the document. +// reading the body of the document. This may only be +// called immediately after a call to get_document() or +// post_form(), or after a call to run() has returned +// false. +// +// The user is responsible for deleting the returned +// istream later. //////////////////////////////////////////////////////////////////// ISocketStream *HTTPChannel:: read_body() { @@ -343,6 +368,131 @@ read_body() { return result; } + +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::download_to_file +// Access: Published +// Description: Specifies the name of a file to download the +// resulting document to. This should be called +// immediately after get_document() or +// request_document() or related functions. +// +// In the case of the blocking I/O methods like +// get_document(), this function will download the +// entire document to the file and return true if it was +// successfully downloaded, false otherwise. +// +// In the case of non-blocking I/O methods like +// request_document(), this function simply indicates an +// intention to download to the indicated file. It +// returns true if the file can be opened for writing, +// false otherwise, but the contents will not be +// completely downloaded until run() has returned false. +// At this time, it is possible that a communications +// error will have left a partial file, so +// is_download_complete() may be called to test this. +//////////////////////////////////////////////////////////////////// +bool HTTPChannel:: +download_to_file(const Filename &filename) { + reset_download_to(); + _download_to_filename = filename; + _download_to_filename.set_binary(); + if (!_download_to_filename.open_write(_download_to_file)) { + downloader_cat.info() + << "Could not open " << filename << " for writing.\n"; + return false; + } + + _download_dest = DD_file; + + if (_nonblocking) { + // In nonblocking mode, we can't start the download yet; that will + // be done later as run() is called. + return true; + } + + // In normal, blocking mode, go ahead and do the download. + run(); + return is_download_complete(); +} + +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::download_to_ram +// Access: Published +// Description: Specifies a Ramfile object to download the +// resulting document to. This should be called +// immediately after get_document() or +// request_document() or related functions. +// +// In the case of the blocking I/O methods like +// get_document(), this function will download the +// entire document to the Ramfile and return true if it +// was successfully downloaded, false otherwise. +// +// In the case of non-blocking I/O methods like +// request_document(), this function simply indicates an +// intention to download to the indicated Ramfile. It +// returns true if the file can be opened for writing, +// false otherwise, but the contents will not be +// completely downloaded until run() has returned false. +// At this time, it is possible that a communications +// error will have left a partial file, so +// is_download_complete() may be called to test this. +//////////////////////////////////////////////////////////////////// +bool HTTPChannel:: +download_to_ram(Ramfile *ramfile) { + nassertr(ramfile != (Ramfile *)NULL, false); + reset_download_to(); + ramfile->_pos = 0; + ramfile->_data = string(); + _download_to_ramfile = ramfile; + _download_dest = DD_ram; + + if (_nonblocking) { + // In nonblocking mode, we can't start the download yet; that will + // be done later as run() is called. + return true; + } + + // In normal, blocking mode, go ahead and do the download. + run(); + return is_download_complete(); +} + +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::reached_done_state +// Access: Private +// Description: Called by run() after it reaches the done state, this +// simply checks to see if a download was requested, and +// begins the download if it has been. +//////////////////////////////////////////////////////////////////// +bool HTTPChannel:: +reached_done_state() { + if (downloader_cat.is_spam()) { + downloader_cat.spam() + << "terminating run(), _state = " << (int)_state + << ", _done_state = " << (int)_done_state << "\n"; + } + + if (_state == S_failure || _download_dest == DD_none) { + // All done. + return false; + + } else { + // Oops, we have to download the body now. + _body_stream = read_body(); + if (_body_stream == (ISocketStream *)NULL) { + if (downloader_cat.is_debug()) { + downloader_cat.debug() + << "Unable to download body.\n"; + } + return false; + } else { + _started_download = true; + return true; + } + } +} //////////////////////////////////////////////////////////////////// // Function: HTTPChannel::run_connecting @@ -942,6 +1092,66 @@ run_read_trailer() { return false; } +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::run_download_to_file +// Access: Private +// Description: After the headers, etc. have been read, this streams +// the download to the named file. +//////////////////////////////////////////////////////////////////// +bool HTTPChannel:: +run_download_to_file() { + nassertr(_body_stream != (ISocketStream *)NULL, false); + + int ch = _body_stream->get(); + while (!_body_stream->eof() && !_body_stream->fail()) { + _download_to_file.put(ch); + ch = _body_stream->get(); + } + + if (_download_to_file.fail()) { + downloader_cat.warning() + << "Error writing to " << _download_to_filename << "\n"; + _state = S_failure; + _download_to_file.close(); + return false; + } + + if (_body_stream->is_closed()) { + // Done. + _download_to_file.close(); + return false; + } else { + // More to come. + return true; + } +} + +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::run_download_to_ram +// Access: Private +// Description: After the headers, etc. have been read, this streams +// the download to the specified Ramfile object. +//////////////////////////////////////////////////////////////////// +bool HTTPChannel:: +run_download_to_ram() { + nassertr(_body_stream != (ISocketStream *)NULL, false); + nassertr(_download_to_ramfile != (Ramfile *)NULL, false); + + int ch = _body_stream->get(); + while (!_body_stream->eof() && !_body_stream->fail()) { + _download_to_ramfile->_data += (char)ch; + ch = _body_stream->get(); + } + + if (_body_stream->is_closed()) { + // Done. + return false; + } else { + // More to come. + return true; + } +} + //////////////////////////////////////////////////////////////////// // Function: HTTPChannel::begin_request @@ -953,6 +1163,7 @@ run_read_trailer() { void HTTPChannel:: begin_request(const string &method, const URLSpec &url, const string &body, bool nonblocking) { + reset_download_to(); _status_code = 0; _status_string = string(); _redirect_trail.clear(); @@ -1026,11 +1237,20 @@ http_getline(string &str) { switch (ch) { case '\n': // end-of-line character, we're done. - if (downloader_cat.is_spam()) { - downloader_cat.spam() << "recv: " << _working_getline << "\n"; - } str = _working_getline; _working_getline = string(); + { + // Trim trailing whitespace. We're not required to do this per the + // HTTP spec, but let's be generous. + size_t p = str.length(); + while (p > 0 && isspace(str[p - 1])) { + --p; + } + str = str.substr(0, p); + } + if (downloader_cat.is_spam()) { + downloader_cat.spam() << "recv: " << str << "\n"; + } return true; case '\r': @@ -1876,6 +2096,21 @@ free_bio() { _state = S_new; } +//////////////////////////////////////////////////////////////////// +// Function: HTTPChannel::reset_download_to +// Access: Private +// Description: Resets the indication of how the document will be +// downloaded. This must be re-specified after each +// get_document() (or related) call. +//////////////////////////////////////////////////////////////////// +void HTTPChannel:: +reset_download_to() { + _started_download = false; + _download_to_file.close(); + _download_to_ramfile = (Ramfile *)NULL; + _download_dest = DD_none; +} + //////////////////////////////////////////////////////////////////// // Function: HTTPChannel::get_basic_authorization // Access: Private diff --git a/panda/src/downloader/httpChannel.h b/panda/src/downloader/httpChannel.h index 7a22ab6d5b..60a5741544 100644 --- a/panda/src/downloader/httpChannel.h +++ b/panda/src/downloader/httpChannel.h @@ -36,8 +36,10 @@ #include "pmap.h" #include "pointerTo.h" #include "config_downloader.h" +#include "filename.h" #include +class Ramfile; class HTTPClient; //////////////////////////////////////////////////////////////////// @@ -98,8 +100,13 @@ PUBLISHED: bool run(); ISocketStream *read_body(); + bool download_to_file(const Filename &filename); + bool download_to_ram(Ramfile *ramfile); + + INLINE bool is_download_complete() const; private: + bool reached_done_state(); bool run_connecting(); bool run_proxy_ready(); bool run_proxy_request_sent(); @@ -115,6 +122,9 @@ private: bool run_read_body(); bool run_read_trailer(); + bool run_download_to_file(); + bool run_download_to_ram(); + void begin_request(const string &method, const URLSpec &url, const string &body, bool nonblocking); @@ -148,6 +158,7 @@ private: #endif void free_bio(); + void reset_download_to(); HTTPClient *_client; URLSpec _proxy; @@ -161,6 +172,16 @@ private: string _header; string _body; + enum DownloadDest { + DD_none, + DD_file, + DD_ram, + }; + DownloadDest _download_dest; + Filename _download_to_filename; + ofstream _download_to_file; + Ramfile *_download_to_ramfile; + int _read_index; HTTPClient::HTTPVersion _http_version; @@ -201,6 +222,7 @@ private: }; State _state; State _done_state; + bool _started_download; string _proxy_header; string _proxy_request_text; bool _proxy_tunnel; diff --git a/panda/src/downloader/identityStreamBuf.cxx b/panda/src/downloader/identityStreamBuf.cxx index ca45ba320f..f4b7ffe393 100644 --- a/panda/src/downloader/identityStreamBuf.cxx +++ b/panda/src/downloader/identityStreamBuf.cxx @@ -141,6 +141,18 @@ read_chars(char *start, size_t length) { // file. (*_source)->read(start, length); read_count = (*_source)->gcount(); + + if (read_count == 0) { + if ((*_source)->is_closed()) { + // socket closed; we're done. + if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) { + // An IdentityStreamBuf doesn't have a trailer, so we've already + // "read" it. + _doc->_state = HTTPChannel::S_read_trailer; + } + } + return 0; + } } else { // Extract some of the bytes remaining in the chunk. @@ -150,6 +162,16 @@ read_chars(char *start, size_t length) { (*_source)->read(start, length); read_count = (*_source)->gcount(); _bytes_remaining -= read_count; + + if (read_count == 0) { + if ((*_source)->is_closed()) { + // socket closed unexpectedly; problem. + if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) { + _doc->_state = HTTPChannel::S_failure; + } + } + return 0; + } } if (_bytes_remaining == 0) { @@ -161,18 +183,6 @@ read_chars(char *start, size_t length) { } } } - - if (read_count == 0) { - if ((*_source)->is_closed()) { - // socket closed; we're done. - if (_doc != (HTTPChannel *)NULL && _read_index == _doc->_read_index) { - // An IdentityStreamBuf doesn't have a trailer, so we've already - // "read" it. - _doc->_state = HTTPChannel::S_read_trailer; - } - } - return 0; - } return read_count; }