httpbackup

This commit is contained in:
David Rose 2003-01-29 20:40:10 +00:00
parent 61a41165bf
commit f0b13035cc
6 changed files with 864 additions and 0 deletions

View File

@ -0,0 +1,16 @@
#begin bin_target
#define TARGET httpbackup
#define LOCAL_LIBS pandaappbase
#define USE_PACKAGES ssl
#define OTHER_LIBS \
progbase \
express:c downloader:c pandaexpress:m \
net:c panda:m
#define SOURCES \
backupCatalog.I backupCatalog.cxx backupCatalog.h \
httpBackup.cxx httpBackup.h
#end bin_target

View File

@ -0,0 +1,63 @@
// Filename: backupCatalog.I
// Created by: drose (29Jan03)
//
////////////////////////////////////////////////////////////////////
//
// PANDA 3D SOFTWARE
// Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved
//
// All use of this software is subject to the terms of the Panda 3d
// Software license. You should have received a copy of this license
// along with this source code; you will also find a current copy of
// the license at http://www.panda3d.org/license.txt .
//
// To contact the maintainers of this program write to
// panda3d@yahoogroups.com .
//
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::Constructor
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
INLINE BackupCatalog::Entry::
Entry() {
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::operator <
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
INLINE bool BackupCatalog::Entry::
operator < (const BackupCatalog::Entry &other) const {
return get_date() < other.get_date();
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::get_date
// Access: Public
// Description: Returns the date associated with the entry. This is
// the last-modified date provided by the server, if we
// have such a thing; or the downloaded date if that's
// all we have.
////////////////////////////////////////////////////////////////////
INLINE const HTTPDate &BackupCatalog::Entry::
get_date() const {
return _document_spec.has_date() ? _document_spec.get_date() : _download_date;
}
INLINE istream &
operator >> (istream &in, BackupCatalog::Entry &entry) {
entry.input(in);
return in;
}
INLINE ostream &
operator << (ostream &out, const BackupCatalog::Entry &entry) {
entry.output(out);
return out;
}

View File

@ -0,0 +1,192 @@
// Filename: backupCatalog.cxx
// Created by: drose (29Jan03)
//
////////////////////////////////////////////////////////////////////
//
// PANDA 3D SOFTWARE
// Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved
//
// All use of this software is subject to the terms of the Panda 3d
// Software license. You should have received a copy of this license
// along with this source code; you will also find a current copy of
// the license at http://www.panda3d.org/license.txt .
//
// To contact the maintainers of this program write to
// panda3d@yahoogroups.com .
//
////////////////////////////////////////////////////////////////////
#include "backupCatalog.h"
#include "indirectLess.h"
#include <algorithm>
// These are the characters that are not escaped when we write a
// filename out to the catalog. Really, the only reason we use
// URLSpec::quote() to protect the filenames is to escape spaces.
static const char * const acceptable_chars = "~/:";
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Constructor
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
BackupCatalog::
BackupCatalog() {
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Destructor
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
BackupCatalog::
~BackupCatalog() {
clear();
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::read
// Access: Public
// Description: Reads the catalog from the named file. Returns true
// on success, false on failure. On a false return, the
// catalog may have been partially read in.
////////////////////////////////////////////////////////////////////
bool BackupCatalog::
read(const Filename &filename) {
clear();
ifstream file;
if (!filename.open_read(file)) {
nout << "Unable to read: " << filename << "\n";
return false;
}
Entry *entry = new Entry;
file >> (*entry);
while (!file.fail() && !file.eof()) {
_table[entry->_document_name].push_back(entry);
_filenames.insert(entry->_filename);
entry = new Entry;
file >> (*entry);
}
// Delete the last Entry that we didn't use.
delete entry;
if (!file.eof()) {
// Oops, we had an error on one of the entries.
return false;
}
// Now sort all of the entries by date.
Table::iterator ti;
for (ti = _table.begin(); ti != _table.end(); ++ti) {
Entries &entries = (*ti).second;
sort(entries.begin(), entries.end(), IndirectLess<Entry>());
}
return true;
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::write
// Access: Public
// Description: Rewrites the catalog to the named file. Returns true
// on success, false on failure.
////////////////////////////////////////////////////////////////////
bool BackupCatalog::
write(const Filename &filename) const {
ofstream file;
if (!filename.open_write(file)) {
nout << "Unable to write: " << filename << "\n";
return false;
}
Table::const_iterator ti;
for (ti = _table.begin(); ti != _table.end(); ++ti) {
const Entries &entries = (*ti).second;
Entries::const_iterator ei;
for (ei = entries.begin(); ei != entries.end(); ++ei) {
(*ei)->write(file);
file << "\n";
}
}
return file.good();
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::clear
// Access: Public
// Description: Completely empties the contents of the catalog.
////////////////////////////////////////////////////////////////////
void BackupCatalog::
clear() {
Table::iterator ti;
for (ti = _table.begin(); ti != _table.end(); ++ti) {
Entries &entries = (*ti).second;
Entries::iterator ei;
for (ei = entries.begin(); ei != entries.end(); ++ei) {
delete (*ei);
}
}
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::delete_file
// Access: Public
// Description: Deletes the file named by the entry, echoing the
// indicated reason to nout.
////////////////////////////////////////////////////////////////////
void BackupCatalog::Entry::
delete_file(const Filename &dirname, const string &reason) {
Filename pathname(dirname, _filename);
if (pathname.exists()) {
nout << "Deleting " << _filename << " (" << reason << ").\n";
if (!pathname.unlink()) {
nout << "unable to delete.\n";
}
} else {
nout << "Tried to delete " << _filename << " but it's already gone!\n";
}
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::input
// Access: Public
// Description: Can be used to read in the catalog entry from a
// stream generated by either output() or write().
////////////////////////////////////////////////////////////////////
void BackupCatalog::Entry::
input(istream &in) {
in >> _document_name >> _filename >> _document_spec >> _download_date;
_document_name = URLSpec::unquote(_document_name);
_filename = URLSpec::unquote(_filename);
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::output
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
void BackupCatalog::Entry::
output(ostream &out) const {
out << URLSpec::quote(_document_name, acceptable_chars) << " "
<< URLSpec::quote(_filename, acceptable_chars) << " "
<< _document_spec << " "
<< _download_date;
}
////////////////////////////////////////////////////////////////////
// Function: BackupCatalog::Entry::write
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
void BackupCatalog::Entry::
write(ostream &out) const {
out << URLSpec::quote(_document_name, acceptable_chars) << " "
<< URLSpec::quote(_filename, acceptable_chars) << "\n";
_document_spec.write(out, 2);
out << " " << _download_date << "\n";
}

View File

@ -0,0 +1,76 @@
// Filename: backupCatalog.h
// Created by: drose (29Jan03)
//
////////////////////////////////////////////////////////////////////
//
// PANDA 3D SOFTWARE
// Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved
//
// All use of this software is subject to the terms of the Panda 3d
// Software license. You should have received a copy of this license
// along with this source code; you will also find a current copy of
// the license at http://www.panda3d.org/license.txt .
//
// To contact the maintainers of this program write to
// panda3d@yahoogroups.com .
//
////////////////////////////////////////////////////////////////////
#ifndef BACKUPCATALOG_H
#define BACKUPCATALOG_H
#include "pandaappbase.h"
#include "documentSpec.h"
#include "filename.h"
#include "pvector.h"
#include "pmap.h"
#include "pset.h"
////////////////////////////////////////////////////////////////////
// Class : BackupCatalog
// Description : This is the list of previous versions of this file
// (and possibly other files) stored in the "catalog", a
// text file within the download directory.
////////////////////////////////////////////////////////////////////
class BackupCatalog {
public:
BackupCatalog();
~BackupCatalog();
bool read(const Filename &filename);
bool write(const Filename &filename) const;
void clear();
class Entry {
public:
INLINE Entry();
INLINE bool operator < (const Entry &other) const;
INLINE const HTTPDate &get_date() const;
void delete_file(const Filename &dirname, const string &reason);
void input(istream &in);
void output(ostream &out) const;
void write(ostream &out) const;
string _document_name;
string _filename;
DocumentSpec _document_spec;
HTTPDate _download_date;
};
typedef pvector<Entry *> Entries;
typedef pmap<string, Entries> Table;
Table _table;
typedef pset<string> Filenames;
Filenames _filenames;
};
INLINE istream &operator >> (istream &in, BackupCatalog::Entry &entry);
INLINE ostream &operator << (ostream &out, const BackupCatalog::Entry &entry);
#include "backupCatalog.I"
#endif

View File

@ -0,0 +1,431 @@
// Filename: httpBackup.cxx
// Created by: drose (29Jan03)
//
////////////////////////////////////////////////////////////////////
//
// PANDA 3D SOFTWARE
// Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved
//
// All use of this software is subject to the terms of the Panda 3d
// Software license. You should have received a copy of this license
// along with this source code; you will also find a current copy of
// the license at http://www.panda3d.org/license.txt .
//
// To contact the maintainers of this program write to
// panda3d@yahoogroups.com .
//
////////////////////////////////////////////////////////////////////
#include "httpBackup.h"
#include "httpChannel.h"
static const int seconds_per_day = 60 * 60 * 24;
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::Constructor
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
HTTPBackup::
HTTPBackup() {
clear_runlines();
add_runline("[opts] url");
set_program_description
("This program is designed to run periodically as a "
"background task, e.g. via a cron job. It fetches the "
"latest copy of a document from an HTTP server and "
"stores it, along with an optional number of previous "
"versions, in a local directory so that it may be "
"backed up to tape.\n\n"
"If the copy on disk is already the same as the latest "
"copy available on the HTTP server, this program generally "
"does nothing (although it may delete old versions if they "
"have expired past the maximum age specified on the command "
"line).");
add_option
("p", "url", 0,
"Specifies the URL of the HTTP proxy server, if one is required.",
&HTTPBackup::dispatch_url, &_got_proxy, &_proxy);
add_option
("a", "", 0,
"If this option is specified, the document is always downloaded every "
"time httpbackup runs, even if the document does not appear to have "
"been changed since last time.",
&HTTPBackup::dispatch_none, &_always_download, NULL);
add_option
("d", "dirname", 0,
"Specifies the name of the directory in which to store the backup "
"versions of the document. The default is '.', the current "
"directory.",
&HTTPBackup::dispatch_filename, NULL, &_dirname);
add_option
("c", "filename", 0,
"Specifies the name of the catalog file that httpbackup uses to "
"record the HTTP entity tags, etc., downloaded from previous "
"versions. If a relative filename is given, it is relative to "
"the directory specified by -d. The default is 'Catalog'.",
&HTTPBackup::dispatch_filename, NULL, &_catalog_name);
add_option
("n", "filename", 0,
"Specifies the name of the document that is being retrieved. This "
"name is written to the catalog file to identify entries for this "
"document, and is used to generate the filename to store the "
"backup versions. The default if this is omitted or empty is to use "
"the basename of the URL.",
&HTTPBackup::dispatch_string, NULL, &_document_name);
add_option
("s", "string", 0,
"Specifies how the date is appended onto the filename (see -n) for "
"each version of the file. This string should contain the sequence "
"of characters from strftime() that correspond to the desired date "
"format to append to the filename. The default is '.%Y-%m-%d.%H:%M', "
"or the year, month, day, hour, and minute. (The date is always "
"represented in GMT, according to HTTP convention.)",
&HTTPBackup::dispatch_string, NULL, &_version_append);
add_option
("maxage", "days", 0,
"Specifies the maximum age, in days, to keep an old version of the "
"file around. If unspecified, the default is no limit. This may "
"be a floating-point number.",
&HTTPBackup::dispatch_double, &_got_max_keep_days, &_max_keep_days);
add_option
("minage", "days", 0,
"Specifies the minimum age, in days, an old version of the file must "
"have before it is automatically deleted due to exceeding -maxver. "
"The default is 0. This may be a floating-point number.",
&HTTPBackup::dispatch_double, NULL, &_min_keep_days);
add_option
("maxver", "count", 0,
"Specifies the maximum number of old versions of the file to keep "
"around. If unspecified, the default is no limit.",
&HTTPBackup::dispatch_int, &_got_max_keep_versions, &_max_keep_versions);
add_option
("minver", "count", 0,
"Specifies the minimum number of old versions to keep after "
"deleting versions older than -maxage. The default is 1.",
&HTTPBackup::dispatch_int, NULL, &_min_keep_versions);
_dirname = ".";
_catalog_name = "Catalog";
_version_append = ".%Y-%m-%d.%H:%M";
_max_keep_days = 0.0;
_min_keep_days = 0.0;
_max_keep_versions = 0;
_min_keep_versions = 1;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::handle_args
// Access: Protected, Virtual
// Description: Does something with the additional arguments on the
// command line (after all the -options have been
// parsed). Returns true if the arguments are good,
// false otherwise.
////////////////////////////////////////////////////////////////////
bool HTTPBackup::
handle_args(ProgramBase::Args &args) {
if (args.size() != 1) {
nout <<
"You must specify the URL of the document to download "
"on the command line.\n\n";
return false;
}
_url = URLSpec(args[0]);
if (!(_url.has_server() && _url.has_path())) {
nout
<< "Invalid URL specification: " << args[0] << "\n";
return false;
}
return true;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::post_command_line
// Access: Protected, Virtual
// Description: This is called after the command line has been
// completely processed, and it gives the program a
// chance to do some last-minute processing and
// validation of the options and arguments. It should
// return true if everything is fine, false if there is
// an error.
////////////////////////////////////////////////////////////////////
bool HTTPBackup::
post_command_line() {
if (_got_proxy) {
_http.set_proxy(_proxy);
}
if (!_catalog_name.is_fully_qualified()) {
_catalog_name = Filename(_dirname, _catalog_name);
}
if (_document_name.empty()) {
Filename pathname = _url.get_path();
_document_name = pathname.get_basename();
}
if (_min_keep_days < 0.0) {
nout << "Invalid -minage " << _min_keep_days << "\n";
return false;
}
if (_min_keep_versions < 0) {
nout << "Invalid -minver " << _min_keep_versions << "\n";
return false;
}
if (_got_max_keep_days) {
if (_max_keep_days < _min_keep_days) {
nout
<< "-maxage " << _max_keep_days << " is less than -minage "
<< _min_keep_days << "\n";
return false;
}
}
if (_got_max_keep_versions) {
if (_max_keep_versions < _min_keep_versions) {
nout
<< "-maxver " << _max_keep_versions << " is less than -minver "
<< _min_keep_versions << "\n";
return false;
}
}
_now = HTTPDate::now();
if (_got_max_keep_days) {
_max_keep_date = _now - (time_t)(_max_keep_days * seconds_per_day);
}
_min_keep_date = _now - (time_t)(_min_keep_days * seconds_per_day);
return true;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::dispatch_url
// Access: Protected, Static
// Description: Dispatch function for a URL parameter.
////////////////////////////////////////////////////////////////////
bool HTTPBackup::
dispatch_url(const string &opt, const string &arg, void *var) {
URLSpec *up = (URLSpec *)var;
(*up) = URLSpec(arg);
return true;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::run
// Access: Public
// Description:
////////////////////////////////////////////////////////////////////
void HTTPBackup::
run() {
_catalog_name.set_text();
// First, read in the catalog.
if (!_catalog_name.exists()) {
nout << _catalog_name << " does not yet exist.\n";
} else {
if (!_catalog.read(_catalog_name)) {
nout << "Unable to read " << _catalog_name << ".\n";
exit(1);
}
}
// Now try to fetch the document.
if (!fetch_latest()) {
nout << "Errors while processing latest.\n";
exit(1);
}
if (!cleanup_old()) {
nout << "Errors while cleaning up old versions.\n";
// We don't bother to exit the program in this case.
}
// Now write out the modified catalog.
nout << "Writing " << _catalog_name << "\n";
_catalog_name.make_dir();
if (!_catalog.write(_catalog_name)) {
nout << "Unable to rewrite " << _catalog_name << ".\n";
exit(1);
}
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::fetch_latest
// Access: Private
// Description: Tries to get the latest version of the document from
// the server, if there is one available. Returns true
// on success (even if the most recent document hasn't
// changed), or false if there was some error.
////////////////////////////////////////////////////////////////////
bool HTTPBackup::
fetch_latest() {
// Check the most recent version of this document.
BackupCatalog::Entries &entries = _catalog._table[_document_name];
DocumentSpec document_spec(_url);
if (!entries.empty()) {
BackupCatalog::Entry *latest = entries[entries.size() - 1];
document_spec = latest->_document_spec;
document_spec.set_url(_url);
if (!_always_download) {
document_spec.set_request_mode(DocumentSpec::RM_newer);
}
}
nout << "Fetching " << document_spec.get_url() << "\n";
PT(HTTPChannel) channel = _http.make_channel(true);
if (!channel->get_document(document_spec)) {
if (channel->get_status_code() == 304) {
nout << "Document has not been modified.\n";
// This is considered a success condition.
return true;
}
nout << "Error fetching document: " << channel->get_status_code()
<< " " << channel->get_status_string() << "\n";
return false;
}
// The document is available. Create an Entry for it.
BackupCatalog::Entry *entry = new BackupCatalog::Entry;
entry->_document_name = _document_name;
entry->_document_spec = channel->get_document_spec();
entry->_download_date = _now;
// Generate a filename based on the last-modified date or the
// download date.
time_t time = entry->get_date().get_time();
struct tm *tp = gmtime(&time);
static const int buffer_size = 512;
char buffer[buffer_size];
if (strftime(buffer, buffer_size, _version_append.c_str(), tp) == 0) {
buffer[0] = '\0';
}
string filename = _document_name + string(buffer);
// Check the filename for uniqueness, just for good measure.
check_unique(filename);
entry->_filename = filename;
// Download to the indicated filename.
Filename pathname(_dirname, filename);
nout << "Downloading to " << pathname << "\n";
pathname.make_dir();
if (!channel->download_to_file(pathname)) {
nout << "Error while downloading.\n";
delete entry;
return false;
}
// The file is successfully downloaded; save the entry.
entries.push_back(entry);
return true;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::cleanup_old
// Access: Private
// Description: Removes old versions that are no longer needed.
////////////////////////////////////////////////////////////////////
bool HTTPBackup::
cleanup_old() {
BackupCatalog::Entries &entries = _catalog._table[_document_name];
if (_got_max_keep_versions &&
(int)entries.size() > _max_keep_versions) {
// Too many versions; delete the oldest ones, except those newer
// than min_keep_date.
int num_delete = entries.size() - _max_keep_versions;
while (num_delete > 0 && entries[num_delete - 1]->get_date() > _min_keep_date) {
num_delete--;
}
for (int i = 0; i < num_delete; i++) {
entries[i]->delete_file(_dirname, "too many old versions");
delete entries[i];
}
entries.erase(entries.begin(), entries.begin() + num_delete);
}
if (_got_max_keep_days &&
(int)entries.size() > _min_keep_versions &&
entries[0]->get_date() < _max_keep_date) {
// The oldest version is too old; delete all the oldest ones,
// but keep at least min_keep_versions of them around.
int num_delete = 1;
while (num_delete < (int)entries.size() - _min_keep_versions &&
entries[num_delete]->get_date() < _max_keep_date) {
num_delete++;
}
for (int i = 0; i < num_delete; i++) {
entries[i]->delete_file(_dirname, "too old");
delete entries[i];
}
entries.erase(entries.begin(), entries.begin() + num_delete);
}
return true;
}
////////////////////////////////////////////////////////////////////
// Function: HTTPBackup::check_unique
// Access: Private
// Description: Ensures that the given filename is unique among all
// files in the catalog.
////////////////////////////////////////////////////////////////////
void HTTPBackup::
check_unique(string &filename) {
bool inserted = _catalog._filenames.insert(filename).second;
if (!inserted) {
// Conflict; append one or more letters to the filename until it
// is unique.
unsigned int uniquifier = 1;
string orig_filename = filename;
orig_filename += '.';
while (!inserted) {
filename = orig_filename;
unsigned int count = uniquifier;
while (count > 0) {
char ch = (char)((count % 26) + 'a');
filename += ch;
count /= 26;
}
uniquifier++;
inserted = _catalog._filenames.insert(filename).second;
}
}
}
int
main(int argc, char *argv[]) {
HTTPBackup prog;
prog.parse_command_line(argc, argv);
prog.run();
return 0;
}

View File

@ -0,0 +1,86 @@
// Filename: httpBackup.h
// Created by: drose (29Jan03)
//
////////////////////////////////////////////////////////////////////
//
// PANDA 3D SOFTWARE
// Copyright (c) 2001, Disney Enterprises, Inc. All rights reserved
//
// All use of this software is subject to the terms of the Panda 3d
// Software license. You should have received a copy of this license
// along with this source code; you will also find a current copy of
// the license at http://www.panda3d.org/license.txt .
//
// To contact the maintainers of this program write to
// panda3d@yahoogroups.com .
//
////////////////////////////////////////////////////////////////////
#ifndef HTTPBACKUP_H
#define HTTPBACKUP_H
#include "pandaappbase.h"
#include "programBase.h"
#include "backupCatalog.h"
#include "httpClient.h"
#include "urlSpec.h"
////////////////////////////////////////////////////////////////////
// Class : HTTPBackup
// Description : This program is designed to run periodically as a
// background task, e.g. via a cron job. It fetches the
// latest copy of a document from an HTTP server and
// stores it, along with an optional number of previous
// versions, in a local directory so that it may be
// backed up to tape.
//
// If the copy on disk is already the same as the latest
// copy available on the HTTP server, this program does
// nothing.
////////////////////////////////////////////////////////////////////
class HTTPBackup : public ProgramBase {
public:
HTTPBackup();
protected:
virtual bool handle_args(Args &args);
virtual bool post_command_line();
static bool dispatch_url(const string &opt, const string &arg, void *var);
public:
void run();
private:
bool fetch_latest();
bool cleanup_old();
void check_unique(string &filename);
private:
URLSpec _proxy;
bool _got_proxy;
URLSpec _url;
Filename _dirname;
Filename _catalog_name;
string _document_name;
string _version_append;
bool _always_download;
double _max_keep_days;
bool _got_max_keep_days;
double _min_keep_days;
int _max_keep_versions;
bool _got_max_keep_versions;
int _min_keep_versions;
HTTPDate _max_keep_date;
HTTPDate _min_keep_date;
HTTPDate _now;
HTTPClient _http;
BackupCatalog _catalog;
};
#endif