add support for text/binary distinction within multifile subfiles

This commit is contained in:
David Rose 2011-07-27 18:16:44 +00:00
parent 49c1710712
commit 5055403995
5 changed files with 179 additions and 26 deletions

View File

@ -48,11 +48,15 @@ Filename chdir_to; // -C
bool got_chdir_to = false;
size_t scale_factor = 0; // -F
pset<string> dont_compress; // -Z
pset<string> text_ext; // -X
vector_string sign_params; // -S
// Default extensions not to compress. May be overridden with -Z.
string dont_compress_str = "jpg,png,mp3,ogg";
// Default text extensions. May be overridden with -X.
string text_ext_str = "txt";
bool got_record_timestamp_flag = false;
bool record_timestamp_flag = true;
@ -210,6 +214,12 @@ help() {
" files that are not to be compressed. The default if this is omitted is\n"
" \"" << dont_compress_str << "\". Specify -Z \"\" (be sure to include the space) to allow\n"
" all files to be compressed.\n\n"
" -X <extension_list>\n"
" Specify a comma-separated list of filename extensions that represent\n"
" text files. These files are opened and read in text mode, and added to\n"
" the multifile with the text flag set. The default if this is omitted is\n"
" \"" << text_ext_str << "\". Specify -X \"\" (be sure to include the space) to record\n"
" all files in binary mode.\n\n"
" -T <flag>\n"
" Enable or disable the recording of file timestamps within the multifile.\n"
@ -271,6 +281,20 @@ is_named(const string &subfile_name, const vector_string &params) {
return false;
}
bool
is_text(const Filename &subfile_name) {
// Returns true if this filename should be read as a text file,
// false otherwise.
string ext = subfile_name.get_extension();
if (text_ext.find(ext) != text_ext.end()) {
// This extension is listed on the -X parameter list; it's a text file.
return true;
}
return false;
}
int
get_compression_level(const Filename &subfile_name) {
// Returns the appropriate compression level for the named file.
@ -317,7 +341,7 @@ do_add_files(Multifile *multifile, const pvector<Filename> &filenames) {
bool okflag = true;
pvector<Filename>::const_iterator fi;
for (fi = filenames.begin(); fi != filenames.end(); ++fi) {
const Filename &subfile_name = (*fi);
Filename subfile_name = (*fi);
if (subfile_name.is_directory()) {
if (!do_add_directory(multifile, subfile_name)) {
@ -329,6 +353,12 @@ do_add_files(Multifile *multifile, const pvector<Filename> &filenames) {
okflag = false;
} else {
if (is_text(subfile_name)) {
subfile_name.set_text();
} else {
subfile_name.set_binary();
}
string new_subfile_name;
if (update) {
new_subfile_name = multifile->update_subfile
@ -623,6 +653,10 @@ list_files(const vector_string &params) {
if (multifile->is_subfile_encrypted(i)) {
encrypted_symbol = 'e';
}
char text_symbol = ' ';
if (multifile->is_subfile_text(i)) {
text_symbol = 't';
}
if (multifile->is_subfile_compressed(i)) {
size_t orig_length = multifile->get_subfile_length(i);
size_t internal_length = multifile->get_subfile_internal_length(i);
@ -631,25 +665,25 @@ list_files(const vector_string &params) {
ratio = (double)internal_length / (double)orig_length;
}
if (ratio > 1.0) {
printf("%12d worse %c %s %s\n",
printf("%12d worse %c%c %s %s\n",
(int)multifile->get_subfile_length(i),
encrypted_symbol,
encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)),
subfile_name.c_str());
} else {
printf("%12d %3.0f%% %c %s %s\n",
printf("%12d %3.0f%% %c%c %s %s\n",
(int)multifile->get_subfile_length(i),
100.0 - ratio * 100.0,
encrypted_symbol,
encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)),
subfile_name.c_str());
}
} else {
printf("%12d %c %s %s\n",
printf("%12d %c%c %s %s\n",
(int)multifile->get_subfile_length(i),
encrypted_symbol,
encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)),
subfile_name.c_str());
@ -736,7 +770,7 @@ main(int argc, char *argv[]) {
extern char *optarg;
extern int optind;
static const char *optflags = "crutxkvz123456789Z:T:S:f:OC:ep:P:F:h";
static const char *optflags = "crutxkvz123456789Z:T:X:S:f:OC:ep:P:F:h";
int flag = getopt(argc, argv, optflags);
Filename rel_path;
while (flag != EOF) {
@ -804,6 +838,9 @@ main(int argc, char *argv[]) {
case 'Z':
dont_compress_str = optarg;
break;
case 'X':
text_ext_str = optarg;
break;
case 'S':
sign_params.push_back(optarg);
break;
@ -890,6 +927,9 @@ main(int argc, char *argv[]) {
// Split out the extensions named by -Z into different words.
tokenize_extensions(dont_compress_str, dont_compress);
// Ditto for -X.
tokenize_extensions(text_ext_str, text_ext);
// Build a list of remaining parameters.
vector_string params;
params.reserve(argc - 1);

View File

@ -61,6 +61,15 @@ ConfigVariableBool keep_temporary_files
"default) to delete these. Mainly useful for debugging "
"when the process goes wrong."));
ConfigVariableBool multifile_always_binary
("multifile-always-binary", false,
PRC_DESC("This is a temporary transition variable. Set this true "
"to enable the old behavior for multifiles: all subfiles are "
"always added to and extracted from the multifile in binary mode. "
"Set it false to enable the new behavior: subfiles may be added "
"or extracted in either binary or text mode, according to the "
"set_binary() or set_text() flag on the Filename."));
ConfigVariableBool collect_tcp
("collect-tcp", false,
PRC_DESC("Set this true to enable accumulation of several small consecutive "

View File

@ -50,6 +50,7 @@ extern ConfigVariableInt patchfile_buffer_size;
extern ConfigVariableInt patchfile_zone_size;
extern ConfigVariableBool keep_temporary_files;
extern ConfigVariableBool multifile_always_binary;
extern EXPCL_PANDAEXPRESS ConfigVariableBool collect_tcp;
extern EXPCL_PANDAEXPRESS ConfigVariableDouble collect_tcp_interval;

View File

@ -467,6 +467,11 @@ set_scale_factor(size_t scale_factor) {
// is replaced without examining its contents (but see
// also update_subfile).
//
// Filename::set_binary() or set_text() must have been
// called previously to specify the nature of the source
// file. If set_text() was called, the text flag will
// be set on the subfile.
//
// Returns the subfile name on success (it might have
// been modified slightly), or empty string on failure.
////////////////////////////////////////////////////////////////////
@ -475,15 +480,24 @@ add_subfile(const string &subfile_name, const Filename &filename,
int compression_level) {
nassertr(is_write_valid(), string());
if (!filename.exists()) {
Filename fname = filename;
if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), string());
if (!fname.exists()) {
return string();
}
string name = standardize_subfile_name(subfile_name);
if (!name.empty()) {
Subfile *subfile = new Subfile;
subfile->_name = name;
subfile->_source_filename = filename;
subfile->_source_filename.set_binary();
subfile->_source_filename = fname;
if (fname.is_text()) {
subfile->_flags |= SF_text;
}
add_new_subfile(subfile, compression_level);
}
@ -500,6 +514,7 @@ add_subfile(const string &subfile_name, const Filename &filename,
// Description: Adds a file from a stream as a subfile to the Multifile.
// The indicated istream will be read and its contents
// added to the Multifile at the next call to flush().
// The file will be added as a binary subfile.
//
// Note that the istream must remain untouched and
// unused by any other code until flush() is called. At
@ -538,13 +553,25 @@ add_subfile(const string &subfile_name, istream *subfile_data,
// compared byte-for-byte to the disk file, and it is
// replaced only if it is different; otherwise, the
// multifile is left unchanged.
//
// Filename::set_binary() or set_text() must have been
// called previously to specify the nature of the source
// file. If set_text() was called, the text flag will
// be set on the subfile.
////////////////////////////////////////////////////////////////////
string Multifile::
update_subfile(const string &subfile_name, const Filename &filename,
int compression_level) {
nassertr(is_write_valid(), string());
if (!filename.exists()) {
Filename fname = filename;
if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), string());
if (!fname.exists()) {
return string();
}
string name = standardize_subfile_name(subfile_name);
@ -552,7 +579,7 @@ update_subfile(const string &subfile_name, const Filename &filename,
int index = find_subfile(name);
if (index >= 0) {
// The subfile already exists; compare it to the source file.
if (compare_subfile(index, filename)) {
if (compare_subfile(index, fname)) {
// The files are identical; do nothing.
return name;
}
@ -562,8 +589,10 @@ update_subfile(const string &subfile_name, const Filename &filename,
// source file. Add the new source file.
Subfile *subfile = new Subfile;
subfile->_name = name;
subfile->_source_filename = filename;
subfile->_source_filename.set_binary();
subfile->_source_filename = fname;
if (fname.is_text()) {
subfile->_flags |= SF_text;
}
add_new_subfile(subfile, compression_level);
}
@ -1644,6 +1673,23 @@ is_subfile_encrypted(int index) const {
return (_subfiles[index]->_flags & SF_encrypted) != 0;
}
////////////////////////////////////////////////////////////////////
// Function: Multifile::is_subfile_text
// Access: Published
// Description: Returns true if the indicated subfile represents text
// data, or false if it represents binary data. If the
// file is text data, it may have been processed by
// end-of-line conversion when it was added. (But the
// actual bits in the multifile will represent the
// standard Unix end-of-line convention, e.g. \n instead
// of \r\n.)
////////////////////////////////////////////////////////////////////
bool Multifile::
is_subfile_text(int index) const {
nassertr(index >= 0 && index < (int)_subfiles.size(), false);
return (_subfiles[index]->_flags & SF_text) != 0;
}
////////////////////////////////////////////////////////////////////
// Function: Multifile::get_index_end
// Access: Published
@ -1776,7 +1822,20 @@ extract_subfile(int index, const Filename &filename) {
nassertr(index >= 0 && index < (int)_subfiles.size(), false);
Filename fname = filename;
if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), false);
if (!fname.is_binary_or_text()) {
// If we haven't specified binary or text, infer it from the type
// of the subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
fname.set_text();
} else {
fname.set_binary();
}
}
fname.make_dir();
pofstream out;
if (!fname.open_write(out, true)) {
@ -1828,6 +1887,13 @@ extract_subfile_to(int index, ostream &out) {
// file on disk with the nth subfile. Returns true if
// the files are equivalent, or false if they are
// different (or the file is missing).
//
// If Filename::set_binary() or set_text() has already
// been called, it specifies the nature of the source
// file. If this is different from the text flag of the
// subfile, the comparison will always return false.
// If this has not been specified, it will be set from
// the text flag of the subfile.
////////////////////////////////////////////////////////////////////
bool Multifile::
compare_subfile(int index, const Filename &filename) {
@ -1840,19 +1906,53 @@ compare_subfile(int index, const Filename &filename) {
return false;
}
Filename fname = filename;
if (fname.is_binary()) {
// If we've specified a binary file, it had better be a binary
// subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
if (express_cat.is_debug()) {
express_cat.debug()
<< "File is not binary: " << filename << "\n";
}
return false;
}
} else if (fname.is_text()) {
// If we've specified a text file, it had better be a text
// subfile.
if ((_subfiles[index]->_flags & SF_text) == 0) {
if (express_cat.is_debug()) {
express_cat.debug()
<< "File is not text: " << filename << "\n";
}
return false;
}
} else {
// If we haven't specified binary or text, infer it from the type
// of the subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
fname.set_text();
} else {
fname.set_binary();
}
}
istream *in1 = open_read_subfile(index);
if (in1 == (istream *)NULL) {
return false;
}
pifstream in2;
Filename bin_filename = Filename::binary_filename(filename);
if (!bin_filename.open_read(in2)) {
if (!fname.open_read(in2)) {
express_cat.info()
<< "Cannot read " << filename << "\n";
return false;
}
if (fname.is_binary()) {
// Check the file size.
in2.seekg(0, ios::end);
streampos file_size = in2.tellg();
@ -1862,6 +1962,7 @@ compare_subfile(int index, const Filename &filename) {
close_read_subfile(in1);
return false;
}
}
// Check the file data, byte-for-byte.
in2.seekg(0);

View File

@ -129,6 +129,7 @@ PUBLISHED:
time_t get_subfile_timestamp(int index) const;
bool is_subfile_compressed(int index) const;
bool is_subfile_encrypted(int index) const;
bool is_subfile_text(int index) const;
streampos get_index_end() const;
streampos get_subfile_internal_start(int index) const;
@ -161,6 +162,7 @@ private:
SF_compressed = 0x0008,
SF_encrypted = 0x0010,
SF_signature = 0x0020,
SF_text = 0x0040,
};
class Subfile {