add support for text/binary distinction within multifile subfiles

This commit is contained in:
David Rose 2011-07-27 18:16:44 +00:00
parent 49c1710712
commit 5055403995
5 changed files with 179 additions and 26 deletions

View File

@ -48,11 +48,15 @@ Filename chdir_to; // -C
bool got_chdir_to = false; bool got_chdir_to = false;
size_t scale_factor = 0; // -F size_t scale_factor = 0; // -F
pset<string> dont_compress; // -Z pset<string> dont_compress; // -Z
pset<string> text_ext; // -X
vector_string sign_params; // -S vector_string sign_params; // -S
// Default extensions not to compress. May be overridden with -Z. // Default extensions not to compress. May be overridden with -Z.
string dont_compress_str = "jpg,png,mp3,ogg"; string dont_compress_str = "jpg,png,mp3,ogg";
// Default text extensions. May be overridden with -X.
string text_ext_str = "txt";
bool got_record_timestamp_flag = false; bool got_record_timestamp_flag = false;
bool record_timestamp_flag = true; bool record_timestamp_flag = true;
@ -210,6 +214,12 @@ help() {
" files that are not to be compressed. The default if this is omitted is\n" " files that are not to be compressed. The default if this is omitted is\n"
" \"" << dont_compress_str << "\". Specify -Z \"\" (be sure to include the space) to allow\n" " \"" << dont_compress_str << "\". Specify -Z \"\" (be sure to include the space) to allow\n"
" all files to be compressed.\n\n" " all files to be compressed.\n\n"
" -X <extension_list>\n"
" Specify a comma-separated list of filename extensions that represent\n"
" text files. These files are opened and read in text mode, and added to\n"
" the multifile with the text flag set. The default if this is omitted is\n"
" \"" << text_ext_str << "\". Specify -X \"\" (be sure to include the space) to record\n"
" all files in binary mode.\n\n"
" -T <flag>\n" " -T <flag>\n"
" Enable or disable the recording of file timestamps within the multifile.\n" " Enable or disable the recording of file timestamps within the multifile.\n"
@ -271,6 +281,20 @@ is_named(const string &subfile_name, const vector_string &params) {
return false; return false;
} }
bool
is_text(const Filename &subfile_name) {
// Returns true if this filename should be read as a text file,
// false otherwise.
string ext = subfile_name.get_extension();
if (text_ext.find(ext) != text_ext.end()) {
// This extension is listed on the -X parameter list; it's a text file.
return true;
}
return false;
}
int int
get_compression_level(const Filename &subfile_name) { get_compression_level(const Filename &subfile_name) {
// Returns the appropriate compression level for the named file. // Returns the appropriate compression level for the named file.
@ -317,7 +341,7 @@ do_add_files(Multifile *multifile, const pvector<Filename> &filenames) {
bool okflag = true; bool okflag = true;
pvector<Filename>::const_iterator fi; pvector<Filename>::const_iterator fi;
for (fi = filenames.begin(); fi != filenames.end(); ++fi) { for (fi = filenames.begin(); fi != filenames.end(); ++fi) {
const Filename &subfile_name = (*fi); Filename subfile_name = (*fi);
if (subfile_name.is_directory()) { if (subfile_name.is_directory()) {
if (!do_add_directory(multifile, subfile_name)) { if (!do_add_directory(multifile, subfile_name)) {
@ -329,6 +353,12 @@ do_add_files(Multifile *multifile, const pvector<Filename> &filenames) {
okflag = false; okflag = false;
} else { } else {
if (is_text(subfile_name)) {
subfile_name.set_text();
} else {
subfile_name.set_binary();
}
string new_subfile_name; string new_subfile_name;
if (update) { if (update) {
new_subfile_name = multifile->update_subfile new_subfile_name = multifile->update_subfile
@ -623,6 +653,10 @@ list_files(const vector_string &params) {
if (multifile->is_subfile_encrypted(i)) { if (multifile->is_subfile_encrypted(i)) {
encrypted_symbol = 'e'; encrypted_symbol = 'e';
} }
char text_symbol = ' ';
if (multifile->is_subfile_text(i)) {
text_symbol = 't';
}
if (multifile->is_subfile_compressed(i)) { if (multifile->is_subfile_compressed(i)) {
size_t orig_length = multifile->get_subfile_length(i); size_t orig_length = multifile->get_subfile_length(i);
size_t internal_length = multifile->get_subfile_internal_length(i); size_t internal_length = multifile->get_subfile_internal_length(i);
@ -631,25 +665,25 @@ list_files(const vector_string &params) {
ratio = (double)internal_length / (double)orig_length; ratio = (double)internal_length / (double)orig_length;
} }
if (ratio > 1.0) { if (ratio > 1.0) {
printf("%12d worse %c %s %s\n", printf("%12d worse %c%c %s %s\n",
(int)multifile->get_subfile_length(i), (int)multifile->get_subfile_length(i),
encrypted_symbol, encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(), format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)), multifile->get_subfile_timestamp(i)),
subfile_name.c_str()); subfile_name.c_str());
} else { } else {
printf("%12d %3.0f%% %c %s %s\n", printf("%12d %3.0f%% %c%c %s %s\n",
(int)multifile->get_subfile_length(i), (int)multifile->get_subfile_length(i),
100.0 - ratio * 100.0, 100.0 - ratio * 100.0,
encrypted_symbol, encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(), format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)), multifile->get_subfile_timestamp(i)),
subfile_name.c_str()); subfile_name.c_str());
} }
} else { } else {
printf("%12d %c %s %s\n", printf("%12d %c%c %s %s\n",
(int)multifile->get_subfile_length(i), (int)multifile->get_subfile_length(i),
encrypted_symbol, encrypted_symbol, text_symbol,
format_timestamp(multifile->get_record_timestamp(), format_timestamp(multifile->get_record_timestamp(),
multifile->get_subfile_timestamp(i)), multifile->get_subfile_timestamp(i)),
subfile_name.c_str()); subfile_name.c_str());
@ -736,7 +770,7 @@ main(int argc, char *argv[]) {
extern char *optarg; extern char *optarg;
extern int optind; extern int optind;
static const char *optflags = "crutxkvz123456789Z:T:S:f:OC:ep:P:F:h"; static const char *optflags = "crutxkvz123456789Z:T:X:S:f:OC:ep:P:F:h";
int flag = getopt(argc, argv, optflags); int flag = getopt(argc, argv, optflags);
Filename rel_path; Filename rel_path;
while (flag != EOF) { while (flag != EOF) {
@ -804,6 +838,9 @@ main(int argc, char *argv[]) {
case 'Z': case 'Z':
dont_compress_str = optarg; dont_compress_str = optarg;
break; break;
case 'X':
text_ext_str = optarg;
break;
case 'S': case 'S':
sign_params.push_back(optarg); sign_params.push_back(optarg);
break; break;
@ -890,6 +927,9 @@ main(int argc, char *argv[]) {
// Split out the extensions named by -Z into different words. // Split out the extensions named by -Z into different words.
tokenize_extensions(dont_compress_str, dont_compress); tokenize_extensions(dont_compress_str, dont_compress);
// Ditto for -X.
tokenize_extensions(text_ext_str, text_ext);
// Build a list of remaining parameters. // Build a list of remaining parameters.
vector_string params; vector_string params;
params.reserve(argc - 1); params.reserve(argc - 1);

View File

@ -61,6 +61,15 @@ ConfigVariableBool keep_temporary_files
"default) to delete these. Mainly useful for debugging " "default) to delete these. Mainly useful for debugging "
"when the process goes wrong.")); "when the process goes wrong."));
ConfigVariableBool multifile_always_binary
("multifile-always-binary", false,
PRC_DESC("This is a temporary transition variable. Set this true "
"to enable the old behavior for multifiles: all subfiles are "
"always added to and extracted from the multifile in binary mode. "
"Set it false to enable the new behavior: subfiles may be added "
"or extracted in either binary or text mode, according to the "
"set_binary() or set_text() flag on the Filename."));
ConfigVariableBool collect_tcp ConfigVariableBool collect_tcp
("collect-tcp", false, ("collect-tcp", false,
PRC_DESC("Set this true to enable accumulation of several small consecutive " PRC_DESC("Set this true to enable accumulation of several small consecutive "

View File

@ -50,6 +50,7 @@ extern ConfigVariableInt patchfile_buffer_size;
extern ConfigVariableInt patchfile_zone_size; extern ConfigVariableInt patchfile_zone_size;
extern ConfigVariableBool keep_temporary_files; extern ConfigVariableBool keep_temporary_files;
extern ConfigVariableBool multifile_always_binary;
extern EXPCL_PANDAEXPRESS ConfigVariableBool collect_tcp; extern EXPCL_PANDAEXPRESS ConfigVariableBool collect_tcp;
extern EXPCL_PANDAEXPRESS ConfigVariableDouble collect_tcp_interval; extern EXPCL_PANDAEXPRESS ConfigVariableDouble collect_tcp_interval;

View File

@ -467,6 +467,11 @@ set_scale_factor(size_t scale_factor) {
// is replaced without examining its contents (but see // is replaced without examining its contents (but see
// also update_subfile). // also update_subfile).
// //
// Filename::set_binary() or set_text() must have been
// called previously to specify the nature of the source
// file. If set_text() was called, the text flag will
// be set on the subfile.
//
// Returns the subfile name on success (it might have // Returns the subfile name on success (it might have
// been modified slightly), or empty string on failure. // been modified slightly), or empty string on failure.
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
@ -475,15 +480,24 @@ add_subfile(const string &subfile_name, const Filename &filename,
int compression_level) { int compression_level) {
nassertr(is_write_valid(), string()); nassertr(is_write_valid(), string());
if (!filename.exists()) { Filename fname = filename;
if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), string());
if (!fname.exists()) {
return string(); return string();
} }
string name = standardize_subfile_name(subfile_name); string name = standardize_subfile_name(subfile_name);
if (!name.empty()) { if (!name.empty()) {
Subfile *subfile = new Subfile; Subfile *subfile = new Subfile;
subfile->_name = name; subfile->_name = name;
subfile->_source_filename = filename; subfile->_source_filename = fname;
subfile->_source_filename.set_binary(); if (fname.is_text()) {
subfile->_flags |= SF_text;
}
add_new_subfile(subfile, compression_level); add_new_subfile(subfile, compression_level);
} }
@ -500,6 +514,7 @@ add_subfile(const string &subfile_name, const Filename &filename,
// Description: Adds a file from a stream as a subfile to the Multifile. // Description: Adds a file from a stream as a subfile to the Multifile.
// The indicated istream will be read and its contents // The indicated istream will be read and its contents
// added to the Multifile at the next call to flush(). // added to the Multifile at the next call to flush().
// The file will be added as a binary subfile.
// //
// Note that the istream must remain untouched and // Note that the istream must remain untouched and
// unused by any other code until flush() is called. At // unused by any other code until flush() is called. At
@ -538,13 +553,25 @@ add_subfile(const string &subfile_name, istream *subfile_data,
// compared byte-for-byte to the disk file, and it is // compared byte-for-byte to the disk file, and it is
// replaced only if it is different; otherwise, the // replaced only if it is different; otherwise, the
// multifile is left unchanged. // multifile is left unchanged.
//
// Filename::set_binary() or set_text() must have been
// called previously to specify the nature of the source
// file. If set_text() was called, the text flag will
// be set on the subfile.
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
string Multifile:: string Multifile::
update_subfile(const string &subfile_name, const Filename &filename, update_subfile(const string &subfile_name, const Filename &filename,
int compression_level) { int compression_level) {
nassertr(is_write_valid(), string()); nassertr(is_write_valid(), string());
if (!filename.exists()) { Filename fname = filename;
if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), string());
if (!fname.exists()) {
return string(); return string();
} }
string name = standardize_subfile_name(subfile_name); string name = standardize_subfile_name(subfile_name);
@ -552,7 +579,7 @@ update_subfile(const string &subfile_name, const Filename &filename,
int index = find_subfile(name); int index = find_subfile(name);
if (index >= 0) { if (index >= 0) {
// The subfile already exists; compare it to the source file. // The subfile already exists; compare it to the source file.
if (compare_subfile(index, filename)) { if (compare_subfile(index, fname)) {
// The files are identical; do nothing. // The files are identical; do nothing.
return name; return name;
} }
@ -562,8 +589,10 @@ update_subfile(const string &subfile_name, const Filename &filename,
// source file. Add the new source file. // source file. Add the new source file.
Subfile *subfile = new Subfile; Subfile *subfile = new Subfile;
subfile->_name = name; subfile->_name = name;
subfile->_source_filename = filename; subfile->_source_filename = fname;
subfile->_source_filename.set_binary(); if (fname.is_text()) {
subfile->_flags |= SF_text;
}
add_new_subfile(subfile, compression_level); add_new_subfile(subfile, compression_level);
} }
@ -1644,6 +1673,23 @@ is_subfile_encrypted(int index) const {
return (_subfiles[index]->_flags & SF_encrypted) != 0; return (_subfiles[index]->_flags & SF_encrypted) != 0;
} }
////////////////////////////////////////////////////////////////////
// Function: Multifile::is_subfile_text
// Access: Published
// Description: Returns true if the indicated subfile represents text
// data, or false if it represents binary data. If the
// file is text data, it may have been processed by
// end-of-line conversion when it was added. (But the
// actual bits in the multifile will represent the
// standard Unix end-of-line convention, e.g. \n instead
// of \r\n.)
////////////////////////////////////////////////////////////////////
bool Multifile::
is_subfile_text(int index) const {
nassertr(index >= 0 && index < (int)_subfiles.size(), false);
return (_subfiles[index]->_flags & SF_text) != 0;
}
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Function: Multifile::get_index_end // Function: Multifile::get_index_end
// Access: Published // Access: Published
@ -1776,7 +1822,20 @@ extract_subfile(int index, const Filename &filename) {
nassertr(index >= 0 && index < (int)_subfiles.size(), false); nassertr(index >= 0 && index < (int)_subfiles.size(), false);
Filename fname = filename; Filename fname = filename;
fname.set_binary(); if (multifile_always_binary) {
fname.set_binary();
}
nassertr(fname.is_binary_or_text(), false);
if (!fname.is_binary_or_text()) {
// If we haven't specified binary or text, infer it from the type
// of the subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
fname.set_text();
} else {
fname.set_binary();
}
}
fname.make_dir(); fname.make_dir();
pofstream out; pofstream out;
if (!fname.open_write(out, true)) { if (!fname.open_write(out, true)) {
@ -1828,6 +1887,13 @@ extract_subfile_to(int index, ostream &out) {
// file on disk with the nth subfile. Returns true if // file on disk with the nth subfile. Returns true if
// the files are equivalent, or false if they are // the files are equivalent, or false if they are
// different (or the file is missing). // different (or the file is missing).
//
// If Filename::set_binary() or set_text() has already
// been called, it specifies the nature of the source
// file. If this is different from the text flag of the
// subfile, the comparison will always return false.
// If this has not been specified, it will be set from
// the text flag of the subfile.
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
bool Multifile:: bool Multifile::
compare_subfile(int index, const Filename &filename) { compare_subfile(int index, const Filename &filename) {
@ -1840,27 +1906,62 @@ compare_subfile(int index, const Filename &filename) {
return false; return false;
} }
Filename fname = filename;
if (fname.is_binary()) {
// If we've specified a binary file, it had better be a binary
// subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
if (express_cat.is_debug()) {
express_cat.debug()
<< "File is not binary: " << filename << "\n";
}
return false;
}
} else if (fname.is_text()) {
// If we've specified a text file, it had better be a text
// subfile.
if ((_subfiles[index]->_flags & SF_text) == 0) {
if (express_cat.is_debug()) {
express_cat.debug()
<< "File is not text: " << filename << "\n";
}
return false;
}
} else {
// If we haven't specified binary or text, infer it from the type
// of the subfile.
if ((_subfiles[index]->_flags & SF_text) != 0) {
fname.set_text();
} else {
fname.set_binary();
}
}
istream *in1 = open_read_subfile(index); istream *in1 = open_read_subfile(index);
if (in1 == (istream *)NULL) { if (in1 == (istream *)NULL) {
return false; return false;
} }
pifstream in2; pifstream in2;
Filename bin_filename = Filename::binary_filename(filename);
if (!bin_filename.open_read(in2)) { if (!fname.open_read(in2)) {
express_cat.info() express_cat.info()
<< "Cannot read " << filename << "\n"; << "Cannot read " << filename << "\n";
return false; return false;
} }
// Check the file size. if (fname.is_binary()) {
in2.seekg(0, ios::end); // Check the file size.
streampos file_size = in2.tellg(); in2.seekg(0, ios::end);
streampos file_size = in2.tellg();
if (file_size != (streampos)get_subfile_length(index)) {
// The files have different sizes. if (file_size != (streampos)get_subfile_length(index)) {
close_read_subfile(in1); // The files have different sizes.
return false; close_read_subfile(in1);
return false;
}
} }
// Check the file data, byte-for-byte. // Check the file data, byte-for-byte.

View File

@ -129,6 +129,7 @@ PUBLISHED:
time_t get_subfile_timestamp(int index) const; time_t get_subfile_timestamp(int index) const;
bool is_subfile_compressed(int index) const; bool is_subfile_compressed(int index) const;
bool is_subfile_encrypted(int index) const; bool is_subfile_encrypted(int index) const;
bool is_subfile_text(int index) const;
streampos get_index_end() const; streampos get_index_end() const;
streampos get_subfile_internal_start(int index) const; streampos get_subfile_internal_start(int index) const;
@ -161,6 +162,7 @@ private:
SF_compressed = 0x0008, SF_compressed = 0x0008,
SF_encrypted = 0x0010, SF_encrypted = 0x0010,
SF_signature = 0x0020, SF_signature = 0x0020,
SF_text = 0x0040,
}; };
class Subfile { class Subfile {