From 5055403995b2c021e9b72cbb7606375630aac18e Mon Sep 17 00:00:00 2001 From: David Rose Date: Wed, 27 Jul 2011 18:16:44 +0000 Subject: [PATCH] add support for text/binary distinction within multifile subfiles --- panda/src/downloadertools/multify.cxx | 56 +++++++++-- panda/src/express/config_express.cxx | 9 ++ panda/src/express/config_express.h | 1 + panda/src/express/multifile.cxx | 137 ++++++++++++++++++++++---- panda/src/express/multifile.h | 2 + 5 files changed, 179 insertions(+), 26 deletions(-) diff --git a/panda/src/downloadertools/multify.cxx b/panda/src/downloadertools/multify.cxx index 66aabb94c3..eb64e0ba94 100644 --- a/panda/src/downloadertools/multify.cxx +++ b/panda/src/downloadertools/multify.cxx @@ -48,11 +48,15 @@ Filename chdir_to; // -C bool got_chdir_to = false; size_t scale_factor = 0; // -F pset dont_compress; // -Z +pset text_ext; // -X vector_string sign_params; // -S // Default extensions not to compress. May be overridden with -Z. string dont_compress_str = "jpg,png,mp3,ogg"; +// Default text extensions. May be overridden with -X. +string text_ext_str = "txt"; + bool got_record_timestamp_flag = false; bool record_timestamp_flag = true; @@ -210,6 +214,12 @@ help() { " files that are not to be compressed. The default if this is omitted is\n" " \"" << dont_compress_str << "\". Specify -Z \"\" (be sure to include the space) to allow\n" " all files to be compressed.\n\n" + " -X \n" + " Specify a comma-separated list of filename extensions that represent\n" + " text files. These files are opened and read in text mode, and added to\n" + " the multifile with the text flag set. The default if this is omitted is\n" + " \"" << text_ext_str << "\". Specify -X \"\" (be sure to include the space) to record\n" + " all files in binary mode.\n\n" " -T \n" " Enable or disable the recording of file timestamps within the multifile.\n" @@ -271,6 +281,20 @@ is_named(const string &subfile_name, const vector_string ¶ms) { return false; } +bool +is_text(const Filename &subfile_name) { + // Returns true if this filename should be read as a text file, + // false otherwise. + + string ext = subfile_name.get_extension(); + if (text_ext.find(ext) != text_ext.end()) { + // This extension is listed on the -X parameter list; it's a text file. + return true; + } + + return false; +} + int get_compression_level(const Filename &subfile_name) { // Returns the appropriate compression level for the named file. @@ -317,7 +341,7 @@ do_add_files(Multifile *multifile, const pvector &filenames) { bool okflag = true; pvector::const_iterator fi; for (fi = filenames.begin(); fi != filenames.end(); ++fi) { - const Filename &subfile_name = (*fi); + Filename subfile_name = (*fi); if (subfile_name.is_directory()) { if (!do_add_directory(multifile, subfile_name)) { @@ -329,6 +353,12 @@ do_add_files(Multifile *multifile, const pvector &filenames) { okflag = false; } else { + if (is_text(subfile_name)) { + subfile_name.set_text(); + } else { + subfile_name.set_binary(); + } + string new_subfile_name; if (update) { new_subfile_name = multifile->update_subfile @@ -623,6 +653,10 @@ list_files(const vector_string ¶ms) { if (multifile->is_subfile_encrypted(i)) { encrypted_symbol = 'e'; } + char text_symbol = ' '; + if (multifile->is_subfile_text(i)) { + text_symbol = 't'; + } if (multifile->is_subfile_compressed(i)) { size_t orig_length = multifile->get_subfile_length(i); size_t internal_length = multifile->get_subfile_internal_length(i); @@ -631,25 +665,25 @@ list_files(const vector_string ¶ms) { ratio = (double)internal_length / (double)orig_length; } if (ratio > 1.0) { - printf("%12d worse %c %s %s\n", + printf("%12d worse %c%c %s %s\n", (int)multifile->get_subfile_length(i), - encrypted_symbol, + encrypted_symbol, text_symbol, format_timestamp(multifile->get_record_timestamp(), multifile->get_subfile_timestamp(i)), subfile_name.c_str()); } else { - printf("%12d %3.0f%% %c %s %s\n", + printf("%12d %3.0f%% %c%c %s %s\n", (int)multifile->get_subfile_length(i), 100.0 - ratio * 100.0, - encrypted_symbol, + encrypted_symbol, text_symbol, format_timestamp(multifile->get_record_timestamp(), multifile->get_subfile_timestamp(i)), subfile_name.c_str()); } } else { - printf("%12d %c %s %s\n", + printf("%12d %c%c %s %s\n", (int)multifile->get_subfile_length(i), - encrypted_symbol, + encrypted_symbol, text_symbol, format_timestamp(multifile->get_record_timestamp(), multifile->get_subfile_timestamp(i)), subfile_name.c_str()); @@ -736,7 +770,7 @@ main(int argc, char *argv[]) { extern char *optarg; extern int optind; - static const char *optflags = "crutxkvz123456789Z:T:S:f:OC:ep:P:F:h"; + static const char *optflags = "crutxkvz123456789Z:T:X:S:f:OC:ep:P:F:h"; int flag = getopt(argc, argv, optflags); Filename rel_path; while (flag != EOF) { @@ -804,6 +838,9 @@ main(int argc, char *argv[]) { case 'Z': dont_compress_str = optarg; break; + case 'X': + text_ext_str = optarg; + break; case 'S': sign_params.push_back(optarg); break; @@ -890,6 +927,9 @@ main(int argc, char *argv[]) { // Split out the extensions named by -Z into different words. tokenize_extensions(dont_compress_str, dont_compress); + // Ditto for -X. + tokenize_extensions(text_ext_str, text_ext); + // Build a list of remaining parameters. vector_string params; params.reserve(argc - 1); diff --git a/panda/src/express/config_express.cxx b/panda/src/express/config_express.cxx index 626912518d..718debde33 100644 --- a/panda/src/express/config_express.cxx +++ b/panda/src/express/config_express.cxx @@ -61,6 +61,15 @@ ConfigVariableBool keep_temporary_files "default) to delete these. Mainly useful for debugging " "when the process goes wrong.")); +ConfigVariableBool multifile_always_binary +("multifile-always-binary", false, + PRC_DESC("This is a temporary transition variable. Set this true " + "to enable the old behavior for multifiles: all subfiles are " + "always added to and extracted from the multifile in binary mode. " + "Set it false to enable the new behavior: subfiles may be added " + "or extracted in either binary or text mode, according to the " + "set_binary() or set_text() flag on the Filename.")); + ConfigVariableBool collect_tcp ("collect-tcp", false, PRC_DESC("Set this true to enable accumulation of several small consecutive " diff --git a/panda/src/express/config_express.h b/panda/src/express/config_express.h index cc44eeff1e..9964bd678b 100644 --- a/panda/src/express/config_express.h +++ b/panda/src/express/config_express.h @@ -50,6 +50,7 @@ extern ConfigVariableInt patchfile_buffer_size; extern ConfigVariableInt patchfile_zone_size; extern ConfigVariableBool keep_temporary_files; +extern ConfigVariableBool multifile_always_binary; extern EXPCL_PANDAEXPRESS ConfigVariableBool collect_tcp; extern EXPCL_PANDAEXPRESS ConfigVariableDouble collect_tcp_interval; diff --git a/panda/src/express/multifile.cxx b/panda/src/express/multifile.cxx index 724bffb26c..bfd5004a7f 100644 --- a/panda/src/express/multifile.cxx +++ b/panda/src/express/multifile.cxx @@ -467,6 +467,11 @@ set_scale_factor(size_t scale_factor) { // is replaced without examining its contents (but see // also update_subfile). // +// Filename::set_binary() or set_text() must have been +// called previously to specify the nature of the source +// file. If set_text() was called, the text flag will +// be set on the subfile. +// // Returns the subfile name on success (it might have // been modified slightly), or empty string on failure. //////////////////////////////////////////////////////////////////// @@ -475,15 +480,24 @@ add_subfile(const string &subfile_name, const Filename &filename, int compression_level) { nassertr(is_write_valid(), string()); - if (!filename.exists()) { + Filename fname = filename; + if (multifile_always_binary) { + fname.set_binary(); + } + + nassertr(fname.is_binary_or_text(), string()); + + if (!fname.exists()) { return string(); } string name = standardize_subfile_name(subfile_name); if (!name.empty()) { Subfile *subfile = new Subfile; subfile->_name = name; - subfile->_source_filename = filename; - subfile->_source_filename.set_binary(); + subfile->_source_filename = fname; + if (fname.is_text()) { + subfile->_flags |= SF_text; + } add_new_subfile(subfile, compression_level); } @@ -500,6 +514,7 @@ add_subfile(const string &subfile_name, const Filename &filename, // Description: Adds a file from a stream as a subfile to the Multifile. // The indicated istream will be read and its contents // added to the Multifile at the next call to flush(). +// The file will be added as a binary subfile. // // Note that the istream must remain untouched and // unused by any other code until flush() is called. At @@ -538,13 +553,25 @@ add_subfile(const string &subfile_name, istream *subfile_data, // compared byte-for-byte to the disk file, and it is // replaced only if it is different; otherwise, the // multifile is left unchanged. +// +// Filename::set_binary() or set_text() must have been +// called previously to specify the nature of the source +// file. If set_text() was called, the text flag will +// be set on the subfile. //////////////////////////////////////////////////////////////////// string Multifile:: update_subfile(const string &subfile_name, const Filename &filename, int compression_level) { nassertr(is_write_valid(), string()); - if (!filename.exists()) { + Filename fname = filename; + if (multifile_always_binary) { + fname.set_binary(); + } + + nassertr(fname.is_binary_or_text(), string()); + + if (!fname.exists()) { return string(); } string name = standardize_subfile_name(subfile_name); @@ -552,7 +579,7 @@ update_subfile(const string &subfile_name, const Filename &filename, int index = find_subfile(name); if (index >= 0) { // The subfile already exists; compare it to the source file. - if (compare_subfile(index, filename)) { + if (compare_subfile(index, fname)) { // The files are identical; do nothing. return name; } @@ -562,8 +589,10 @@ update_subfile(const string &subfile_name, const Filename &filename, // source file. Add the new source file. Subfile *subfile = new Subfile; subfile->_name = name; - subfile->_source_filename = filename; - subfile->_source_filename.set_binary(); + subfile->_source_filename = fname; + if (fname.is_text()) { + subfile->_flags |= SF_text; + } add_new_subfile(subfile, compression_level); } @@ -1644,6 +1673,23 @@ is_subfile_encrypted(int index) const { return (_subfiles[index]->_flags & SF_encrypted) != 0; } +//////////////////////////////////////////////////////////////////// +// Function: Multifile::is_subfile_text +// Access: Published +// Description: Returns true if the indicated subfile represents text +// data, or false if it represents binary data. If the +// file is text data, it may have been processed by +// end-of-line conversion when it was added. (But the +// actual bits in the multifile will represent the +// standard Unix end-of-line convention, e.g. \n instead +// of \r\n.) +//////////////////////////////////////////////////////////////////// +bool Multifile:: +is_subfile_text(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + return (_subfiles[index]->_flags & SF_text) != 0; +} + //////////////////////////////////////////////////////////////////// // Function: Multifile::get_index_end // Access: Published @@ -1776,7 +1822,20 @@ extract_subfile(int index, const Filename &filename) { nassertr(index >= 0 && index < (int)_subfiles.size(), false); Filename fname = filename; - fname.set_binary(); + if (multifile_always_binary) { + fname.set_binary(); + } + + nassertr(fname.is_binary_or_text(), false); + if (!fname.is_binary_or_text()) { + // If we haven't specified binary or text, infer it from the type + // of the subfile. + if ((_subfiles[index]->_flags & SF_text) != 0) { + fname.set_text(); + } else { + fname.set_binary(); + } + } fname.make_dir(); pofstream out; if (!fname.open_write(out, true)) { @@ -1828,6 +1887,13 @@ extract_subfile_to(int index, ostream &out) { // file on disk with the nth subfile. Returns true if // the files are equivalent, or false if they are // different (or the file is missing). +// +// If Filename::set_binary() or set_text() has already +// been called, it specifies the nature of the source +// file. If this is different from the text flag of the +// subfile, the comparison will always return false. +// If this has not been specified, it will be set from +// the text flag of the subfile. //////////////////////////////////////////////////////////////////// bool Multifile:: compare_subfile(int index, const Filename &filename) { @@ -1840,27 +1906,62 @@ compare_subfile(int index, const Filename &filename) { return false; } + Filename fname = filename; + if (fname.is_binary()) { + // If we've specified a binary file, it had better be a binary + // subfile. + if ((_subfiles[index]->_flags & SF_text) != 0) { + if (express_cat.is_debug()) { + express_cat.debug() + << "File is not binary: " << filename << "\n"; + } + return false; + } + + } else if (fname.is_text()) { + // If we've specified a text file, it had better be a text + // subfile. + if ((_subfiles[index]->_flags & SF_text) == 0) { + if (express_cat.is_debug()) { + express_cat.debug() + << "File is not text: " << filename << "\n"; + } + return false; + } + + } else { + // If we haven't specified binary or text, infer it from the type + // of the subfile. + if ((_subfiles[index]->_flags & SF_text) != 0) { + fname.set_text(); + } else { + fname.set_binary(); + } + } + istream *in1 = open_read_subfile(index); if (in1 == (istream *)NULL) { return false; } pifstream in2; - Filename bin_filename = Filename::binary_filename(filename); - if (!bin_filename.open_read(in2)) { + + if (!fname.open_read(in2)) { express_cat.info() << "Cannot read " << filename << "\n"; return false; } - // Check the file size. - in2.seekg(0, ios::end); - streampos file_size = in2.tellg(); - - if (file_size != (streampos)get_subfile_length(index)) { - // The files have different sizes. - close_read_subfile(in1); - return false; + if (fname.is_binary()) { + // Check the file size. + in2.seekg(0, ios::end); + streampos file_size = in2.tellg(); + + if (file_size != (streampos)get_subfile_length(index)) { + // The files have different sizes. + close_read_subfile(in1); + return false; + } } // Check the file data, byte-for-byte. diff --git a/panda/src/express/multifile.h b/panda/src/express/multifile.h index 65a73e94b8..4c2a77e84a 100644 --- a/panda/src/express/multifile.h +++ b/panda/src/express/multifile.h @@ -129,6 +129,7 @@ PUBLISHED: time_t get_subfile_timestamp(int index) const; bool is_subfile_compressed(int index) const; bool is_subfile_encrypted(int index) const; + bool is_subfile_text(int index) const; streampos get_index_end() const; streampos get_subfile_internal_start(int index) const; @@ -161,6 +162,7 @@ private: SF_compressed = 0x0008, SF_encrypted = 0x0010, SF_signature = 0x0020, + SF_text = 0x0040, }; class Subfile {