diff --git a/dtool/src/dtoolutil/textEncoder.cxx b/dtool/src/dtoolutil/textEncoder.cxx index 1065f21dcb..1c9bca49a3 100644 --- a/dtool/src/dtoolutil/textEncoder.cxx +++ b/dtool/src/dtoolutil/textEncoder.cxx @@ -21,6 +21,42 @@ using std::ostream; using std::string; using std::wstring; +// Maps cp437 characters to Unicode codepoints. +static char16_t cp437_table[256] = { + 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, + 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c, + 0x25ba, 0x25c4, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x25ac, 0x21a8, + 0x2191, 0x2193, 0x2192, 0x2190, 0x221f, 0x2194, 0x25b2, 0x25bc, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x2302, + 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, + 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, + 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, + 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, + 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, + 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, + 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, + 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, + 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0, +}; + TextEncoder::Encoding TextEncoder::_default_encoding = TextEncoder::E_utf8; /** @@ -177,6 +213,20 @@ encode_wchar(char32_t ch, TextEncoder::Encoding encoding) { }; return string(encoded, 4); } + + case E_cp437: + if ((ch & ~0x7f) == 0) { + return string(1, (char)ch); + } + else if (ch >= 0 && ch < 0x266b) { + // This case is not optimized, because we don't really need it right now. + for (int i = 0; i < 256; ++i) { + if (cp437_table[i] == ch) { + return std::string(1, (char)i); + } + } + } + return "."; } return ""; @@ -233,6 +283,15 @@ decode_text(const string &text, TextEncoder::Encoding encoding) { return decode_text_impl(decoder); } + case E_cp437: + { + std::wstring result(text.size(), 0); + for (size_t i = 0; i < result.size(); ++i) { + result[i] = cp437_table[(uint8_t)text[i]]; + } + return result; + } + case E_iso8859: default: { @@ -382,6 +441,9 @@ operator << (ostream &out, TextEncoder::Encoding encoding) { case TextEncoder::E_utf16be: return out << "utf16be"; + + case TextEncoder::E_cp437: + return out << "cp437"; }; return out << "**invalid TextEncoder::Encoding(" << (int)encoding << ")**"; @@ -402,6 +464,8 @@ operator >> (istream &in, TextEncoder::Encoding &encoding) { } else if (word == "unicode" || word == "utf16be" || word == "utf-16be" || word == "utf16-be" || word == "utf-16-be") { encoding = TextEncoder::E_utf16be; + } else if (word == "cp437") { + encoding = TextEncoder::E_cp437; } else { ostream *notify_ptr = StringDecoder::get_notify_ptr(); if (notify_ptr != nullptr) { diff --git a/dtool/src/dtoolutil/textEncoder.h b/dtool/src/dtoolutil/textEncoder.h index 217f52d271..faa50715be 100644 --- a/dtool/src/dtoolutil/textEncoder.h +++ b/dtool/src/dtoolutil/textEncoder.h @@ -36,6 +36,7 @@ PUBLISHED: E_iso8859, E_utf8, E_utf16be, + E_cp437, // Deprecated alias for E_utf16be E_unicode = E_utf16be, diff --git a/panda/src/express/CMakeLists.txt b/panda/src/express/CMakeLists.txt index b45a55d11c..fa4b8d6e2c 100644 --- a/panda/src/express/CMakeLists.txt +++ b/panda/src/express/CMakeLists.txt @@ -56,6 +56,7 @@ set(P3EXPRESS_HEADERS virtualFileMountMultifile.h virtualFileMountMultifile.I virtualFileMountRamdisk.h virtualFileMountRamdisk.I virtualFileMountSystem.h virtualFileMountSystem.I + virtualFileMountZip.h virtualFileMountZip.I virtualFileSimple.h virtualFileSimple.I virtualFileSystem.h virtualFileSystem.I weakPointerCallback.I weakPointerCallback.h @@ -64,6 +65,7 @@ set(P3EXPRESS_HEADERS weakPointerToVoid.I weakPointerToVoid.h weakReferenceList.I weakReferenceList.h windowsRegistry.h + zipArchive.I zipArchive.h zStream.I zStream.h zStreamBuf.h ) @@ -110,6 +112,7 @@ set(P3EXPRESS_SOURCES virtualFileMountMultifile.cxx virtualFileMountRamdisk.cxx virtualFileMountSystem.cxx + virtualFileMountZip.cxx virtualFileSimple.cxx virtualFileSystem.cxx weakPointerCallback.cxx weakPointerTo.cxx @@ -117,6 +120,7 @@ set(P3EXPRESS_SOURCES weakPointerToVoid.cxx weakReferenceList.cxx windowsRegistry.cxx + zipArchive.cxx zStream.cxx zStreamBuf.cxx ) diff --git a/panda/src/express/p3express_composite2.cxx b/panda/src/express/p3express_composite2.cxx index 1d7a081996..9f097f3a52 100644 --- a/panda/src/express/p3express_composite2.cxx +++ b/panda/src/express/p3express_composite2.cxx @@ -20,6 +20,7 @@ #include "virtualFileMountMultifile.cxx" #include "virtualFileMountRamdisk.cxx" #include "virtualFileMountSystem.cxx" +#include "virtualFileMountZip.cxx" #include "virtualFileSimple.cxx" #include "virtualFileSystem.cxx" #include "weakPointerCallback.cxx" @@ -30,3 +31,4 @@ #include "windowsRegistry.cxx" #include "zStream.cxx" #include "zStreamBuf.cxx" +#include "zipArchive.cxx" diff --git a/panda/src/express/virtualFileMountZip.I b/panda/src/express/virtualFileMountZip.I new file mode 100644 index 0000000000..7fceee74dd --- /dev/null +++ b/panda/src/express/virtualFileMountZip.I @@ -0,0 +1,30 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file virtualFileMountZip.I + * @author rdb + * @date 2019-11-07 + */ + +/** + * + */ +INLINE VirtualFileMountZip:: +VirtualFileMountZip(ZipArchive *archive, const Filename &directory) : + _archive(archive), + _directory(directory) +{ +} + +/** + * Returns the ZipArchive pointer that this mount object is based on. + */ +INLINE ZipArchive *VirtualFileMountZip:: +get_archive() const { + return _archive; +} diff --git a/panda/src/express/virtualFileMountZip.cxx b/panda/src/express/virtualFileMountZip.cxx new file mode 100644 index 0000000000..70702be37f --- /dev/null +++ b/panda/src/express/virtualFileMountZip.cxx @@ -0,0 +1,204 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file virtualFileMountZip.cxx + * @author drose + * @date 2002-08-03 + */ + +#include "virtualFileMountZip.h" +#include "virtualFileSystem.h" + +TypeHandle VirtualFileMountZip::_type_handle; + + +/** + * + */ +VirtualFileMountZip:: +~VirtualFileMountZip() { +} + + +/** + * Returns true if the indicated file exists within the mount system. + */ +bool VirtualFileMountZip:: +has_file(const Filename &file) const { + Filename path(_directory, file); + return (path.empty() || + _archive->find_subfile(path) >= 0 || + _archive->has_directory(path)); +} + +/** + * Returns true if the indicated file exists within the mount system and is a + * directory. + */ +bool VirtualFileMountZip:: +is_directory(const Filename &file) const { + Filename path(_directory, file); + return (path.empty() || _archive->has_directory(file)); +} + +/** + * Returns true if the indicated file exists within the mount system and is a + * regular file. + */ +bool VirtualFileMountZip:: +is_regular_file(const Filename &file) const { + Filename path(_directory, file); + return (_archive->find_subfile(path) >= 0); +} + +/** + * Fills up the indicated pvector with the contents of the file, if it is a + * regular file. Returns true on success, false otherwise. + */ +bool VirtualFileMountZip:: +read_file(const Filename &file, bool do_uncompress, + vector_uchar &result) const { + + Filename path(_directory, file); + if (do_uncompress) { + // If the file is to be decompressed, we'd better just use the higher- + // level implementation, which includes support for on-the-fly + // decompression. + return VirtualFileMount::read_file(path, do_uncompress, result); + } + + // But if we're just reading a straight file, let the Multifile do the + // reading, which avoids a few levels of buffer copies. + + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + express_cat.info() + << "Unable to read " << path << "\n"; + return false; + } + + return _archive->read_subfile(subfile_index, result); +} + +/** + * Opens the file for reading, if it exists. Returns a newly allocated + * istream on success (which you should eventually delete when you are done + * reading). Returns NULL on failure. + */ +std::istream *VirtualFileMountZip:: +open_read_file(const Filename &file) const { + Filename path(_directory, file); + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + return nullptr; + } + + // The caller will eventually pass this pointer to + // VirtualFileSystem::close_read_file(), not to + // Multifile::close_read_subfile(). Fortunately, these two methods do the + // same thing, so that doesn't matter. + return _archive->open_read_subfile(subfile_index); +} + +/** + * Returns the current size on disk (or wherever it is) of the already-open + * file. Pass in the stream that was returned by open_read_file(); some + * implementations may require this stream to determine the size. + */ +std::streamsize VirtualFileMountZip:: +get_file_size(const Filename &file, std::istream *) const { + Filename path(_directory, file); + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + return 0; + } + return _archive->get_subfile_length(subfile_index); +} + +/** + * Returns the current size on disk (or wherever it is) of the file before it + * has been opened. + */ +std::streamsize VirtualFileMountZip:: +get_file_size(const Filename &file) const { + Filename path(_directory, file); + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + return 0; + } + return _archive->get_subfile_length(subfile_index); +} + +/** + * Returns a time_t value that represents the time the file was last modified, + * to within whatever precision the operating system records this information + * (on a Windows95 system, for instance, this may only be accurate to within 2 + * seconds). + * + * If the timestamp cannot be determined, either because it is not supported + * by the operating system or because there is some error (such as file not + * found), returns 0. + */ +time_t VirtualFileMountZip:: +get_timestamp(const Filename &file) const { + Filename path(_directory, file); + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + return 0; + } + return _archive->get_subfile_timestamp(subfile_index); +} + +/** + * Populates the SubfileInfo structure with the data representing where the + * file actually resides on disk, if this is knowable. Returns true if the + * file might reside on disk, and the info is populated, or false if it might + * not (or it is not known where the file resides), in which case the info is + * meaningless. + */ +bool VirtualFileMountZip:: +get_system_info(const Filename &file, SubfileInfo &info) { + Filename path(_directory, file); + Filename filename = _archive->get_filename(); + if (filename.empty()) { + return false; + } + int subfile_index = _archive->find_subfile(path); + if (subfile_index < 0) { + return false; + } + if (_archive->is_subfile_compressed(subfile_index) || + _archive->is_subfile_encrypted(subfile_index)) { + return false; + } + + std::streampos start = _archive->get_subfile_internal_start(subfile_index); + size_t length = _archive->get_subfile_internal_length(subfile_index); + + info = SubfileInfo(filename, start, length); + return true; +} + +/** + * Fills the given vector up with the list of filenames that are local to this + * directory, if the filename is a directory. Returns true if successful, or + * false if the file is not a directory or cannot be read. + */ +bool VirtualFileMountZip:: +scan_directory(vector_string &contents, const Filename &dir) const { + Filename path(_directory, dir); + return _archive->scan_directory(contents, path); +} + +/** + * + */ +void VirtualFileMountZip:: +output(std::ostream &out) const { + out << _archive->get_filename(); +} diff --git a/panda/src/express/virtualFileMountZip.h b/panda/src/express/virtualFileMountZip.h new file mode 100644 index 0000000000..8d2ead6764 --- /dev/null +++ b/panda/src/express/virtualFileMountZip.h @@ -0,0 +1,77 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file virtualFileMountZip.h + * @author drose + * @date 2002-08-03 + */ + +#ifndef VIRTUALFILEMOUNTZIP_H +#define VIRTUALFILEMOUNTZIP_H + +#include "pandabase.h" + +#include "virtualFileMount.h" +#include "zipArchive.h" +#include "pointerTo.h" + +/** + * Maps a .zip archive into the VirtualFileSystem. + */ +class EXPCL_PANDA_EXPRESS VirtualFileMountZip : public VirtualFileMount { +PUBLISHED: + INLINE VirtualFileMountZip(ZipArchive *archive, + const Filename &directory = Filename()); + virtual ~VirtualFileMountZip(); + + INLINE ZipArchive *get_archive() const; + +public: + virtual bool has_file(const Filename &file) const; + virtual bool is_directory(const Filename &file) const; + virtual bool is_regular_file(const Filename &file) const; + + virtual bool read_file(const Filename &file, bool do_uncompress, + vector_uchar &result) const; + + virtual std::istream *open_read_file(const Filename &file) const; + virtual std::streamsize get_file_size(const Filename &file, std::istream *stream) const; + virtual std::streamsize get_file_size(const Filename &file) const; + virtual time_t get_timestamp(const Filename &file) const; + virtual bool get_system_info(const Filename &file, SubfileInfo &info); + + virtual bool scan_directory(vector_string &contents, + const Filename &dir) const; + + virtual void output(std::ostream &out) const; + +private: + PT(ZipArchive) _archive; + Filename _directory; + +public: + virtual TypeHandle get_type() const { + return get_class_type(); + } + virtual TypeHandle force_init_type() {init_type(); return get_class_type();} + static TypeHandle get_class_type() { + return _type_handle; + } + static void init_type() { + VirtualFileMount::init_type(); + register_type(_type_handle, "VirtualFileMountZip", + VirtualFileMount::get_class_type()); + } + +private: + static TypeHandle _type_handle; +}; + +#include "virtualFileMountZip.I" + +#endif diff --git a/panda/src/express/virtualFileSystem.cxx b/panda/src/express/virtualFileSystem.cxx index cbd4344cc6..3d02eed6a8 100644 --- a/panda/src/express/virtualFileSystem.cxx +++ b/panda/src/express/virtualFileSystem.cxx @@ -18,6 +18,7 @@ #include "virtualFileMountMultifile.h" #include "virtualFileMountRamdisk.h" #include "virtualFileMountSystem.h" +#include "virtualFileMountZip.h" #include "streamWrapper.h" #include "dSearchPath.h" #include "dcast.h" @@ -93,6 +94,16 @@ mount(Multifile *multifile, const Filename &mount_point, int flags) { return mount(new_mount, mount_point, flags); } +/** + * Mounts the indicated ZipArchive at the given mount point. + */ +bool VirtualFileSystem:: +mount(ZipArchive *archive, const Filename &mount_point, int flags) { + PT(VirtualFileMountZip) new_mount = + new VirtualFileMountZip(archive); + return mount(new_mount, mount_point, flags); +} + /** * Mounts the indicated system file or directory at the given mount point. If * the named file is a directory, mounts the directory. If the named file is @@ -126,20 +137,70 @@ mount(const Filename &physical_filename, const Filename &mount_point, PT(VirtualFileMountSystem) new_mount = new VirtualFileMountSystem(physical_filename); return mount(new_mount, mount_point, flags); - } else { - // It's not a directory; it must be a Multifile. - PT(Multifile) multifile = new Multifile; - multifile->set_encryption_password(password); + } - // For now these are always opened read only. Maybe later we'll support - // read-write on Multifiles. - flags |= MF_read_only; - if (!multifile->open_read(physical_filename)) { - return false; + // It's not a directory; it must be a multifile or .zip file. + Filename fname = physical_filename; + fname.set_binary(); + + PT(VirtualFile) vfile = get_file(fname); + if (vfile == nullptr) { + return false; + } + istream *stream = vfile->open_read_file(false); + if (stream == nullptr) { + return false; + } + + // For now these are always opened read only. Maybe later we'll support + // read-write on multifiles and .zip files. + flags |= MF_read_only; + + char ch = stream->get(); + if (ch == '#' || ch == 'p') { + // It *might* be a multifile. + while (ch == '#') { + // Skip to the end of the line. + while (ch != EOF && ch != '\n') { + ch = stream->get(); + } + // Skip to the first non-whitespace character of the line. + while (ch != EOF && (isspace(ch) || ch == '\r')) { + ch = stream->get(); + } } - return mount(multifile, mount_point, flags); + // Now read the actual Multifile header. + char this_header[6]; + this_header[0] = ch; + stream->read(this_header + 1, 6 - 1); + if (!stream->fail() && stream->gcount() == 6 - 1 && + memcmp(this_header, "pmf\0\n\r", 6) == 0) { + // Looks like a multifile all right. Reopen it. + close_read_file(stream); + + PT(Multifile) multifile = new Multifile; + multifile->set_encryption_password(password); + + if (!multifile->open_read(physical_filename)) { + return false; + } + + return mount(multifile, mount_point, flags); + } } + + // It must be a ZIP file. Note that ZipArchive does not require rewinding + // the stream back to 0. + IStreamWrapper *read = new IStreamWrapper(stream, true); + + PT(ZipArchive) archive = new ZipArchive; + if (!archive->open_read(read, true)) { + return false; + } + + archive->set_filename(physical_filename); + return mount(archive, mount_point, flags); } /** @@ -245,6 +306,48 @@ unmount(Multifile *multifile) { return num_removed; } +/** + * Unmounts all appearances of the indicated ZipArchive from the file system. + * Returns the number of appearances unmounted. + */ +int VirtualFileSystem:: +unmount(ZipArchive *archive) { + _lock.lock(); + Mounts::iterator ri, wi; + wi = ri = _mounts.begin(); + while (ri != _mounts.end()) { + VirtualFileMount *mount = (*ri); + (*wi) = mount; + + if (mount->is_exact_type(VirtualFileMountZip::get_class_type())) { + VirtualFileMountZip *zip_mount = + DCAST(VirtualFileMountZip, mount); + if (zip_mount->get_archive() == archive) { + // Remove this one. Don't increment wi. + if (express_cat->is_debug()) { + express_cat->debug() + << "unmount " << *mount << " from " << mount->get_mount_point() << "\n"; + } + mount->_file_system = nullptr; + + } else { + // Don't remove this one. + ++wi; + } + } else { + // Don't remove this one. + ++wi; + } + ++ri; + } + + int num_removed = _mounts.end() - wi; + _mounts.erase(wi, _mounts.end()); + ++_mount_seq; + _lock.unlock(); + return num_removed; +} + /** * Unmounts all appearances of the indicated directory name or multifile name * from the file system. Returns the number of appearances unmounted. diff --git a/panda/src/express/virtualFileSystem.h b/panda/src/express/virtualFileSystem.h index 3b1d3b3ed9..775f2ddd44 100644 --- a/panda/src/express/virtualFileSystem.h +++ b/panda/src/express/virtualFileSystem.h @@ -25,6 +25,7 @@ #include "config_express.h" #include "mutexImpl.h" #include "pvector.h" +#include "zipArchive.h" class Multifile; class VirtualFileComposite; @@ -47,12 +48,14 @@ PUBLISHED: }; BLOCKING bool mount(Multifile *multifile, const Filename &mount_point, int flags); + BLOCKING bool mount(ZipArchive *archive, const Filename &mount_point, int flags); BLOCKING bool mount(const Filename &physical_filename, const Filename &mount_point, int flags, const std::string &password = ""); BLOCKING bool mount_loop(const Filename &virtual_filename, const Filename &mount_point, int flags, const std::string &password = ""); bool mount(VirtualFileMount *mount, const Filename &mount_point, int flags); BLOCKING int unmount(Multifile *multifile); + BLOCKING int unmount(ZipArchive *archive); BLOCKING int unmount(const Filename &physical_filename); int unmount(VirtualFileMount *mount); BLOCKING int unmount_point(const Filename &mount_point); diff --git a/panda/src/express/zStream.I b/panda/src/express/zStream.I index 6442d03037..4f311567fb 100644 --- a/panda/src/express/zStream.I +++ b/panda/src/express/zStream.I @@ -22,17 +22,17 @@ IDecompressStream() : std::istream(&_buf) { * */ INLINE IDecompressStream:: -IDecompressStream(std::istream *source, bool owns_source) : std::istream(&_buf) { - open(source, owns_source); +IDecompressStream(std::istream *source, bool owns_source, std::streamsize source_length, bool header) : std::istream(&_buf) { + open(source, owns_source, source_length, header); } /** * */ INLINE IDecompressStream &IDecompressStream:: -open(std::istream *source, bool owns_source) { +open(std::istream *source, bool owns_source, std::streamsize source_length, bool header) { clear((ios_iostate)0); - _buf.open_read(source, owns_source); + _buf.open_read(source, owns_source, source_length, header); return *this; } @@ -58,19 +58,19 @@ OCompressStream() : std::ostream(&_buf) { * */ INLINE OCompressStream:: -OCompressStream(std::ostream *dest, bool owns_dest, int compression_level) : +OCompressStream(std::ostream *dest, bool owns_dest, int compression_level, bool header) : std::ostream(&_buf) { - open(dest, owns_dest, compression_level); + open(dest, owns_dest, compression_level, header); } /** * */ INLINE OCompressStream &OCompressStream:: -open(std::ostream *dest, bool owns_dest, int compression_level) { +open(std::ostream *dest, bool owns_dest, int compression_level, bool header) { clear((ios_iostate)0); - _buf.open_write(dest, owns_dest, compression_level); + _buf.open_write(dest, owns_dest, compression_level, header); return *this; } diff --git a/panda/src/express/zStream.h b/panda/src/express/zStream.h index b05a75158c..f90eae346f 100644 --- a/panda/src/express/zStream.h +++ b/panda/src/express/zStream.h @@ -34,13 +34,17 @@ class EXPCL_PANDA_EXPRESS IDecompressStream : public std::istream { PUBLISHED: INLINE IDecompressStream(); - INLINE explicit IDecompressStream(std::istream *source, bool owns_source); + INLINE explicit IDecompressStream(std::istream *source, bool owns_source, + std::streamsize source_length = -1, + bool header=true); #if _MSC_VER >= 1800 INLINE IDecompressStream(const IDecompressStream ©) = delete; #endif - INLINE IDecompressStream &open(std::istream *source, bool owns_source); + INLINE IDecompressStream &open(std::istream *source, bool owns_source, + std::streamsize source_length = -1, + bool header=true); INLINE IDecompressStream &close(); private: @@ -61,14 +65,16 @@ class EXPCL_PANDA_EXPRESS OCompressStream : public std::ostream { PUBLISHED: INLINE OCompressStream(); INLINE explicit OCompressStream(std::ostream *dest, bool owns_dest, - int compression_level = 6); + int compression_level = 6, + bool header=true); #if _MSC_VER >= 1800 INLINE OCompressStream(const OCompressStream ©) = delete; #endif INLINE OCompressStream &open(std::ostream *dest, bool owns_dest, - int compression_level = 6); + int compression_level = 6, + bool header=true); INLINE OCompressStream &close(); private: diff --git a/panda/src/express/zStreamBuf.cxx b/panda/src/express/zStreamBuf.cxx index 57a64bed7a..3940716004 100644 --- a/panda/src/express/zStreamBuf.cxx +++ b/panda/src/express/zStreamBuf.cxx @@ -73,8 +73,9 @@ ZStreamBuf:: * */ void ZStreamBuf:: -open_read(std::istream *source, bool owns_source) { +open_read(std::istream *source, bool owns_source, std::streamsize source_length, bool header) { _source = source; + _source_bytes_left = source_length; _owns_source = owns_source; _z_source.next_in = Z_NULL; @@ -91,7 +92,7 @@ open_read(std::istream *source, bool owns_source) { _z_source.opaque = Z_NULL; _z_source.msg = (char *)"no error message"; - int result = inflateInit2(&_z_source, 32 + 15); + int result = inflateInit2(&_z_source, header ? 32 + 15 : -15); if (result < 0) { show_zlib_error("inflateInit2", result, _z_source); close_read(); @@ -104,6 +105,8 @@ open_read(std::istream *source, bool owns_source) { */ void ZStreamBuf:: close_read() { + _source_bytes_left = 0; + if (_source != nullptr) { int result = inflateEnd(&_z_source); @@ -124,7 +127,7 @@ close_read() { * */ void ZStreamBuf:: -open_write(std::ostream *dest, bool owns_dest, int compression_level) { +open_write(std::ostream *dest, bool owns_dest, int compression_level, bool header) { _dest = dest; _owns_dest = owns_dest; @@ -142,9 +145,10 @@ open_write(std::ostream *dest, bool owns_dest, int compression_level) { _z_dest.opaque = Z_NULL; _z_dest.msg = (char *)"no error message"; - int result = deflateInit(&_z_dest, compression_level); + int result = deflateInit2(&_z_dest, compression_level, Z_DEFLATED, + header ? 15 : -15, 8, Z_DEFAULT_STRATEGY); if (result < 0) { - show_zlib_error("deflateInit", result, _z_dest); + show_zlib_error("deflateInit2", result, _z_dest); close_write(); } thread_consider_yield(); @@ -310,13 +314,22 @@ read_chars(char *start, size_t length) { _z_source.next_out = (Bytef *)start; _z_source.avail_out = length; - bool eof = (_source->eof() || _source->fail()); + bool eof = (_source_bytes_left == 0 || _source->eof() || _source->fail()); int flush = 0; while (_z_source.avail_out > 0) { if (_z_source.avail_in == 0 && !eof) { - _source->read(decompress_buffer, decompress_buffer_size); - size_t read_count = _source->gcount(); + size_t read_count = 0; + if (_source_bytes_left >= 0) { + // Don't read more than the specified limit. + _source->read(decompress_buffer, + std::min(_source_bytes_left, (std::streamsize)decompress_buffer_size)); + read_count = _source->gcount(); + _source_bytes_left -= read_count; + } else { + _source->read(decompress_buffer, decompress_buffer_size); + read_count = _source->gcount(); + } eof = (read_count == 0 || _source->eof() || _source->fail()); _z_source.next_in = (Bytef *)decompress_buffer; diff --git a/panda/src/express/zStreamBuf.h b/panda/src/express/zStreamBuf.h index 3dadd1543d..49c9a506ee 100644 --- a/panda/src/express/zStreamBuf.h +++ b/panda/src/express/zStreamBuf.h @@ -29,10 +29,10 @@ public: ZStreamBuf(); virtual ~ZStreamBuf(); - void open_read(std::istream *source, bool owns_source); + void open_read(std::istream *source, bool owns_source, std::streamsize source_length=-1, bool header=true); void close_read(); - void open_write(std::ostream *dest, bool owns_dest, int compression_level); + void open_write(std::ostream *dest, bool owns_dest, int compression_level, bool header=true); void close_write(); virtual std::streampos seekoff(std::streamoff off, ios_seekdir dir, ios_openmode which); @@ -50,6 +50,7 @@ private: private: std::istream *_source; + std::streamsize _source_bytes_left = -1; bool _owns_source; std::ostream *_dest; diff --git a/panda/src/express/zipArchive.I b/panda/src/express/zipArchive.I new file mode 100644 index 0000000000..ae8c25a85a --- /dev/null +++ b/panda/src/express/zipArchive.I @@ -0,0 +1,147 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file zipArchive.I + * @author rdb + * @date 2019-10-23 + */ + +/** + * Returns the filename of the ZipArchive, if it is available. + */ +INLINE const Filename &ZipArchive:: +get_filename() const { + return _filename; +} + +/** + * Replaces the filename of the ZipArchive. This is primarily used for + * documentation purposes only; changing this name does not open the indicated + * file. See open_read() or open_write() for that. + */ +INLINE void ZipArchive:: +set_filename(const Filename &filename) { + _filename = filename; +} + +/** + * Returns true if the ZipArchive has been opened for read mode and there have + * been no errors, and individual Subfile contents may be extracted. + */ +INLINE bool ZipArchive:: +is_read_valid() const { + return (_read != nullptr); +} + +/** + * Returns true if the ZipArchive has been opened for write mode and there have + * been no errors, and Subfiles may be added or removed from the ZipArchive. + */ +INLINE bool ZipArchive:: +is_write_valid() const { + return (_write != nullptr && !_write->fail()); +} + +/** + * Returns true if the ZipArchive index is suboptimal and should be repacked. + * Call repack() to achieve this. It is not done automatically. + */ +INLINE bool ZipArchive:: +needs_repack() const { + return _needs_repack; +} + +/** + * Sets the flag indicating whether timestamps should be recorded within the + * ZipArchive or not. The default is true, indicating the ZipArchive will + * record timestamps for each subfile that is added. + * + * If this is false, the ZipArchive will not record timestamps internally. In + * this case, the return value from get_subfile_timestamp() will be zero. + * + * You may want to set this false to minimize the bitwise difference between + * independently-generated ZipArchives. + */ +INLINE void ZipArchive:: +set_record_timestamp(bool flag) { + _record_timestamp = flag; +} + +/** + * Returns the flag indicating whether timestamps should be recorded within + * the ZipArchive or not. See set_record_timestamp(). + */ +INLINE bool ZipArchive:: +get_record_timestamp() const { + return _record_timestamp; +} + +/** + * Removes the named subfile from the ZipArchive, if it exists; returns true if + * successfully removed, or false if it did not exist in the first place. The + * file will not actually be removed from the disk until the next call to + * flush(). + * + * Note that this does not actually remove the data from the indicated + * subfile; it simply removes it from the index. The ZipArchive will not be + * reduced in size after this operation, until the next call to repack(). + */ +INLINE bool ZipArchive:: +remove_subfile(const std::string &subfile_name) { + int index = find_subfile(subfile_name); + if (index >= 0) { + remove_subfile(index); + return true; + } + return false; +} + +/** + * Returns a vector_uchar that contains the entire contents of the indicated + * subfile. + */ +INLINE vector_uchar ZipArchive:: +read_subfile(int index) { + vector_uchar result; + read_subfile(index, result); + return result; +} + +/** + * Returns the comment string that was at the end of the ZIP end-of-directory + * record, if any. + * See set_comment(). + */ +INLINE const std::string &ZipArchive:: +get_comment() const { + return _comment; +} + +/** + * Compares two Subfiles for proper sorting within the index. + */ +INLINE bool ZipArchive::Subfile:: +operator < (const ZipArchive::Subfile &other) const { + return _name < other._name; +} + +/** + * Returns true if this Subfile is compressed. + */ +INLINE bool ZipArchive::Subfile:: +is_compressed() const { + return _compression_method != CM_store; +} + +/** + * Returns true if this Subfile is encrypted. + */ +INLINE bool ZipArchive::Subfile:: +is_encrypted() const { + return (_flags & SF_encrypted) != 0; +} diff --git a/panda/src/express/zipArchive.cxx b/panda/src/express/zipArchive.cxx new file mode 100644 index 0000000000..4c0e838c5c --- /dev/null +++ b/panda/src/express/zipArchive.cxx @@ -0,0 +1,1989 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file zipArchive.cxx + * @author rdb + * @date 2019-01-20 + */ + +#include "zipArchive.h" + +#include "config_express.h" +#include "streamWriter.h" +#include "streamReader.h" +#include "datagram.h" +#include "zStream.h" +#include "encryptStream.h" +#include "virtualFileSystem.h" +#include "virtualFile.h" + +#include +#include +#include + +#include "openSSLWrapper.h" + +using std::streamoff; +using std::streampos; +using std::streamsize; +using std::stringstream; +using std::string; + +// 1980-01-01 00:00:00 +static const time_t dos_epoch = 315532800; + +/** + * + */ +ZipArchive:: +ZipArchive() : + _read_filew(_read_file), + _read_write_filew(_read_write_file) +{ + _read = nullptr; + _write = nullptr; + _owns_stream = false; + _index_changed = false; + _needs_repack = false; + _record_timestamp = true; +} + +/** + * + */ +ZipArchive:: +~ZipArchive() { + close(); +} + +/** + * Opens the named ZipArchive on disk for reading. The ZipArchive index is read + * in, and the list of subfiles becomes available; individual subfiles may + * then be extracted or read, but the list of subfiles may not be modified. + * + * Also see the version of open_read() which accepts an istream. Returns true + * on success, false on failure. + */ +bool ZipArchive:: +open_read(const Filename &filename) { + close(); + Filename fname = filename; + fname.set_binary(); + + VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr(); + PT(VirtualFile) vfile = vfs->get_file(fname); + if (vfile == nullptr) { + return false; + } + std::istream *stream = vfile->open_read_file(false); + if (stream == nullptr) { + return false; + } + + _read = new IStreamWrapper(stream, true); + _owns_stream = true; + _filename = filename; + return read_index(); +} + +/** + * Opens an anonymous ZipArchive for reading using an istream. There must be + * seek functionality via seekg() and tellg() on the istream. + * + * If owns_pointer is true, then the ZipArchive assumes ownership of the stream + * pointer and will delete it when the ZIP file is closed, including if this + * function returns false. + * + * The given stream must be seekable. + */ +bool ZipArchive:: +open_read(IStreamWrapper *stream, bool owns_pointer) { + close(); + _read = stream; + _owns_stream = owns_pointer; + return read_index(); +} + +/** + * Opens the named ZipArchive on disk for writing. If there already exists a + * file by that name, it is truncated. The ZipArchive is then prepared for + * accepting a brand new set of subfiles, which will be written to the + * indicated filename. Individual subfiles may not be extracted or read. + * + * Also see the version of open_write() which accepts an ostream. Returns + * true on success, false on failure. + */ +bool ZipArchive:: +open_write(const Filename &filename) { + close(); + Filename fname = filename; + fname.set_binary(); + if (!fname.open_write(_write_file, true)) { + return false; + } + _write = &_write_file; + _filename = filename; + _index_start = 0; + _file_end = 0; + _index_changed = true; + return true; +} + +/** + * Opens an anonymous ZipArchive for writing using an ostream. + * + * If owns_pointer is true, then the ZipArchive assumes ownership of the stream + * pointer and will delete it when the ZIP file is closed, including if this + * function returns false. + */ +bool ZipArchive:: +open_write(std::ostream *stream, bool owns_pointer) { + close(); + _write = stream; + _owns_stream = owns_pointer; + _write->seekp(0, ios::beg); + _index_start = 0; + _file_end = 0; + _index_changed = true; + return true; +} + +/** + * Opens the named ZipArchive on disk for reading and writing. If there + * already exists a file by that name, its index is read. Subfiles may be + * added or removed, and the resulting changes will be written to the named + * file. + * + * Also see the version of open_read_write() which accepts an iostream. + * Returns true on success, false on failure. + */ +bool ZipArchive:: +open_read_write(const Filename &filename) { + close(); + Filename fname = filename; + fname.set_binary(); + bool exists = fname.exists(); + if (!fname.open_read_write(_read_write_file)) { + return false; + } + _read = &_read_write_filew; + _write = &_read_write_file; + _filename = filename; + + if (exists) { + return read_index(); + } else { + _index_start = 0; + _file_end = 0; + _index_changed = true; + return true; + } +} + +/** + * Opens an anonymous ZipArchive for reading and writing using an iostream. + * There must be seek functionality via seekg()/seekp() and tellg()/tellp() on + * the iostream. + * + * If owns_pointer is true, then the ZipArchive assumes ownership of the stream + * pointer and will delete it when the ZIP file is closed, including if this + * function returns false. + */ +bool ZipArchive:: +open_read_write(std::iostream *stream, bool owns_pointer) { + close(); + + // We don't support locking when opening a file in read-write mode, because + // we don't bother with locking on write. But we need to have an + // IStreamWrapper to assign to the _read member, so we create one on-the-fly + // here. + _read = new StreamWrapper(stream, owns_pointer); + _write = stream; + _owns_stream = true; // Because we own the StreamWrapper, above. + _write->seekp(0, ios::beg); + + // Check whether the read stream is empty. + stream->seekg(0, ios::end); + if (stream->tellg() == (streampos)0) { + // The read stream is empty, which is always valid. + _index_changed = true; + return true; + } + + // The read stream is not empty, so we'd better have a valid ZipArchive. + return read_index(); +} + +/** + * Verifies the integrity of the contents of the ZIP archive. + */ +bool ZipArchive:: +verify() { + nassertr_always(is_read_valid(), false); + + _read->acquire(); + std::istream *read = _read->get_istream(); + + bool passes = true; + + for (Subfile *subfile : _subfiles) { + if (!subfile->read_header(*read) || + !subfile->verify_data(*read)) { + passes = false; + } + } + + _read->release(); + return passes; +} + +/** + * Closes the ZipArchive if it is open. All changes are flushed to disk, and + * the file becomes invalid for further operations until the next call to + * open(). + */ +void ZipArchive:: +close() { + flush(); + + if (_owns_stream) { + // We prefer to delete the IStreamWrapper over the ostream, if possible. + if (_read != nullptr) { + // Only delete it if no SubStream is still referencing it. + if (!_read->unref()) { + delete _read; + } + } else if (_write != nullptr) { + delete _write; + } + } + + _read = nullptr; + _write = nullptr; + _owns_stream = false; + _index_start = 0; + _file_end = 0; + _index_changed = false; + _needs_repack = false; + + _read_file.close(); + _write_file.close(); + _read_write_file.close(); + _filename = Filename(); + + clear_subfiles(); +} + +/** + * Adds a file on disk as a subfile to the ZipArchive. The file named by + * filename will be read and added to the ZipArchive immediately, but the index + * will not be updated until you call flush(). If there already exists a + * subfile with the indicated name, it is replaced without examining its + * contents (but see also update_subfile). + * + * Returns the subfile name on success (it might have been modified slightly), + * or empty string on failure. + */ +std::string ZipArchive:: +add_subfile(const std::string &subfile_name, const Filename &filename, + int compression_level) { + nassertr(is_write_valid(), std::string()); + +#ifndef HAVE_ZLIB + express_cat.warning() + << "zlib not compiled in; unable to modify ZIP file.\n"; + return std::string(); +#endif + + VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr(); + PT(VirtualFile) vfile = vfs->get_file(filename); + if (vfile == nullptr) { + return std::string(); + } + + std::istream *in = vfs->open_read_file(filename, false); + if (in == nullptr) { + return std::string(); + } + + std::string name = add_subfile(subfile_name, in, compression_level); + vfs->close_read_file(in); + return name; +} + +/** + * Adds a file from a stream as a subfile to the ZipArchive. The indicated + * istream will be read and its contents added to the end of the current ZIP + * file immediately. + * + * Note that the istream must remain untouched and unused by any other code + * until flush() is called. At that time, the index of the ZIP archive will be + * rewritten to the end of the file. + * + * Returns the subfile name on success (it might have been modified slightly), + * or empty string on failure. + */ +std::string ZipArchive:: +add_subfile(const std::string &subfile_name, std::istream *subfile_data, + int compression_level) { + nassertr(is_write_valid(), string()); + +#ifndef HAVE_ZLIB + express_cat.warning() + << "zlib not compiled in; unable to modify ZIP file.\n"; + return std::string(); +#endif + + std::string name = standardize_subfile_name(subfile_name); + if (!name.empty()) { + Subfile *subfile = new Subfile(subfile_name, compression_level); + + // Write it straight away, overwriting the index at the end of the file. + // This index will be rewritten at the next call to flush() or close(). + std::streampos fpos = _index_start; + _write->seekp(fpos); + + if (!subfile->write_header(*_write, fpos)) { + delete subfile; + return ""; + } + + if (!subfile->write_data(*_write, subfile_data, fpos, compression_level)) { + // Failed to write the data. + delete subfile; + return ""; + } + + if (fpos > _index_start) { + _index_start = fpos; + } + add_new_subfile(subfile, compression_level); + } + + return name; +} + +/** + * Adds a file on disk to the subfile. If a subfile already exists with the + * same name, its contents are compared byte-for-byte to the disk file, and it + * is replaced only if it is different; otherwise, the ZIP file is left + * unchanged. + * + * Either Filename:::set_binary() or set_text() must have been called + * previously to specify the nature of the source file. If set_text() was + * called, the text flag will be set on the subfile. + */ +string ZipArchive:: +update_subfile(const std::string &subfile_name, const Filename &filename, + int compression_level) { + nassertr(is_write_valid(), string()); + +#ifndef HAVE_ZLIB + express_cat.warning() + << "zlib not compiled in; unable to modify ZIP file.\n"; + return std::string(); +#endif + + if (!filename.exists()) { + return string(); + } + std::string name = standardize_subfile_name(subfile_name); + if (!name.empty()) { + int index = find_subfile(name); + if (index >= 0) { + // The subfile already exists; compare it to the source file. + if (compare_subfile(index, filename)) { + // The files are identical; do nothing. + return name; + } + } + + // The subfile does not already exist or it is different from the source + // file. Add the new source file. + Subfile *subfile = new Subfile(name, compression_level); + add_new_subfile(subfile, compression_level); + } + + return name; +} + +/** + * Ensures that any changes made to the ZIP archive have been synchronized to + * disk. In particular, this causes the central directory to be rewritten at + * the end of the file. + * + * This may result in a suboptimal packing in the ZIP file, especially if + * existing files were changed or files were removed. To guarantee that the + * file is as compact as it can be, call repack() instead of flush(). + * + * It is not necessary to call flush() explicitly unless you are concerned + * about reading the recently-added subfiles immediately. + * + * Returns true on success, false on failure. + */ +bool ZipArchive:: +flush() { + if (!is_write_valid()) { + return false; + } + + nassertr(_write != nullptr, false); + + // First, mark out all of the removed subfiles. + for (Subfile *subfile : _removed_subfiles) { + delete subfile; + } + _removed_subfiles.clear(); + + if (_index_changed) { + std::streampos fpos = _index_start; + _write->seekp(fpos); + if (!write_index(*_write, fpos)) { + express_cat.info() + << "Unable to write updated central directory to ZIP archive " << _filename << ".\n"; + close(); + return false; + } + _index_changed = false; + } + + _write->flush(); + return true; +} + +/** + * Forces a complete rewrite of the ZipArchive and all of its contents, so that + * the files are tightly packed in the file without any gaps. This is useful to + * do after removing files, to ensure that the file size is minimized. + * + * It is only valid to call this if the ZipArchive was opened using + * open_read_write() and an explicit filename, rather than an iostream. Also, + * we must have write permission to the directory containing the ZipArchive. + * + * Returns true on success, false on failure. + */ +bool ZipArchive:: +repack() { + nassertr(is_write_valid() && is_read_valid(), false); + nassertr(!_filename.empty(), false); + + // First, we open a temporary filename to copy the ZipArchive to. + Filename dirname = _filename.get_dirname(); + if (dirname.empty()) { + dirname = "."; + } + Filename temp_filename = Filename::temporary(dirname, "ziptemp"); + temp_filename.set_binary(); + pofstream temp; + if (!temp_filename.open_write(temp)) { + express_cat.info() + << "Unable to open temporary file " << temp_filename << "\n"; + return false; + } + + // Now we scrub our internal structures so it looks like we're a brand new + // ZipArchive. + for (Subfile *subfile : _removed_subfiles) { + delete subfile; + } + _removed_subfiles.clear(); + + // And we write our contents to our new temporary file. + //_write = &temp; + + bool success = true; + + _read->acquire(); + std::istream &read = *_read->get_istream(); + + // Copy over all of the subfiles. + std::streampos fpos = 0; + + for (Subfile *subfile : _subfiles) { + if (!subfile->read_header(read)) { + success = false; + continue; + } + + // We don't need to write a data descriptor, since at this point we know + // the checksum and sizes. + subfile->_flags &= ~SF_data_descriptor; + + if (!subfile->write_header(temp, fpos)) { + success = false; + continue; + } + + static const size_t buffer_size = 4096; + char buffer[buffer_size]; + uint64_t num_bytes = subfile->_data_length; + fpos += num_bytes; + + while (num_bytes >= buffer_size) { + read.read(buffer, buffer_size); + temp.write(buffer, buffer_size); + num_bytes -= buffer_size; + } + if (num_bytes > 0) { + read.read(buffer, num_bytes); + temp.write(buffer, num_bytes); + } + } + _read->release(); + + // Write the central directory at the end of the file. + success = success && write_index(temp, fpos); + + if (!success) { + express_cat.error() + << "Failed to write repacked archive to " << temp_filename << ".\n"; + temp.close(); + temp_filename.unlink(); + return false; + } + + // Now close everything, and move the temporary file back over our original + // file. + Filename orig_name = _filename; + temp.close(); + close(); + orig_name.unlink(); + if (!temp_filename.rename_to(orig_name)) { + express_cat.info() + << "Unable to rename temporary file " << temp_filename << " to " + << orig_name << ".\n"; + return false; + } + + if (!open_read_write(orig_name)) { + express_cat.info() + << "Unable to read newly repacked " << _filename + << ".\n"; + return false; + } + + return true; +} + +/** + * Returns the number of subfiles within the ZipArchive. The subfiles may be + * accessed in alphabetical order by iterating through [0 .. + * get_num_subfiles()). + */ +int ZipArchive:: +get_num_subfiles() const { + return _subfiles.size(); +} + +/** + * Returns the index of the subfile with the indicated name, or -1 if the + * named subfile is not within the ZipArchive. + */ +int ZipArchive:: +find_subfile(const std::string &subfile_name) const { + Subfile find_subfile; + find_subfile._name = standardize_subfile_name(subfile_name); + Subfiles::const_iterator fi; + fi = _subfiles.find(&find_subfile); + if (fi == _subfiles.end()) { + // Not present. + return -1; + } + return (fi - _subfiles.begin()); +} + +/** + * Returns true if the indicated subfile name is the directory prefix to one + * or more files within the ZipArchive. That is, the ZipArchive contains at + * least one file named "subfile_name/...". + */ +bool ZipArchive:: +has_directory(const std::string &subfile_name) const { + string prefix = subfile_name; + if (!prefix.empty()) { + prefix += '/'; + } + Subfile find_subfile; + find_subfile._name = prefix; + Subfiles::const_iterator fi; + fi = _subfiles.upper_bound(&find_subfile); + if (fi == _subfiles.end()) { + // Not present. + return false; + } + + // At least one subfile exists whose name sorts after prefix. If it + // contains prefix as the initial substring, then we have a match. + Subfile *subfile = (*fi); + return (subfile->_name.length() > prefix.length() && + subfile->_name.substr(0, prefix.length()) == prefix); +} + +/** + * Considers subfile_name to be the name of a subdirectory within the + * ZipArchive, but not a file itself; fills the given vector up with the sorted + * list of subdirectories or files within the named directory. + * + * Note that directories do not exist explicitly within a ZipArchive; this just + * checks for the existence of files with the given initial prefix. + * + * Returns true if successful, false otherwise. + */ +bool ZipArchive:: +scan_directory(vector_string &contents, const std::string &subfile_name) const { + string prefix = subfile_name; + if (!prefix.empty()) { + prefix += '/'; + } + Subfile find_subfile; + find_subfile._name = prefix; + Subfiles::const_iterator fi; + fi = _subfiles.upper_bound(&find_subfile); + + string previous = ""; + while (fi != _subfiles.end()) { + Subfile *subfile = (*fi); + if (!(subfile->_name.length() > prefix.length() && + subfile->_name.substr(0, prefix.length()) == prefix)) { + // We've reached the end of the list of subfiles beneath the indicated + // directory prefix. + return true; + } + + size_t slash = subfile->_name.find('/', prefix.length()); + string basename = subfile->_name.substr(prefix.length(), slash - prefix.length()); + if (basename != previous) { + contents.push_back(basename); + previous = basename; + } + ++fi; + } + + return true; +} + +/** + * Removes the nth subfile from the ZipArchive. This will cause all subsequent + * index numbers to decrease by one. The file will not actually be removed + * from the disk until the next call to flush(). + * + * Note that this does not actually remove the data from the indicated + * subfile; it simply removes it from the index. The ZipArchive will not be + * reduced in size after this operation, until the next call to repack(). + */ +void ZipArchive:: +remove_subfile(int index) { + nassertv(is_write_valid()); + nassertv(index >= 0 && index < (int)_subfiles.size()); + Subfile *subfile = _subfiles[index]; + //subfile->_flags |= SF_deleted; + _removed_subfiles.push_back(subfile); + _subfiles.erase(_subfiles.begin() + index); + + // We'll need to rewrite the index to remove it. The packing is also + // suboptimal now, so a repack would be good. + _index_changed = true; + _needs_repack = true; +} + +/** + * Returns the name of the nth subfile. + */ +const string &ZipArchive:: +get_subfile_name(int index) const { +#ifndef NDEBUG + static string empty_string; + nassertr(index >= 0 && index < (int)_subfiles.size(), empty_string); +#endif + return _subfiles[index]->_name; +} + +/** + * Returns the uncompressed data length of the nth subfile. + */ +size_t ZipArchive:: +get_subfile_length(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), 0); + return _subfiles[index]->_uncompressed_length; +} + +/** + * Returns the modification time of the nth subfile. If this is called on an + * older .zip file, which did not store individual timestamps in the file (or + * if get_record_timestamp() is false), this will return the modification time + * of the overall ZIP file. + */ +time_t ZipArchive:: +get_subfile_timestamp(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), 0); + return _subfiles[index]->_timestamp; +} + +/** + * Returns true if the indicated subfile has been compressed when stored + * within the archive, false otherwise. + */ +bool ZipArchive:: +is_subfile_compressed(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + return _subfiles[index]->is_compressed(); +} + +/** + * Returns true if the indicated subfile has been encrypted when stored within + * the archive, false otherwise. + */ +bool ZipArchive:: +is_subfile_encrypted(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + return _subfiles[index]->is_encrypted(); +} + +/** + * Returns the starting byte position within the ZipArchive at which the + * indicated subfile begins. This may be used, with + * get_subfile_internal_length(), for low-level access to the subfile, but + * usually it is better to use open_read_subfile() instead (which + * automatically decrypts and/or uncompresses the subfile data). + */ +streampos ZipArchive:: +get_subfile_internal_start(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), 0); + _read->acquire(); + _subfiles[index]->read_header(*_read->get_istream()); + std::streampos data_start = _read->get_istream()->tellg(); + _read->release(); + return data_start; +} + +/** + * Returns the number of bytes the indicated subfile consumes within the + * archive. For compressed subfiles, this will generally be smaller than + * get_subfile_length(); for encrypted (but noncompressed) subfiles, it may be + * slightly different, for noncompressed and nonencrypted subfiles, it will be + * equal. + */ +size_t ZipArchive:: +get_subfile_internal_length(int index) const { + nassertr(index >= 0 && index < (int)_subfiles.size(), 0); + return _subfiles[index]->_data_length; +} + +/** + * Returns an istream that may be used to read the indicated subfile. You may + * seek() within this istream to your heart's content; even though it will be + * a reference to the already-opened pfstream of the ZipArchive itself, byte 0 + * appears to be the beginning of the subfile and EOF appears to be the end of + * the subfile. + * + * The returned istream will have been allocated via new; you should pass the + * pointer to close_read_subfile() when you are finished with it to delete it + * and release its resources. + * + * Any future calls to repack() or close() (or the ZipArchive destructor) will + * invalidate all currently open subfile pointers. + * + * The return value will be NULL if the stream cannot be opened for some + * reason. + */ +std::istream *ZipArchive:: +open_read_subfile(int index) { + nassertr(is_read_valid(), nullptr); + nassertr(index >= 0 && index < (int)_subfiles.size(), nullptr); + Subfile *subfile = _subfiles[index]; + + return open_read_subfile(subfile); +} + +/** + * Closes a file opened by a previous call to open_read_subfile(). This + * really just deletes the istream pointer, but it is recommended to use this + * interface instead of deleting it explicitly, to help work around compiler + * issues. + */ +void ZipArchive:: +close_read_subfile(std::istream *stream) { + if (stream != nullptr) { + // For some reason--compiler bug in gcc 3.2?--explicitly deleting the + // stream pointer does not call the appropriate global delete function; + // instead apparently calling the system delete function. So we call the + // delete function by hand instead. +#if !defined(WIN32_VC) && !defined(USE_MEMORY_NOWRAPPERS) && defined(REDEFINE_GLOBAL_OPERATOR_NEW) + stream->~istream(); + (*global_operator_delete)(stream); +#else + delete stream; +#endif + } +} + +/** + * Extracts the nth subfile into a file with the given name. + */ +bool ZipArchive:: +extract_subfile(int index, const Filename &filename) { + nassertr(is_read_valid(), false); + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + + filename.make_dir(); + + Filename fname = filename; + if (!filename.is_text()) { + fname.set_binary(); + } + + pofstream out; + if (!fname.open_write(out, true)) { + express_cat.info() + << "Unable to write to file " << filename << "\n"; + return false; + } + + return extract_subfile_to(index, out); +} + +/** + * Extracts the nth subfile to the indicated ostream. + */ +bool ZipArchive:: +extract_subfile_to(int index, std::ostream &out) { + nassertr(is_read_valid(), false); + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + + std::istream *in = open_read_subfile(index); + if (in == nullptr) { + return false; + } + + static const size_t buffer_size = 4096; + char buffer[buffer_size]; + + in->read(buffer, buffer_size); + size_t count = in->gcount(); + while (count != 0) { + out.write(buffer, count); + in->read(buffer, buffer_size); + count = in->gcount(); + } + + bool failed = (in->fail() && !in->eof()); + close_read_subfile(in); + nassertr(!failed, false); + + return (!out.fail()); +} + +/** + * Performs a byte-for-byte comparison of the indicated file on disk with the + * nth subfile. Returns true if the files are equivalent, or false if they + * are different (or the file is missing). + * + * If Filename::set_binary() or set_text() has already been called, it + * specifies the nature of the source file. If this is different from the + * text flag of the subfile, the comparison will always return false. If this + * has not been specified, it will be set from the text flag of the subfile. + */ +bool ZipArchive:: +compare_subfile(int index, const Filename &filename) { + nassertr(is_read_valid(), false); + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + + if (!filename.exists()) { + express_cat.info() + << "File is missing: " << filename << "\n"; + return false; + } + + std::istream *in1 = open_read_subfile(index); + if (in1 == nullptr) { + return false; + } + + pifstream in2; + + if (!filename.open_read(in2)) { + express_cat.info() + << "Cannot read " << filename << "\n"; + return false; + } + + if (filename.is_binary()) { + // Check the file size. + in2.seekg(0, ios::end); + streampos file_size = in2.tellg(); + + if (file_size != (streampos)get_subfile_length(index)) { + // The files have different sizes. + close_read_subfile(in1); + return false; + } + } + + // Check the file data, byte-for-byte. + in2.seekg(0); + int byte1 = in1->get(); + int byte2 = in2.get(); + while (!in1->fail() && !in2.fail()) { + if (byte1 != byte2) { + close_read_subfile(in1); + return false; + } + byte1 = in1->get(); + byte2 = in2.get(); + } + + bool failed = (in1->fail() && !in1->eof()) || (in2.fail() && !in2.eof()); + close_read_subfile(in1); + + nassertr(!failed, false); + + return true; +} + +/** + * + */ +void ZipArchive:: +output(std::ostream &out) const { + out << "ZipArchive " << _filename << ", " << get_num_subfiles() + << " subfiles.\n"; +} + +/** + * Shows a list of all subfiles within the ZipArchive. + */ +void ZipArchive:: +ls(std::ostream &out) const { + int num_subfiles = get_num_subfiles(); + for (int i = 0; i < num_subfiles; i++) { + string subfile_name = get_subfile_name(i); + out << subfile_name << "\n"; + } +} + +/** + * Sets the string which is appended to the very end of the ZIP archive. + * This string may not be longer than 65535 characters. + */ +void ZipArchive:: +set_comment(const std::string &comment) { + nassertv(comment.size() <= 65535); + + if (_comment != comment) { + _comment = comment; + _index_changed = true; + } +} + +/** + * Fills a string with the entire contents of the indicated subfile. + */ +bool ZipArchive:: +read_subfile(int index, string &result) { + result = string(); + + // We use a temporary pvector, because dynamic accumulation of a pvector + // seems to be many times faster than that of a string, at least on the + // Windows implementation of STL. + vector_uchar pv; + if (!read_subfile(index, pv)) { + return false; + } + + if (!pv.empty()) { + result.append((const char *)&pv[0], pv.size()); + } + + return true; +} + +/** + * Fills a pvector with the entire contents of the indicated subfile. + */ +bool ZipArchive:: +read_subfile(int index, vector_uchar &result) { + nassertr(is_read_valid(), false); + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + result.clear(); + + // Now look up the particular Subfile we are reading. + nassertr(is_read_valid(), false); + nassertr(index >= 0 && index < (int)_subfiles.size(), false); + Subfile *subfile = _subfiles[index]; + + result.reserve(subfile->_uncompressed_length); + + bool success = true; + if (subfile->is_compressed() || subfile->is_encrypted()) { + // If the subfile is encrypted or compressed, we can't read it directly. + // Fall back to the generic implementation. + std::istream *in = open_read_subfile(index); + if (in == nullptr) { + return false; + } + + success = VirtualFile::simple_read_file(in, result); + close_read_subfile(in); + + } else { + // But if the subfile is just a plain file, we can just read the data + // directly from the ZipArchive, without paying the cost of an ISubStream. + static const size_t buffer_size = 4096; + char buffer[buffer_size]; + + _read->acquire(); + if (!subfile->read_header(*_read->get_istream())) { + _read->release(); + express_cat.error() + << "Failed to read local header of " + << _filename << "/" << subfile->_name << "\n"; + return false; + } + std::istream &read = *_read->get_istream(); + /*std::streampos data_start =*/ read.tellg(); + + size_t bytes_to_go = subfile->_uncompressed_length; + read.read(buffer, std::min(bytes_to_go, buffer_size)); + size_t read_bytes = read.gcount(); + + while (read_bytes > 0) { + result.insert(result.end(), buffer, buffer + read_bytes); + + bytes_to_go -= read_bytes; + if (bytes_to_go == 0) { + break; + } + + read.read(buffer, std::min(bytes_to_go, buffer_size)); + read_bytes = read.gcount(); + } + + _read->release(); + success = (bytes_to_go == 0); + } + + if (!success) { + std::ostringstream message; + message << "I/O error reading from " << get_filename() << " at " + << get_subfile_name(index); + nassert_raise(message.str()); + return false; + } + + return true; +} + +/** + * Adds a newly-allocated Subfile pointer to the ZipArchive. + */ +void ZipArchive:: +add_new_subfile(Subfile *subfile, int compression_level) { + // We'll need to rewrite the index after this. + _index_changed = true; + + std::pair insert_result = _subfiles.insert(subfile); + if (!insert_result.second) { + // Hmm, unable to insert. There must already be a subfile by that name. + // Add it to the _removed_subfiles list, so we can remove the old one. + std::swap(subfile, *insert_result.first); + _removed_subfiles.push_back(subfile); + + // Since we're removing a subfile and adding the new one at the end, we've + // got empty space. A repack would be good. + _needs_repack = true; + } +} + +/** + * This variant of open_read_subfile() is used internally only, and accepts a + * pointer to the internal Subfile object, which is assumed to be valid and + * written to the multifile. + */ +std::istream *ZipArchive:: +open_read_subfile(Subfile *subfile) { + // Read the header first. + _read->acquire(); + if (!subfile->read_header(*_read->get_istream())) { + _read->release(); + express_cat.error() + << "Failed to read local header of " + << _filename << "/" << subfile->_name << "\n"; + return nullptr; + } + std::streampos data_start = _read->get_istream()->tellg(); + _read->release(); + + // Return an ISubStream object that references into the open ZipArchive + // istream. + nassertr(data_start != (streampos)0, nullptr); + std::istream *stream = + new ISubStream(_read, data_start, + data_start + (streampos)subfile->_data_length); + + if (subfile->is_compressed()) { +#ifndef HAVE_ZLIB + express_cat.error() + << "zlib not compiled in; cannot read compressed multifiles.\n"; + return nullptr; +#else // HAVE_ZLIB + // Oops, the subfile is compressed. So actually, return an + // IDecompressStream that wraps around the ISubStream. + IDecompressStream *wrapper = new IDecompressStream(stream, true, -1, false); + stream = wrapper; +#endif // HAVE_ZLIB + } + + if (stream->fail()) { + // Hmm, some inexplicable problem. + delete stream; + return nullptr; + } + + return stream; +} + +/** + * Returns the standard form of the subfile name. + */ +string ZipArchive:: +standardize_subfile_name(const std::string &subfile_name) const { + Filename name = subfile_name; + name.standardize(); + if (name.empty() || name == "/") { + // Invalid empty name. + return string(); + } + + if (name[0] == '/') { + return name.get_fullpath().substr(1); + } else if (name.length() > 2 && name[0] == '.' && name[1] == '/') { + return name.get_fullpath().substr(2); + } else { + return name.get_fullpath(); + } +} + +/** + * Removes the set of subfiles from the tables and frees their associated + * memory. + */ +void ZipArchive:: +clear_subfiles() { + for (Subfile *subfile : _removed_subfiles) { + delete subfile; + } + _removed_subfiles.clear(); + + for (Subfile *subfile : _subfiles) { + delete subfile; + } + _subfiles.clear(); +} + +/** + * Reads the ZipArchive header and index. Returns true if successful, false if + * the ZipArchive is not valid. + * + * Assumes that the get pointer is at the end of the file. + */ +bool ZipArchive:: +read_index() { + nassertr(_read != nullptr, false); + + // We acquire the IStreamWrapper lock for the duration of this method. + _read->acquire(); + std::istream *read = _read->get_istream(); + + // ZIP files need to be read from the end. + read->seekg(-2, std::ios::end); + if (read->fail()) { + express_cat.info() + << "Unable to seek ZIP archive " << _filename << ".\n"; + _read->release(); + close(); + return false; + } + + std::streampos fpos = read->tellg(); + _file_end = fpos + (std::streamoff)2; + + uint64_t cdir_entries = 0; + uint64_t cdir_offset = 0; + uint64_t cdir_size = 0; + uint32_t comment_length = 0; + std::streampos eocd_offset = 0; + bool found = false; + + // Seek backwards until we have found the the end-of-directory record. + StreamReader reader(read, false); + while (comment_length <= 0xffff && fpos >= 20) { + if (reader.get_uint16() == comment_length) { + // This field references the distance to the end of the .zip file, so it + // could be the comment length field at the end of the record. Skip to the + // beginning of the record to see if the signature matches. + read->seekg(-22, std::ios::cur); + if (reader.get_uint32() == 0x06054b50) { + // Yes, got it. + eocd_offset = read->tellg() - (std::streamoff)4; + reader.skip_bytes(6); + cdir_entries = reader.get_uint16(); + cdir_size = reader.get_uint32(); + cdir_offset = reader.get_uint32(); + if (comment_length > 0) { + _comment = reader.get_fixed_string(comment_length); + } else { + _comment.clear(); + } + found = true; + break; + } + } + + comment_length += 2; + read->seekg(-2, std::ios::cur); + fpos -= 2; + } + + if (!found) { + express_cat.info() + << "Unable to find end-of-directory record in ZIP archive " << _filename << ".\n"; + _read->release(); + close(); + return false; + } + + // Now look for a ZIP64 end-of-central-directory locator. + if (eocd_offset >= 20) { + uint64_t eocd64_offset = 0; + read->seekg(eocd_offset - (std::streamoff)20); + if (reader.get_uint32() == 0x07064b50) { + reader.skip_bytes(4); // disk no + eocd64_offset = reader.get_uint64(); + reader.skip_bytes(4); // disk count + + read->seekg(eocd64_offset); + if (reader.get_uint32() == 0x06064b50) { + reader.skip_bytes(20); + cdir_entries = reader.get_uint64(); + cdir_size = reader.get_uint64(); + cdir_offset = reader.get_uint64(); + } else { + express_cat.info() + << "Unable to read ZIP64 end-of-directory record in ZIP archive " + << _filename << ".\n"; + } + } + } + + _index_start = cdir_offset; + + // Find the central directory. + read->seekg((std::streampos)cdir_offset); + if (read->fail()) { + express_cat.info() + << "Unable to locate central directory in ZIP archive " << _filename << ".\n"; + _read->release(); + close(); + return false; + } + + _record_timestamp = false; + + for (size_t i = 0; i < cdir_entries; ++i) { + Subfile *subfile = new Subfile; + if (!subfile->read_index(*read)) { + express_cat.info() + << "Failed to read central directory for " << _filename << ".\n"; + _read->release(); + close(); + return false; + } + + if (subfile->_timestamp != dos_epoch) { + // If all subfiles have the timestamp set to the DOS epoch, we apparently + // don't care about preserving timestamps. + _record_timestamp = true; + } + _subfiles.push_back(subfile); + } + + // Sort the subfiles. + size_t before_size = _subfiles.size(); + _subfiles.sort(); + size_t after_size = _subfiles.size(); + + // If these don't match, the same filename appeared twice in the index, + // which shouldn't be possible. + nassertr(before_size == after_size, false); + + _read->release(); + return true; +} + +/** + * Writes the index of the ZIP archive at the current put position. + */ +bool ZipArchive:: +write_index(std::ostream &write, std::streampos &fpos) { + nassertr(write.tellp() == fpos, false); + + _index_start = fpos; + + for (Subfile *subfile : _subfiles) { + if (!subfile->write_index(write, fpos)) { + express_cat.info() + << "Failed to write central directory entry for " + << _filename << "/" << subfile->_name << ".\n"; + _read->release(); + close(); + return false; + } + } + + size_t cdir_entries = _subfiles.size(); + std::streamoff cdir_size = fpos - _index_start; + + StreamWriter writer(write); + + if (_index_start >= 0xffffffff || + cdir_size >= 0xffffffff || + cdir_entries >= 0xffff) { + // Write a ZIP64 end-of-central-directory record. + writer.add_uint32(0x06064b50); + writer.add_uint64(44); // size of the rest of the record (w/o first 12 bytes) + writer.add_uint16(45); // version number that produced this file + writer.add_uint16(45); // version number needed to read this file + writer.add_uint32(0); + writer.add_uint32(0); + writer.add_uint64(cdir_entries); + writer.add_uint64(cdir_entries); + writer.add_uint64(cdir_size); + writer.add_uint64(_index_start); + nassertr(write.tellp() == fpos + std::streamoff(12 + 44), false); + + // And write the ZIP64 end-of-central-directory-record locator. + writer.add_uint32(0x07064b50); + writer.add_uint32(0); + writer.add_uint64(fpos); + writer.add_uint32(1); // number of disks + } + + // Write the end of central directory record. + writer.add_uint32(0x06054b50); + writer.add_uint16(0); + writer.add_uint16(0); + writer.add_uint16(std::min((size_t)0xffffu, cdir_entries)); + writer.add_uint16(std::min((size_t)0xffffu, cdir_entries)); + writer.add_uint32(std::min((std::streamoff)0xffffffffu, cdir_size)); + writer.add_uint32(std::min((std::streampos)0xffffffffu, _index_start)); + writer.add_uint16(_comment.size()); + writer.append_data(_comment); + + if (write.fail()) { + express_cat.warning() + << "Unable to write central directory for " << _filename << ".\n"; + close(); + return false; + } + + fpos = write.tellp(); + if (fpos < _file_end) { + // We didn't hit the end of the file writing the index. This is a problem + // because readers start looking for the EOCD record at the end of the file. + // We'll have to shift the whole index forwards. Unfortunately it's hard to + // anticipate having to do this ahead of time. + fpos = _index_start + (_file_end - fpos); + _needs_repack = true; + write.seekp(fpos); + return write_index(write, fpos); + } + + _file_end = fpos; + return true; +} + +/** + * Creates a new subfile record. + */ +ZipArchive::Subfile:: +Subfile(const std::string &name, int compression_level) : + _name(name), + _timestamp(dos_epoch), + _compression_method((compression_level > 0) ? CM_deflate : CM_store) +{ + // If the name contains any non-ASCII characters, we set the UTF-8 flag. + for (char c : name) { + if (c & ~0x7f) { + _flags |= SF_utf8_encoding; + break; + } + } + + if (compression_level > 6) { + _flags |= SF_deflate_best; + } else if (compression_level > 1 && compression_level < 6) { + _flags |= SF_deflate_fast; + } else if (compression_level == 1) { + _flags |= SF_deflate_fastest; + } +} + +/** + * Reads the index record for the Subfile from the indicated istream. Assumes + * the istream has already been positioned to the indicated stream position, + * fpos, the start of the index record. Returns true on success. + */ +bool ZipArchive::Subfile:: +read_index(std::istream &read) { + StreamReader reader(read); + + if (reader.get_uint32() != 0x02014b50) { + return false; + } + + uint16_t version = reader.get_uint8(); + _system = reader.get_uint8(); + uint16_t min_version = reader.get_uint16(); + _flags = reader.get_uint16(); + _compression_method = (CompressionMethod)reader.get_uint16(); + { + // Convert from DOS/FAT timestamp to UNIX timestamp. + uint16_t mtime = reader.get_uint16(); + uint16_t mdate = reader.get_uint16(); + + struct tm time = {}; + time.tm_sec = (mtime & 0b0000000000011111u) << 1; + time.tm_min = (mtime & 0b0000011111100000u) >> 5; + time.tm_hour = (mtime & 0b1111100000000000u) >> 11; + time.tm_mday = (mdate & 0b0000000000011111u); + time.tm_mon = ((mdate & 0b0000000111100000u) >> 5) - 1; + time.tm_year = ((mdate & 0b1111111000000000u) >> 9) + 80; + time.tm_isdst = -1; + _timestamp = mktime(&time); + } + _checksum = reader.get_uint32(); + _data_length = reader.get_uint32(); + _uncompressed_length = reader.get_uint32(); + size_t name_length = reader.get_uint16(); + size_t extra_length = reader.get_uint16(); + size_t comment_length = reader.get_uint16(); + /*size_t disk_number =*/ reader.get_uint16(); + _internal_attribs = reader.get_uint16(); + _external_attribs = reader.get_uint32(); + _header_start = (std::streampos)reader.get_uint32(); + + std::string name = reader.get_fixed_string(name_length); + + // Read the extra fields, which may include a UNIX timestamp, which can be + // specified with greater precision than a DOS timestamp. + while (extra_length >= 4) { + uint16_t const tag = reader.get_uint16(); + uint16_t const size = reader.get_uint16(); + if (tag == 0x0001) { + // ZIP64 extended info. + int size_left = size; + if (_uncompressed_length == 0xffffffffu && size_left >= 8) { + _uncompressed_length = reader.get_uint64(); + size_left -= 8; + } + if (_data_length == 0xffffffffu && size_left >= 8) { + _data_length = reader.get_uint64(); + size_left -= 8; + } + if (_header_start == 0xffffffffu && size_left >= 8) { + _header_start = reader.get_uint64(); + size_left -= 8; + } + reader.skip_bytes(size_left); + } else if (tag == 0x5455 && size == 5) { + reader.skip_bytes(1); + _timestamp = reader.get_uint32(); + } else { + reader.skip_bytes(size); + } + extra_length -= 4 + size; + } + // Skip leftover bytes in the extra field not large enough to contain a proper + // extra tag. This may be the case for Android .apk files processed with + // zipalign, which uses this for alignment. + reader.skip_bytes(extra_length); + + std::string comment = reader.get_fixed_string(comment_length); + + if (_flags & SF_utf8_encoding) { + _name = std::move(name); + _comment = std::move(comment); + } else { + _name = TextEncoder::reencode_text(name, TextEncoder::E_cp437, TextEncoder::E_utf8); + _comment = TextEncoder::reencode_text(comment, TextEncoder::E_cp437, TextEncoder::E_utf8); + } + + return true; +} + +/** + * Reads the header record for the Subfile from the indicated istream. + */ +bool ZipArchive::Subfile:: +read_header(std::istream &read) { + read.seekg(_header_start); + if (read.fail()) { + return false; + } + + // First, get the next stream position. We do this separately, because if + // it is zero, we don't get anything else. + StreamReader reader(read); + + uint32_t signature = reader.get_uint32(); + if (signature != 0x04034b50) { + //0x02014b50 + express_cat.warning() + << "ZIP subfile " << _name << " header does not contain expected signature\n"; + return false; + } + + // We skip most of the stuff in the local file header, since most of this is + // duplicated in the central directory. + reader.get_uint16(); + int flags = reader.get_uint16(); + + if (flags != _flags) { + express_cat.warning() + << "ZIP subfile " << _name << " flags mismatch between file header and index record\n"; + } + _flags = flags; + + if (reader.get_uint16() != (uint16_t)_compression_method) { + express_cat.warning() + << "ZIP subfile " << _name << " compression method mismatch between file header and index record\n"; + return false; + } + + reader.get_uint32(); + + if (flags & SF_data_descriptor) { + // Ignore these fields, the real values will follow the file. + reader.skip_bytes(4 * 3); + } else { + if (reader.get_uint32() != _checksum) { + express_cat.warning() + << "ZIP subfile " << _name << " CRC32 mismatch between file header and index record\n"; + return false; + } + + // Compressed and uncompressed size + uint32_t data_length = reader.get_uint32(); + uint32_t uncompressed_length = reader.get_uint32(); + + if ((data_length != 0xffffffffu && data_length != _data_length) || + (uncompressed_length != 0xffffffffu && uncompressed_length != _uncompressed_length)) { + express_cat.warning() + << "ZIP subfile " << _name << " length mismatch between file header and index record\n"; + return false; + } + } + + size_t name_length = reader.get_uint16(); + size_t extra_length = reader.get_uint16(); + + std::string name = reader.get_fixed_string(name_length); + if ((flags & SF_utf8_encoding) == 0) { + name = TextEncoder::reencode_text(name, TextEncoder::E_cp437, TextEncoder::E_utf8); + } + + if (extra_length < 4) { + reader.skip_bytes(extra_length); + } else if (extra_length > 0) { + for (int i = 0; i < extra_length;) { + size_t length = reader.get_uint16(); + i += 4; + reader.skip_bytes(length); + i += length; + } + } + + if (name != _name) { + express_cat.warning() + << "Name of ZIP subfile \"" << _name << "\" in index record does not match " + "name in file header \"" << name << "\"\n"; + return false; + } + + //_data_start = read.tellg(); + return true; +} + +/** + * Called after read_header to verify the integrity of the data. + * If ZLib support is not enabled, this does not verify the checksum or the + * compression. + */ +bool ZipArchive::Subfile:: +verify_data(std::istream &read) { + //nassertr(read.tellg() == _data_start, false); + + static const size_t buffer_size = 4096; + char buffer[buffer_size]; + +#ifdef HAVE_ZLIB + unsigned long crc = crc32(0L, Z_NULL, 0); + IDecompressStream wrapper; + + std::istream *data_stream; + if (_compression_method == CM_store) { + data_stream = &read; + } + else if (_compression_method == CM_deflate) { + wrapper.open(&read, false, _data_length, false); + data_stream = &wrapper; + } + else { + express_cat.warning() + << "Unable to verify ZIP subfile \"" << _name << "\": compression method " + << (int)_compression_method << " not supported.\n"; + return false; + } + + size_t bytes_to_go = _uncompressed_length; + data_stream->read(buffer, std::min(bytes_to_go, buffer_size)); + size_t read_bytes = data_stream->gcount(); + + while (read_bytes > 0) { + crc = crc32(crc, (unsigned char *)buffer, read_bytes); + + bytes_to_go -= read_bytes; + if (bytes_to_go == 0) { + break; + } + + data_stream->read(buffer, std::min(bytes_to_go, buffer_size)); + read_bytes = data_stream->gcount(); + } + + if (data_stream == &wrapper) { + wrapper.close(); + } + + if (bytes_to_go > 0) { + express_cat.warning() + << "Reached end of compressed data verifying ZIP subfile " << _name << ".\n"; + return false; + } + + if (crc != _checksum) { + express_cat.warning() + << "ZIP file member " << _name << " is corrupted.\n"; + return false; + } +#else + read.ignore(_data_length); + + if (read.eof()) { + express_cat.warning() + << "Reached EOF verifying ZIP subfile " << _name << ".\n"; + return false; + } +#endif + + // If we are expecting a data descriptor, verify that it matches what is in + // the index entry. + if (_flags & SF_data_descriptor) { + StreamReader reader(read); + uint32_t checksum = reader.get_uint32(); + if (checksum == 0x08074b50) { + // There is an optional data descriptor signature. + if (_checksum == 0x08074b50) { + // The CRC32 happens to match the data descriptor signature by accident. + // Since the data descriptor signature is optional, we can't know for + // sure which is which, so let's just not bother validating this. + return true; + } + checksum = reader.get_uint32(); + } + uint32_t data_length = reader.get_uint32(); + uint32_t uncompressed_length = reader.get_uint32(); + if (checksum != _checksum || + data_length != _data_length || + uncompressed_length != _uncompressed_length) { + express_cat.warning() + << "ZIP file member " << _name << " has mismatched data descriptor.\n"; + return false; + } + } + + return true; +} + +/** + * Writes the index record for the Subfile to the indicated ostream. Assumes + * the istream has already been positioned to the indicated stream position, + * fpos, the start of the index record, and that this is the effective end of + * the file. Returns true on success. + * + * The _index_start member is updated by this operation. + */ +bool ZipArchive::Subfile:: +write_index(std::ostream &write, streampos &fpos) { + nassertr(write.tellp() == fpos, false); + + StreamWriter writer(write); + writer.add_uint32(0x02014b50); + + bool zip64_length = + (_data_length >= 0xffffffffu || _uncompressed_length >= 0xffffffffu); + bool zip64_offset = (_header_start >= 0xffffffffu); + size_t extra_length = zip64_length * 16 + zip64_offset * 8; + + if (zip64_length || zip64_offset) { + writer.add_uint8(45); + writer.add_uint8(_system); + writer.add_uint16((_flags & SF_strong_encryption) ? 50 : 45); + } else { + // We just write 2.0 if it we support DEFLATE compression, 1.0 otherwise. +#ifdef HAVE_ZLIB + writer.add_uint8(20); + writer.add_uint8(_system); + writer.add_uint16((_flags & SF_strong_encryption) ? 50 : (is_compressed() ? 20 : 10)); +#else + writer.add_uint8(10); + writer.add_uint8(_system); + writer.add_uint16((_flags & SF_strong_encryption) ? 50 : 10); +#endif + } + + writer.add_uint16(_flags); + writer.add_uint16((uint16_t)_compression_method); + + if (_timestamp > dos_epoch) { + // Convert from UNIX timestamp to DOS/FAT timestamp. + struct tm *time = localtime(&_timestamp); + writer.add_uint16((time->tm_sec >> 1) + | (time->tm_min << 5) + | (time->tm_hour << 11)); + writer.add_uint16(time->tm_mday + | ((time->tm_min + 1) << 5) + | ((time->tm_year - 1980) << 9)); + } else { + // January 1, 1980 + writer.add_uint16(0); + writer.add_uint16(33); + } + + std::string encoded_name; + std::string encoded_comment; + if (_flags & SF_utf8_encoding) { + encoded_name = _name; + encoded_comment = _comment; + } else { + encoded_name = TextEncoder::reencode_text(_name, TextEncoder::E_utf8, TextEncoder::E_cp437); + encoded_comment = TextEncoder::reencode_text(_comment, TextEncoder::E_utf8, TextEncoder::E_cp437); + } + + writer.add_uint32(_checksum); + if (zip64_length) { + writer.add_uint32(0xffffffffu); + writer.add_uint32(0xffffffffu); + } else { + writer.add_uint32(_data_length); + writer.add_uint32(_uncompressed_length); + } + writer.add_uint16(encoded_name.size()); + writer.add_uint16(extra_length); + writer.add_uint16(encoded_comment.size()); + writer.add_uint16(0); // disk number start + writer.add_uint16(_internal_attribs); + writer.add_uint32(_external_attribs); + writer.add_uint32(zip64_offset ? 0xffffffffu : (uint32_t)_header_start); + + writer.append_data(encoded_name); + + // Write any extra fields. + if (zip64_length || zip64_offset) { + writer.add_uint16(0x0001); + writer.add_uint16(zip64_length * 16 + zip64_offset * 8); + if (zip64_length) { + writer.add_uint64(_data_length); + writer.add_uint64(_uncompressed_length); + } + if (zip64_offset) { + writer.add_uint64(_header_start); + } + } + + writer.append_data(encoded_comment); + + fpos += 46 + extra_length + encoded_name.size() + encoded_comment.size(); + nassertr(write.tellp() == fpos, false); + return !write.fail(); +} + +/** + * Writes the local file header to the indicated ostream. This immediately + * precedes the data, so should be followed up by a call to write_data. + * + * Assumes that the file is currently positioned at the given fpos pointer, and + * advances it by the amount of bytes written to the output (which may be longer + * than the actual size of the subfile). + */ +bool ZipArchive::Subfile:: +write_header(std::ostream &write, std::streampos &fpos) { + nassertr(write.tellp() == fpos, false); + + std::string encoded_name; + if (_flags & SF_utf8_encoding) { + encoded_name = _name; + } else { + encoded_name = TextEncoder::reencode_text(_name, TextEncoder::E_utf8, TextEncoder::E_cp437); + } + + std::streamoff header_size = 30 + encoded_name.size(); + + StreamWriter writer(write); + int modulo = (fpos + header_size) % 4; + if (!is_compressed() && modulo != 0) { + // Align uncompressed files to 4-byte boundary. We don't really need to do + // this, but it's needed when producing .apk files, and it doesn't really + // cause harm to do it in other cases as well. + writer.pad_bytes(4 - modulo); + fpos += (4 - modulo); + } + + _header_start = fpos; + + writer.add_uint32(0x04034b50); + writer.add_uint16((_flags & SF_strong_encryption) ? 50 : (is_compressed() ? 20 : 10)); + + writer.add_uint16(_flags); + writer.add_uint16((uint16_t)_compression_method); + + if (_timestamp > 315532800) { + // Convert from UNIX timestamp to DOS/FAT timestamp. + struct tm *time = localtime(&_timestamp); + writer.add_uint16((time->tm_sec >> 1) + | (time->tm_min << 5) + | (time->tm_hour << 11)); + writer.add_uint16(time->tm_mday + | ((time->tm_min + 1) << 5) + | ((time->tm_year - 1980) << 9)); + } else { + // January 1, 1980 + writer.add_uint16(0); + writer.add_uint16(33); + } + + // This flag is set if we don't yet have the checksum or lengths. We will + // write a data descriptor after the actual data containing these values. + if (_flags & SF_data_descriptor) { + writer.add_uint32(0); + writer.add_uint32(0); + writer.add_uint32(0); + } else { + writer.add_uint32(_checksum); + writer.add_uint32(_data_length); + writer.add_uint32(_uncompressed_length); + } + + writer.add_uint16(encoded_name.size()); + writer.add_uint16(0); // We don't write extras for now. + writer.append_data(encoded_name); + + fpos += header_size; + nassertr(write.tellp() == fpos, false); + return !write.fail(); +} + +/** + * Writes the data record for the Subfile to the indicated ostream: the actual + * contents of the Subfile. Assumes the istream has already been positioned + * to the indicated stream position, fpos, the start of the data record, and + * that this is the effective end of the file. Returns the position within + * the file of the next data record. + * + * The _data_start, _data_length, and _uncompressed_length members are updated + * by this operation. + * + * If the "read" pointer is non-NULL, it is the readable istream of a + * ZipArchive in which the Subfile might already be packed. This is used for + * reading the contents of the Subfile during a repack() operation. + */ +bool ZipArchive::Subfile:: +write_data(std::ostream &write, std::istream *read, std::streampos &fpos, int compression_level) { + nassertr(write.tellp() == fpos, false); + + if (!is_compressed()) { + nassertr((fpos % 4) == 0, false); + } + + //_data_start = fpos; + + std::ostream *putter = &write; + bool delete_putter = false; + +#ifndef HAVE_ZLIB + // Without ZLIB, we can't support compression. The flag had better not be + // set. + nassertr(!is_compressed(), false); +#else // HAVE_ZLIB + if (is_compressed()) { + // Write it compressed. + putter = new OCompressStream(putter, delete_putter, compression_level, false); + delete_putter = true; + } +#endif // HAVE_ZLIB + + static const size_t buffer_size = 4096; + char buffer[buffer_size]; + size_t total_count = 0; + +#ifdef HAVE_ZLIB + unsigned long crc = crc32(0L, Z_NULL, 0); +#endif + + read->read(buffer, buffer_size); + size_t count = read->gcount(); + while (count != 0) { +#ifdef HAVE_ZLIB + crc = crc32(crc, (unsigned char *)buffer, count); +#endif + total_count += count; + putter->write(buffer, count); + read->read(buffer, buffer_size); + count = read->gcount(); + } + + if (delete_putter) { + delete putter; + } + + if (is_compressed()) { + std::streampos write_end = write.tellp(); + _data_length = (size_t)(write_end - fpos); + fpos = write_end; + } else { + _data_length = total_count; + fpos += total_count; + } + _uncompressed_length = total_count; +#ifdef HAVE_ZLIB + _checksum = crc; +#endif + + //TODO: what if we need a zip64 data descriptor? + if (_flags & SF_data_descriptor) { + StreamWriter writer(write); + writer.add_uint32(0x08074b50); + writer.add_uint32(_checksum); + writer.add_uint32(_data_length); + writer.add_uint32(_uncompressed_length); + fpos += 16; + } + + nassertr(write.tellp() == fpos, false); + return !write.fail(); +} diff --git a/panda/src/express/zipArchive.h b/panda/src/express/zipArchive.h new file mode 100644 index 0000000000..3a97cbcd21 --- /dev/null +++ b/panda/src/express/zipArchive.h @@ -0,0 +1,205 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file zipArchive.h + * @author rdb + * @date 2019-01-20 + */ + +#ifndef ZIPARCHIVE_H +#define ZIPARCHIVE_H + +#include "pandabase.h" + +#include "config_express.h" +#include "streamWrapper.h" +#include "subStream.h" +#include "filename.h" +#include "ordered_vector.h" +#include "indirectLess.h" +#include "referenceCount.h" +#include "pvector.h" +#include "vector_uchar.h" + +/** + * A file that contains a set of files. + */ +class EXPCL_PANDA_EXPRESS ZipArchive : public ReferenceCount { +PUBLISHED: + ZipArchive(); + ZipArchive(const ZipArchive ©) = delete; + ~ZipArchive(); + + ZipArchive &operator = (const ZipArchive ©) = delete; + +PUBLISHED: + BLOCKING bool open_read(const Filename &filename); + BLOCKING bool open_read(IStreamWrapper *stream, bool owns_pointer = false); + BLOCKING bool open_write(const Filename &filename); + BLOCKING bool open_write(std::ostream *stream, bool owns_pointer = false); + BLOCKING bool open_read_write(const Filename &filename); + BLOCKING bool open_read_write(std::iostream *stream, bool owns_pointer = false); + BLOCKING bool verify(); + BLOCKING void close(); + + INLINE const Filename &get_filename() const; + INLINE void set_filename(const Filename &filename); + + INLINE bool is_read_valid() const; + INLINE bool is_write_valid() const; + INLINE bool needs_repack() const; + + INLINE void set_record_timestamp(bool record_timestamp); + INLINE bool get_record_timestamp() const; + + std::string add_subfile(const std::string &subfile_name, const Filename &filename, + int compression_level); + std::string add_subfile(const std::string &subfile_name, std::istream *subfile_data, + int compression_level); + std::string update_subfile(const std::string &subfile_name, const Filename &filename, + int compression_level); + + BLOCKING bool flush(); + BLOCKING bool repack(); + + int get_num_subfiles() const; + int find_subfile(const std::string &subfile_name) const; + bool has_directory(const std::string &subfile_name) const; + bool scan_directory(vector_string &contents, + const std::string &subfile_name) const; + void remove_subfile(int index); + INLINE bool remove_subfile(const std::string &subfile_name); + const std::string &get_subfile_name(int index) const; + MAKE_SEQ(get_subfile_names, get_num_subfiles, get_subfile_name); + size_t get_subfile_length(int index) const; + time_t get_subfile_timestamp(int index) const; + bool is_subfile_compressed(int index) const; + bool is_subfile_encrypted(int index) const; + + std::streampos get_subfile_internal_start(int index) const; + size_t get_subfile_internal_length(int index) const; + + BLOCKING INLINE vector_uchar read_subfile(int index); + BLOCKING std::istream *open_read_subfile(int index); + BLOCKING static void close_read_subfile(std::istream *stream); + BLOCKING bool extract_subfile(int index, const Filename &filename); + BLOCKING bool extract_subfile_to(int index, std::ostream &out); + BLOCKING bool compare_subfile(int index, const Filename &filename); + + void output(std::ostream &out) const; + void ls(std::ostream &out = std::cout) const; + + void set_comment(const std::string &comment); + INLINE const std::string &get_comment() const; + +public: + bool read_subfile(int index, std::string &result); + bool read_subfile(int index, vector_uchar &result); + +private: + enum SubfileFlags : uint16_t { + SF_encrypted = (1 << 0), + SF_deflate_best = (1 << 1), + SF_deflate_fast = (1 << 2), + SF_deflate_fastest = SF_deflate_best | SF_deflate_fast, + SF_data_descriptor = (1 << 3), + SF_strong_encryption = (1 << 6), + SF_utf8_encoding = (1 << 11), + }; + + enum CompressionMethod : uint16_t { + CM_store = 0, + CM_shrink = 1, + CM_reduce1 = 2, + CM_reduce2 = 3, + CM_reduce3 = 4, + CM_reduce4 = 5, + CM_implode = 6, + CM_tokenize = 7, + CM_deflate = 8, + CM_deflate64 = 9, + CM_bzip2 = 12, + CM_lzma = 14, + CM_terse = 18, + CM_lz77 = 19, + CM_wavpack = 97, + CM_ppmd = 98, + }; + + class Subfile { + public: + Subfile() = default; + Subfile(const std::string &name, int compression_level); + + INLINE bool operator < (const Subfile &other) const; + + bool read_index(std::istream &read); + bool read_header(std::istream &read); + bool verify_data(std::istream &read); + bool write_index(std::ostream &write, std::streampos &fpos); + bool write_header(std::ostream &write, std::streampos &fpos); + bool write_data(std::ostream &write, std::istream *read, + std::streampos &fpos, int compression_level); + INLINE bool is_compressed() const; + INLINE bool is_encrypted() const; + INLINE std::streampos get_last_byte_pos() const; + + std::string _name; + uint8_t _system = 0; + size_t _index_length = 0; + uint32_t _checksum = 0; + uint64_t _data_length = 0; + uint64_t _uncompressed_length = 0; + time_t _timestamp = 0; + std::streampos _header_start = 0; + uint16_t _internal_attribs = 0; + uint32_t _external_attribs = 0; + std::string _comment; + int _flags = SF_data_descriptor; + CompressionMethod _compression_method = CM_store; + }; + + void add_new_subfile(Subfile *subfile, int compression_level); + std::istream *open_read_subfile(Subfile *subfile); + std::string standardize_subfile_name(const std::string &subfile_name) const; + + void clear_subfiles(); + bool read_index(); + bool write_index(std::ostream &write, std::streampos &fpos); + + typedef ov_set > Subfiles; + Subfiles _subfiles; + typedef pvector PendingSubfiles; + PendingSubfiles _removed_subfiles; + + std::streampos _offset; + IStreamWrapper *_read; + std::ostream *_write; + bool _owns_stream; + std::streampos _index_start = 0; + std::streampos _file_end = 0; + + bool _index_changed; + bool _needs_repack; + bool _record_timestamp; + + pifstream _read_file; + IStreamWrapper _read_filew; + pofstream _write_file; + pfstream _read_write_file; + StreamWrapper _read_write_filew; + Filename _filename; + std::string _header_prefix; + std::string _comment; + + friend class Subfile; +}; + +#include "zipArchive.I" + +#endif diff --git a/tests/express/test_zip.py b/tests/express/test_zip.py new file mode 100644 index 0000000000..89b869ff69 --- /dev/null +++ b/tests/express/test_zip.py @@ -0,0 +1,183 @@ +from panda3d.core import ZipArchive, IStreamWrapper, StringStream, Filename +from direct.stdpy.file import StreamIOWrapper +import zipfile +from io import BytesIO + + +EMPTY_ZIP = b'PK\x05\x06' + b'\x00' * 18 + + +def test_zip_read_empty(): + stream = StringStream(EMPTY_ZIP) + wrapper = IStreamWrapper(stream) + + zip = ZipArchive() + zip.open_read(wrapper) + + assert zip.is_read_valid() + assert not zip.is_write_valid() + assert not zip.needs_repack() + + assert zip.get_num_subfiles() == 0 + + zip.close() + + +def test_zip_write_empty(): + stream = StringStream() + zip = ZipArchive() + zip.open_write(stream) + + assert not zip.is_read_valid() + assert zip.is_write_valid() + assert not zip.needs_repack() + + zip.close() + + assert stream.data == EMPTY_ZIP + with zipfile.ZipFile(StreamIOWrapper(stream), 'r') as zf: + assert zf.testzip() is None + + +def test_zip_read_extract(tmp_path): + stream = StringStream() + zf = zipfile.ZipFile(StreamIOWrapper(stream), mode='w', allowZip64=True) + zf.writestr("test.txt", b"test stored", compress_type=zipfile.ZIP_STORED) + zf.writestr("test2.txt", b"test deflated", compress_type=zipfile.ZIP_DEFLATED) + zf.writestr("dir/dtest.txt", b"test in dir") + zf.writestr("dir1/dir2/test.txt", b"test nested dir") + zf.writestr("emptydir/", b"", compress_type=zipfile.ZIP_STORED) + zf.close() + + wrapper = IStreamWrapper(stream) + zip = ZipArchive() + zip.open_read(wrapper) + + assert zip.is_read_valid() + assert not zip.is_write_valid() + assert not zip.needs_repack() + + assert zip.verify() + + assert zip.find_subfile("nonexistent.txt") == -1 + + sf = zip.find_subfile("test.txt") + assert sf >= 0 + assert zip.read_subfile(sf) == b"test stored" + assert zip.extract_subfile(sf, tmp_path / "test.txt") + assert open(tmp_path / "test.txt", 'rb').read() == b"test stored" + + sf = zip.find_subfile("test2.txt") + assert sf >= 0 + assert zip.read_subfile(sf) == b"test deflated" + assert zip.extract_subfile(sf, tmp_path / "test2.txt") + assert open(tmp_path / "test2.txt", 'rb').read() == b"test deflated" + + +def test_zip_write(): + stream = StringStream() + zip = ZipArchive() + zip.open_write(stream) + zip.add_subfile("test.txt", StringStream(b"test deflated"), 6) + zip.add_subfile("test2.txt", StringStream(b"test stored"), 0) + zip.close() + + with zipfile.ZipFile(StreamIOWrapper(stream), 'r') as zf: + assert zf.testzip() is None + + assert tuple(sorted(zf.namelist())) == ("test.txt", "test2.txt") + + +def test_zip_replace_subfile(tmp_path): + stream = StringStream() + zf = zipfile.ZipFile(StreamIOWrapper(stream), mode='w', allowZip64=True) + zf.writestr("test1.txt", b"contents of first file") + zf.writestr("test2.txt", b"") + zf.writestr("test3.txt", b"contents of third file") + zf.close() + + zip = ZipArchive() + zip.open_read_write(stream) + + assert zip.is_read_valid() + assert zip.is_write_valid() + assert not zip.needs_repack() + + assert zip.verify() + + sf = zip.find_subfile("test2.txt") + assert sf >= 0 + zip.add_subfile("test2.txt", StringStream(b"contents of second file"), 6) + zip.close() + + with zipfile.ZipFile(StreamIOWrapper(stream), 'r') as zf: + assert zf.testzip() is None + assert zf.read("test1.txt") == b"contents of first file" + assert zf.read("test2.txt") == b"contents of second file" + assert zf.read("test3.txt") == b"contents of third file" + + +def test_zip_remove_subfile(tmp_path): + stream = StringStream() + zf = zipfile.ZipFile(StreamIOWrapper(stream), mode='w', allowZip64=True) + zf.writestr("test1.txt", b"contents of first file") + zf.writestr("test2.txt", b"contents of second file") + zf.writestr("test3.txt", b"contents of third file") + zf.close() + + zip = ZipArchive() + zip.open_read_write(stream) + + assert zip.is_read_valid() + assert zip.is_write_valid() + assert not zip.needs_repack() + + assert zip.verify() + + removed = zip.remove_subfile("test2.txt") + assert removed + zip.close() + + with zipfile.ZipFile(StreamIOWrapper(stream), 'r') as zf: + assert zf.testzip() is None + names = zf.namelist() + assert "test1.txt" in names + assert "test2.txt" not in names + assert "test3.txt" in names + + +def test_zip_repack(tmp_path): + zip_path = tmp_path / "test_zip_repack.zip" + zf = zipfile.ZipFile(zip_path, mode='w', allowZip64=True) + zf.writestr("test1.txt", b"contents of first file") + zf.writestr("test2.txt", b"contents of second file") + zf.close() + + zip = ZipArchive() + zip.open_read_write(zip_path) + + assert zip.is_read_valid() + assert zip.is_write_valid() + assert not zip.needs_repack() + + assert zip.verify() + + removed = zip.remove_subfile("test2.txt") + assert removed + + zip.add_subfile("test3.txt", StringStream(b"contents of third file"), 6) + + assert zip.needs_repack() + result = zip.repack() + assert result + assert not zip.needs_repack() + + assert zip.verify() + + zip.close() + + with zipfile.ZipFile(zip_path, 'r') as zf: + assert zf.testzip() is None + assert zf.read("test1.txt") == b"contents of first file" + assert "test2.txt" not in zf.namelist() + assert zf.read("test3.txt") == b"contents of third file"