Roger's patchfile generation speed optimizations

This commit is contained in:
Darren Ranalli 2004-05-26 22:30:21 +00:00
parent bbea3902db
commit d0acfcc4ac
4 changed files with 40 additions and 16 deletions

View File

@ -28,7 +28,8 @@
int
main(int argc, char *argv[]) {
if (argc < 3) {
cerr << "Usage: apply_patch <patch> <file>" << endl;
cerr << "Usage: apply_patch <patch_file> <old_file>" << endl;
cerr << "Will overwrite old_file" << endl;
return 1;
}

View File

@ -28,7 +28,7 @@
int
main(int argc, char *argv[]) {
if (argc < 3) {
cerr << "Usage: build_patch <src_file> <dest_file>" << endl;
cerr << "Usage: build_patch <old_file> <new_file>" << endl;
return 1;
}

View File

@ -94,10 +94,11 @@ const PN_uint32 Patchfile::_magic_number = 0xfeebfaac;
// To version 2 on 11/2/02 to store copy offsets as relative.
const PN_uint16 Patchfile::_current_version = 2;
const PN_uint32 Patchfile::_HASHTABLESIZE = PN_uint32(1) << 16;
const PN_uint32 Patchfile::_HASHTABLESIZE = PN_uint32(1) << 24;
const PN_uint32 Patchfile::_DEFAULT_FOOTPRINT_LENGTH = 9; // this produced the smallest patch file for libpanda.dll when tested, 12/20/2000
const PN_uint32 Patchfile::_NULL_VALUE = PN_uint32(0) - 1;
const PN_uint32 Patchfile::_MAX_RUN_LENGTH = (PN_uint32(1) << 16) - 1;
const PN_uint32 Patchfile::_HASH_MASK = (PN_uint32(1) << 24) - 1;
////////////////////////////////////////////////////////////////////
// Function: Patchfile::Constructor
@ -527,7 +528,7 @@ internal_read_header(const Filename &patch_file) {
// Access: Private
// Description:
////////////////////////////////////////////////////////////////////
PN_uint16 Patchfile::
PN_uint32 Patchfile::
calc_hash(const char *buffer) {
#ifdef USE_MD5_FOR_HASHTABLE_INDEX_VALUES
HashVal hash;
@ -537,18 +538,18 @@ calc_hash(const char *buffer) {
return PN_uint16(hash.get_value(0));
#else
PN_uint16 hash_value = 0;
PN_uint32 hash_value = 0;
for(int i = 0; i < (int)_footprint_length; i++) {
// this is probably not such a good hash. to be replaced
/// --> TRIED MD5, was not worth it for the execution-time hit on 800Mhz PC
hash_value ^= (*buffer) << (i % 8);
hash_value ^= (*buffer) << ((i % _footprint_length) * 2);
buffer++;
}
//cout << hash_value << " ";
return hash_value;
return hash_value & _HASH_MASK;
#endif
}
@ -609,7 +610,7 @@ build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
double t = GET_PROFILE_TIME();
#endif
PN_uint16 hash_value = calc_hash(&buffer_orig[i]);
PN_uint32 hash_value = calc_hash(&buffer_orig[i]);
#ifdef PROFILE_PATCH_BUILD
hashCalc += GET_PROFILE_TIME() - t;
@ -673,7 +674,19 @@ build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
// two strings of bytes
////////////////////////////////////////////////////////////////////
PN_uint32 Patchfile::
calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length) {
calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
PN_uint32 min_length) {
// early out: look ahead and sample the end of the minimum range
if (min_length > 2) {
if (min_length >= max_length)
return 0;
if (buf1[min_length] != buf2[min_length] ||
buf1[min_length-1] != buf2[min_length-1] ||
buf1[min_length-2] != buf2[min_length-2]) {
return 0;
}
}
PN_uint32 length = 0;
while ((length < max_length) && (*buf1 == *buf2)) {
buf1++, buf2++, length++;
@ -697,7 +710,7 @@ find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_lengt
copy_length = 0;
// get offset of matching string (in orig file) from hash table
PN_uint16 hash_value = calc_hash(&buffer_new[new_pos]);
PN_uint32 hash_value = calc_hash(&buffer_new[new_pos]);
// if no match, bail
if (_NULL_VALUE == hash_table[hash_value])
@ -706,8 +719,12 @@ find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_lengt
copy_pos = hash_table[hash_value];
// calc match length
copy_length = (PN_uint16)calc_match_length(&buffer_new[new_pos], &buffer_orig[copy_pos],
min(min((length_new - new_pos),(length_orig - copy_pos)), _MAX_RUN_LENGTH));
copy_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
&buffer_orig[copy_pos],
min(min((length_new - new_pos),
(length_orig - copy_pos)),
_MAX_RUN_LENGTH),
0);
// run through link table, see if we find any longer matches
PN_uint32 match_offset;
@ -715,8 +732,12 @@ find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_lengt
match_offset = link_table[copy_pos];
while (match_offset != _NULL_VALUE) {
match_length = (PN_uint16)calc_match_length(&buffer_new[new_pos], &buffer_orig[match_offset],
min(min((length_new - new_pos),(length_orig - match_offset)), _MAX_RUN_LENGTH));
match_length = (PN_uint16)calc_match_length(&buffer_new[new_pos],
&buffer_orig[match_offset],
min(min((length_new - new_pos),
(length_orig - match_offset)),
_MAX_RUN_LENGTH),
copy_length);
// have we found a longer match?
if (match_length > copy_length) {

View File

@ -83,11 +83,12 @@ private:
// stuff for the build operation
void build_hash_link_tables(const char *buffer_orig, PN_uint32 length_orig,
PN_uint32 *hash_table, PN_uint32 *link_table);
PN_uint16 calc_hash(const char *buffer);
PN_uint32 calc_hash(const char *buffer);
void find_longest_match(PN_uint32 new_pos, PN_uint32 &copy_pos, PN_uint16 &copy_length,
PN_uint32 *hash_table, PN_uint32 *link_table, const char* buffer_orig,
PN_uint32 length_orig, const char* buffer_new, PN_uint32 length_new);
PN_uint32 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length);
PN_uint32 calc_match_length(const char* buf1, const char* buf2, PN_uint32 max_length,
PN_uint32 min_length);
void emit_ADD(ofstream &write_stream, PN_uint16 length, const char* buffer,
PN_uint32 ADD_pos);
@ -99,6 +100,7 @@ private:
static const PN_uint32 _DEFAULT_FOOTPRINT_LENGTH;
static const PN_uint32 _NULL_VALUE;
static const PN_uint32 _MAX_RUN_LENGTH;
static const PN_uint32 _HASH_MASK;
PN_uint32 _footprint_length;