interrogate: Tidy up hash_string function

The main motivation behind this change is to get rid of a
signed integer overflow that sometimes happens in the prime
multiplication step, which is (per the C++ spec) undefined
behavior. However, it's probably for the best to use only
unsigned int when the function is clearly trying to avoid
negative values.

Not that I suspect it matters much, but I have also heavily
tested that the behavior of the function is unchanged (at
least on PC hardware - signed integer overflow doesn't
behave portably) although it may now be slightly faster due
to the fact that I have removed the floating-point math.
This commit is contained in:
Sam Edwards 2018-12-30 04:20:50 -07:00
parent 952031455c
commit 818fdbd232

View File

@ -656,13 +656,13 @@ get_preferred_name(CPPType *type) {
*/ */
string InterrogateBuilder:: string InterrogateBuilder::
hash_string(const string &name, int shift_offset) { hash_string(const string &name, int shift_offset) {
int hash = 0; unsigned int hash = 0;
int shift = 0; unsigned int shift = 0;
string::const_iterator ni; string::const_iterator ni;
for (ni = name.begin(); ni != name.end(); ++ni) { for (ni = name.begin(); ni != name.end(); ++ni) {
int c = (int)(unsigned char)(*ni); unsigned int c = (unsigned char)*ni;
int shifted_c = (c << shift) & 0xffffff; unsigned int shifted_c = (c << shift) & 0xffffff;
if (shift > 16) { if (shift > 16) {
// We actually want a circular shift, not an arithmetic shift. // We actually want a circular shift, not an arithmetic shift.
shifted_c |= ((c >> (24 - shift)) & 0xff) ; shifted_c |= ((c >> (24 - shift)) & 0xff) ;
@ -675,10 +675,9 @@ hash_string(const string &name, int shift_offset) {
// bits back at the bottom, to scramble up the bits a bit. This helps // bits back at the bottom, to scramble up the bits a bit. This helps
// reduce hash conflicts from names that are similar to each other, by // reduce hash conflicts from names that are similar to each other, by
// separating adjacent hash codes. // separating adjacent hash codes.
int prime = 4999; const unsigned int prime = 4999;
int low_order = (hash * prime) & 0xffffff; unsigned long long product = (unsigned long long)hash * prime;
int high_order = (int)((double)hash * (double)prime / (double)(1 << 24)); hash = (product ^ (product >> 24)) & 0xffffff;
hash = low_order ^ high_order;
// Also add in the additional_number, times some prime factor. hash = (hash // Also add in the additional_number, times some prime factor. hash = (hash
// + additional_number * 1657) & 0xffffff; // + additional_number * 1657) & 0xffffff;
@ -690,10 +689,9 @@ hash_string(const string &name, int shift_offset) {
// deal, since we have to resolve hash conflicts anyway. // deal, since we have to resolve hash conflicts anyway.
string result; string result;
int extract_h = hash;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
int value = (extract_h & 0x3f); unsigned int value = (hash & 0x3f);
extract_h >>= 6; hash >>= 6;
if (value < 26) { if (value < 26) {
result += (char)('A' + value); result += (char)('A' + value);