This repository has been archived on 2024-06-13. You can view files and clone it, but cannot push or open issues or pull requests.
2020-08-04 13:13:01 -04:00

459 lines
16 KiB
C++

//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: Defines a large symbol table (intp sized handles, can store more
// than 64k strings)
//
// $Header: $
// $NoKeywords: $
//===========================================================================//
#ifndef UTLSYMBOLLARGE_H
#define UTLSYMBOLLARGE_H
#ifdef _WIN32
#pragma once
#endif
#include "tier0/threadtools.h"
#include "tier0/vprof.h"
#include "tier1/stringpool.h"
#include "tier1/utltshash.h"
//-----------------------------------------------------------------------------
// CUtlSymbolTableLarge:
// description:
// This class defines a symbol table, which allows us to perform mappings
// of strings to symbols and back.
//
// This class stores the strings in a series of string pools. The returned
// CUtlSymbolLarge is just a pointer
// to the string data, the hash precedes it in memory and is used to speed
// up searching, etc.
//-----------------------------------------------------------------------------
typedef intp UtlSymLargeId_t;
#define UTL_INVAL_SYMBOL_LARGE ((UtlSymLargeId_t)~0)
class CUtlSymbolLarge {
public:
// constructor, destructor
CUtlSymbolLarge() { u.m_Id = UTL_INVAL_SYMBOL_LARGE; }
CUtlSymbolLarge(UtlSymLargeId_t id) { u.m_Id = id; }
CUtlSymbolLarge(CUtlSymbolLarge const &sym) { u.m_Id = sym.u.m_Id; }
// operator=
CUtlSymbolLarge &operator=(CUtlSymbolLarge const &src) {
u.m_Id = src.u.m_Id;
return *this;
}
// operator==
bool operator==(CUtlSymbolLarge const &src) const {
return u.m_Id == src.u.m_Id;
}
// operator==
bool operator==(UtlSymLargeId_t const &src) const { return u.m_Id == src; }
// operator==
bool operator!=(CUtlSymbolLarge const &src) const {
return u.m_Id != src.u.m_Id;
}
// operator==
bool operator!=(UtlSymLargeId_t const &src) const { return u.m_Id != src; }
// Gets at the symbol
operator UtlSymLargeId_t const() const { return u.m_Id; }
// Gets the string associated with the symbol
inline const char *String() const {
if (u.m_Id == UTL_INVAL_SYMBOL_LARGE) return "";
return u.m_pAsString;
}
inline bool IsValid() const {
return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false;
}
private:
// Disallowed
CUtlSymbolLarge(const char *pStr); // they need to go through the table to
// assign the ptr
bool operator==(const char *pStr)
const; // disallow since we don't know if the table this is from was
// case sensitive or not... maybe we don't care
union {
UtlSymLargeId_t m_Id;
char const *m_pAsString;
} u;
};
#define MIN_STRING_POOL_SIZE 2048
inline uint32 CUtlSymbolLarge_Hash(bool CASEINSENSITIVE, const char *pString,
int len) {
return (CASEINSENSITIVE ? HashStringCaseless(pString)
: HashString(pString));
}
typedef uint32 LargeSymbolTableHashDecoration_t;
// The structure consists of the hash immediately followed by the string data
struct CUtlSymbolTableLargeBaseTreeEntry_t {
LargeSymbolTableHashDecoration_t m_Hash;
// Variable length string data
char m_String[1];
bool IsEmpty() const { return ((m_Hash == 0) && (0 == m_String[0])); }
char const *String() const { return (const char *)&m_String[0]; }
CUtlSymbolLarge ToSymbol() const {
return reinterpret_cast<UtlSymLargeId_t>(String());
}
LargeSymbolTableHashDecoration_t HashValue() const { return m_Hash; }
};
template <class TreeType, bool CASEINSENSITIVE>
class CTreeEntryLess {
public:
CTreeEntryLess(int ignored = 0) {
} // permits default initialization to NULL in CUtlRBTree
bool operator!() const { return false; }
bool operator()(CUtlSymbolTableLargeBaseTreeEntry_t *const &left,
CUtlSymbolTableLargeBaseTreeEntry_t *const &right) const {
// compare the hashes
if (left->m_Hash == right->m_Hash) {
// if the hashes match compare the strings
if (!CASEINSENSITIVE)
return strcmp(left->String(), right->String()) < 0;
else
return V_stricmp(left->String(), right->String()) < 0;
} else {
return left->m_Hash < right->m_Hash;
}
}
};
// For non-threaded versions, simply index into CUtlRBTree
template <bool CASEINSENSITIVE>
class CNonThreadsafeTree
: public CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp,
CTreeEntryLess<CNonThreadsafeTree<CASEINSENSITIVE>,
CASEINSENSITIVE> > {
public:
typedef CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp,
CTreeEntryLess<CNonThreadsafeTree, CASEINSENSITIVE> >
CNonThreadsafeTreeType;
CNonThreadsafeTree() : CNonThreadsafeTreeType(0, 16) {}
inline void Commit() {
// Nothing, only matters for thread-safe tables
}
inline int Insert(CUtlSymbolTableLargeBaseTreeEntry_t *entry) {
return CNonThreadsafeTreeType::Insert(entry);
}
inline int Find(CUtlSymbolTableLargeBaseTreeEntry_t *entry) const {
return CNonThreadsafeTreeType::Find(entry);
}
inline int InvalidIndex() const {
return CNonThreadsafeTreeType::InvalidIndex();
}
inline int GetElements(int nFirstElement, int nCount,
CUtlSymbolLarge *pElements) const {
CUtlVector<CUtlSymbolTableLargeBaseTreeEntry_t *> list;
list.EnsureCount(nCount);
for (int i = 0; i < nCount; ++i) {
pElements[i] = CNonThreadsafeTreeType::Element(i)->ToSymbol();
}
return nCount;
}
};
// Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash
// contained inside of it, don't need to recompute a hash here
template <int BUCKET_COUNT, class KEYTYPE, bool CASEINSENSITIVE>
class CCThreadsafeTreeHashMethod {
public:
static int Hash(const KEYTYPE &key, int nBucketMask) {
uint32 nHash = key->HashValue();
return (nHash & nBucketMask);
}
static bool Compare(CUtlSymbolTableLargeBaseTreeEntry_t *const &lhs,
CUtlSymbolTableLargeBaseTreeEntry_t *const &rhs) {
if (lhs->m_Hash != rhs->m_Hash) return false;
if (!CASEINSENSITIVE) {
return (!Q_strcmp(lhs->String(), rhs->String()) ? true : false);
}
return (!Q_stricmp(lhs->String(), rhs->String()) ? true : false);
}
};
/*
NOTE: So the only crappy thing about using a CUtlTSHash here is that the
KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the hash
and the string since with strings there is a good chance of hash collision
after you have a fair number of strings so we have to implement a Compare
method (above) which falls back to strcmp/stricmp if the hashes are equal.
This means that all of the data is in the KEYTYPE of the hash and the payload
doesn't matter. So I made the payload also be a pointer to a
CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more
convenient
TODO: If we have a CUtlTSHash that was all about the existence of the KEYTYPE
and didn't require a payload (or template on 'void') then we could eliminate
50% of the pointer overhead used for this data structure.
*/
// Thread safe version is based on the
template <bool CASEINSENSITIVE>
class CThreadsafeTree
: public CUtlTSHash<
CUtlSymbolTableLargeBaseTreeEntry_t *, 2048,
CUtlSymbolTableLargeBaseTreeEntry_t *,
CCThreadsafeTreeHashMethod<
2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE> > {
public:
typedef CUtlTSHash<
CUtlSymbolTableLargeBaseTreeEntry_t *, 2048,
CUtlSymbolTableLargeBaseTreeEntry_t *,
CCThreadsafeTreeHashMethod<2048, CUtlSymbolTableLargeBaseTreeEntry_t *,
CASEINSENSITIVE> >
CThreadsafeTreeType;
CThreadsafeTree() : CThreadsafeTreeType(32) {}
inline void Commit() { CThreadsafeTreeType::Commit(); }
inline int Insert(CUtlSymbolTableLargeBaseTreeEntry_t *entry) {
return CThreadsafeTreeType::Insert(entry, entry);
}
inline int Find(CUtlSymbolTableLargeBaseTreeEntry_t *entry) {
return CThreadsafeTreeType::Find(entry);
}
inline int InvalidIndex() const {
return CThreadsafeTreeType::InvalidHandle();
}
inline int GetElements(int nFirstElement, int nCount,
CUtlSymbolLarge *pElements) const {
CUtlVector<UtlTSHashHandle_t> list;
list.EnsureCount(nCount);
int c = CThreadsafeTreeType::GetElements(nFirstElement, nCount,
list.Base());
for (int i = 0; i < c; ++i) {
pElements[i] = CThreadsafeTreeType::Element(list[i])->ToSymbol();
}
return c;
}
};
// Base Class for threaded and non-threaded types
template <class TreeType, bool CASEINSENSITIVE,
size_t POOL_SIZE = MIN_STRING_POOL_SIZE>
class CUtlSymbolTableLargeBase {
public:
// constructor, destructor
CUtlSymbolTableLargeBase();
~CUtlSymbolTableLargeBase();
// Finds and/or creates a symbol based on the string
CUtlSymbolLarge AddString(const char *pString);
// Finds the symbol for pString
CUtlSymbolLarge Find(const char *pString) const;
// Remove all symbols in the table.
void RemoveAll();
int GetNumStrings(void) const { return m_Lookup.Count(); }
void Commit() { m_Lookup.Commit(); }
// Returns elements in the table
int GetElements(int nFirstElement, int nCount,
CUtlSymbolLarge *pElements) const {
return m_Lookup.GetElements(nFirstElement, nCount, pElements);
}
uint64 GetMemoryUsage() const {
uint64 unBytesUsed = 0u;
for (int i = 0; i < m_StringPools.Count(); i++) {
StringPool_t *pPool = m_StringPools[i];
unBytesUsed += (uint64)pPool->m_TotalLen;
}
return unBytesUsed;
}
protected:
struct StringPool_t {
int m_TotalLen; // How large is
int m_SpaceUsed;
char m_Data[1];
};
TreeType m_Lookup;
// stores the string data
CUtlVector<StringPool_t *> m_StringPools;
private:
int FindPoolWithSpace(int len) const;
};
//-----------------------------------------------------------------------------
// constructor, destructor
//-----------------------------------------------------------------------------
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE,
POOL_SIZE>::CUtlSymbolTableLargeBase()
: m_StringPools(8) {}
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE,
POOL_SIZE>::~CUtlSymbolTableLargeBase() {
// Release the stringpool string data
RemoveAll();
}
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline CUtlSymbolLarge
CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::Find(
const char *pString) const {
VPROF("CUtlSymbolLarge::Find");
if (!pString) return CUtlSymbolLarge();
// Passing this special invalid symbol makes the comparison function
// use the string passed in the context
int len = Q_strlen(pString) + 1;
CUtlSymbolTableLargeBaseTreeEntry_t *search =
(CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca(
len + sizeof(LargeSymbolTableHashDecoration_t));
search->m_Hash = CUtlSymbolLarge_Hash(CASEINSENSITIVE, pString, len);
Q_memcpy((char *)&search->m_String[0], pString, len);
int idx = const_cast<TreeType &>(m_Lookup).Find(search);
if (idx == m_Lookup.InvalidIndex()) return UTL_INVAL_SYMBOL_LARGE;
const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[idx];
return entry->ToSymbol();
}
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline int CUtlSymbolTableLargeBase<
TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace(int len) const {
for (int i = 0; i < m_StringPools.Count(); i++) {
StringPool_t *pPool = m_StringPools[i];
if ((pPool->m_TotalLen - pPool->m_SpaceUsed) >= len) {
return i;
}
}
return -1;
}
//-----------------------------------------------------------------------------
// Finds and/or creates a symbol based on the string
//-----------------------------------------------------------------------------
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline CUtlSymbolLarge
CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::AddString(
const char *pString) {
VPROF("CUtlSymbolLarge::AddString");
if (!pString) return UTL_INVAL_SYMBOL_LARGE;
CUtlSymbolLarge id = Find(pString);
if (id != UTL_INVAL_SYMBOL_LARGE) return id;
int lenString = Q_strlen(pString) + 1; // length of just the string
int lenDecorated =
lenString +
sizeof(
LargeSymbolTableHashDecoration_t); // and with its hash decoration
// make sure that all strings are aligned on 2-byte boundaries so the hashes
// will read correctly This assert seems to be invalid because
// LargeSymbolTableHashDecoration_t is always a uint32, by design.
// COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) ==
// sizeof(intp));
lenDecorated = ALIGN_VALUE(lenDecorated, sizeof(intp));
// Find a pool with space for this string, or allocate a new one.
int iPool = FindPoolWithSpace(lenDecorated);
if (iPool == -1) {
// Add a new pool.
int newPoolSize = MAX(lenDecorated + sizeof(StringPool_t), POOL_SIZE);
StringPool_t *pPool = (StringPool_t *)malloc(newPoolSize);
pPool->m_TotalLen = newPoolSize - sizeof(StringPool_t);
pPool->m_SpaceUsed = 0;
iPool = m_StringPools.AddToTail(pPool);
}
// Compute a hash
LargeSymbolTableHashDecoration_t hash =
CUtlSymbolLarge_Hash(CASEINSENSITIVE, pString, lenString);
// Copy the string in.
StringPool_t *pPool = m_StringPools[iPool];
// Assert( pPool->m_SpaceUsed < 0xFFFF ); // Pool could be bigger than 2k
// This should never happen, because if we had a string > 64k, it
// would have been given its entire own pool.
CUtlSymbolTableLargeBaseTreeEntry_t *entry =
(CUtlSymbolTableLargeBaseTreeEntry_t *)&pPool
->m_Data[pPool->m_SpaceUsed];
pPool->m_SpaceUsed += lenDecorated;
entry->m_Hash = hash;
char *pText = (char *)&entry->m_String[0];
Q_memcpy(pText, pString, lenString);
// insert the string into the database
MEM_ALLOC_CREDIT();
int idx = m_Lookup.Insert(entry);
return m_Lookup.Element(idx)->ToSymbol();
}
//-----------------------------------------------------------------------------
// Remove all symbols in the table.
//-----------------------------------------------------------------------------
template <class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE>
inline void
CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::RemoveAll() {
m_Lookup.Purge();
for (int i = 0; i < m_StringPools.Count(); i++) {
StringPool_t *pString = m_StringPools[i];
free(pString);
}
m_StringPools.RemoveAll();
}
// Case-sensitive
typedef CUtlSymbolTableLargeBase<CNonThreadsafeTree<false>, false>
CUtlSymbolTableLarge;
// Case-insensitive
typedef CUtlSymbolTableLargeBase<CNonThreadsafeTree<true>, true>
CUtlSymbolTableLarge_CI;
// Multi-threaded case-sensitive
typedef CUtlSymbolTableLargeBase<CThreadsafeTree<false>, false>
CUtlSymbolTableLargeMT;
// Multi-threaded case-insensitive
typedef CUtlSymbolTableLargeBase<CThreadsafeTree<true>, true>
CUtlSymbolTableLargeMT_CI;
#endif // UTLSYMBOLLARGE_H