//========= Copyright Valve Corporation, All rights reserved. ============// // // Purpose: Defines a large symbol table (intp sized handles, can store more // than 64k strings) // // $Header: $ // $NoKeywords: $ //===========================================================================// #ifndef UTLSYMBOLLARGE_H #define UTLSYMBOLLARGE_H #ifdef _WIN32 #pragma once #endif #include "tier0/threadtools.h" #include "tier0/vprof.h" #include "tier1/stringpool.h" #include "tier1/utltshash.h" //----------------------------------------------------------------------------- // CUtlSymbolTableLarge: // description: // This class defines a symbol table, which allows us to perform mappings // of strings to symbols and back. // // This class stores the strings in a series of string pools. The returned // CUtlSymbolLarge is just a pointer // to the string data, the hash precedes it in memory and is used to speed // up searching, etc. //----------------------------------------------------------------------------- typedef intp UtlSymLargeId_t; #define UTL_INVAL_SYMBOL_LARGE ((UtlSymLargeId_t)~0) class CUtlSymbolLarge { public: // constructor, destructor CUtlSymbolLarge() { u.m_Id = UTL_INVAL_SYMBOL_LARGE; } CUtlSymbolLarge(UtlSymLargeId_t id) { u.m_Id = id; } CUtlSymbolLarge(CUtlSymbolLarge const &sym) { u.m_Id = sym.u.m_Id; } // operator= CUtlSymbolLarge &operator=(CUtlSymbolLarge const &src) { u.m_Id = src.u.m_Id; return *this; } // operator== bool operator==(CUtlSymbolLarge const &src) const { return u.m_Id == src.u.m_Id; } // operator== bool operator==(UtlSymLargeId_t const &src) const { return u.m_Id == src; } // operator== bool operator!=(CUtlSymbolLarge const &src) const { return u.m_Id != src.u.m_Id; } // operator== bool operator!=(UtlSymLargeId_t const &src) const { return u.m_Id != src; } // Gets at the symbol operator UtlSymLargeId_t const() const { return u.m_Id; } // Gets the string associated with the symbol inline const char *String() const { if (u.m_Id == UTL_INVAL_SYMBOL_LARGE) return ""; return u.m_pAsString; } inline bool IsValid() const { return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false; } private: // Disallowed CUtlSymbolLarge(const char *pStr); // they need to go through the table to // assign the ptr bool operator==(const char *pStr) const; // disallow since we don't know if the table this is from was // case sensitive or not... maybe we don't care union { UtlSymLargeId_t m_Id; char const *m_pAsString; } u; }; #define MIN_STRING_POOL_SIZE 2048 inline uint32 CUtlSymbolLarge_Hash(bool CASEINSENSITIVE, const char *pString, int len) { return (CASEINSENSITIVE ? HashStringCaseless(pString) : HashString(pString)); } typedef uint32 LargeSymbolTableHashDecoration_t; // The structure consists of the hash immediately followed by the string data struct CUtlSymbolTableLargeBaseTreeEntry_t { LargeSymbolTableHashDecoration_t m_Hash; // Variable length string data char m_String[1]; bool IsEmpty() const { return ((m_Hash == 0) && (0 == m_String[0])); } char const *String() const { return (const char *)&m_String[0]; } CUtlSymbolLarge ToSymbol() const { return reinterpret_cast(String()); } LargeSymbolTableHashDecoration_t HashValue() const { return m_Hash; } }; template class CTreeEntryLess { public: CTreeEntryLess(int ignored = 0) { } // permits default initialization to NULL in CUtlRBTree bool operator!() const { return false; } bool operator()(CUtlSymbolTableLargeBaseTreeEntry_t *const &left, CUtlSymbolTableLargeBaseTreeEntry_t *const &right) const { // compare the hashes if (left->m_Hash == right->m_Hash) { // if the hashes match compare the strings if (!CASEINSENSITIVE) return strcmp(left->String(), right->String()) < 0; else return V_stricmp(left->String(), right->String()) < 0; } else { return left->m_Hash < right->m_Hash; } } }; // For non-threaded versions, simply index into CUtlRBTree template class CNonThreadsafeTree : public CUtlRBTree, CASEINSENSITIVE> > { public: typedef CUtlRBTree > CNonThreadsafeTreeType; CNonThreadsafeTree() : CNonThreadsafeTreeType(0, 16) {} inline void Commit() { // Nothing, only matters for thread-safe tables } inline int Insert(CUtlSymbolTableLargeBaseTreeEntry_t *entry) { return CNonThreadsafeTreeType::Insert(entry); } inline int Find(CUtlSymbolTableLargeBaseTreeEntry_t *entry) const { return CNonThreadsafeTreeType::Find(entry); } inline int InvalidIndex() const { return CNonThreadsafeTreeType::InvalidIndex(); } inline int GetElements(int nFirstElement, int nCount, CUtlSymbolLarge *pElements) const { CUtlVector list; list.EnsureCount(nCount); for (int i = 0; i < nCount; ++i) { pElements[i] = CNonThreadsafeTreeType::Element(i)->ToSymbol(); } return nCount; } }; // Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash // contained inside of it, don't need to recompute a hash here template class CCThreadsafeTreeHashMethod { public: static int Hash(const KEYTYPE &key, int nBucketMask) { uint32 nHash = key->HashValue(); return (nHash & nBucketMask); } static bool Compare(CUtlSymbolTableLargeBaseTreeEntry_t *const &lhs, CUtlSymbolTableLargeBaseTreeEntry_t *const &rhs) { if (lhs->m_Hash != rhs->m_Hash) return false; if (!CASEINSENSITIVE) { return (!Q_strcmp(lhs->String(), rhs->String()) ? true : false); } return (!Q_stricmp(lhs->String(), rhs->String()) ? true : false); } }; /* NOTE: So the only crappy thing about using a CUtlTSHash here is that the KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the hash and the string since with strings there is a good chance of hash collision after you have a fair number of strings so we have to implement a Compare method (above) which falls back to strcmp/stricmp if the hashes are equal. This means that all of the data is in the KEYTYPE of the hash and the payload doesn't matter. So I made the payload also be a pointer to a CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more convenient TODO: If we have a CUtlTSHash that was all about the existence of the KEYTYPE and didn't require a payload (or template on 'void') then we could eliminate 50% of the pointer overhead used for this data structure. */ // Thread safe version is based on the template class CThreadsafeTree : public CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE> > { public: typedef CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod<2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE> > CThreadsafeTreeType; CThreadsafeTree() : CThreadsafeTreeType(32) {} inline void Commit() { CThreadsafeTreeType::Commit(); } inline int Insert(CUtlSymbolTableLargeBaseTreeEntry_t *entry) { return CThreadsafeTreeType::Insert(entry, entry); } inline int Find(CUtlSymbolTableLargeBaseTreeEntry_t *entry) { return CThreadsafeTreeType::Find(entry); } inline int InvalidIndex() const { return CThreadsafeTreeType::InvalidHandle(); } inline int GetElements(int nFirstElement, int nCount, CUtlSymbolLarge *pElements) const { CUtlVector list; list.EnsureCount(nCount); int c = CThreadsafeTreeType::GetElements(nFirstElement, nCount, list.Base()); for (int i = 0; i < c; ++i) { pElements[i] = CThreadsafeTreeType::Element(list[i])->ToSymbol(); } return c; } }; // Base Class for threaded and non-threaded types template class CUtlSymbolTableLargeBase { public: // constructor, destructor CUtlSymbolTableLargeBase(); ~CUtlSymbolTableLargeBase(); // Finds and/or creates a symbol based on the string CUtlSymbolLarge AddString(const char *pString); // Finds the symbol for pString CUtlSymbolLarge Find(const char *pString) const; // Remove all symbols in the table. void RemoveAll(); int GetNumStrings(void) const { return m_Lookup.Count(); } void Commit() { m_Lookup.Commit(); } // Returns elements in the table int GetElements(int nFirstElement, int nCount, CUtlSymbolLarge *pElements) const { return m_Lookup.GetElements(nFirstElement, nCount, pElements); } uint64 GetMemoryUsage() const { uint64 unBytesUsed = 0u; for (int i = 0; i < m_StringPools.Count(); i++) { StringPool_t *pPool = m_StringPools[i]; unBytesUsed += (uint64)pPool->m_TotalLen; } return unBytesUsed; } protected: struct StringPool_t { int m_TotalLen; // How large is int m_SpaceUsed; char m_Data[1]; }; TreeType m_Lookup; // stores the string data CUtlVector m_StringPools; private: int FindPoolWithSpace(int len) const; }; //----------------------------------------------------------------------------- // constructor, destructor //----------------------------------------------------------------------------- template inline CUtlSymbolTableLargeBase::CUtlSymbolTableLargeBase() : m_StringPools(8) {} template inline CUtlSymbolTableLargeBase::~CUtlSymbolTableLargeBase() { // Release the stringpool string data RemoveAll(); } template inline CUtlSymbolLarge CUtlSymbolTableLargeBase::Find( const char *pString) const { VPROF("CUtlSymbolLarge::Find"); if (!pString) return CUtlSymbolLarge(); // Passing this special invalid symbol makes the comparison function // use the string passed in the context int len = Q_strlen(pString) + 1; CUtlSymbolTableLargeBaseTreeEntry_t *search = (CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca( len + sizeof(LargeSymbolTableHashDecoration_t)); search->m_Hash = CUtlSymbolLarge_Hash(CASEINSENSITIVE, pString, len); Q_memcpy((char *)&search->m_String[0], pString, len); int idx = const_cast(m_Lookup).Find(search); if (idx == m_Lookup.InvalidIndex()) return UTL_INVAL_SYMBOL_LARGE; const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[idx]; return entry->ToSymbol(); } template inline int CUtlSymbolTableLargeBase< TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace(int len) const { for (int i = 0; i < m_StringPools.Count(); i++) { StringPool_t *pPool = m_StringPools[i]; if ((pPool->m_TotalLen - pPool->m_SpaceUsed) >= len) { return i; } } return -1; } //----------------------------------------------------------------------------- // Finds and/or creates a symbol based on the string //----------------------------------------------------------------------------- template inline CUtlSymbolLarge CUtlSymbolTableLargeBase::AddString( const char *pString) { VPROF("CUtlSymbolLarge::AddString"); if (!pString) return UTL_INVAL_SYMBOL_LARGE; CUtlSymbolLarge id = Find(pString); if (id != UTL_INVAL_SYMBOL_LARGE) return id; int lenString = Q_strlen(pString) + 1; // length of just the string int lenDecorated = lenString + sizeof( LargeSymbolTableHashDecoration_t); // and with its hash decoration // make sure that all strings are aligned on 2-byte boundaries so the hashes // will read correctly This assert seems to be invalid because // LargeSymbolTableHashDecoration_t is always a uint32, by design. // COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) == // sizeof(intp)); lenDecorated = ALIGN_VALUE(lenDecorated, sizeof(intp)); // Find a pool with space for this string, or allocate a new one. int iPool = FindPoolWithSpace(lenDecorated); if (iPool == -1) { // Add a new pool. int newPoolSize = MAX(lenDecorated + sizeof(StringPool_t), POOL_SIZE); StringPool_t *pPool = (StringPool_t *)malloc(newPoolSize); pPool->m_TotalLen = newPoolSize - sizeof(StringPool_t); pPool->m_SpaceUsed = 0; iPool = m_StringPools.AddToTail(pPool); } // Compute a hash LargeSymbolTableHashDecoration_t hash = CUtlSymbolLarge_Hash(CASEINSENSITIVE, pString, lenString); // Copy the string in. StringPool_t *pPool = m_StringPools[iPool]; // Assert( pPool->m_SpaceUsed < 0xFFFF ); // Pool could be bigger than 2k // This should never happen, because if we had a string > 64k, it // would have been given its entire own pool. CUtlSymbolTableLargeBaseTreeEntry_t *entry = (CUtlSymbolTableLargeBaseTreeEntry_t *)&pPool ->m_Data[pPool->m_SpaceUsed]; pPool->m_SpaceUsed += lenDecorated; entry->m_Hash = hash; char *pText = (char *)&entry->m_String[0]; Q_memcpy(pText, pString, lenString); // insert the string into the database MEM_ALLOC_CREDIT(); int idx = m_Lookup.Insert(entry); return m_Lookup.Element(idx)->ToSymbol(); } //----------------------------------------------------------------------------- // Remove all symbols in the table. //----------------------------------------------------------------------------- template inline void CUtlSymbolTableLargeBase::RemoveAll() { m_Lookup.Purge(); for (int i = 0; i < m_StringPools.Count(); i++) { StringPool_t *pString = m_StringPools[i]; free(pString); } m_StringPools.RemoveAll(); } // Case-sensitive typedef CUtlSymbolTableLargeBase, false> CUtlSymbolTableLarge; // Case-insensitive typedef CUtlSymbolTableLargeBase, true> CUtlSymbolTableLarge_CI; // Multi-threaded case-sensitive typedef CUtlSymbolTableLargeBase, false> CUtlSymbolTableLargeMT; // Multi-threaded case-insensitive typedef CUtlSymbolTableLargeBase, true> CUtlSymbolTableLargeMT_CI; #endif // UTLSYMBOLLARGE_H