pstats: Optimize PStatFrameData (de-)serialization

This can be slow (hundreds of us, up to multiple ms) with large amounts of data points, this makes it an order of magnitude more efficient
This commit is contained in:
rdb 2022-12-10 19:08:49 +01:00
parent d48e23f234
commit 3254c6d329

View File

@ -34,7 +34,6 @@ sort_time() {
*/ */
bool PStatFrameData:: bool PStatFrameData::
write_datagram(Datagram &destination, PStatClient *client) const { write_datagram(Datagram &destination, PStatClient *client) const {
Data::const_iterator di;
if (_time_data.size() >= 65536 || _level_data.size() >= 65536) { if (_time_data.size() >= 65536 || _level_data.size() >= 65536) {
pstats_cat.info() pstats_cat.info()
<< "Dropping frame with " << _time_data.size() << "Dropping frame with " << _time_data.size()
@ -43,16 +42,62 @@ write_datagram(Datagram &destination, PStatClient *client) const {
return false; return false;
} }
#if !defined(WORDS_BIGENDIAN) || defined(__GNUC__)
// Hand-roll this, significantly more efficient for many data points
size_t size = (_time_data.size() + _level_data.size()) * 6 + 4;
PTA_uchar array = destination.modify_array();
size_t offset = array.size();
array.resize(offset + size);
unsigned char *data = &array[0] + offset;
uint16_t *ptr = (uint16_t *)data;
#ifdef WORDS_BIGENDIAN
*ptr++ = __builtin_bswap16(_time_data.size());
for (const DataPoint &dp : _time_data) {
*ptr++ = __builtin_bswap16(dp._index);
PN_float32 v = (PN_float32)dp._value;
*(uint32_t *)ptr = __builtin_bswap32(reinterpret_cast<uint32_t &>(v));
ptr += 2;
}
*ptr++ = __builtin_bswap16(_level_data.size());
for (const DataPoint &dp : _level_data) {
*ptr++ = __builtin_bswap16(dp._index);
PN_float32 v = (PN_float32)dp._value;
*(uint32_t *)ptr = __builtin_bswap32(reinterpret_cast<uint32_t &>(v));
ptr += 2;
}
#else
*ptr++ = _time_data.size();
for (const DataPoint &dp : _time_data) {
*ptr++ = dp._index;
*(PN_float32 *)ptr = dp._value;
ptr += 2;
}
*ptr++ = _level_data.size();
for (const DataPoint &dp : _level_data) {
*ptr++ = dp._index;
*(PN_float32 *)ptr = dp._value;
ptr += 2;
}
#endif
#else
destination.add_uint16(_time_data.size()); destination.add_uint16(_time_data.size());
for (di = _time_data.begin(); di != _time_data.end(); ++di) { for (const DataPoint &dp : _time_data) {
destination.add_uint16((*di)._index); destination.add_uint16(dp._index);
destination.add_float32((*di)._value); destination.add_float32(dp._value);
} }
destination.add_uint16(_level_data.size()); destination.add_uint16(_level_data.size());
for (di = _level_data.begin(); di != _level_data.end(); ++di) { for (const DataPoint &dp : _level_data) {
destination.add_uint16((*di)._index); destination.add_uint16(dp._index);
destination.add_float32((*di)._value); destination.add_float32(dp._value);
} }
#endif
return true; return true;
} }
@ -64,22 +109,25 @@ void PStatFrameData::
read_datagram(DatagramIterator &source, PStatClientVersion *) { read_datagram(DatagramIterator &source, PStatClientVersion *) {
clear(); clear();
int i; {
int time_size = source.get_uint16(); size_t time_size = source.get_uint16();
for (i = 0; i < time_size; i++) { _time_data.resize(time_size);
nassertv(source.get_remaining_size() > 0); for (DataPoint &dp : _time_data) {
DataPoint dp; nassertv(source.get_remaining_size() > 0);
dp._index = source.get_uint16(); dp._index = source.get_uint16();
dp._value = source.get_float32(); dp._value = source.get_float32();
_time_data.push_back(dp); }
} }
int level_size = source.get_uint16();
for (i = 0; i < level_size; i++) { {
nassertv(source.get_remaining_size() > 0); size_t level_size = source.get_uint16();
DataPoint dp; _level_data.resize(level_size);
dp._index = source.get_uint16(); for (DataPoint &dp : _level_data) {
dp._value = source.get_float32(); nassertv(source.get_remaining_size() > 0);
_level_data.push_back(dp); dp._index = source.get_uint16();
dp._value = source.get_float32();
}
} }
nassertv(source.get_remaining_size() == 0); nassertv(source.get_remaining_size() == 0);
} }