From ad7669e12ace60a6b7470617c51054ea259d3ab3 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Fri, 16 Feb 2018 04:42:31 -0700 Subject: [PATCH] mathutil: Update FFTCompressor for FFTW3 This has been due for a while. The last FFTW 2.x release was in 1999. Note that this does change some of the loops; this has two benefits: 1) The halfcomplex storage order is now explained with a comment. 2) It fixed the special case "don't break a run of bytes for a zero" which was never triggering due to the value not being *exactly* 0.0. I have tested these changes against older FFT-compressed animation .bams and no noticeable decompression changes are present, so a .bam version bump is not necessary. --- makepanda/makepanda.py | 5 +- panda/src/mathutil/fftCompressor.cxx | 124 +++++++++++++++++++-------- 2 files changed, 91 insertions(+), 38 deletions(-) diff --git a/makepanda/makepanda.py b/makepanda/makepanda.py index 3f75af73e4..21c0181159 100755 --- a/makepanda/makepanda.py +++ b/makepanda/makepanda.py @@ -648,8 +648,7 @@ if (COMPILER == "MSVC"): if (PkgSkip("HARFBUZZ")==0): LibName("HARFBUZZ", GetThirdpartyDir() + "harfbuzz/lib/harfbuzz.lib") IncDirectory("HARFBUZZ", GetThirdpartyDir() + "harfbuzz/include/harfbuzz") - if (PkgSkip("FFTW")==0): LibName("FFTW", GetThirdpartyDir() + "fftw/lib/rfftw.lib") - if (PkgSkip("FFTW")==0): LibName("FFTW", GetThirdpartyDir() + "fftw/lib/fftw.lib") + if (PkgSkip("FFTW")==0): LibName("FFTW", GetThirdpartyDir() + "fftw/lib/fftw3.lib") if (PkgSkip("ARTOOLKIT")==0):LibName("ARTOOLKIT",GetThirdpartyDir() + "artoolkit/lib/libAR.lib") if (PkgSkip("OPENCV")==0): LibName("OPENCV", GetThirdpartyDir() + "opencv/lib/cv.lib") if (PkgSkip("OPENCV")==0): LibName("OPENCV", GetThirdpartyDir() + "opencv/lib/highgui.lib") @@ -816,7 +815,7 @@ if (COMPILER=="GCC"): SmartPkgEnable("FFMPEG", ffmpeg_libs, ffmpeg_libs, ("libavformat/avformat.h", "libavcodec/avcodec.h", "libavutil/avutil.h")) SmartPkgEnable("SWSCALE", "libswscale", "libswscale", ("libswscale/swscale.h"), target_pkg = "FFMPEG", thirdparty_dir = "ffmpeg") SmartPkgEnable("SWRESAMPLE","libswresample", "libswresample", ("libswresample/swresample.h"), target_pkg = "FFMPEG", thirdparty_dir = "ffmpeg") - SmartPkgEnable("FFTW", "", ("rfftw", "fftw"), ("fftw.h", "rfftw.h")) + SmartPkgEnable("FFTW", "", ("fftw3"), ("fftw.h")) SmartPkgEnable("FMODEX", "", ("fmodex"), ("fmodex", "fmodex/fmod.h")) SmartPkgEnable("FREETYPE", "freetype2", ("freetype"), ("freetype2", "freetype2/freetype/freetype.h")) SmartPkgEnable("HARFBUZZ", "harfbuzz", ("harfbuzz"), ("harfbuzz", "harfbuzz/hb-ft.h")) diff --git a/panda/src/mathutil/fftCompressor.cxx b/panda/src/mathutil/fftCompressor.cxx index f96c6f5f7c..0a5b7425e1 100644 --- a/panda/src/mathutil/fftCompressor.cxx +++ b/panda/src/mathutil/fftCompressor.cxx @@ -29,18 +29,14 @@ #undef howmany #endif -#ifdef PHAVE_DRFFTW_H - #include "drfftw.h" -#else - #include "rfftw.h" -#endif +#include "fftw3.h" // These FFTW support objects can only be defined if we actually have the FFTW // library available. -static rfftw_plan get_real_compress_plan(int length); -static rfftw_plan get_real_decompress_plan(int length); +static fftw_plan get_real_compress_plan(int length); +static fftw_plan get_real_decompress_plan(int length); -typedef pmap RealPlans; +typedef pmap RealPlans; static RealPlans _real_compress_plans; static RealPlans _real_decompress_plans; @@ -262,20 +258,31 @@ write_reals(Datagram &datagram, const PN_stdfloat *array, int length) { } // Now generate the Fourier transform. - double *data = (double *)alloca(length * sizeof(double)); + int fft_length = length / 2 + 1; + fftw_complex *fft_bins = (fftw_complex *)alloca(fft_length * sizeof(fftw_complex)); + + // This is for an in-place transform. It doesn't violate strict aliasing + // rules because &fft_bins[0][0] is still a double pointer. This saves on + // precious stack space. + double *data = &fft_bins[0][0]; int i; for (i = 0; i < length; i++) { data[i] = array[i]; } - double *half_complex = (double *)alloca(length * sizeof(double)); - - rfftw_plan plan = get_real_compress_plan(length); - rfftw_one(plan, data, half_complex); + // Note: This is an in-place DFT. `data` and `fft_bins` are aliases. + fftw_plan plan = get_real_compress_plan(length); + fftw_execute_dft_r2c(plan, data, fft_bins); // Now encode the numbers, run-length encoded by size, so we only write out // the number of bits we need for each number. + // Note that Panda3D has conventionally always used FFTW2's halfcomplex + // format for serializing the bins. In short, this means that for an n-length + // FFT, it stores: + // 1) The real components for bins 0 through floor(n/2), followed by... + // 2) The imaginary components for bins floor((n+1)/2)-1 through 1. + // (Imaginary component for bin 0 is never stored, as that's always zero.) vector_double run; RunWidth run_width = RW_invalid; @@ -286,8 +293,18 @@ write_reals(Datagram &datagram, const PN_stdfloat *array, int length) { static const double max_range_16 = 32767.0; static const double max_range_8 = 127.0; - double scale_factor = get_scale_factor(i, length); - double num = cfloor(half_complex[i] / scale_factor + 0.5); + int bin; // which FFT bin we're storing + int j; // 0=real; 1=imag + if (i < fft_length) { + bin = i; + j = 0; + } else { + bin = length - i; + j = 1; + } + + double scale_factor = get_scale_factor(bin, fft_length); + double num = cfloor(fft_bins[bin][j] / scale_factor + 0.5); // How many bits do we need to encode this integer? double a = fabs(num); @@ -313,16 +330,30 @@ write_reals(Datagram &datagram, const PN_stdfloat *array, int length) { // across a single intervening zero, don't interrupt the run just for // that. if (run_width == RW_8 && num_width == RW_0) { - if (i + 1 >= length || half_complex[i + 1] != 0.0) { + if (run.back() != 0) { num_width = RW_8; } } if (num_width != run_width) { // Now we need to flush the last run. + + // First, however, take care of the special case above: if we're + // switching from RW_8 to RW_0, there could be a zero at the end, which + // should be reclaimed into the RW_0 run. + bool reclaimed_zero = (run_width == RW_8 && num_width == RW_0 && + run.back() == 0); + if (reclaimed_zero) { + run.pop_back(); + } + num_written += write_run(datagram, run_width, run); run.clear(); run_width = num_width; + + if (reclaimed_zero) { + run.push_back(0); + } } run.push_back(num); @@ -595,14 +626,36 @@ read_reals(DatagramIterator &di, vector_stdfloat &array) { nassertr(num_read == length, false); nassertr((int)half_complex.size() == length, false); + int fft_length = length / 2 + 1; + fftw_complex *fft_bins = (fftw_complex *)alloca(fft_length * sizeof(fftw_complex)); + int i; - for (i = 0; i < length; i++) { - half_complex[i] *= get_scale_factor(i, length); + for (i = 0; i < fft_length; i++) { + double scale_factor = get_scale_factor(i, fft_length); + + // For an explanation of this, see the compression code's comment about the + // halfcomplex format. + + fft_bins[i][0] = half_complex[i] * scale_factor; + if (i == 0) { + // First bin doesn't store imaginary component + fft_bins[i][1] = 0.0; + } else if ((i == fft_length - 1) && !(length & 1)) { + // Last bin doesn't store imaginary component with even lengths + fft_bins[i][1] = 0.0; + } else { + fft_bins[i][1] = half_complex[length - i] * scale_factor; + } } - double *data = (double *)alloca(length * sizeof(double)); - rfftw_plan plan = get_real_decompress_plan(length); - rfftw_one(plan, &half_complex[0], data); + // This is for an in-place transform. It doesn't violate strict aliasing + // rules because &fft_bins[0][0] is still a double pointer. This saves on + // precious stack space. + double *data = &fft_bins[0][0]; + + // Note: This is an in-place DFT. `data` and `fft_bins` are aliases. + fftw_plan plan = get_real_decompress_plan(length); + fftw_execute_dft_c2r(plan, fft_bins, data); double scale = 1.0 / (double)length; array.reserve(array.size() + length); @@ -770,14 +823,14 @@ free_storage() { for (pi = _real_compress_plans.begin(); pi != _real_compress_plans.end(); ++pi) { - rfftw_destroy_plan((*pi).second); + fftw_destroy_plan((*pi).second); } _real_compress_plans.clear(); for (pi = _real_decompress_plans.begin(); pi != _real_decompress_plans.end(); ++pi) { - rfftw_destroy_plan((*pi).second); + fftw_destroy_plan((*pi).second); } _real_decompress_plans.clear(); #endif @@ -933,17 +986,18 @@ read_run(DatagramIterator &di, vector_double &run) { } /** - * Returns the appropriate scaling for the given position within the - * halfcomplex array. + * Returns the appropriate scaling for the given bin in the FFT output. + * + * The scale factor is the value of one integer in the quantized data. As such, + * greater bins (higher, more noticeable frequencies) have *lower* scaling + * factors, which means greater precision. */ double FFTCompressor:: get_scale_factor(int i, int length) const { - int m = (length / 2) + 1; - int k = (i < m) ? i : length - i; - nassertr(k >= 0 && k < m, 1.0); + nassertr(i < length, 1.0); return _fft_offset + - _fft_factor * pow((double)(m-1 - k) / (double)(m-1), _fft_exponent); + _fft_factor * pow((double)(length - i) / (double)(length), _fft_exponent); } /** @@ -997,7 +1051,7 @@ get_compressability(const PN_stdfloat *data, int length) const { * Returns a FFTW plan suitable for compressing a float array of the indicated * length. */ -static rfftw_plan +static fftw_plan get_real_compress_plan(int length) { RealPlans::iterator pi; pi = _real_compress_plans.find(length); @@ -1005,8 +1059,8 @@ get_real_compress_plan(int length) { return (*pi).second; } - rfftw_plan plan; - plan = rfftw_create_plan(length, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE); + fftw_plan plan; + plan = fftw_plan_dft_r2c_1d(length, NULL, NULL, FFTW_ESTIMATE); _real_compress_plans.insert(RealPlans::value_type(length, plan)); return plan; @@ -1016,7 +1070,7 @@ get_real_compress_plan(int length) { * Returns a FFTW plan suitable for decompressing a float array of the * indicated length. */ -static rfftw_plan +static fftw_plan get_real_decompress_plan(int length) { RealPlans::iterator pi; pi = _real_decompress_plans.find(length); @@ -1024,8 +1078,8 @@ get_real_decompress_plan(int length) { return (*pi).second; } - rfftw_plan plan; - plan = rfftw_create_plan(length, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE); + fftw_plan plan; + plan = fftw_plan_dft_c2r_1d(length, NULL, NULL, FFTW_ESTIMATE); _real_decompress_plans.insert(RealPlans::value_type(length, plan)); return plan;