From 1c6f4c29c4e9b31798d63d48dcc162a550d38e51 Mon Sep 17 00:00:00 2001 From: rdb Date: Thu, 28 Apr 2016 11:58:46 +0200 Subject: [PATCH] Add support for native FLAC reading --- panda/src/movies/config_movies.cxx | 5 + panda/src/movies/dr_flac.h | 2976 ++++++++++++++++++++++ panda/src/movies/flacAudio.I | 12 + panda/src/movies/flacAudio.cxx | 64 + panda/src/movies/flacAudio.h | 54 + panda/src/movies/flacAudioCursor.I | 12 + panda/src/movies/flacAudioCursor.cxx | 120 + panda/src/movies/flacAudioCursor.h | 65 + panda/src/movies/p3movies_composite1.cxx | 2 + 9 files changed, 3310 insertions(+) create mode 100644 panda/src/movies/dr_flac.h create mode 100644 panda/src/movies/flacAudio.I create mode 100644 panda/src/movies/flacAudio.cxx create mode 100644 panda/src/movies/flacAudio.h create mode 100644 panda/src/movies/flacAudioCursor.I create mode 100644 panda/src/movies/flacAudioCursor.cxx create mode 100644 panda/src/movies/flacAudioCursor.h diff --git a/panda/src/movies/config_movies.cxx b/panda/src/movies/config_movies.cxx index db5d6be074..fbd80e275d 100644 --- a/panda/src/movies/config_movies.cxx +++ b/panda/src/movies/config_movies.cxx @@ -13,6 +13,8 @@ #include "config_movies.h" #include "dconfig.h" +#include "flacAudio.h" +#include "flacAudioCursor.h" #include "inkblotVideo.h" #include "inkblotVideoCursor.h" #include "microphoneAudio.h" @@ -75,6 +77,8 @@ init_libmovies() { } initialized = true; + FlacAudio::init_type(); + FlacAudioCursor::init_type(); InkblotVideo::init_type(); InkblotVideoCursor::init_type(); MicrophoneAudio::init_type(); @@ -93,6 +97,7 @@ init_libmovies() { #endif MovieTypeRegistry *reg = MovieTypeRegistry::get_global_ptr(); + reg->register_audio_type(&FlacAudio::make, "flac"); reg->register_audio_type(&WavAudio::make, "wav wave"); #ifdef HAVE_VORBIS diff --git a/panda/src/movies/dr_flac.h b/panda/src/movies/dr_flac.h new file mode 100644 index 0000000000..7043ec66f1 --- /dev/null +++ b/panda/src/movies/dr_flac.h @@ -0,0 +1,2976 @@ +// Public domain. See "unlicense" statement at the end of this file. +//NB: modified by rdb to use 16-bit instead of 32-bit samples. + +// ABOUT +// +// This is a simple library for decoding FLAC files. +// +// +// +// USAGE +// +// This is a single-file library. To use it, do something like the following in one .c file. +// #define DR_FLAC_IMPLEMENTATION +// #include "dr_flac.h" +// +// You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, +// do something like the following: +// +// drflac* pFlac = drflac_open_file("MySong.flac"); +// if (pFlac == NULL) { +// ... Failed to open FLAC file ... +// } +// +// int16_t* pSamples = malloc(pFlac->totalSampleCount * sizeof(int16_t)); +// uint64_t numberOfSamplesActuallyRead = drflac_read_s16(pFlac, pFlac->totalSampleCount, pSamples); +// +// ... pSamples now contains the decoded samples as interleaved signed 16-bit PCM ... +// +// The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of +// channels and the bits per sample, should be directly accessible - just make sure you don't change their values. +// +// You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and +// the decoder will give you as many samples as it can, up to the amount requested. Later on when you need the next batch of +// samples, just call it again. Example: +// +// while (drflac_read_s16(pFlac, chunkSize, pChunkSamples) > 0) { +// do_something(); +// } +// +// You can seek to a specific sample with drflac_seek_to_sample(). The given sample is based on interleaving. So for example, +// if you were to seek to the sample at index 0 in a stereo stream, you'll be seeking to the first sample of the left channel. +// The sample at index 1 will be the first sample of the right channel. The sample at index 2 will be the second sample of the +// left channel, etc. +// +// +// +// OPTIONS +// #define these options before including this file. +// +// #define DR_FLAC_NO_STDIO +// Disable drflac_open_file(). +// +// #define DR_FLAC_NO_WIN32_IO +// Don't use the Win32 API internally for drflac_open_file(). Setting this will force stdio FILE APIs instead. This is +// mainly for testing, but it's left here in case somebody might find use for it. dr_flac will use the Win32 API by +// default. Ignored when DR_FLAC_NO_STDIO is #defined. +// +// #define DR_FLAC_BUFFER_SIZE +// Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls +// back to the client for more data. Larger values means more memory, but better performance. My tests show diminishing +// returns after about 4KB (which is the default). Consider reducing this if you have a very efficient implementation of +// onRead(), or increase it if it's very inefficient. +// +// +// +// QUICK NOTES +// +// - Based on my own tests, the 32-bit build is about about 1.1x-1.25x slower than the reference implementation. The 64-bit +// build is at about parity. +// - This should work fine with valid native FLAC files, but it won't work very well when the STREAMINFO block is unavailable +// and when a stream starts in the middle of a frame. This is something I plan on addressing. +// - Audio data is retrieved as signed 16-bit PCM, regardless of the bits per sample the FLAC stream is encoded as. +// - This has not been tested on big-endian architectures. +// - Rice codes in unencoded binary form (see https://xiph.org/flac/format.html#rice_partition) has not been tested. If anybody +// knows where I can find some test files for this, let me know. +// - Perverse and erroneous files have not been tested. Again, if you know where I can get some test files let me know. +// - dr_flac is not thread-safe, but it's APIs can be called from any thread so long as you do your own synchronization. +// - dr_flac does not currently do any CRC checks. +// - Ogg encapsulation is not supported, but I want to add it at some point. +// +// +// +// TODO +// - Implement a proper test suite. +// - Add support for initializing the decoder without a STREAMINFO block. Build a synthethic test to get support working at at least +// a basic level. +// - Add support for retrieving metadata blocks so applications can retrieve the album art or whatnot. +// - Add support for Ogg encapsulation. + +#ifndef dr_flac_h +#define dr_flac_h + +#include +#include +#include + +// As data is read from the client it is placed into an internal buffer for fast access. This controls the +// size of that buffer. Larger values means more speed, but also more memory. In my testing there is diminishing +// returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. +#ifndef DR_FLAC_BUFFER_SIZE +#define DR_FLAC_BUFFER_SIZE 4096 +#endif + +// Check if we can enable 64-bit optimizations. +#if defined(_WIN64) +#define DRFLAC_64BIT +#endif + +#if defined(__GNUC__) +#if defined(__x86_64__) || defined(__ppc64__) +#define DRFLAC_64BIT +#endif +#endif + +#ifdef DRFLAC_64BIT +typedef uint64_t drflac_cache_t; +#else +typedef uint32_t drflac_cache_t; +#endif + + + +// Callback for when data is read. Return value is the number of bytes actually read. +typedef size_t (* drflac_read_proc)(void* userData, void* bufferOut, size_t bytesToRead); + +// Callback for when data needs to be seeked. Offset is always relative to the current position. Return value is false on failure, true success. +typedef bool (* drflac_seek_proc)(void* userData, int offset); + + +typedef struct +{ + // The absolute position of the first byte of the data of the block. This is just past the block's header. + long long pos; + + // The size in bytes of the block's data. + unsigned int sizeInBytes; + +} drflac_block; + +typedef struct +{ + // The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. + unsigned char subframeType; + + // The number of wasted bits per sample as specified by the sub-frame header. + unsigned char wastedBitsPerSample; + + // The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. + unsigned char lpcOrder; + + // The number of bits per sample for this subframe. This is not always equal to the current frame's bit per sample because + // an extra bit is required for side channels when interchannel decorrelation is being used. + int bitsPerSample; + + // A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pHeap, or + // NULL if the heap is not being used. Note that it's a signed 32-bit integer for each value. + int32_t* pDecodedSamples; + +} drflac_subframe; + +typedef struct +{ + // If the stream uses variable block sizes, this will be set to the index of the first sample. If fixed block sizes are used, this will + // always be set to 0. + unsigned long long sampleNumber; + + // If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. + unsigned int frameNumber; + + // The sample rate of this frame. + unsigned int sampleRate; + + // The number of samples in each sub-frame within this frame. + unsigned short blockSize; + + // The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this + // will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. + unsigned char channelAssignment; + + // The number of bits per sample within this frame. + unsigned char bitsPerSample; + + // The frame's CRC. This is set, but unused at the moment. + unsigned char crc8; + + // The number of samples left to be read in this frame. This is initially set to the block size multiplied by the channel count. As samples + // are read, this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. + unsigned int samplesRemaining; + + // The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. + drflac_subframe subframes[8]; + +} drflac_frame; + +typedef struct +{ + // The function to call when more data needs to be read. This is set by drflac_open(). + drflac_read_proc onRead; + + // The function to call when the current read position needs to be moved. + drflac_seek_proc onSeek; + + // The user data to pass around to onRead and onSeek. + void* pUserData; + + + // The sample rate. Will be set to something like 44100. + unsigned int sampleRate; + + // The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the + // value specified in the STREAMINFO block. + unsigned char channels; + + // The bits per sample. Will be set to somthing like 16, 24, etc. + unsigned char bitsPerSample; + + // The maximum block size, in samples. This number represents the number of samples in each channel (not combined). + unsigned short maxBlockSize; + + // The total number of samples making up the stream. This includes every channel. For example, if the stream has 2 channels, + // with each channel having a total of 4096, this value will be set to 2*4096 = 8192. + uint64_t totalSampleCount; + + + // The location and size of the APPLICATION block. + drflac_block applicationBlock; + + // The location and size of the SEEKTABLE block. + drflac_block seektableBlock; + + // The location and size of the VORBIS_COMMENT block. + drflac_block vorbisCommentBlock; + + // The location and size of the CUESHEET block. + drflac_block cuesheetBlock; + + // The location and size of the PICTURE block. + drflac_block pictureBlock; + + + // Information about the frame the decoder is currently sitting on. + drflac_frame currentFrame; + + // The position of the first frame in the stream. This is only ever used for seeking. + unsigned long long firstFramePos; + + + + // The current byte position in the client's data stream. + uint64_t currentBytePos; + + // The index of the next valid cache line in the "L2" cache. + size_t nextL2Line; + + // The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. + size_t consumedBits; + + // Unused L2 lines. This will always be 0 until the end of the stream is hit. Used for correctly calculating the current byte + // position of the read pointer in the stream. + size_t unusedL2Lines; + + // The cached data which was most recently read from the client. When data is read from the client, it is placed within this + // variable. As data is read, it's bit-shifted such that the next valid bit is sitting on the most significant bit. + drflac_cache_t cache; + drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)]; + + + // A pointer to the decoded sample data. This is an offset of pExtraData. + int32_t* pDecodedSamples; + + // Variable length extra data. We attach this to the end of the object so we avoid unnecessary mallocs. + char pExtraData[1]; + +} drflac; + + + + +// Opens a FLAC decoder. +// +// This is the lowest level function for opening a FLAC stream. You can also use drflac_open_file() and drflac_open_memory() +// to open the stream from a file or from a block of memory respectively. +// +// At the moment the STREAMINFO block must be present for this to succeed. +// +// The onRead and onSeek callbacks are used to read and seek data provided by the client. +static drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData); + +// Closes the given FLAC decoder. +static void drflac_close(drflac* pFlac); + +// Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM. +// +// Returns the number of samples actually read. +static uint64_t drflac_read_s16(drflac* pFlac, uint64_t samplesToRead, int16_t* pBufferOut); + +// Seeks to the sample at the given index. +static bool drflac_seek_to_sample(drflac* pFlac, uint64_t sampleIndex); + + + +#ifndef DR_FLAC_NO_STDIO +// Opens a flac decoder from the file at the given path. +static drflac* drflac_open_file(const char* pFile); +#endif + +// Helper for opening a file from a pre-allocated memory buffer. +// +// This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for +// the lifetime of the decoder. +static drflac* drflac_open_memory(const void* data, size_t dataSize); + +#endif //dr_flac_h + + +/////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// +/////////////////////////////////////////////////////////////////////////////// +#ifdef DR_FLAC_IMPLEMENTATION +#include +#include +#include + +#ifdef _MSC_VER +#include // For _byteswap_ulong and _byteswap_uint64 +#endif + +#ifdef __linux__ +#define _BSD_SOURCE +#include +#endif + +#ifdef _MSC_VER +#define DRFLAC_INLINE __forceinline +#else +#define DRFLAC_INLINE inline +#endif + +#define DRFLAC_BLOCK_TYPE_STREAMINFO 0 +#define DRFLAC_BLOCK_TYPE_PADDING 1 +#define DRFLAC_BLOCK_TYPE_APPLICATION 2 +#define DRFLAC_BLOCK_TYPE_SEEKTABLE 3 +#define DRFLAC_BLOCK_TYPE_VORBIS_COMMENT 4 +#define DRFLAC_BLOCK_TYPE_CUESHEET 5 +#define DRFLAC_BLOCK_TYPE_PICTURE 6 +#define DRFLAC_BLOCK_TYPE_INVALID 127 + +#define DRFLAC_SUBFRAME_CONSTANT 0 +#define DRFLAC_SUBFRAME_VERBATIM 1 +#define DRFLAC_SUBFRAME_FIXED 8 +#define DRFLAC_SUBFRAME_LPC 32 +#define DRFLAC_SUBFRAME_RESERVED 255 + +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0 +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1 + +#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0 +#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8 +#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 +#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 + +typedef struct +{ + uint64_t firstSample; + uint64_t frameOffset; // The offset from the first byte of the header of the first frame. + uint16_t sampleCount; +} drflac_seekpoint; + +#ifndef DR_FLAC_NO_STDIO +#if defined(DR_FLAC_NO_WIN32_IO) || !defined(_WIN32) +#include + +static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData); +} + +static bool drflac__on_seek_stdio(void* pUserData, int offset) +{ + return fseek((FILE*)pUserData, offset, SEEK_CUR) == 0; +} + +drflac* drflac_open_file(const char* filename) +{ + FILE* pFile; +#ifdef _MSC_VER + if (fopen_s(&pFile, filename, "rb") != 0) { + return false; + } +#else + pFile = fopen(filename, "rb"); + if (pFile == NULL) { + return false; + } +#endif + + return drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, pFile); +} +#else +#include + +static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + assert(bytesToRead < 0xFFFFFFFF); // dr_flac will never request huge amounts of data at a time. This is a safe assertion. + + DWORD bytesRead; + ReadFile((HANDLE)pUserData, bufferOut, (DWORD)bytesToRead, &bytesRead, NULL); + + return (size_t)bytesRead; +} + +static bool drflac__on_seek_stdio(void* pUserData, int offset) +{ + return SetFilePointer((HANDLE)pUserData, offset, NULL, FILE_CURRENT) != INVALID_SET_FILE_POINTER; +} + +static drflac* drflac_open_file(const char* filename) +{ + HANDLE hFile = CreateFileA(filename, FILE_GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + return false; + } + + return drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)hFile); +} +#endif +#endif //DR_FLAC_NO_STDIO + + +typedef struct +{ + /// A pointer to the beginning of the data. We use a char as the type here for easy offsetting. + const unsigned char* data; + + /// The size of the data. + size_t dataSize; + + /// The position we're currently sitting at. + size_t currentReadPos; + +} drflac_memory; + +static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac_memory* memory = (drflac_memory*)pUserData; + assert(memory != NULL); + assert(memory->dataSize >= memory->currentReadPos); + + size_t bytesRemaining = memory->dataSize - memory->currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + memcpy(bufferOut, memory->data + memory->currentReadPos, bytesToRead); + memory->currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static bool drflac__on_seek_memory(void* pUserData, int offset) +{ + drflac_memory* memory = (drflac_memory*)pUserData; + assert(memory != NULL); + + if (offset > 0) { + if (memory->currentReadPos + offset > memory->dataSize) { + offset = (int)(memory->dataSize - memory->currentReadPos); // Trying to seek too far forward. + } + } else { + if (memory->currentReadPos < (size_t)-offset) { + offset = -(int)memory->currentReadPos; // Trying to seek too far backwards. + } + } + + // This will never underflow thanks to the clamps above. + memory->currentReadPos += offset; + + return 1; +} + +static drflac* drflac_open_memory(const void* data, size_t dataSize) +{ + drflac_memory* pUserData = (drflac_memory*)malloc(sizeof(*pUserData)); + if (pUserData == NULL) { + return false; + } + + pUserData->data = (const unsigned char*)data; + pUserData->dataSize = dataSize; + pUserData->currentReadPos = 0; + return drflac_open(drflac__on_read_memory, drflac__on_seek_memory, pUserData); +} + + +//// Endian Management //// +static DRFLAC_INLINE bool drflac__is_little_endian() +{ + int n = 1; + return (*(char*)&n) == 1; +} + +static DRFLAC_INLINE uint32_t drflac__swap_endian_uint32(uint32_t n) +{ +#ifdef _MSC_VER + return _byteswap_ulong(n); +#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC__ >= 3)) + return __builtin_bswap32(n); +#else + return ((n & 0xFF000000) >> 24) | + ((n & 0x00FF0000) >> 8) | + ((n & 0x0000FF00) << 8) | + ((n & 0x000000FF) << 24); +#endif +} + +static DRFLAC_INLINE uint64_t drflac__swap_endian_uint64(uint64_t n) +{ +#ifdef _MSC_VER + return _byteswap_uint64(n); +#elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC__ >= 3)) + return __builtin_bswap64(n); +#else + return ((n & 0xFF00000000000000ULL) >> 56) | + ((n & 0x00FF000000000000ULL) >> 40) | + ((n & 0x0000FF0000000000ULL) >> 24) | + ((n & 0x000000FF00000000ULL) >> 8) | + ((n & 0x00000000FF000000ULL) << 8) | + ((n & 0x0000000000FF0000ULL) << 24) | + ((n & 0x000000000000FF00ULL) << 40) | + ((n & 0x00000000000000FFULL) << 56); +#endif +} + + +static DRFLAC_INLINE uint32_t drflac__be2host_32(uint32_t n) +{ +#ifdef __linux__ + return be32toh(n); +#else + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint32(n); + } + + return n; +#endif +} + +static DRFLAC_INLINE uint64_t drflac__be2host_64(uint64_t n) +{ +#ifdef __linux__ + return be64toh(n); +#else + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint64(n); + } + + return n; +#endif +} + +#ifdef DRFLAC_64BIT +#define drflac__be2host__cache_line drflac__be2host_64 +#else +#define drflac__be2host__cache_line drflac__be2host_32 +#endif + + +// BIT READING ATTEMPT #2 +// +// This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting +// on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache +// is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an +// array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data +// from onRead() is read into. +#define DRFLAC_CACHE_L1_SIZE_BYTES (sizeof(pFlac->cache)) +#define DRFLAC_CACHE_L1_SIZE_BITS (sizeof(pFlac->cache)*8) +#define DRFLAC_CACHE_L1_BITS_REMAINING (DRFLAC_CACHE_L1_SIZE_BITS - (pFlac->consumedBits)) +#ifdef DRFLAC_64BIT +#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~(((uint64_t)-1LL) >> (_bitCount))) +#else +#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~(((uint32_t)-1) >> (_bitCount))) +#endif +#define DRFLAC_CACHE_L1_SELECTION_SHIFT(_bitCount) (DRFLAC_CACHE_L1_SIZE_BITS - (_bitCount)) +#define DRFLAC_CACHE_L1_SELECT(_bitCount) ((pFlac->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)) +#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(_bitCount) (DRFLAC_CACHE_L1_SELECT(_bitCount) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(_bitCount)) +#define DRFLAC_CACHE_L2_SIZE_BYTES (sizeof(pFlac->cacheL2)) +#define DRFLAC_CACHE_L2_LINE_COUNT (DRFLAC_CACHE_L2_SIZE_BYTES / sizeof(pFlac->cacheL2[0])) +#define DRFLAC_CACHE_L2_LINES_REMAINING (DRFLAC_CACHE_L2_LINE_COUNT - pFlac->nextL2Line) + +static DRFLAC_INLINE bool drflac__reload_l1_cache_from_l2(drflac* pFlac) +{ + // Fast path. Try loading straight from L2. + if (pFlac->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT) { + pFlac->cache = pFlac->cacheL2[pFlac->nextL2Line++]; + return true; + } + + // If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client. + size_t bytesRead = pFlac->onRead(pFlac->pUserData, pFlac->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES); + pFlac->currentBytePos += bytesRead; + + pFlac->nextL2Line = 0; + if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES) { + pFlac->cache = pFlac->cacheL2[pFlac->nextL2Line++]; + return true; + } + + + // If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably + // means we've just reached the end of the file. We need to move the valid data down to the end of the buffer + // and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to + // the size of the L1 so we'll need to seek backwards by any misaligned bytes. + size_t alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES; + if (alignedL1LineCount > 0) + { + size_t offset = DRFLAC_CACHE_L2_LINE_COUNT - alignedL1LineCount; + for (size_t i = alignedL1LineCount; i > 0; --i) { + pFlac->cacheL2[i-1 + offset] = pFlac->cacheL2[i-1]; + } + + pFlac->nextL2Line = offset; + pFlac->unusedL2Lines = offset; + + // At this point there may be some leftover unaligned bytes. We need to seek backwards so we don't lose + // those bytes. + size_t unalignedBytes = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES); + if (unalignedBytes > 0) { + pFlac->onSeek(pFlac->pUserData, -(int)unalignedBytes); + pFlac->currentBytePos -= unalignedBytes; + } + + pFlac->cache = pFlac->cacheL2[pFlac->nextL2Line++]; + return true; + } + else + { + // If we get into this branch it means we weren't able to load any L1-aligned data. We just need to seek + // backwards by the leftover bytes and return false. + if (bytesRead > 0) { + pFlac->onSeek(pFlac->pUserData, -(int)bytesRead); + pFlac->currentBytePos -= bytesRead; + } + + pFlac->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT; + return false; + } +} + +static bool drflac__reload_cache(drflac* pFlac) +{ + // Fast path. Try just moving the next value in the L2 cache to the L1 cache. + if (drflac__reload_l1_cache_from_l2(pFlac)) { + pFlac->cache = drflac__be2host__cache_line(pFlac->cache); + pFlac->consumedBits = 0; + return true; + } + + // Slow path. + + // If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last + // few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the + // data straight from the client into the L1 cache. This should only really happen once per stream so efficiency is not important. + size_t bytesRead = pFlac->onRead(pFlac->pUserData, &pFlac->cache, DRFLAC_CACHE_L1_SIZE_BYTES); + if (bytesRead == 0) { + return false; + } + + pFlac->currentBytePos += bytesRead; + + assert(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES); + pFlac->consumedBits = (DRFLAC_CACHE_L1_SIZE_BYTES - bytesRead) * 8; + + pFlac->cache = drflac__be2host__cache_line(pFlac->cache); + pFlac->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_SIZE_BITS - pFlac->consumedBits); // <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. + return true; +} + +static bool drflac__seek_bits(drflac* pFlac, size_t bitsToSeek) +{ + if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING) { + pFlac->consumedBits += bitsToSeek; + pFlac->cache <<= bitsToSeek; + return true; + } else { + // It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. + bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING; + pFlac->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING; + pFlac->cache = 0; + + size_t wholeBytesRemaining = bitsToSeek/8; + if (wholeBytesRemaining > 0) + { + // The next bytes to seek will be located in the L2 cache. The problem is that the L2 cache is not byte aligned, + // but rather DRFLAC_CACHE_L1_SIZE_BYTES aligned (usually 4 or 8). If, for example, the number of bytes to seek is + // 3, we'll need to handle it in a special way. + size_t wholeCacheLinesRemaining = wholeBytesRemaining / DRFLAC_CACHE_L1_SIZE_BYTES; + if (wholeCacheLinesRemaining < DRFLAC_CACHE_L2_LINES_REMAINING) + { + wholeBytesRemaining -= wholeCacheLinesRemaining * DRFLAC_CACHE_L1_SIZE_BYTES; + bitsToSeek -= wholeCacheLinesRemaining * DRFLAC_CACHE_L1_SIZE_BITS; + pFlac->nextL2Line += wholeCacheLinesRemaining; + } + else + { + wholeBytesRemaining -= DRFLAC_CACHE_L2_LINES_REMAINING * DRFLAC_CACHE_L1_SIZE_BYTES; + bitsToSeek -= DRFLAC_CACHE_L2_LINES_REMAINING * DRFLAC_CACHE_L1_SIZE_BITS; + pFlac->nextL2Line += DRFLAC_CACHE_L2_LINES_REMAINING; + + pFlac->onSeek(pFlac->pUserData, (int)wholeBytesRemaining); + pFlac->currentBytePos += wholeBytesRemaining; + bitsToSeek -= wholeBytesRemaining*8; + } + } + + + if (bitsToSeek > 0) { + if (!drflac__reload_cache(pFlac)) { + return false; + } + + return drflac__seek_bits(pFlac, bitsToSeek); + } + + return true; + } +} + +static bool drflac__read_uint32(drflac* pFlac, unsigned int bitCount, uint32_t* pResultOut) +{ + assert(pFlac != NULL); + assert(pResultOut != NULL); + assert(bitCount > 0); + assert(bitCount <= 32); + + if (pFlac->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS) { + if (!drflac__reload_cache(pFlac)) { + return false; + } + } + + if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING) { + if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS) { + *pResultOut = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bitCount); + pFlac->consumedBits += bitCount; + pFlac->cache <<= bitCount; + } else { + *pResultOut = (uint32_t)pFlac->cache; + pFlac->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS; + pFlac->cache = 0; + } + return true; + } else { + // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. + size_t bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING; + size_t bitCountLo = bitCount - bitCountHi; + uint32_t resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bitCountHi); + + if (!drflac__reload_cache(pFlac)) { + return false; + } + + *pResultOut = (resultHi << bitCountLo) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bitCountLo); + pFlac->consumedBits += bitCountLo; + pFlac->cache <<= bitCountLo; + return true; + } +} + +static bool drflac__read_int32(drflac* pFlac, unsigned int bitCount, int32_t* pResult) +{ + assert(pFlac != NULL); + assert(pResult != NULL); + assert(bitCount > 0); + assert(bitCount <= 32); + + uint32_t result; + if (!drflac__read_uint32(pFlac, bitCount, &result)) { + return false; + } + + if ((result & (1 << (bitCount - 1)))) { // TODO: See if we can get rid of this branch. + result |= (-1 << bitCount); + } + + *pResult = (int32_t)result; + return true; +} + +static bool drflac__read_uint64(drflac* pFlac, unsigned int bitCount, uint64_t* pResultOut) +{ + assert(bitCount <= 64); + assert(bitCount > 32); + + uint32_t resultHi; + if (!drflac__read_uint32(pFlac, bitCount - 32, &resultHi)) { + return false; + } + + uint32_t resultLo; + if (!drflac__read_uint32(pFlac, 32, &resultLo)) { + return false; + } + + *pResultOut = (((uint64_t)resultHi) << 32) | ((uint64_t)resultLo); + return true; +} + +static bool drflac__read_int64(drflac* pFlac, unsigned int bitCount, int64_t* pResultOut) +{ + assert(bitCount <= 64); + + uint64_t result; + if (!drflac__read_uint64(pFlac, bitCount, &result)) { + return false; + } + + if ((result & (1ULL << (bitCount - 1)))) { // TODO: See if we can get rid of this branch. + result |= (-1LL << bitCount); + } + + *pResultOut = (int64_t)result; + return true; +} + +static bool drflac__read_uint16(drflac* pFlac, unsigned int bitCount, uint16_t* pResult) +{ + assert(pFlac != NULL); + assert(pResult != NULL); + assert(bitCount > 0); + assert(bitCount <= 16); + + uint32_t result; + if (!drflac__read_uint32(pFlac, bitCount, &result)) { + return false; + } + + *pResult = (uint16_t)result; + return true; +} + +static bool drflac__read_int16(drflac* pFlac, unsigned int bitCount, int16_t* pResult) +{ + assert(pFlac != NULL); + assert(pResult != NULL); + assert(bitCount > 0); + assert(bitCount <= 16); + + int32_t result; + if (!drflac__read_int32(pFlac, bitCount, &result)) { + return false; + } + + *pResult = (int16_t)result; + return true; +} + +static bool drflac__read_uint8(drflac* pFlac, unsigned int bitCount, uint8_t* pResult) +{ + assert(pFlac != NULL); + assert(pResult != NULL); + assert(bitCount > 0); + assert(bitCount <= 8); + + uint32_t result; + if (!drflac__read_uint32(pFlac, bitCount, &result)) { + return false; + } + + *pResult = (uint8_t)result; + return true; +} + +static bool drflac__read_int8(drflac* pFlac, unsigned int bitCount, int8_t* pResult) +{ + assert(pFlac != NULL); + assert(pResult != NULL); + assert(bitCount > 0); + assert(bitCount <= 8); + + int32_t result; + if (!drflac__read_int32(pFlac, bitCount, &result)) { + return false; + } + + *pResult = (int8_t)result; + return true; +} + + +static inline bool drflac__seek_past_next_set_bit(drflac* pFlac, unsigned int* pOffsetOut) +{ + unsigned int zeroCounter = 0; + while (pFlac->cache == 0) { + zeroCounter += (unsigned int)DRFLAC_CACHE_L1_BITS_REMAINING; + if (!drflac__reload_cache(pFlac)) { + return false; + } + } + + // At this point the cache should not be zero, in which case we know the first set bit should be somewhere in here. There is + // no need for us to perform any cache reloading logic here which should make things much faster. + assert(pFlac->cache != 0); + + unsigned int bitOffsetTable[] = { + 0, + 4, + 3, 3, + 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + + unsigned int setBitOffsetPlus1 = bitOffsetTable[DRFLAC_CACHE_L1_SELECT_AND_SHIFT(4)]; + if (setBitOffsetPlus1 == 0) { + if (pFlac->cache == 1) { + setBitOffsetPlus1 = DRFLAC_CACHE_L1_SIZE_BITS; + } else { + setBitOffsetPlus1 = 5; + for (;;) + { + if ((pFlac->cache & DRFLAC_CACHE_L1_SELECT(setBitOffsetPlus1))) { + break; + } + + setBitOffsetPlus1 += 1; + } + } + } + + pFlac->consumedBits += setBitOffsetPlus1; + pFlac->cache <<= setBitOffsetPlus1; + + *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1; + return true; +} + + + +static bool drflac__seek_to_byte(drflac* pFlac, long long offsetFromStart) +{ + assert(pFlac != NULL); + + long long bytesToMove = offsetFromStart - pFlac->currentBytePos; + if (bytesToMove == 0) { + return 1; + } + + if (bytesToMove > 0x7FFFFFFF) { + while (bytesToMove > 0x7FFFFFFF) { + if (!pFlac->onSeek(pFlac->pUserData, 0x7FFFFFFF)) { + return 0; + } + + pFlac->currentBytePos += 0x7FFFFFFF; + bytesToMove -= 0x7FFFFFFF; + } + } else { + while (bytesToMove < (int)0x80000000) { + if (!pFlac->onSeek(pFlac->pUserData, (int)0x80000000)) { + return 0; + } + + pFlac->currentBytePos += (int)0x80000000; + bytesToMove -= (int)0x80000000; + } + } + + assert(bytesToMove <= 0x7FFFFFFF && bytesToMove >= (int)0x80000000); + + bool result = pFlac->onSeek(pFlac->pUserData, (int)bytesToMove); // <-- Safe cast as per the assert above. + pFlac->currentBytePos += (int)bytesToMove; + + pFlac->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS; + pFlac->cache = 0; + pFlac->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT; // <-- This clears the L2 cache. + + return result; +} + +static long long drflac__tell(drflac* pFlac) +{ + assert(pFlac != NULL); + + size_t unreadBytesFromL1 = (DRFLAC_CACHE_L1_SIZE_BYTES - (pFlac->consumedBits/8)); + size_t unreadBytesFromL2 = (DRFLAC_CACHE_L2_SIZE_BYTES - ((pFlac->nextL2Line - pFlac->unusedL2Lines)*DRFLAC_CACHE_L1_SIZE_BYTES)); + + return pFlac->currentBytePos - unreadBytesFromL1 - unreadBytesFromL2; +} + + + +static bool drflac__read_utf8_coded_number(drflac* pFlac, unsigned long long* pNumberOut) +{ + assert(pFlac != NULL); + assert(pNumberOut != NULL); + + // We should never need to read UTF-8 data while not being aligned to a byte boundary. Therefore we can grab the data + // directly from the input stream rather than using drflac__read_uint8(). + assert((pFlac->consumedBits & 7) == 0); + + unsigned char utf8[7] = {0}; + if (!drflac__read_uint8(pFlac, 8, utf8)) { + *pNumberOut = 0; + return false; + } + + if ((utf8[0] & 0x80) == 0) { + *pNumberOut = utf8[0]; + return true; + } + + int byteCount = 1; + if ((utf8[0] & 0xE0) == 0xC0) { + byteCount = 2; + } else if ((utf8[0] & 0xF0) == 0xE0) { + byteCount = 3; + } else if ((utf8[0] & 0xF8) == 0xF0) { + byteCount = 4; + } else if ((utf8[0] & 0xFC) == 0xF8) { + byteCount = 5; + } else if ((utf8[0] & 0xFE) == 0xFC) { + byteCount = 6; + } else if ((utf8[0] & 0xFF) == 0xFE) { + byteCount = 7; + } else { + *pNumberOut = 0; + return false; // Bad UTF-8 encoding. + } + + // Read extra bytes. + assert(byteCount > 1); + + unsigned long long result = ((long long)(utf8[0] & (0xFF >> (byteCount + 1)))); + for (int i = 1; i < byteCount; ++i) { + if (!drflac__read_uint8(pFlac, 8, utf8 + i)) { + *pNumberOut = 0; + return false; + } + + result = (result << 6) | (utf8[i] & 0x3F); + } + + *pNumberOut = result; + return true; +} + + + +static DRFLAC_INLINE bool drflac__read_and_seek_rice(drflac* pFlac, unsigned char m) +{ + unsigned int unused; + if (!drflac__seek_past_next_set_bit(pFlac, &unused)) { + return false; + } + + if (m > 0) { + if (!drflac__seek_bits(pFlac, m)) { + return false; + } + } + + return true; +} + + +// The next two functions are responsible for calculating the prediction. +// +// When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's +// safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. +// +// +// Optimization Experiment #1 +// +// The first optimization experiment I'm trying here is a loop unroll for the most common LPC orders. I've done a little test +// and the results are as follows, in order of most common: +// 1) order = 8 : 93.1M +// 2) order = 7 : 36.6M +// 3) order = 3 : 33.2M +// 4) order = 6 : 20.9M +// 5) order = 5 : 18.1M +// 6) order = 4 : 15.8M +// 7) order = 12 : 10.8M +// 8) order = 2 : 9.8M +// 9) order = 1 : 1.6M +// 10) order = 10 : 1.0M +// 11) order = 9 : 0.8M +// 12) order = 11 : 0.8M +// +// We'll experiment with unrolling the top 8 most common ones. We'll ignore the least common ones since there seems to be a +// large drop off there. +// +// Result: There's a tiny improvement in some cases, but it could just be within margin of error so unsure if it's worthwhile +// just yet. +static DRFLAC_INLINE int32_t drflac__calculate_prediction_32(unsigned int order, int shift, const short* coefficients, int32_t* pDecodedSamples) +{ + assert(order <= 32); + + // 32-bit version. + + // This method is slower on both 32- and 64-bit builds with VC++. Leaving this here for now just in case we need it later + // for whatever reason. +#if 0 + int prediction; + if (order == 8) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + prediction += coefficients[7] * pDecodedSamples[-8]; + } + else if (order == 7) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + } + else if (order == 3) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + } + else if (order == 6) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + } + else if (order == 5) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + } + else if (order == 4) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + } + else if (order == 12) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + prediction += coefficients[7] * pDecodedSamples[-8]; + prediction += coefficients[8] * pDecodedSamples[-9]; + prediction += coefficients[9] * pDecodedSamples[-10]; + prediction += coefficients[10] * pDecodedSamples[-11]; + prediction += coefficients[11] * pDecodedSamples[-12]; + } + else if (order == 2) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + } + else if (order == 1) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + } + else if (order == 10) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + prediction += coefficients[7] * pDecodedSamples[-8]; + prediction += coefficients[8] * pDecodedSamples[-9]; + prediction += coefficients[9] * pDecodedSamples[-10]; + } + else if (order == 9) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + prediction += coefficients[7] * pDecodedSamples[-8]; + prediction += coefficients[8] * pDecodedSamples[-9]; + } + else if (order == 11) + { + prediction = coefficients[0] * pDecodedSamples[-1]; + prediction += coefficients[1] * pDecodedSamples[-2]; + prediction += coefficients[2] * pDecodedSamples[-3]; + prediction += coefficients[3] * pDecodedSamples[-4]; + prediction += coefficients[4] * pDecodedSamples[-5]; + prediction += coefficients[5] * pDecodedSamples[-6]; + prediction += coefficients[6] * pDecodedSamples[-7]; + prediction += coefficients[7] * pDecodedSamples[-8]; + prediction += coefficients[8] * pDecodedSamples[-9]; + prediction += coefficients[9] * pDecodedSamples[-10]; + prediction += coefficients[10] * pDecodedSamples[-11]; + } + else + { + prediction = 0; + for (int j = 0; j < (int)order; ++j) { + prediction += coefficients[j] * pDecodedSamples[-j-1]; + } + } +#endif + + // Experiment #2. See if we can use a switch and let the compiler optimize it to a jump table. + // Result: VC++ definitely optimizes this to a single jmp as expected. I expect other compilers should do the same, but I've + // not verified yet. +#if 1 + int prediction = 0; + + switch (order) + { + case 32: prediction += coefficients[31] * pDecodedSamples[-32]; + case 31: prediction += coefficients[30] * pDecodedSamples[-31]; + case 30: prediction += coefficients[29] * pDecodedSamples[-30]; + case 29: prediction += coefficients[28] * pDecodedSamples[-29]; + case 28: prediction += coefficients[27] * pDecodedSamples[-28]; + case 27: prediction += coefficients[26] * pDecodedSamples[-27]; + case 26: prediction += coefficients[25] * pDecodedSamples[-26]; + case 25: prediction += coefficients[24] * pDecodedSamples[-25]; + case 24: prediction += coefficients[23] * pDecodedSamples[-24]; + case 23: prediction += coefficients[22] * pDecodedSamples[-23]; + case 22: prediction += coefficients[21] * pDecodedSamples[-22]; + case 21: prediction += coefficients[20] * pDecodedSamples[-21]; + case 20: prediction += coefficients[19] * pDecodedSamples[-20]; + case 19: prediction += coefficients[18] * pDecodedSamples[-19]; + case 18: prediction += coefficients[17] * pDecodedSamples[-18]; + case 17: prediction += coefficients[16] * pDecodedSamples[-17]; + case 16: prediction += coefficients[15] * pDecodedSamples[-16]; + case 15: prediction += coefficients[14] * pDecodedSamples[-15]; + case 14: prediction += coefficients[13] * pDecodedSamples[-14]; + case 13: prediction += coefficients[12] * pDecodedSamples[-13]; + case 12: prediction += coefficients[11] * pDecodedSamples[-12]; + case 11: prediction += coefficients[10] * pDecodedSamples[-11]; + case 10: prediction += coefficients[ 9] * pDecodedSamples[-10]; + case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9]; + case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8]; + case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7]; + case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6]; + case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5]; + case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4]; + case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3]; + case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2]; + case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1]; + } +#endif + + return (int32_t)(prediction >> shift); +} + +static DRFLAC_INLINE int32_t drflac__calculate_prediction(unsigned int order, int shift, const short* coefficients, int32_t* pDecodedSamples) +{ + assert(order <= 32); + + // 64-bit version. + + // This method is faster on the 32-bit build when compiling with VC++. See note below. +#ifndef DRFLAC_64BIT + long long prediction; + if (order == 8) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + prediction += (long long)coefficients[7] * (long long)pDecodedSamples[-8]; + } + else if (order == 7) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + } + else if (order == 3) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + } + else if (order == 6) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + } + else if (order == 5) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + } + else if (order == 4) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + } + else if (order == 12) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + prediction += (long long)coefficients[7] * (long long)pDecodedSamples[-8]; + prediction += (long long)coefficients[8] * (long long)pDecodedSamples[-9]; + prediction += (long long)coefficients[9] * (long long)pDecodedSamples[-10]; + prediction += (long long)coefficients[10] * (long long)pDecodedSamples[-11]; + prediction += (long long)coefficients[11] * (long long)pDecodedSamples[-12]; + } + else if (order == 2) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + } + else if (order == 1) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + } + else if (order == 10) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + prediction += (long long)coefficients[7] * (long long)pDecodedSamples[-8]; + prediction += (long long)coefficients[8] * (long long)pDecodedSamples[-9]; + prediction += (long long)coefficients[9] * (long long)pDecodedSamples[-10]; + } + else if (order == 9) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + prediction += (long long)coefficients[7] * (long long)pDecodedSamples[-8]; + prediction += (long long)coefficients[8] * (long long)pDecodedSamples[-9]; + } + else if (order == 11) + { + prediction = (long long)coefficients[0] * (long long)pDecodedSamples[-1]; + prediction += (long long)coefficients[1] * (long long)pDecodedSamples[-2]; + prediction += (long long)coefficients[2] * (long long)pDecodedSamples[-3]; + prediction += (long long)coefficients[3] * (long long)pDecodedSamples[-4]; + prediction += (long long)coefficients[4] * (long long)pDecodedSamples[-5]; + prediction += (long long)coefficients[5] * (long long)pDecodedSamples[-6]; + prediction += (long long)coefficients[6] * (long long)pDecodedSamples[-7]; + prediction += (long long)coefficients[7] * (long long)pDecodedSamples[-8]; + prediction += (long long)coefficients[8] * (long long)pDecodedSamples[-9]; + prediction += (long long)coefficients[9] * (long long)pDecodedSamples[-10]; + prediction += (long long)coefficients[10] * (long long)pDecodedSamples[-11]; + } + else + { + prediction = 0; + for (int j = 0; j < (int)order; ++j) { + prediction += (long long)coefficients[j] * (long long)pDecodedSamples[-j-1]; + } + } +#endif + + // Experiment #2. See if we can use a switch and let the compiler optimize it to a single jmp instruction. + // Result: VC++ optimizes this to a single jmp on the 64-bit build, but for some reason the 32-bit version compiles to less efficient + // code. Thus, we use this version on the 64-bit build and the uglier version above for the 32-bit build. If anyone has an idea on how + // I can get VC++ to generate an efficient jump table for the 32-bit build let me know. +#ifdef DRFLAC_64BIT + long long prediction = 0; + + switch (order) + { + case 32: prediction += (long long)coefficients[31] * (long long)pDecodedSamples[-32]; + case 31: prediction += (long long)coefficients[30] * (long long)pDecodedSamples[-31]; + case 30: prediction += (long long)coefficients[29] * (long long)pDecodedSamples[-30]; + case 29: prediction += (long long)coefficients[28] * (long long)pDecodedSamples[-29]; + case 28: prediction += (long long)coefficients[27] * (long long)pDecodedSamples[-28]; + case 27: prediction += (long long)coefficients[26] * (long long)pDecodedSamples[-27]; + case 26: prediction += (long long)coefficients[25] * (long long)pDecodedSamples[-26]; + case 25: prediction += (long long)coefficients[24] * (long long)pDecodedSamples[-25]; + case 24: prediction += (long long)coefficients[23] * (long long)pDecodedSamples[-24]; + case 23: prediction += (long long)coefficients[22] * (long long)pDecodedSamples[-23]; + case 22: prediction += (long long)coefficients[21] * (long long)pDecodedSamples[-22]; + case 21: prediction += (long long)coefficients[20] * (long long)pDecodedSamples[-21]; + case 20: prediction += (long long)coefficients[19] * (long long)pDecodedSamples[-20]; + case 19: prediction += (long long)coefficients[18] * (long long)pDecodedSamples[-19]; + case 18: prediction += (long long)coefficients[17] * (long long)pDecodedSamples[-18]; + case 17: prediction += (long long)coefficients[16] * (long long)pDecodedSamples[-17]; + case 16: prediction += (long long)coefficients[15] * (long long)pDecodedSamples[-16]; + case 15: prediction += (long long)coefficients[14] * (long long)pDecodedSamples[-15]; + case 14: prediction += (long long)coefficients[13] * (long long)pDecodedSamples[-14]; + case 13: prediction += (long long)coefficients[12] * (long long)pDecodedSamples[-13]; + case 12: prediction += (long long)coefficients[11] * (long long)pDecodedSamples[-12]; + case 11: prediction += (long long)coefficients[10] * (long long)pDecodedSamples[-11]; + case 10: prediction += (long long)coefficients[ 9] * (long long)pDecodedSamples[-10]; + case 9: prediction += (long long)coefficients[ 8] * (long long)pDecodedSamples[- 9]; + case 8: prediction += (long long)coefficients[ 7] * (long long)pDecodedSamples[- 8]; + case 7: prediction += (long long)coefficients[ 6] * (long long)pDecodedSamples[- 7]; + case 6: prediction += (long long)coefficients[ 5] * (long long)pDecodedSamples[- 6]; + case 5: prediction += (long long)coefficients[ 4] * (long long)pDecodedSamples[- 5]; + case 4: prediction += (long long)coefficients[ 3] * (long long)pDecodedSamples[- 4]; + case 3: prediction += (long long)coefficients[ 2] * (long long)pDecodedSamples[- 3]; + case 2: prediction += (long long)coefficients[ 1] * (long long)pDecodedSamples[- 2]; + case 1: prediction += (long long)coefficients[ 0] * (long long)pDecodedSamples[- 1]; + } +#endif + + return (int32_t)(prediction >> shift); +} + + +// Reads and decodes a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. +// +// This is the most frequently called function in the library. It does both the Rice decoding and the prediction in a single loop +// iteration. +static bool drflac__decode_samples_with_residual__rice(drflac* pFlac, unsigned int count, unsigned char riceParam, unsigned int order, int shift, const short* coefficients, int* pSamplesOut) +{ + assert(pFlac != NULL); + assert(count > 0); + assert(pSamplesOut != NULL); + + static unsigned int bitOffsetTable[] = { + 0, + 4, + 3, 3, + 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + + drflac_cache_t riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam); + drflac_cache_t resultHiShift = DRFLAC_CACHE_L1_SIZE_BITS - riceParam; + + for (int i = 0; i < (int)count; ++i) + { + unsigned int zeroCounter = 0; + while (pFlac->cache == 0) { + zeroCounter += (unsigned int)DRFLAC_CACHE_L1_BITS_REMAINING; + if (!drflac__reload_cache(pFlac)) { + return false; + } + } + + // At this point the cache should not be zero, in which case we know the first set bit should be somewhere in here. There is + // no need for us to perform any cache reloading logic here which should make things much faster. + assert(pFlac->cache != 0); + unsigned int decodedRice; + + unsigned int setBitOffsetPlus1 = bitOffsetTable[DRFLAC_CACHE_L1_SELECT_AND_SHIFT(4)]; + if (setBitOffsetPlus1 > 0) { + decodedRice = (zeroCounter + (setBitOffsetPlus1-1)) << riceParam; + } else { + if (pFlac->cache == 1) { + setBitOffsetPlus1 = DRFLAC_CACHE_L1_SIZE_BITS; + decodedRice = (zeroCounter + (DRFLAC_CACHE_L1_SIZE_BITS-1)) << riceParam; + } else { + setBitOffsetPlus1 = 5; + for (;;) + { + if ((pFlac->cache & DRFLAC_CACHE_L1_SELECT(setBitOffsetPlus1))) { + decodedRice = (zeroCounter + (setBitOffsetPlus1-1)) << riceParam; + break; + } + + setBitOffsetPlus1 += 1; + } + } + } + + + unsigned int bitsLo = 0; + unsigned int riceLength = setBitOffsetPlus1 + riceParam; + if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING) + { + bitsLo = (unsigned int)((pFlac->cache & (riceParamMask >> setBitOffsetPlus1)) >> (DRFLAC_CACHE_L1_SIZE_BITS - riceLength)); + + pFlac->consumedBits += riceLength; + pFlac->cache <<= riceLength; + } + else + { + pFlac->consumedBits += riceLength; + pFlac->cache <<= setBitOffsetPlus1; + + // It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. + size_t bitCountLo = pFlac->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS; + drflac_cache_t resultHi = pFlac->cache & riceParamMask; // <-- This mask is OK because all bits after the first bits are always zero. + + + if (pFlac->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT) { + pFlac->cache = drflac__be2host__cache_line(pFlac->cacheL2[pFlac->nextL2Line++]); + } else { + // Slow path. We need to fetch more data from the client. + if (!drflac__reload_cache(pFlac)) { + return false; + } + } + + bitsLo = (unsigned int)((resultHi >> resultHiShift) | DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bitCountLo)); + pFlac->consumedBits = bitCountLo; + pFlac->cache <<= bitCountLo; + } + + + decodedRice |= bitsLo; + if ((decodedRice & 0x01)) { + decodedRice = ~(decodedRice >> 1); + } else { + decodedRice = (decodedRice >> 1); + } + + + // In order to properly calculate the prediction when the bits per sample is >16 we need to do it using 64-bit arithmetic. We can assume this + // is probably going to be slower on 32-bit systems so we'll do a more optimized 32-bit version when the bits per sample is low enough. + if (pFlac->currentFrame.bitsPerSample > 16) { + pSamplesOut[i] = ((int)decodedRice + drflac__calculate_prediction(order, shift, coefficients, pSamplesOut + i)); + } else { + pSamplesOut[i] = ((int)decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i)); + } + } + + return true; +} + + +// Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. +static bool drflac__read_and_seek_residual__rice(drflac* pFlac, unsigned int count, unsigned char riceParam) +{ + assert(pFlac != NULL); + assert(count > 0); + + for (unsigned int i = 0; i < count; ++i) { + if (!drflac__read_and_seek_rice(pFlac, riceParam)) { + return false; + } + } + + return true; +} + +static bool drflac__decode_samples_with_residual__unencoded(drflac* pFlac, unsigned int count, unsigned char unencodedBitsPerSample, unsigned int order, int shift, const short* coefficients, int* pSamplesOut) +{ + assert(pFlac != NULL); + assert(count > 0); + assert(unencodedBitsPerSample > 0 && unencodedBitsPerSample <= 32); + assert(pSamplesOut != NULL); + + for (unsigned int i = 0; i < count; ++i) + { + if (!drflac__read_int32(pFlac, unencodedBitsPerSample, pSamplesOut + i)) { + return false; + } + + pSamplesOut[i] += drflac__calculate_prediction(order, shift, coefficients, pSamplesOut + i); + } + + return true; +} + + +// Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called +// when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be ignored. The +// and parameters are used to determine how many residual values need to be decoded. +static bool drflac__decode_samples_with_residual(drflac* pFlac, unsigned int blockSize, unsigned int order, int shift, const short* coefficients, int* pDecodedSamples) +{ + assert(pFlac != NULL); + assert(blockSize != 0); + assert(pDecodedSamples != NULL); // <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? + + unsigned char residualMethod; + if (!drflac__read_uint8(pFlac, 2, &residualMethod)) { + return false; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return false; // Unknown or unsupported residual coding method. + } + + // Ignore the first values. + pDecodedSamples += order; + + + unsigned char partitionOrder; + if (!drflac__read_uint8(pFlac, 4, &partitionOrder)) { + return false; + } + + + unsigned int samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + unsigned int partitionsRemaining = (1 << partitionOrder); + for (;;) + { + unsigned char riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(pFlac, 4, &riceParam)) { + return false; + } + if (riceParam == 16) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(pFlac, 5, &riceParam)) { + return false; + } + if (riceParam == 32) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__decode_samples_with_residual__rice(pFlac, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { + return false; + } + } else { + unsigned char unencodedBitsPerSample = 0; + if (!drflac__read_uint8(pFlac, 5, &unencodedBitsPerSample)) { + return false; + } + + if (!drflac__decode_samples_with_residual__unencoded(pFlac, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { + return false; + } + } + + pDecodedSamples += samplesInPartition; + + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + samplesInPartition = blockSize / (1 << partitionOrder); + } + + return true; +} + +// Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called +// when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be set to 0. The +// and parameters are used to determine how many residual values need to be decoded. +static bool drflac__read_and_seek_residual(drflac* pFlac, unsigned int blockSize, unsigned int order) +{ + assert(pFlac != NULL); + assert(blockSize != 0); + + unsigned char residualMethod; + if (!drflac__read_uint8(pFlac, 2, &residualMethod)) { + return false; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return false; // Unknown or unsupported residual coding method. + } + + unsigned char partitionOrder; + if (!drflac__read_uint8(pFlac, 4, &partitionOrder)) { + return false; + } + + unsigned int samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + unsigned int partitionsRemaining = (1 << partitionOrder); + for (;;) + { + unsigned char riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(pFlac, 4, &riceParam)) { + return false; + } + if (riceParam == 16) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(pFlac, 5, &riceParam)) { + return false; + } + if (riceParam == 32) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__read_and_seek_residual__rice(pFlac, samplesInPartition, riceParam)) { + return false; + } + } else { + unsigned char unencodedBitsPerSample = 0; + if (!drflac__read_uint8(pFlac, 5, &unencodedBitsPerSample)) { + return false; + } + + if (!drflac__seek_bits(pFlac, unencodedBitsPerSample * samplesInPartition)) { + return false; + } + } + + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + samplesInPartition = blockSize / (1 << partitionOrder); + } + + return true; +} + + +static bool drflac__decode_samples__constant(drflac* pFlac, drflac_subframe* pSubframe) +{ + // Only a single sample needs to be decoded here. + int sample; + if (!drflac__read_int32(pFlac, pSubframe->bitsPerSample, &sample)) { + return false; + } + + // We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) + // we'll want to look at a more efficient way. + for (unsigned int i = 0; i < pFlac->currentFrame.blockSize; ++i) { + pSubframe->pDecodedSamples[i] = sample; + } + + return true; +} + +static bool drflac__decode_samples__verbatim(drflac* pFlac, drflac_subframe* pSubframe) +{ + for (unsigned int i = 0; i < pFlac->currentFrame.blockSize; ++i) { + int sample; + if (!drflac__read_int32(pFlac, pSubframe->bitsPerSample, &sample)) { + return false; + } + + pSubframe->pDecodedSamples[i] = sample; + } + + return true; +} + +static bool drflac__decode_samples__fixed(drflac* pFlac, drflac_subframe* pSubframe) +{ + short lpcCoefficientsTable[5][4] = { + {0, 0, 0, 0}, + {1, 0, 0, 0}, + {2, -1, 0, 0}, + {3, -3, 1, 0}, + {4, -6, 4, -1} + }; + + // Warm up samples and coefficients. + for (unsigned int i = 0; i < pSubframe->lpcOrder; ++i) { + int sample; + if (!drflac__read_int32(pFlac, pSubframe->bitsPerSample, &sample)) { + return false; + } + + pSubframe->pDecodedSamples[i] = sample; + } + + + if (!drflac__decode_samples_with_residual(pFlac, pFlac->currentFrame.blockSize, pSubframe->lpcOrder, 0, lpcCoefficientsTable[pSubframe->lpcOrder], pSubframe->pDecodedSamples)) { + return false; + } + + return true; +} + +static bool drflac__decode_samples__lpc(drflac* pFlac, drflac_subframe* pSubframe) +{ + // Warm up samples. + for (unsigned int i = 0; i < pSubframe->lpcOrder; ++i) { + int sample; + if (!drflac__read_int32(pFlac, pSubframe->bitsPerSample, &sample)) { + return false; + } + + pSubframe->pDecodedSamples[i] = sample; + } + + unsigned char lpcPrecision; + if (!drflac__read_uint8(pFlac, 4, &lpcPrecision)) { + return false; + } + if (lpcPrecision == 15) { + return false; // Invalid. + } + lpcPrecision += 1; + + + signed char lpcShift; + if (!drflac__read_int8(pFlac, 5, &lpcShift)) { + return false; + } + + + short coefficients[32]; + for (unsigned int i = 0; i < pSubframe->lpcOrder; ++i) { + if (!drflac__read_int16(pFlac, lpcPrecision, coefficients + i)) { + return false; + } + } + + if (!drflac__decode_samples_with_residual(pFlac, pFlac->currentFrame.blockSize, pSubframe->lpcOrder, lpcShift, coefficients, pSubframe->pDecodedSamples)) { + return false; + } + + return true; +} + + +static bool drflac__read_next_frame_header(drflac* pFlac) +{ + assert(pFlac != NULL); + assert(pFlac->onRead != NULL); + + // At the moment the sync code is as a form of basic validation. The CRC is stored, but is unused at the moment. This + // should probably be handled better in the future. + + const int sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; + const uint8_t bitsPerSampleTable[8] = {0, 8, 12, (uint8_t)-1, 16, 20, 24, (uint8_t)-1}; // -1 = reserved. + + unsigned short syncCode = 0; + if (!drflac__read_uint16(pFlac, 14, &syncCode)) { + return false; + } + + if (syncCode != 0x3FFE) { + // TODO: Try and recover by attempting to seek to and read the next frame? + return false; + } + + unsigned char reserved; + if (!drflac__read_uint8(pFlac, 1, &reserved)) { + return false; + } + + unsigned char blockingStrategy = 0; + if (!drflac__read_uint8(pFlac, 1, &blockingStrategy)) { + return false; + } + + + + unsigned char blockSize = 0; + if (!drflac__read_uint8(pFlac, 4, &blockSize)) { + return false; + } + + unsigned char sampleRate = 0; + if (!drflac__read_uint8(pFlac, 4, &sampleRate)) { + return false; + } + + unsigned char channelAssignment = 0; + if (!drflac__read_uint8(pFlac, 4, &channelAssignment)) { + return false; + } + + unsigned char bitsPerSample = 0; + if (!drflac__read_uint8(pFlac, 3, &bitsPerSample)) { + return false; + } + + if (!drflac__read_uint8(pFlac, 1, &reserved)) { + return false; + } + + + unsigned char isVariableBlockSize = blockingStrategy == 1; + if (isVariableBlockSize) { + pFlac->currentFrame.frameNumber = 0; + if (!drflac__read_utf8_coded_number(pFlac, &pFlac->currentFrame.sampleNumber)) { + return false; + } + } else { + unsigned long long frameNumber = 0; + if (!drflac__read_utf8_coded_number(pFlac, &frameNumber)) { + return false; + } + pFlac->currentFrame.frameNumber = (unsigned int)frameNumber; // <-- Safe cast. + pFlac->currentFrame.sampleNumber = 0; + } + + + if (blockSize == 1) { + pFlac->currentFrame.blockSize = 192; + } else if (blockSize >= 2 && blockSize <= 5) { + pFlac->currentFrame.blockSize = 576 * (1 << (blockSize - 2)); + } else if (blockSize == 6) { + if (!drflac__read_uint16(pFlac, 8, &pFlac->currentFrame.blockSize)) { + return false; + } + pFlac->currentFrame.blockSize += 1; + } else if (blockSize == 7) { + if (!drflac__read_uint16(pFlac, 16, &pFlac->currentFrame.blockSize)) { + return false; + } + pFlac->currentFrame.blockSize += 1; + } else { + pFlac->currentFrame.blockSize = 256 * (1 << (blockSize - 8)); + } + + + if (sampleRate <= 11) { + pFlac->currentFrame.sampleRate = sampleRateTable[sampleRate]; + } else if (sampleRate == 12) { + if (!drflac__read_uint32(pFlac, 8, &pFlac->currentFrame.sampleRate)) { + return false; + } + pFlac->currentFrame.sampleRate *= 1000; + } else if (sampleRate == 13) { + if (!drflac__read_uint32(pFlac, 16, &pFlac->currentFrame.sampleRate)) { + return false; + } + } else if (sampleRate == 14) { + if (!drflac__read_uint32(pFlac, 16, &pFlac->currentFrame.sampleRate)) { + return false; + } + pFlac->currentFrame.sampleRate *= 10; + } else { + return false; // Invalid. + } + + + pFlac->currentFrame.channelAssignment = channelAssignment; + + pFlac->currentFrame.bitsPerSample = bitsPerSampleTable[bitsPerSample]; + if (pFlac->currentFrame.bitsPerSample == 0) { + pFlac->currentFrame.bitsPerSample = pFlac->bitsPerSample; + } + + if (drflac__read_uint8(pFlac, 8, &pFlac->currentFrame.crc8) != 1) { + return false; + } + + memset(pFlac->currentFrame.subframes, 0, sizeof(pFlac->currentFrame.subframes)); + + return true; +} + +static bool drflac__read_subframe_header(drflac* pFlac, drflac_subframe* pSubframe) +{ + unsigned char header; + if (!drflac__read_uint8(pFlac, 8, &header)) { + return false; + } + + // First bit should always be 0. + if ((header & 0x80) != 0) { + return false; + } + + int type = (header & 0x7E) >> 1; + if (type == 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; + } else if (type == 1) { + pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; + } else { + if ((type & 0x20) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; + pSubframe->lpcOrder = (type & 0x1F) + 1; + } else if ((type & 0x08) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; + pSubframe->lpcOrder = (type & 0x07); + if (pSubframe->lpcOrder > 4) { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + pSubframe->lpcOrder = 0; + } + } else { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + } + } + + if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { + return false; + } + + // Wasted bits per sample. + pSubframe->wastedBitsPerSample = 0; + if ((header & 0x01) == 1) { + unsigned int wastedBitsPerSample; + if (!drflac__seek_past_next_set_bit(pFlac, &wastedBitsPerSample)) { + return false; + } + pSubframe->wastedBitsPerSample = (unsigned char)wastedBitsPerSample + 1; + } + + return true; +} + +static bool drflac__decode_subframe(drflac* pFlac, int subframeIndex) +{ + assert(pFlac != NULL); + + drflac_subframe* pSubframe = pFlac->currentFrame.subframes + subframeIndex; + if (!drflac__read_subframe_header(pFlac, pSubframe)) { + return false; + } + + // Side channels require an extra bit per sample. Took a while to figure that one out... + pSubframe->bitsPerSample = pFlac->currentFrame.bitsPerSample; + if ((pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + pSubframe->bitsPerSample += 1; + } else if (pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + pSubframe->bitsPerSample += 1; + } + + // Need to handle wasted bits per sample. + pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample; + pSubframe->pDecodedSamples = pFlac->pDecodedSamples + (pFlac->currentFrame.blockSize * subframeIndex); + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + drflac__decode_samples__constant(pFlac, pSubframe); + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + drflac__decode_samples__verbatim(pFlac, pSubframe); + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + drflac__decode_samples__fixed(pFlac, pSubframe); + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac__decode_samples__lpc(pFlac, pSubframe); + } break; + + default: return false; + } + + return true; +} + +static bool drflac__seek_subframe(drflac* pFlac, int subframeIndex) +{ + assert(pFlac != NULL); + + drflac_subframe* pSubframe = pFlac->currentFrame.subframes + subframeIndex; + if (!drflac__read_subframe_header(pFlac, pSubframe)) { + return false; + } + + // Side channels require an extra bit per sample. Took a while to figure that one out... + pSubframe->bitsPerSample = pFlac->currentFrame.bitsPerSample; + if ((pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + pSubframe->bitsPerSample += 1; + } else if (pFlac->currentFrame.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + pSubframe->bitsPerSample += 1; + } + + // Need to handle wasted bits per sample. + pSubframe->bitsPerSample -= pSubframe->wastedBitsPerSample; + pSubframe->pDecodedSamples = pFlac->pDecodedSamples + (pFlac->currentFrame.blockSize * subframeIndex); + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + if (!drflac__seek_bits(pFlac, pSubframe->bitsPerSample)) { + return false; + } + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + unsigned int bitsToSeek = pFlac->currentFrame.blockSize * pSubframe->bitsPerSample; + if (!drflac__seek_bits(pFlac, bitsToSeek)) { + return false; + } + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample; + if (!drflac__seek_bits(pFlac, bitsToSeek)) { + return false; + } + + if (!drflac__read_and_seek_residual(pFlac, pFlac->currentFrame.blockSize, pSubframe->lpcOrder)) { + return false; + } + } break; + + case DRFLAC_SUBFRAME_LPC: + { + unsigned int bitsToSeek = pSubframe->lpcOrder * pSubframe->bitsPerSample; + if (!drflac__seek_bits(pFlac, bitsToSeek)) { + return false; + } + + unsigned char lpcPrecision; + if (!drflac__read_uint8(pFlac, 4, &lpcPrecision)) { + return false; + } + if (lpcPrecision == 15) { + return false; // Invalid. + } + lpcPrecision += 1; + + + bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; // +5 for shift. + if (!drflac__seek_bits(pFlac, bitsToSeek)) { + return false; + } + + if (!drflac__read_and_seek_residual(pFlac, pFlac->currentFrame.blockSize, pSubframe->lpcOrder)) { + return false; + } + } break; + + default: return false; + } + + return true; +} + + +static DRFLAC_INLINE int drflac__get_channel_count_from_channel_assignment(int channelAssignment) +{ + assert(channelAssignment <= 10); + + int lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; + return lookup[channelAssignment]; +} + +static bool drflac__decode_frame(drflac* pFlac) +{ + // This function should be called while the stream is sitting on the first byte after the frame header. + + int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.channelAssignment); + for (int i = 0; i < channelCount; ++i) + { + if (!drflac__decode_subframe(pFlac, i)) { + return false; + } + } + + // At the end of the frame sits the padding and CRC. We don't use these so we can just seek past. + if (!drflac__seek_bits(pFlac, (DRFLAC_CACHE_L1_BITS_REMAINING & 7) + 16)) { + return false; + } + + + pFlac->currentFrame.samplesRemaining = pFlac->currentFrame.blockSize * channelCount; + + return true; +} + +static bool drflac__seek_frame(drflac* pFlac) +{ + int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.channelAssignment); + for (int i = 0; i < channelCount; ++i) + { + if (!drflac__seek_subframe(pFlac, i)) { + return false; + } + } + + // Padding and CRC. + return drflac__seek_bits(pFlac, (DRFLAC_CACHE_L1_BITS_REMAINING & 7) + 16); +} + +static bool drflac__read_and_decode_next_frame(drflac* pFlac) +{ + assert(pFlac != NULL); + + if (!drflac__read_next_frame_header(pFlac)) { + return false; + } + + return drflac__decode_frame(pFlac); +} + +static unsigned int drflac__read_block_header(drflac* pFlac, unsigned int* pBlockSizeOut, bool* pIsLastBlockOut) // Returns the block type. +{ + assert(pFlac != NULL); + + unsigned char isLastBlock = 1; + unsigned char blockType = DRFLAC_BLOCK_TYPE_INVALID; + unsigned int blockSize = 0; + + if (!drflac__read_uint8(pFlac, 1, &isLastBlock)) { + goto done_reading_block_header; + } + + if (!drflac__read_uint8(pFlac, 7, &blockType)) { + goto done_reading_block_header; + } + + if (!drflac__read_uint32(pFlac, 24, &blockSize)) { + goto done_reading_block_header; + } + + +done_reading_block_header: + if (pBlockSizeOut) { + *pBlockSizeOut = blockSize; + } + + if (pIsLastBlockOut) { + *pIsLastBlockOut = isLastBlock; + } + + return blockType; +} + + +static void drflac__get_current_frame_sample_range(drflac* pFlac, uint64_t* pFirstSampleInFrameOut, uint64_t* pLastSampleInFrameOut) +{ + assert(pFlac != NULL); + + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.channelAssignment); + + uint64_t firstSampleInFrame = pFlac->currentFrame.sampleNumber; + if (firstSampleInFrame == 0) { + firstSampleInFrame = pFlac->currentFrame.frameNumber * pFlac->maxBlockSize*channelCount; + } + + uint64_t lastSampleInFrame = firstSampleInFrame + (pFlac->currentFrame.blockSize*channelCount); + if (lastSampleInFrame > 0) { + lastSampleInFrame -= 1; // Needs to be zero based. + } + + + if (pFirstSampleInFrameOut) { + *pFirstSampleInFrameOut = firstSampleInFrame; + } + if (pLastSampleInFrameOut) { + *pLastSampleInFrameOut = lastSampleInFrame; + } +} + +static bool drflac__seek_to_first_frame(drflac* pFlac) +{ + assert(pFlac != NULL); + + bool result = drflac__seek_to_byte(pFlac, (long long)pFlac->firstFramePos); + pFlac->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS; + pFlac->cache = 0; + + memset(&pFlac->currentFrame, 0, sizeof(pFlac->currentFrame)); + + + return result; +} + +static DRFLAC_INLINE bool drflac__seek_to_next_frame(drflac* pFlac) +{ + // This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. + assert(pFlac != NULL); + return drflac__seek_frame(pFlac); +} + +static bool drflac__seek_to_frame_containing_sample(drflac* pFlac, uint64_t sampleIndex) +{ + assert(pFlac != NULL); + + if (!drflac__seek_to_first_frame(pFlac)) { + return false; + } + + uint64_t firstSampleInFrame = 0; + uint64_t lastSampleInFrame = 0; + for (;;) + { + // We need to read the frame's header in order to determine the range of samples it contains. + if (!drflac__read_next_frame_header(pFlac)) { + return false; + } + + drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); + if (sampleIndex >= firstSampleInFrame && sampleIndex <= lastSampleInFrame) { + break; // The sample is in this frame. + } + + if (!drflac__seek_to_next_frame(pFlac)) { + return false; + } + } + + // If we get here we should be right at the start of the frame containing the sample. + return true; +} + +static bool drflac__seek_to_sample__brute_force(drflac* pFlac, uint64_t sampleIndex) +{ + if (!drflac__seek_to_frame_containing_sample(pFlac, sampleIndex)) { + return false; + } + + // At this point we should be sitting on the first byte of the frame containing the sample. We need to decode every sample up to (but + // not including) the sample we're seeking to. + uint64_t firstSampleInFrame = 0; + drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, NULL); + + assert(firstSampleInFrame <= sampleIndex); + size_t samplesToDecode = (size_t)(sampleIndex - firstSampleInFrame); // <-- Safe cast because the maximum number of samples in a frame is 65535. + if (samplesToDecode == 0) { + return true; + } + + // At this point we are just sitting on the byte after the frame header. We need to decode the frame before reading anything from it. + if (!drflac__decode_frame(pFlac)) { + return false; + } + + return drflac_read_s16(pFlac, samplesToDecode, NULL); +} + +static bool drflac__seek_to_sample__seek_table(drflac* pFlac, uint64_t sampleIndex) +{ + assert(pFlac != NULL); + + if (pFlac->seektableBlock.pos == 0) { + return false; + } + + if (!drflac__seek_to_byte(pFlac, pFlac->seektableBlock.pos)) { + return false; + } + + // The number of seek points is derived from the size of the SEEKTABLE block. + unsigned int seekpointCount = pFlac->seektableBlock.sizeInBytes / 18; // 18 = the size of each seek point. + if (seekpointCount == 0) { + return false; // Would this ever happen? + } + + + drflac_seekpoint closestSeekpoint = {0}; + + unsigned int seekpointsRemaining = seekpointCount; + while (seekpointsRemaining > 0) + { + drflac_seekpoint seekpoint; + if (!drflac__read_uint64(pFlac, 64, &seekpoint.firstSample)) { + break; + } + if (!drflac__read_uint64(pFlac, 64, &seekpoint.frameOffset)) { + break; + } + if (!drflac__read_uint16(pFlac, 16, &seekpoint.sampleCount)) { + break; + } + + if (seekpoint.firstSample * pFlac->channels > sampleIndex) { + break; + } + + closestSeekpoint = seekpoint; + seekpointsRemaining -= 1; + } + + // At this point we should have found the seekpoint closest to our sample. We need to seek to it using basically the same + // technique as we use with the brute force method. + drflac__seek_to_byte(pFlac, pFlac->firstFramePos + closestSeekpoint.frameOffset); + + uint64_t firstSampleInFrame = 0; + uint64_t lastSampleInFrame = 0; + for (;;) + { + // We need to read the frame's header in order to determine the range of samples it contains. + if (!drflac__read_next_frame_header(pFlac)) { + return false; + } + + drflac__get_current_frame_sample_range(pFlac, &firstSampleInFrame, &lastSampleInFrame); + if (sampleIndex >= firstSampleInFrame && sampleIndex <= lastSampleInFrame) { + break; // The sample is in this frame. + } + + if (!drflac__seek_to_next_frame(pFlac)) { + return false; + } + } + + assert(firstSampleInFrame <= sampleIndex); + + // At this point we are just sitting on the byte after the frame header. We need to decode the frame before reading anything from it. + if (!drflac__decode_frame(pFlac)) { + return false; + } + + size_t samplesToDecode = (size_t)(sampleIndex - firstSampleInFrame); // <-- Safe cast because the maximum number of samples in a frame is 65535. + return drflac_read_s16(pFlac, samplesToDecode, NULL) == samplesToDecode; +} + + +static drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData) +{ + if (onRead == NULL || onSeek == NULL) { + return false; + } + + unsigned char id[4]; + if (onRead(pUserData, id, 4) != 4 || id[0] != 'f' || id[1] != 'L' || id[2] != 'a' || id[3] != 'C') { + return false; // Not a FLAC stream. + } + + drflac tempFlac; + memset(&tempFlac, 0, sizeof(tempFlac)); + tempFlac.onRead = onRead; + tempFlac.onSeek = onSeek; + tempFlac.pUserData = pUserData; + tempFlac.currentBytePos = 4; + tempFlac.nextL2Line = sizeof(tempFlac.cacheL2) / sizeof(tempFlac.cacheL2[0]); // <-- Initialize to this to force a client-side data retrieval right from the start. + tempFlac.consumedBits = sizeof(tempFlac.cache)*8; + + // The first metadata block should be the STREAMINFO block. We don't care about everything in here. + unsigned int blockSize; + bool isLastBlock; + int blockType = drflac__read_block_header(&tempFlac, &blockSize, &isLastBlock); + if (blockType != DRFLAC_BLOCK_TYPE_STREAMINFO && blockSize != 34) { + return false; + } + + if (!drflac__seek_bits(&tempFlac, 16)) { // minBlockSize + return false; + } + if (!drflac__read_uint16(&tempFlac, 16, &tempFlac.maxBlockSize)) { + return false; + } + if (!drflac__seek_bits(&tempFlac, 48)) { // minFrameSize + maxFrameSize + return false; + } + if (!drflac__read_uint32(&tempFlac, 20, &tempFlac.sampleRate)) { + return false; + } + if (!drflac__read_uint8(&tempFlac, 3, &tempFlac.channels)) { + return false; + } + if (!drflac__read_uint8(&tempFlac, 5, &tempFlac.bitsPerSample)) { + return false; + } + if (!drflac__read_uint64(&tempFlac, 36, &tempFlac.totalSampleCount)) { + return false; + } + if (!drflac__seek_bits(&tempFlac, 128)) { // MD5 + return false; + } + + tempFlac.channels += 1; + tempFlac.bitsPerSample += 1; + tempFlac.totalSampleCount *= tempFlac.channels; + + while (!isLastBlock) + { + blockType = drflac__read_block_header(&tempFlac, &blockSize, &isLastBlock); + + switch (blockType) + { + case DRFLAC_BLOCK_TYPE_APPLICATION: + { + tempFlac.applicationBlock.pos = drflac__tell(&tempFlac); + tempFlac.applicationBlock.sizeInBytes = blockSize; + } break; + + case DRFLAC_BLOCK_TYPE_SEEKTABLE: + { + tempFlac.seektableBlock.pos = drflac__tell(&tempFlac); + tempFlac.seektableBlock.sizeInBytes = blockSize; + } break; + + case DRFLAC_BLOCK_TYPE_VORBIS_COMMENT: + { + tempFlac.vorbisCommentBlock.pos = drflac__tell(&tempFlac); + tempFlac.vorbisCommentBlock.sizeInBytes = blockSize; + } break; + + case DRFLAC_BLOCK_TYPE_CUESHEET: + { + tempFlac.cuesheetBlock.pos = drflac__tell(&tempFlac); + tempFlac.cuesheetBlock.sizeInBytes = blockSize; + } break; + + case DRFLAC_BLOCK_TYPE_PICTURE: + { + tempFlac.pictureBlock.pos = drflac__tell(&tempFlac); + tempFlac.pictureBlock.sizeInBytes = blockSize; + } break; + + + // These blocks we either don't care about or aren't supporting. + case DRFLAC_BLOCK_TYPE_PADDING: + case DRFLAC_BLOCK_TYPE_INVALID: + default: break; + } + + if (!drflac__seek_bits(&tempFlac, blockSize*8)) { + return false; + } + } + + + // At this point we should be sitting right at the start of the very first frame. + tempFlac.firstFramePos = drflac__tell(&tempFlac); + + drflac* pFlac = (drflac*)malloc(sizeof(*pFlac) - sizeof(pFlac->pExtraData) + (tempFlac.maxBlockSize * tempFlac.channels * sizeof(int32_t))); + memcpy(pFlac, &tempFlac, sizeof(tempFlac) - sizeof(pFlac->pExtraData)); + pFlac->pDecodedSamples = (int32_t*)pFlac->pExtraData; + + return pFlac; +} + +static void drflac_close(drflac* pFlac) +{ + if (pFlac == NULL) { + return; + } + +#ifndef DR_FLAC_NO_STDIO + // If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file() + // was used by looking at the callbacks. + if (pFlac->onRead == drflac__on_read_stdio) { +#if defined(DR_OPUS_NO_WIN32_IO) || !defined(_WIN32) + fclose((FILE*)pFlac->pUserData); +#else + CloseHandle((HANDLE)pFlac->pUserData); +#endif + } +#endif + + // If we opened the file with drflac_open_memory() we will want to free() the user data. + if (pFlac->onRead == drflac__on_read_memory) { + free(pFlac->pUserData); + } + + free(pFlac); +} + +static uint64_t drflac__read_s16__misaligned(drflac* pFlac, uint64_t samplesToRead, int16_t* bufferOut) +{ + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.channelAssignment); + + // We should never be calling this when the number of samples to read is >= the sample count. + assert(samplesToRead < channelCount); + assert(pFlac->currentFrame.samplesRemaining > 0 && samplesToRead <= pFlac->currentFrame.samplesRemaining); + + + uint64_t samplesRead = 0; + while (samplesToRead > 0) + { + uint64_t totalSamplesInFrame = pFlac->currentFrame.blockSize * channelCount; + uint64_t samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining; + unsigned int channelIndex = samplesReadFromFrameSoFar % channelCount; + + unsigned long long nextSampleInFrame = samplesReadFromFrameSoFar / channelCount; + + int decodedSample = 0; + switch (pFlac->currentFrame.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + if (channelIndex == 0) { + decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame]; + } else { + int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame]; + int left = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame]; + decodedSample = left - side; + } + + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + if (channelIndex == 0) { + int side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame]; + int right = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame]; + decodedSample = side + right; + } else { + decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame]; + } + + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + int mid; + int side; + if (channelIndex == 0) { + mid = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame]; + side = pFlac->currentFrame.subframes[channelIndex + 1].pDecodedSamples[nextSampleInFrame]; + + mid = (((unsigned int)mid) << 1) | (side & 0x01); + decodedSample = (mid + side) >> 1; + } else { + mid = pFlac->currentFrame.subframes[channelIndex - 1].pDecodedSamples[nextSampleInFrame]; + side = pFlac->currentFrame.subframes[channelIndex + 0].pDecodedSamples[nextSampleInFrame]; + + mid = (((unsigned int)mid) << 1) | (side & 0x01); + decodedSample = (mid - side) >> 1; + } + + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + decodedSample = pFlac->currentFrame.subframes[channelIndex].pDecodedSamples[nextSampleInFrame]; + } break; + } + + int shift = (16 - pFlac->bitsPerSample) + pFlac->currentFrame.subframes[channelIndex].wastedBitsPerSample; + if (shift >= 0) { + decodedSample <<= shift; + } else { + decodedSample >>= -shift; + } + + if (bufferOut) { + *bufferOut++ = decodedSample; + } + + samplesRead += 1; + pFlac->currentFrame.samplesRemaining -= 1; + samplesToRead -= 1; + } + + return samplesRead; +} + +static uint64_t drflac__seek_forward_by_samples(drflac* pFlac, uint64_t samplesToRead) +{ + uint64_t samplesRead = 0; + while (samplesToRead > 0) + { + if (pFlac->currentFrame.samplesRemaining == 0) + { + if (!drflac__read_and_decode_next_frame(pFlac)) { + break; // Couldn't read the next frame, so just break from the loop and return. + } + } + else + { + samplesRead += 1; + pFlac->currentFrame.samplesRemaining -= 1; + samplesToRead -= 1; + } + } + + return samplesRead; +} + +static uint64_t drflac_read_s16(drflac* pFlac, uint64_t samplesToRead, int16_t* bufferOut) +{ + // Note that is allowed to be null, in which case this will be treated as something like a seek. + if (pFlac == NULL || samplesToRead == 0) { + return 0; + } + + if (bufferOut == NULL) { + return drflac__seek_forward_by_samples(pFlac, samplesToRead); + } + + + uint64_t samplesRead = 0; + while (samplesToRead > 0) + { + // If we've run out of samples in this frame, go to the next. + if (pFlac->currentFrame.samplesRemaining == 0) + { + if (!drflac__read_and_decode_next_frame(pFlac)) { + break; // Couldn't read the next frame, so just break from the loop and return. + } + } + else + { + // Here is where we grab the samples and interleave them. + + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFrame.channelAssignment); + uint64_t totalSamplesInFrame = pFlac->currentFrame.blockSize * channelCount; + uint64_t samplesReadFromFrameSoFar = totalSamplesInFrame - pFlac->currentFrame.samplesRemaining; + + int misalignedSampleCount = samplesReadFromFrameSoFar % channelCount; + if (misalignedSampleCount > 0) { + uint64_t misalignedSamplesRead = drflac__read_s16__misaligned(pFlac, misalignedSampleCount, bufferOut); + samplesRead += misalignedSamplesRead; + samplesReadFromFrameSoFar += misalignedSamplesRead; + bufferOut += misalignedSamplesRead; + samplesToRead -= misalignedSamplesRead; + } + + + uint64_t alignedSampleCountPerChannel = samplesToRead / channelCount; + if (alignedSampleCountPerChannel > pFlac->currentFrame.samplesRemaining / channelCount) { + alignedSampleCountPerChannel = pFlac->currentFrame.samplesRemaining / channelCount; + } + + uint64_t firstAlignedSampleInFrame = samplesReadFromFrameSoFar / channelCount; + int unusedBitsPerSample = 16 - pFlac->bitsPerSample; + + if (unusedBitsPerSample >= 0) { + int lshift0 = unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int lshift1 = unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + switch (pFlac->currentFrame.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int left = pDecodedSamples0[i]; + int side = pDecodedSamples1[i]; + int right = left - side; + + bufferOut[i*2+0] = left << lshift0; + bufferOut[i*2+1] = right << lshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int side = pDecodedSamples0[i]; + int right = pDecodedSamples1[i]; + int left = right + side; + + bufferOut[i*2+0] = left << lshift0; + bufferOut[i*2+1] = right << lshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int side = pDecodedSamples1[i]; + int mid = (((uint32_t)pDecodedSamples0[i]) << 1) | (side & 0x01); + + bufferOut[i*2+0] = ((mid + side) >> 1) << lshift0; + bufferOut[i*2+1] = ((mid - side) >> 1) << lshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + if (pFlac->currentFrame.channelAssignment == 1) // 1 = Stereo + { + // Stereo optimized inner loop unroll. + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + bufferOut[i*2+0] = pDecodedSamples0[i] << lshift0; + bufferOut[i*2+1] = pDecodedSamples1[i] << lshift1; + } + } + else + { + // Generic interleaving. + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + for (unsigned int j = 0; j < channelCount; ++j) { + bufferOut[(i*channelCount)+j] = (pFlac->currentFrame.subframes[j].pDecodedSamples[firstAlignedSampleInFrame + i]) << (unusedBitsPerSample + pFlac->currentFrame.subframes[j].wastedBitsPerSample); + } + } + } + } break; + } + } else { + int rshift0 = -unusedBitsPerSample + pFlac->currentFrame.subframes[0].wastedBitsPerSample; + int rshift1 = -unusedBitsPerSample + pFlac->currentFrame.subframes[1].wastedBitsPerSample; + + switch (pFlac->currentFrame.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int left = pDecodedSamples0[i]; + int side = pDecodedSamples1[i]; + int right = left - side; + + bufferOut[i*2+0] = left >> rshift0; + bufferOut[i*2+1] = right >> rshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int side = pDecodedSamples0[i]; + int right = pDecodedSamples1[i]; + int left = right + side; + + bufferOut[i*2+0] = left >> rshift0; + bufferOut[i*2+1] = right >> rshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + int side = pDecodedSamples1[i]; + int mid = (((uint32_t)pDecodedSamples0[i]) << 1) | (side & 0x01); + + bufferOut[i*2+0] = ((mid + side) >> 1) >> rshift0; + bufferOut[i*2+1] = ((mid - side) >> 1) >> rshift1; + } + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + if (pFlac->currentFrame.channelAssignment == 1) // 1 = Stereo + { + // Stereo optimized inner loop unroll. + const int* pDecodedSamples0 = pFlac->currentFrame.subframes[0].pDecodedSamples + firstAlignedSampleInFrame; + const int* pDecodedSamples1 = pFlac->currentFrame.subframes[1].pDecodedSamples + firstAlignedSampleInFrame; + + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + bufferOut[i*2+0] = pDecodedSamples0[i] >> rshift0; + bufferOut[i*2+1] = pDecodedSamples1[i] >> rshift1; + } + } + else + { + // Generic interleaving. + for (uint64_t i = 0; i < alignedSampleCountPerChannel; ++i) { + for (unsigned int j = 0; j < channelCount; ++j) { + bufferOut[(i*channelCount)+j] = (pFlac->currentFrame.subframes[j].pDecodedSamples[firstAlignedSampleInFrame + i]) >> (pFlac->currentFrame.subframes[j].wastedBitsPerSample - unusedBitsPerSample); + } + } + } + } break; + } + } + + uint64_t alignedSamplesRead = alignedSampleCountPerChannel * channelCount; + samplesRead += alignedSamplesRead; + samplesReadFromFrameSoFar += alignedSamplesRead; + bufferOut += alignedSamplesRead; + samplesToRead -= alignedSamplesRead; + pFlac->currentFrame.samplesRemaining -= (unsigned int)alignedSamplesRead; + + + + // At this point we may still have some excess samples left to read. + if (samplesToRead > 0 && pFlac->currentFrame.samplesRemaining > 0) + { + uint64_t excessSamplesRead = 0; + if (samplesToRead < pFlac->currentFrame.samplesRemaining) { + excessSamplesRead = drflac__read_s16__misaligned(pFlac, samplesToRead, bufferOut); + } else { + excessSamplesRead = drflac__read_s16__misaligned(pFlac, pFlac->currentFrame.samplesRemaining, bufferOut); + } + + samplesRead += excessSamplesRead; + samplesReadFromFrameSoFar += excessSamplesRead; + bufferOut += excessSamplesRead; + samplesToRead -= excessSamplesRead; + } + } + } + + return samplesRead; +} + +static bool drflac_seek_to_sample(drflac* pFlac, uint64_t sampleIndex) +{ + if (pFlac == NULL) { + return false; + } + + if (sampleIndex == 0) { + return drflac__seek_to_first_frame(pFlac); + } + + // Clamp the sample to the end. + if (sampleIndex >= pFlac->totalSampleCount) { + sampleIndex = pFlac->totalSampleCount - 1; + } + + + // First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. + if (!drflac__seek_to_sample__seek_table(pFlac, sampleIndex)) { + return drflac__seek_to_sample__brute_force(pFlac, sampleIndex); + } + + return true; +} + + +#endif //DR_FLAC_IMPLEMENTATION + + +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/ diff --git a/panda/src/movies/flacAudio.I b/panda/src/movies/flacAudio.I new file mode 100644 index 0000000000..0c4a8926db --- /dev/null +++ b/panda/src/movies/flacAudio.I @@ -0,0 +1,12 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file flacAudio.I + * @author rdb + * @date 2016-04-27 + */ diff --git a/panda/src/movies/flacAudio.cxx b/panda/src/movies/flacAudio.cxx new file mode 100644 index 0000000000..8bfbd56ad2 --- /dev/null +++ b/panda/src/movies/flacAudio.cxx @@ -0,0 +1,64 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file flacAudio.cxx + * @author rdb + * @date 2016-04-27 + */ + +#include "flacAudio.h" +#include "flacAudioCursor.h" +#include "virtualFileSystem.h" +#include "dcast.h" + +TypeHandle FlacAudio::_type_handle; + +/** + * xxx + */ +FlacAudio:: +FlacAudio(const Filename &name) : + MovieAudio(name) +{ + _filename = name; +} + +/** + * xxx + */ +FlacAudio:: +~FlacAudio() { +} + +/** + * Open this audio, returning a MovieAudioCursor + */ +PT(MovieAudioCursor) FlacAudio:: +open() { + VirtualFileSystem *vfs = VirtualFileSystem::get_global_ptr(); + istream *stream = vfs->open_read_file(_filename, true); + + if (stream == NULL) { + return NULL; + } else { + PT(FlacAudioCursor) cursor = new FlacAudioCursor(this, stream); + if (cursor == NULL || !cursor->_is_valid) { + return NULL; + } else { + return DCAST(MovieAudioCursor, cursor); + } + } +} + +/** + * Obtains a MovieAudio that references a file. + */ +PT(MovieAudio) FlacAudio:: +make(const Filename &name) { + return DCAST(MovieAudio, new FlacAudio(name)); +} diff --git a/panda/src/movies/flacAudio.h b/panda/src/movies/flacAudio.h new file mode 100644 index 0000000000..4fb9818930 --- /dev/null +++ b/panda/src/movies/flacAudio.h @@ -0,0 +1,54 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file flacAudio.h + * @author rdb + * @date 2016-04-27 + */ + +#ifndef FLACAUDIO_H +#define FLACAUDIO_H + +#include "pandabase.h" +#include "movieAudio.h" + +class FlacAudioCursor; + +/** + * Reads FLAC audio files. Ogg-encapsulated FLAC files are not supported. + */ +class EXPCL_PANDA_MOVIES FlacAudio : public MovieAudio { +PUBLISHED: + FlacAudio(const Filename &name); + virtual ~FlacAudio(); + virtual PT(MovieAudioCursor) open(); + + static PT(MovieAudio) make(const Filename &name); + +private: + friend class FlacAudioCursor; + +public: + static TypeHandle get_class_type() { + return _type_handle; + } + static void init_type() { + MovieAudio::init_type(); + register_type(_type_handle, "FlacAudio", + MovieAudio::get_class_type()); + } + virtual TypeHandle get_type() const { + return get_class_type(); + } + virtual TypeHandle force_init_type() {init_type(); return get_class_type();} + +private: + static TypeHandle _type_handle; +}; + +#endif // FLACAUDIO_H diff --git a/panda/src/movies/flacAudioCursor.I b/panda/src/movies/flacAudioCursor.I new file mode 100644 index 0000000000..c01b9a80fa --- /dev/null +++ b/panda/src/movies/flacAudioCursor.I @@ -0,0 +1,12 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file vorbisAudioCursor.I + * @author rdb + * @date 2013-08-23 + */ diff --git a/panda/src/movies/flacAudioCursor.cxx b/panda/src/movies/flacAudioCursor.cxx new file mode 100644 index 0000000000..5618062536 --- /dev/null +++ b/panda/src/movies/flacAudioCursor.cxx @@ -0,0 +1,120 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file flacAudioCursor.cxx + * @author rdb + * @date 2013-08-23 + */ + +#include "flacAudioCursor.h" +#include "virtualFileSystem.h" + +#define DR_FLAC_IMPLEMENTATION +#define DR_FLAC_NO_STDIO +extern "C" { + #include "dr_flac.h" +} + +/** + * Callback passed to dr_flac to implement file I/O via the VirtualFileSystem. + */ +static size_t cb_read_proc(void *user, void *buffer, size_t size) { + istream *stream = (istream *)user; + nassertr(stream != NULL, false); + + stream->read((char *)buffer, size); + + if (stream->eof()) { + // Gracefully handle EOF. + stream->clear(); + } + + return stream->gcount(); +} + +/** + * Callback passed to dr_flac to implement file I/O via the VirtualFileSystem. + */ +static bool cb_seek_proc(void *user, int offset) { + istream *stream = (istream *)user; + nassertr(stream != NULL, false); + + stream->seekg(offset, ios::cur); + return !stream->fail(); +} + +TypeHandle FlacAudioCursor::_type_handle; + +/** + * Reads the .wav header from the indicated stream. This leaves the read + * pointer positioned at the start of the data. + */ +FlacAudioCursor:: +FlacAudioCursor(FlacAudio *src, istream *stream) : + MovieAudioCursor(src), + _is_valid(false), + _drflac(NULL) +{ + nassertv(stream != NULL); + nassertv(stream->good()); + + _drflac = drflac_open(&cb_read_proc, &cb_seek_proc, (void *)stream); + + if (_drflac == NULL) { + movies_cat.error() + << "Failed to open FLAC file.\n"; + _is_valid = false; + } + + _length = (_drflac->totalSampleCount / _drflac->channels) / (double)_drflac->sampleRate; + + _audio_channels = _drflac->channels; + _audio_rate = _drflac->sampleRate; + + _can_seek = true; + _can_seek_fast = _can_seek; + + _is_valid = true; +} + +/** + * xxx + */ +FlacAudioCursor:: +~FlacAudioCursor() { + if (_drflac != NULL) { + drflac_close(_drflac); + } +} + +/** + * Seeks to a target location. Afterward, the packet_time is guaranteed to be + * less than or equal to the specified time. + */ +void FlacAudioCursor:: +seek(double t) { + t = max(t, 0.0); + + uint64_t sample = t * _drflac->sampleRate; + + if (drflac_seek_to_sample(_drflac, sample * _drflac->channels)) { + _last_seek = sample / (double)_drflac->sampleRate; + _samples_read = 0; + } +} + +/** + * Read audio samples from the stream. N is the number of samples you wish to + * read. Your buffer must be equal in size to N * channels. Multiple-channel + * audio will be interleaved. + */ +void FlacAudioCursor:: +read_samples(int n, PN_int16 *data) { + int desired = n * _audio_channels; + _samples_read += drflac_read_s16(_drflac, desired, data) / _audio_channels; +} diff --git a/panda/src/movies/flacAudioCursor.h b/panda/src/movies/flacAudioCursor.h new file mode 100644 index 0000000000..2b4633c871 --- /dev/null +++ b/panda/src/movies/flacAudioCursor.h @@ -0,0 +1,65 @@ +/** + * PANDA 3D SOFTWARE + * Copyright (c) Carnegie Mellon University. All rights reserved. + * + * All use of this software is subject to the terms of the revised BSD + * license. You should have received a copy of this license along + * with this source code in a file named "LICENSE." + * + * @file flacAudioCursor.h + * @author rdb + * @date 2013-08-23 + */ + +#ifndef FLACAUDIOCURSOR_H +#define FLACAUDIOCURSOR_H + +#include "pandabase.h" +#include "movieAudioCursor.h" + +#define DR_FLAC_NO_STDIO +extern "C" { + #include "dr_flac.h" +} + +class FlacAudio; + +/** + * Interfaces with the libvorbisfile library to implement decoding of Ogg + * Vorbis audio files. + */ +class EXPCL_PANDA_MOVIES FlacAudioCursor : public MovieAudioCursor { +PUBLISHED: + FlacAudioCursor(FlacAudio *src, istream *stream); + virtual ~FlacAudioCursor(); + virtual void seek(double offset); + +public: + virtual void read_samples(int n, PN_int16 *data); + + bool _is_valid; + +protected: + drflac *_drflac; + +public: + static TypeHandle get_class_type() { + return _type_handle; + } + static void init_type() { + MovieAudioCursor::init_type(); + register_type(_type_handle, "FlacAudioCursor", + MovieAudioCursor::get_class_type()); + } + virtual TypeHandle get_type() const { + return get_class_type(); + } + virtual TypeHandle force_init_type() {init_type(); return get_class_type();} + +private: + static TypeHandle _type_handle; +}; + +#include "flacAudioCursor.I" + +#endif // FLACAUDIOCURSOR_H diff --git a/panda/src/movies/p3movies_composite1.cxx b/panda/src/movies/p3movies_composite1.cxx index ecb10339a6..ea526c30b1 100644 --- a/panda/src/movies/p3movies_composite1.cxx +++ b/panda/src/movies/p3movies_composite1.cxx @@ -1,4 +1,6 @@ #include "config_movies.cxx" +#include "flacAudio.cxx" +#include "flacAudioCursor.cxx" #include "inkblotVideo.cxx" #include "inkblotVideoCursor.cxx" #include "microphoneAudio.cxx"