panda3d/panda/src/ffmpeg/ffmpegAudioCursor.cxx

/**
 * PANDA 3D SOFTWARE
 * Copyright (c) Carnegie Mellon University.  All rights reserved.
 *
 * All use of this software is subject to the terms of the revised BSD
 * license.  You should have received a copy of this license along
 * with this source code in a file named "LICENSE."
 *
 * @file ffmpegAudioCursor.cxx
 * @author jyelon
 * @date 2007-08-01
 */

#include "config_ffmpeg.h"
#include "ffmpegAudioCursor.h"

#include "ffmpegAudio.h"
extern "C" {
  #include "libavutil/dict.h"
  #include "libavutil/opt.h"
  #include "libavcodec/avcodec.h"
  #include "libavformat/avformat.h"
}

#ifdef HAVE_SWRESAMPLE
extern "C" {
  #include "libswresample/swresample.h"
}
#endif

TypeHandle FfmpegAudioCursor::_type_handle;

#if LIBAVFORMAT_VERSION_MAJOR < 53
  #define AVMEDIA_TYPE_AUDIO CODEC_TYPE_AUDIO
#endif

#ifndef AVCODEC_MAX_AUDIO_FRAME_SIZE
// More recent versions of ffmpeg no longer define this.
#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
#endif

/**
 * xxx
 */
FfmpegAudioCursor::
FfmpegAudioCursor(FfmpegAudio *src) :
  MovieAudioCursor(src),
  _filename(src->_filename),
  _packet(0),
  _packet_data(0),
  _format_ctx(0),
  _audio_ctx(0),
#ifdef HAVE_SWRESAMPLE
  _resample_ctx(0),
#endif
  _buffer(0),
  _buffer_alloc(0),
  _frame(0)
{
  if (!_ffvfile.open_vfs(_filename)) {
    cleanup();
    return;
  }

  _format_ctx = _ffvfile.get_format_context();
  nassertv(_format_ctx != NULL);

#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(53, 6, 0)
  if (avformat_find_stream_info(_format_ctx, NULL) < 0) {
#else
  if (av_find_stream_info(_format_ctx) < 0) {
#endif
    cleanup();
    return;
  }

  // Find the audio stream
  for (int i = 0; i < (int)_format_ctx->nb_streams; i++) {
    if (_format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
      _audio_index = i;
      _audio_ctx = _format_ctx->streams[i]->codec;
      _audio_timebase = av_q2d(_format_ctx->streams[i]->time_base);
      _audio_rate = _audio_ctx->sample_rate;
      _audio_channels = _audio_ctx->channels;
    }
  }

  if (_audio_ctx == 0) {
    cleanup();
    return;
  }

  AVCodec *pAudioCodec = avcodec_find_decoder(_audio_ctx->codec_id);
  if (pAudioCodec == 0) {
    cleanup();
    return;
  }

#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(53, 8, 0)
  AVDictionary *opts = NULL;
  av_dict_set(&opts, "request_sample_fmt", "s16", 0);
  if (avcodec_open2(_audio_ctx, pAudioCodec, NULL) < 0) {
#else
  if (avcodec_open(_audio_ctx, pAudioCodec) < 0) {
#endif
    cleanup();
    return;
  }

#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(53, 8, 0)
  av_dict_free(&opts);
#endif

  // Set up the resample context if necessary.
  if (_audio_ctx->sample_fmt != AV_SAMPLE_FMT_S16) {
#ifdef HAVE_SWRESAMPLE
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 25, 0)
    ffmpeg_cat.error()
      << "Codec does not use signed 16-bit sample format.  Upgrade libavcodec to 53.25.0 or higher.\n";
#else
    ffmpeg_cat.debug()
      << "Codec does not use signed 16-bit sample format.  Setting up swresample context.\n";
#endif

    _resample_ctx = swr_alloc();
    av_opt_set_int(_resample_ctx, "in_channel_layout", _audio_ctx->channel_layout, 0);
    av_opt_set_int(_resample_ctx, "out_channel_layout", _audio_ctx->channel_layout, 0);
    av_opt_set_int(_resample_ctx, "in_sample_rate", _audio_ctx->sample_rate, 0);
    av_opt_set_int(_resample_ctx, "out_sample_rate", _audio_ctx->sample_rate, 0);
    av_opt_set_sample_fmt(_resample_ctx, "in_sample_fmt", _audio_ctx->sample_fmt, 0);
    av_opt_set_sample_fmt(_resample_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);

    if (swr_init(_resample_ctx) != 0) {
      ffmpeg_cat.error()
        << "Failed to set up resample context.\n";
      _resample_ctx = NULL;
    }
#else
    ffmpeg_cat.error()
      << "Codec does not use signed 16-bit sample format, but support for libswresample has not been enabled.\n";
#endif
  }

  _length = (_format_ctx->duration * 1.0) / AV_TIME_BASE;
  _can_seek = true;
  _can_seek_fast = true;

#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(54, 59, 100)
  _frame = av_frame_alloc();
#else
  _frame = avcodec_alloc_frame();
#endif

  _packet = new AVPacket;
  _buffer_size = AVCODEC_MAX_AUDIO_FRAME_SIZE / 2;
  _buffer_alloc = new PN_int16[_buffer_size + 64];

  // Allocate enough space for 1024 samples per channel.
  if ((_packet == 0)||(_buffer_alloc == 0)) {
    cleanup();
    return;
  }
  memset(_packet, 0, sizeof(AVPacket));

  // Align the buffer to a 64-byte boundary The ffmpeg codec likes this,
  // because it uses SSESSE2.
  _buffer = _buffer_alloc;
  while (((size_t)_buffer) & 31) {
    _buffer += 1;
  }

  fetch_packet();
  _initial_dts = _packet->dts;
  _last_seek = 0;
  _samples_read = 0;
  _buffer_head = 0;
  _buffer_tail = 0;
}

/**
 * xxx
 */
FfmpegAudioCursor::
~FfmpegAudioCursor() {
  cleanup();
}

/**
 * Reset to a standard inactive state.
 */
void FfmpegAudioCursor::
cleanup() {
  if (_frame) {
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(55, 45, 101)
    av_frame_free(&_frame);
#elif LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(54, 59, 100)
    avcodec_free_frame(&_frame);
#else
    av_free(&_frame);
#endif
    _frame = NULL;
  }

  if (_packet) {
    if (_packet->data) {
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
      av_packet_unref(_packet);
#else
      av_free_packet(_packet);
#endif
    }
    delete _packet;
    _packet = NULL;
  }

  if (_buffer_alloc) {
    delete[] _buffer_alloc;
    _buffer_alloc = 0;
    _buffer = NULL;
  }

  if ((_audio_ctx)&&(_audio_ctx->codec)) {
    avcodec_close(_audio_ctx);
  }
  _audio_ctx = NULL;

  if (_format_ctx) {
    _ffvfile.close();
    _format_ctx = NULL;
  }

#ifdef HAVE_SWRESAMPLE
  if (_resample_ctx) {
    swr_free(&_resample_ctx);
    _resample_ctx = NULL;
  }
#endif

  _audio_index = -1;
}

/**
 * Fetches an audio packet and stores it in the packet buffer.  Also sets
 * packet_size and packet_data.
 */
void FfmpegAudioCursor::
fetch_packet() {
  if (_packet->data) {
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
    av_packet_unref(_packet);
#else
    av_free_packet(_packet);
#endif
  }
  while (av_read_frame(_format_ctx, _packet) >= 0) {
    if (_packet->stream_index == _audio_index) {
      _packet_size = _packet->size;
      _packet_data = _packet->data;
      return;
    }
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
    av_packet_unref(_packet);
#else
    av_free_packet(_packet);
#endif
  }
  _packet->data = 0;
  _packet_size = 0;
  _packet_data = 0;
}

/**
 * Reloads the audio buffer by decoding audio packets until one of those audio
 * packets finally yields some samples.  If we encounter the end of the
 * stream, we synthesize silence.
 */
bool FfmpegAudioCursor::
reload_buffer() {

  while (_buffer_head == _buffer_tail) {
    // If we're out of packets, generate silence.
    if (_packet->data == 0) {
      _buffer_head = 0;
      _buffer_tail = _buffer_size;
      memset(_buffer, 0, _buffer_size * 2);
      return true;
    } else if (_packet_size > 0) {
      int bufsize = _buffer_size * 2;
#if LIBAVCODEC_VERSION_INT < 3349504
      int len = avcodec_decode_audio(_audio_ctx, _buffer, &bufsize,
                                    _packet_data, _packet_size);
      movies_debug("avcodec_decode_audio returned " << len);
#elif LIBAVCODEC_VERSION_INT < 3414272
      int len = avcodec_decode_audio2(_audio_ctx, _buffer, &bufsize,
                                      _packet_data, _packet_size);
      movies_debug("avcodec_decode_audio2 returned " << len);
#elif LIBAVCODEC_VERSION_INT < AV_VERSION_INT(53, 25, 0)
      // We should technically also consider resampling in this case, but
      // whatever.  Just upgrade your ffmpeg version if you get garbage.
      AVPacket pkt;
      av_init_packet(&pkt);
      pkt.data = _packet_data;
      pkt.size = _packet_size;
      int len = avcodec_decode_audio3(_audio_ctx, _buffer, &bufsize, &pkt);
      movies_debug("avcodec_decode_audio3 returned " << len);
      av_free_packet(&pkt);
#else
      int got_frame;
      AVPacket pkt;
      av_init_packet(&pkt);
      pkt.data = _packet_data;
      pkt.size = _packet_size;
      int len = avcodec_decode_audio4(_audio_ctx, _frame, &got_frame, &pkt);
      movies_debug("avcodec_decode_audio4 returned " << len);
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(57, 12, 100)
      av_packet_unref(&pkt);
#else
      av_free_packet(&pkt);
#endif

      bufsize = 0;
      if (got_frame) {
#ifdef HAVE_SWRESAMPLE
        if (_resample_ctx) {
          // Resample the data to signed 16-bit sample format.
          bufsize = swr_convert(_resample_ctx, (uint8_t **)&_buffer, _buffer_size / 2, (const uint8_t**)_frame->extended_data, _frame->nb_samples);
          bufsize *= _audio_channels * 2;
        } else
#endif
        {
          bufsize = _frame->linesize[0];
          memcpy(_buffer, _frame->data[0], bufsize);
        }
      }
#if LIBAVUTIL_VERSION_INT > AV_VERSION_INT(52, 19, 100)
      av_frame_unref(_frame);
#endif
#endif

      if (len < 0) {
        return false;
      } else if (len == 0){
        return true;
      }
      _packet_data += len;
      _packet_size -= len;
      if (bufsize > 0) {
        _buffer_head = 0;
        _buffer_tail = (bufsize/2);
        return true;
      }
    } else {
      fetch_packet();
    }
  }
  return true;
}

/**
 * Seeks to a target location.  Afterward, the packet_time is guaranteed to be
 * less than or equal to the specified time.
 */
void FfmpegAudioCursor::
seek(double t) {
  PN_int64 target_ts = (PN_int64)(t / _audio_timebase);
  if (target_ts < (PN_int64)(_initial_dts)) {
    // Attempts to seek before the first packet will fail.
    target_ts = _initial_dts;
  }
  if (av_seek_frame(_format_ctx, _audio_index, target_ts, AVSEEK_FLAG_BACKWARD) < 0) {
    ffmpeg_cat.error() << "Seek failure. Shutting down movie.\n";
    cleanup();
    return;
  }
  avcodec_close(_audio_ctx);
  AVCodec *pAudioCodec = avcodec_find_decoder(_audio_ctx->codec_id);
  if(pAudioCodec == 0) {
    cleanup();
    return;
  }
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(53, 8, 0)
  if (avcodec_open2(_audio_ctx, pAudioCodec, NULL) < 0) {
#else
  if (avcodec_open(_audio_ctx, pAudioCodec) < 0) {
#endif
    cleanup();
    return;
  }
  _buffer_head = 0;
  _buffer_tail = 0;
  fetch_packet();
  double ts = _packet->dts * _audio_timebase;
  if (t > ts) {
    int skip = (int)((t-ts) * _audio_rate);
    read_samples(skip, 0);
  }
  _last_seek = t;
  _samples_read = 0;
}

/**
 * Read audio samples from the stream.  N is the number of samples you wish to
 * read.  Your buffer must be equal in size to N * channels.  Multiple-channel
 * audio will be interleaved.
 */
void FfmpegAudioCursor::
read_samples(int n, PN_int16 *data) {
  int desired = n * _audio_channels;

  while (desired > 0) {
    if (_buffer_head == _buffer_tail) {
      if(!reload_buffer()){
        break;
      }
      movies_debug("read_samples() desired samples: " << desired << " N:" << n);
    }
    int available = _buffer_tail - _buffer_head;
    int ncopy = (desired > available) ? available : desired;
    if (ncopy) {
      if (data != 0) {
        memcpy(data, _buffer + _buffer_head, ncopy * 2);
        data += ncopy;
      }
      desired -= ncopy;
      _buffer_head += ncopy;
    }

  }
  _samples_read += n;
}