From c10232b4e2cd7a3a78233ddf96704b9fe781ac92 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 20 Apr 2021 19:57:21 +0800 Subject: [PATCH] Bridger: Refine transcoder to support aac2opus and opus2aac. 4.0.94 --- README.md | 1 + trunk/src/app/srs_app_rtc_codec.cpp | 731 +++++++++++--------------- trunk/src/app/srs_app_rtc_codec.hpp | 125 ++--- trunk/src/app/srs_app_rtc_source.cpp | 87 ++- trunk/src/app/srs_app_rtc_source.hpp | 9 +- trunk/src/core/srs_core_version4.hpp | 2 +- trunk/src/kernel/srs_kernel_codec.hpp | 15 +- 7 files changed, 390 insertions(+), 580 deletions(-) diff --git a/README.md b/README.md index 89aa0abb5..c273502fb 100755 --- a/README.md +++ b/README.md @@ -157,6 +157,7 @@ Other important wiki: ## V4 changes +* v5.0, 2021-04-20, Refine transcoder to support aac2opus and opus2aac. 4.0.94 * v4.0, 2021-05-01, Timer: Extract and apply shared FastTimer. 4.0.93 * v4.0, 2021-04-29, RTC: Support AV1 for Chrome M90. 4.0.91 * v4.0, 2021-04-24, Change push-RTSP as deprecated feature. diff --git a/trunk/src/app/srs_app_rtc_codec.cpp b/trunk/src/app/srs_app_rtc_codec.cpp index 2c9ebd2a9..eb2d66613 100644 --- a/trunk/src/app/srs_app_rtc_codec.cpp +++ b/trunk/src/app/srs_app_rtc_codec.cpp @@ -1,4 +1,3 @@ - /** * The MIT License (MIT) * @@ -22,14 +21,13 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include -static const int kFrameBufMax = 40960; -static const int kPacketBufMax = 8192; +#include +#include +#include -static const char* id2codec_name(SrsAudioCodecId id) +static const char* id2codec_name(SrsAudioCodecId id) { switch (id) { case SrsAudioCodecIdAAC: @@ -41,506 +39,379 @@ static const char* id2codec_name(SrsAudioCodecId id) } } -SrsAudioDecoder::SrsAudioDecoder(SrsAudioCodecId codec) - : codec_id_(codec) +SrsAudioTranscoder::SrsAudioTranscoder() { - frame_ = NULL; - packet_ = NULL; - codec_ctx_ = NULL; + dec_ = NULL; + dec_frame_ = NULL; + dec_packet_ = NULL; + enc_ = NULL; + enc_frame_ = NULL; + enc_packet_ = NULL; + swr_ = NULL; + swr_data_ = NULL; + fifo_ = NULL; + new_pkt_pts_ = AV_NOPTS_VALUE; + next_out_pts_ = AV_NOPTS_VALUE; } -SrsAudioDecoder::~SrsAudioDecoder() +SrsAudioTranscoder::~SrsAudioTranscoder() { - if (codec_ctx_) { - avcodec_free_context(&codec_ctx_); - codec_ctx_ = NULL; + if (dec_) { + avcodec_free_context(&dec_); } - if (frame_) { - av_frame_free(&frame_); - frame_ = NULL; + + if (dec_frame_) { + av_frame_free(&dec_frame_); } - if (packet_) { - av_packet_free(&packet_); - packet_ = NULL; + + if (dec_packet_) { + av_packet_free(&dec_packet_); + } + + if (swr_) { + swr_free(&swr_); + } + + free_swr_samples(); + + if (enc_) { + avcodec_free_context(&enc_); + } + + if (enc_frame_) { + av_frame_free(&enc_frame_); + } + + if (enc_packet_) { + av_packet_free(&enc_packet_); + } + + if (fifo_) { + av_audio_fifo_free(fifo_); + fifo_ = NULL; } } -srs_error_t SrsAudioDecoder::initialize() +srs_error_t SrsAudioTranscoder::initialize(SrsAudioCodecId src_codec, SrsAudioCodecId dst_codec, int dst_channels, int dst_samplerate, int dst_bit_rate) { srs_error_t err = srs_success; - //check codec name,only support "aac","opus" - if (codec_id_ != SrsAudioCodecIdAAC && codec_id_ != SrsAudioCodecIdOpus) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Invalid codec name %d", codec_id_); + if ((err = init_dec(src_codec)) != srs_success) { + return srs_error_wrap(err, "dec init codec:%d", src_codec); } - const char* codec_name = id2codec_name(codec_id_); + if ((err = init_enc(dst_codec, dst_channels, dst_samplerate, dst_bit_rate)) != srs_success) { + return srs_error_wrap(err, "enc init codec:%d, channels:%d, samplerate:%d, bitrate:%d", + dst_codec, dst_channels, dst_samplerate, dst_bit_rate); + } + + if ((err = init_fifo()) != srs_success) { + return srs_error_wrap(err, "fifo init"); + } + + return err; +} + +srs_error_t SrsAudioTranscoder::transcode(SrsAudioFrame *in_pkt, std::vector& out_pkts) +{ + srs_error_t err = srs_success; + + if ((err = decode_and_resample(in_pkt)) != srs_success) { + return srs_error_wrap(err, "decode and resample"); + } + + if ((err = encode(out_pkts)) != srs_success) { + return srs_error_wrap(err, "encode"); + } + + return err; +} + +void SrsAudioTranscoder::free_frames(std::vector& frames) +{ + for (std::vector::iterator it = frames.begin(); it != frames.end(); ++it) { + SrsAudioFrame* p = *it; + + for (int i = 0; i < p->nb_samples; i++) { + char* pa = p->samples[i].bytes; + srs_freepa(pa); + } + + srs_freep(p); + } +} + +void SrsAudioTranscoder::aac_codec_header(uint8_t **data, int *len) +{ + //srs_assert(dst_codec == SrsAudioCodecIdAAC); + *len = enc_->extradata_size; + *data = enc_->extradata; +} + +srs_error_t SrsAudioTranscoder::init_dec(SrsAudioCodecId src_codec) +{ + const char* codec_name = id2codec_name(src_codec); const AVCodec *codec = avcodec_find_decoder_by_name(codec_name); if (!codec) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Codec not found by name %d(%s)", codec_id_, codec_name); + return srs_error_new(ERROR_RTC_RTP_MUXER, "Codec not found by name(%d,%s)", src_codec, codec_name); } - codec_ctx_ = avcodec_alloc_context3(codec); - if (!codec_ctx_) { + dec_ = avcodec_alloc_context3(codec); + if (!dec_) { return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio codec context"); } - if (avcodec_open2(codec_ctx_, codec, NULL) < 0) { + if (avcodec_open2(dec_, codec, NULL) < 0) { return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not open codec"); } - frame_ = av_frame_alloc(); - if (!frame_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio frame"); + dec_frame_ = av_frame_alloc(); + if (!dec_frame_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio decode out frame"); } - packet_ = av_packet_alloc(); - if (!packet_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio packet"); + dec_packet_ = av_packet_alloc(); + if (!dec_packet_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio decode in packet"); } - return err; + new_pkt_pts_ = AV_NOPTS_VALUE; + return srs_success; } -srs_error_t SrsAudioDecoder::decode(SrsSample *pkt, char *buf, int &size) +srs_error_t SrsAudioTranscoder::init_enc(SrsAudioCodecId dst_codec, int dst_channels, int dst_samplerate, int dst_bit_rate) { - srs_error_t err = srs_success; - - packet_->data = (uint8_t *)pkt->bytes; - packet_->size = pkt->size; - - int ret = avcodec_send_packet(codec_ctx_, packet_); - if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Error submitting the packet to the decoder"); - } - - int max = size; - size = 0; - - while (ret >= 0) { - ret = avcodec_receive_frame(codec_ctx_, frame_); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { - return err; - } else if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Error during decoding"); - } - - int pcm_size = av_get_bytes_per_sample(codec_ctx_->sample_fmt); - if (pcm_size < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Failed to calculate data size"); - } - - // @see https://github.com/ossrs/srs/pull/2011/files - for (int i = 0; i < codec_ctx_->channels; i++) { - if (size + pcm_size * frame_->nb_samples <= max) { - memcpy(buf + size,frame_->data[i],pcm_size * frame_->nb_samples); - size += pcm_size * frame_->nb_samples; - } - } - } - - return err; -} - -AVCodecContext* SrsAudioDecoder::codec_ctx() -{ - return codec_ctx_; -} - -SrsAudioEncoder::SrsAudioEncoder(SrsAudioCodecId codec, int samplerate, int channels) - : channels_(channels), - sampling_rate_(samplerate), - codec_id_(codec), - want_bytes_(0) -{ - codec_ctx_ = NULL; -} - -SrsAudioEncoder::~SrsAudioEncoder() -{ - if (codec_ctx_) { - avcodec_free_context(&codec_ctx_); - } - - if (frame_) { - av_frame_free(&frame_); - } - -} - -srs_error_t SrsAudioEncoder::initialize() -{ - srs_error_t err = srs_success; - - if (codec_id_ != SrsAudioCodecIdAAC && codec_id_ != SrsAudioCodecIdOpus) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Invalid codec name %d", codec_id_); - } - - frame_ = av_frame_alloc(); - if (!frame_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio frame"); - } - - const char* codec_name = id2codec_name(codec_id_); + const char* codec_name = id2codec_name(dst_codec); const AVCodec *codec = avcodec_find_encoder_by_name(codec_name); if (!codec) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Codec not found by name %d(%s)", codec_id_, codec_name); + return srs_error_new(ERROR_RTC_RTP_MUXER, "Codec not found by name(%d,%s)", dst_codec, codec_name); } - codec_ctx_ = avcodec_alloc_context3(codec); - if (!codec_ctx_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio codec context"); + enc_ = avcodec_alloc_context3(codec); + if (!enc_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio codec context(%d,%s)", dst_codec, codec_name); } - codec_ctx_->sample_rate = sampling_rate_; - codec_ctx_->channels = channels_; - codec_ctx_->channel_layout = av_get_default_channel_layout(channels_); - codec_ctx_->bit_rate = 48000; - if (codec_id_ == SrsAudioCodecIdOpus) { - codec_ctx_->sample_fmt = AV_SAMPLE_FMT_S16; + enc_->sample_rate = dst_samplerate; + enc_->channels = dst_channels; + enc_->channel_layout = av_get_default_channel_layout(dst_channels); + enc_->bit_rate = dst_bit_rate; + enc_->sample_fmt = codec->sample_fmts[0]; + enc_->time_base = {1, 1000}; + if (dst_codec == SrsAudioCodecIdOpus) { //TODO: for more level setting - codec_ctx_->compression_level = 1; - } else if (codec_id_ == SrsAudioCodecIdAAC) { - codec_ctx_->sample_fmt = AV_SAMPLE_FMT_FLTP; + enc_->compression_level = 1; + } else if (dst_codec == SrsAudioCodecIdAAC) { + enc_->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; } // TODO: FIXME: Show detail error. - if (avcodec_open2(codec_ctx_, codec, NULL) < 0) { + if (avcodec_open2(enc_, codec, NULL) < 0) { return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not open codec"); } - want_bytes_ = codec_ctx_->channels * codec_ctx_->frame_size * av_get_bytes_per_sample(codec_ctx_->sample_fmt); + enc_frame_ = av_frame_alloc(); + if (!enc_frame_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio encode in frame"); + } - frame_->format = codec_ctx_->sample_fmt; - frame_->nb_samples = codec_ctx_->frame_size; - frame_->channel_layout = codec_ctx_->channel_layout; + enc_frame_->format = enc_->sample_fmt; + enc_frame_->nb_samples = enc_->frame_size; + enc_frame_->channel_layout = enc_->channel_layout; - if (av_frame_get_buffer(frame_, 0) < 0) { + if (av_frame_get_buffer(enc_frame_, 0) < 0) { return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not get audio frame buffer"); } - return err; + enc_packet_ = av_packet_alloc(); + if (!enc_packet_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate audio encode out packet"); + } + + next_out_pts_ = AV_NOPTS_VALUE; + return srs_success; } -int SrsAudioEncoder::want_bytes() +srs_error_t SrsAudioTranscoder::init_swr(AVCodecContext* decoder) { - return want_bytes_; + swr_ = swr_alloc_set_opts(NULL, enc_->channel_layout, enc_->sample_fmt, enc_->sample_rate, + decoder->channel_layout, decoder->sample_fmt, decoder->sample_rate, 0, NULL); + if (!swr_) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "alloc swr"); + } + + int error; + char err_buf[AV_ERROR_MAX_STRING_SIZE] = {0}; + if ((error = swr_init(swr_)) < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "open swr(%d:%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } + + /* Allocate as many pointers as there are audio channels. + * Each pointer will later point to the audio samples of the corresponding + * channels (although it may be NULL for interleaved formats). + */ + if (!(swr_data_ = (uint8_t **)calloc(enc_->channels, sizeof(*swr_data_)))) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "alloc swr buffer"); + } + + /* Allocate memory for the samples of all channels in one consecutive + * block for convenience. */ + if ((error = av_samples_alloc(swr_data_, NULL, enc_->channels, enc_->frame_size, enc_->sample_fmt, 0)) < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "alloc swr buffer(%d:%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } + + return srs_success; } -srs_error_t SrsAudioEncoder::encode(SrsSample *frame, char *buf, int &size) +srs_error_t SrsAudioTranscoder::init_fifo() +{ + if (!(fifo_ = av_audio_fifo_alloc(enc_->sample_fmt, enc_->channels, 1))) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate FIFO"); + } + return srs_success; +} + +srs_error_t SrsAudioTranscoder::decode_and_resample(SrsAudioFrame *pkt) { srs_error_t err = srs_success; - if (want_bytes_ > 0 && frame->size != want_bytes_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "invalid frame size %d, should be %d", frame->size, want_bytes_); + dec_packet_->data = (uint8_t *)pkt->samples[0].bytes; + dec_packet_->size = pkt->samples[0].size; + + char err_buf[AV_ERROR_MAX_STRING_SIZE] = {0}; + + int error = avcodec_send_packet(dec_, dec_packet_); + if (error < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "submit to dec(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); } - // TODO: Directly use frame? - memcpy(frame_->data[0], frame->bytes, frame->size); - - /* send the frame for encoding */ - int r0 = avcodec_send_frame(codec_ctx_, frame_); - if (r0 < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Error sending the frame to the encoder, %d", r0); - } - - AVPacket pkt; - av_init_packet(&pkt); - pkt.data = NULL; - pkt.size = 0; - - /* read all the available output packets (in general there may be any - * number of them */ - size = 0; - while (r0 >= 0) { - r0 = avcodec_receive_packet(codec_ctx_, &pkt); - if (r0 == AVERROR(EAGAIN) || r0 == AVERROR_EOF) { - break; - } else if (r0 < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Error during decoding %d", r0); + new_pkt_pts_ = pkt->dts + pkt->cts; + while (error >= 0) { + error = avcodec_receive_frame(dec_, dec_frame_); + if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) { + return err; + } else if (error < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Error during decoding(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); } - //TODO: fit encoder out more pkt - memcpy(buf, pkt.data, pkt.size); - size = pkt.size; - av_packet_unref(&pkt); + // Decoder is OK now, try to init swr if not initialized. + if (!swr_ && (err = init_swr(dec_)) != srs_success) { + return srs_error_wrap(err, "resample init"); + } - // TODO: FIXME: Refine api, got more than one packets. + int in_samples = dec_frame_->nb_samples; + const uint8_t **in_data = (const uint8_t**)dec_frame_->extended_data; + do { + /* Convert the samples using the resampler. */ + int frame_size = swr_convert(swr_, swr_data_, enc_->frame_size, in_data, in_samples); + if ((error = frame_size) < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not convert input samples(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } + + in_data = NULL; in_samples = 0; + if ((err = add_samples_to_fifo(swr_data_, frame_size)) != srs_success) { + return srs_error_wrap(err, "write samples"); + } + } while (swr_get_out_samples(swr_, in_samples) >= enc_->frame_size); } return err; } -AVCodecContext* SrsAudioEncoder::codec_ctx() +srs_error_t SrsAudioTranscoder::encode(std::vector &pkts) { - return codec_ctx_; -} + char err_buf[AV_ERROR_MAX_STRING_SIZE] = {0}; -SrsAudioResample::SrsAudioResample(int src_rate, int src_layout, enum AVSampleFormat src_fmt, - int src_nb, int dst_rate, int dst_layout, AVSampleFormat dst_fmt) - : src_rate_(src_rate), - src_ch_layout_(src_layout), - src_sample_fmt_(src_fmt), - src_nb_samples_(src_nb), - dst_rate_(dst_rate), - dst_ch_layout_(dst_layout), - dst_sample_fmt_(dst_fmt) -{ - src_nb_channels_ = 0; - dst_nb_channels_ = 0; - src_linesize_ = 0; - dst_linesize_ = 0; - dst_nb_samples_ = 0; - src_data_ = NULL; - dst_data_ = 0; - - max_dst_nb_samples_ = 0; - swr_ctx_ = NULL; -} - -SrsAudioResample::~SrsAudioResample() -{ - if (src_data_) { - av_freep(&src_data_[0]); - av_freep(&src_data_); - src_data_ = NULL; - } - if (dst_data_) { - av_freep(&dst_data_[0]); - av_freep(&dst_data_); - dst_data_ = NULL; - } - if (swr_ctx_) { - swr_free(&swr_ctx_); - swr_ctx_ = NULL; - } -} - -srs_error_t SrsAudioResample::initialize() -{ - srs_error_t err = srs_success; - - swr_ctx_ = swr_alloc(); - if (!swr_ctx_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate resampler context"); - } - - av_opt_set_int(swr_ctx_, "in_channel_layout", src_ch_layout_, 0); - av_opt_set_int(swr_ctx_, "in_sample_rate", src_rate_, 0); - av_opt_set_sample_fmt(swr_ctx_, "in_sample_fmt", src_sample_fmt_, 0); - - av_opt_set_int(swr_ctx_, "out_channel_layout", dst_ch_layout_, 0); - av_opt_set_int(swr_ctx_, "out_sample_rate", dst_rate_, 0); - av_opt_set_sample_fmt(swr_ctx_, "out_sample_fmt", dst_sample_fmt_, 0); - - int ret; - if ((ret = swr_init(swr_ctx_)) < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Failed to initialize the resampling context"); - } - - src_nb_channels_ = av_get_channel_layout_nb_channels(src_ch_layout_); - ret = av_samples_alloc_array_and_samples(&src_data_, &src_linesize_, src_nb_channels_, - src_nb_samples_, src_sample_fmt_, 0); - if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate source samples"); - } - - max_dst_nb_samples_ = dst_nb_samples_ = - av_rescale_rnd(src_nb_samples_, dst_rate_, src_rate_, AV_ROUND_UP); - - dst_nb_channels_ = av_get_channel_layout_nb_channels(dst_ch_layout_); - ret = av_samples_alloc_array_and_samples(&dst_data_, &dst_linesize_, dst_nb_channels_, - dst_nb_samples_, dst_sample_fmt_, 0); - if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not allocate destination samples"); - } - - return err; -} - -srs_error_t SrsAudioResample::resample(SrsSample *pcm, char *buf, int &size) -{ - srs_error_t err = srs_success; - - int ret, plane = 1; - if (src_sample_fmt_ == AV_SAMPLE_FMT_FLTP) { - plane = 2; - } - if (src_linesize_ * plane < pcm->size || pcm->size < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "size not ok"); - } - memcpy(src_data_[0], pcm->bytes, pcm->size); - - dst_nb_samples_ = av_rescale_rnd(swr_get_delay(swr_ctx_, src_rate_) + - src_nb_samples_, dst_rate_, src_rate_, AV_ROUND_UP); - if (dst_nb_samples_ > max_dst_nb_samples_) { - av_freep(&dst_data_[0]); - ret = av_samples_alloc(dst_data_, &dst_linesize_, dst_nb_channels_, - dst_nb_samples_, dst_sample_fmt_, 1); - if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "alloc error"); - } - max_dst_nb_samples_ = dst_nb_samples_; - } - - ret = swr_convert(swr_ctx_, dst_data_, dst_nb_samples_, (const uint8_t **)src_data_, src_nb_samples_); - if (ret < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Error while converting"); - } - - int dst_bufsize = av_samples_get_buffer_size(&dst_linesize_, dst_nb_channels_, - ret, dst_sample_fmt_, 1); - if (dst_bufsize < 0) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not get sample buffer size"); - } - - int max = size; - size = 0; - if (max >= dst_bufsize) { - memcpy(buf, dst_data_[0], dst_bufsize); - size = dst_bufsize; - } - - return err; -} - -SrsAudioRecode::SrsAudioRecode(SrsAudioCodecId src_codec, SrsAudioCodecId dst_codec,int channels, int samplerate) - : dst_channels_(channels), - dst_samplerate_(samplerate), - src_codec_(src_codec), - dst_codec_(dst_codec) -{ - size_ = 0; - data_ = NULL; - - dec_ = NULL; - enc_ = NULL; - resample_ = NULL; -} - -SrsAudioRecode::~SrsAudioRecode() -{ - srs_freep(dec_); - srs_freep(enc_); - srs_freep(resample_); - srs_freepa(data_); -} - -srs_error_t SrsAudioRecode::initialize() -{ - srs_error_t err = srs_success; - - dec_ = new SrsAudioDecoder(src_codec_); - if ((err = dec_->initialize()) != srs_success) { - return srs_error_wrap(err, "dec init"); - } - - enc_ = new SrsAudioEncoder(dst_codec_, dst_samplerate_, dst_channels_); - if ((err = enc_->initialize()) != srs_success) { - return srs_error_wrap(err, "enc init"); - } - - enc_want_bytes_ = enc_->want_bytes(); - if (enc_want_bytes_ > 0) { - data_ = new char[enc_want_bytes_]; - srs_assert(data_); - } - - return err; -} - -srs_error_t SrsAudioRecode::transcode(SrsSample *pkt, char **buf, int *buf_len, int &n) -{ - srs_error_t err = srs_success; - - if (!dec_) { - return srs_error_new(ERROR_RTC_RTP_MUXER, "dec_ nullptr"); - } - - int decode_len = kPacketBufMax; - static char decode_buffer[kPacketBufMax]; - if ((err = dec_->decode(pkt, decode_buffer, decode_len)) != srs_success) { - return srs_error_wrap(err, "decode error"); - } - - if (!resample_) { - int channel_layout = av_get_default_channel_layout(dst_channels_); - AVCodecContext *codec_ctx = dec_->codec_ctx(); - resample_ = new SrsAudioResample(codec_ctx->sample_rate, (int)codec_ctx->channel_layout, \ - codec_ctx->sample_fmt, codec_ctx->frame_size, dst_samplerate_, channel_layout, \ - enc_->codec_ctx()->sample_fmt); - if ((err = resample_->initialize()) != srs_success) { - return srs_error_wrap(err, "init resample"); + if (next_out_pts_ == AV_NOPTS_VALUE) { + next_out_pts_ = new_pkt_pts_; + } else { + int64_t diff = llabs(new_pkt_pts_ - next_out_pts_); + if (diff > 1000) { + srs_trace("time diff to large=%lld, next out=%lld, new pkt=%lld, set to new pkt", + diff, next_out_pts_, new_pkt_pts_); + next_out_pts_ = new_pkt_pts_; } } - SrsSample pcm; - pcm.bytes = decode_buffer; - pcm.size = decode_len; - int resample_len = kFrameBufMax; - static char resample_buffer[kFrameBufMax]; - static char encode_buffer[kPacketBufMax]; - if ((err = resample_->resample(&pcm, resample_buffer, resample_len)) != srs_success) { - return srs_error_wrap(err, "resample error"); - } - - n = 0; - - // We can encode it in one time. - if (enc_want_bytes_ <= 0) { - int encode_len; - pcm.bytes = (char *)data_; - pcm.size = size_; - if ((err = enc_->encode(&pcm, encode_buffer, encode_len)) != srs_success) { - return srs_error_wrap(err, "encode error"); + int frame_cnt = 0; + while (av_audio_fifo_size(fifo_) >= enc_->frame_size) { + /* Read as many samples from the FIFO buffer as required to fill the frame. + * The samples are stored in the frame temporarily. */ + if (av_audio_fifo_read(fifo_, (void **)enc_frame_->data, enc_->frame_size) < enc_->frame_size) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not read data from FIFO"); + } + /* send the frame for encoding */ + enc_frame_->pts = next_out_pts_ + av_rescale(enc_->frame_size * frame_cnt, 1000, enc_->sample_rate); + ++frame_cnt; + int error = avcodec_send_frame(enc_, enc_frame_); + if (error < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Error sending the frame to the encoder(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); } - memcpy(buf[n], encode_buffer, encode_len); - buf_len[n] = encode_len; - n++; + av_init_packet(enc_packet_); + enc_packet_->data = NULL; + enc_packet_->size = 0; + /* read all the available output packets (in general there may be any + * number of them */ + while (error >= 0) { + error = avcodec_receive_packet(enc_, enc_packet_); + if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) { + break; + } else if (error < 0) { + free_frames(pkts); + return srs_error_new(ERROR_RTC_RTP_MUXER, "Error during decoding(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } - return err; - } - - // Need to refill the sample to data, because the frame size is not matched to encoder. - int data_left = resample_len; - if (size_ + data_left < enc_want_bytes_) { - memcpy(data_ + size_, resample_buffer, data_left); - size_ += data_left; - return err; - } - - int index = 0; - while (1) { - data_left = data_left - (enc_want_bytes_ - size_); - memcpy(data_ + size_, resample_buffer + index, enc_want_bytes_ - size_); - index += enc_want_bytes_ - size_; - size_ += enc_want_bytes_ - size_; - - int encode_len; - pcm.bytes = (char *)data_; - pcm.size = size_; - if ((err = enc_->encode(&pcm, encode_buffer, encode_len)) != srs_success) { - return srs_error_wrap(err, "encode error"); - } - - if (encode_len > 0) { - memcpy(buf[n], encode_buffer, encode_len); - buf_len[n] = encode_len; - n++; - } - - size_ = 0; - if(!data_left) { - break; - } - - if(data_left < enc_want_bytes_) { - memcpy(data_ + size_, resample_buffer + index, data_left); - size_ += data_left; - break; + SrsAudioFrame *out_frame = new SrsAudioFrame; + char *buf = new char[enc_packet_->size]; + memcpy(buf, enc_packet_->data, enc_packet_->size); + out_frame->add_sample(buf, enc_packet_->size); + out_frame->dts = enc_packet_->dts; + out_frame->cts = enc_packet_->pts - enc_packet_->dts; + pkts.push_back(out_frame); } } - return err; + next_out_pts_ += av_rescale(enc_->frame_size * frame_cnt, 1000, enc_->sample_rate); + + return srs_success; } + +srs_error_t SrsAudioTranscoder::add_samples_to_fifo(uint8_t **samples, int frame_size) +{ + char err_buf[AV_ERROR_MAX_STRING_SIZE] = {0}; + + int error; + + /* Make the FIFO as large as it needs to be to hold both, + * the old and the new samples. */ + if ((error = av_audio_fifo_realloc(fifo_, av_audio_fifo_size(fifo_) + frame_size)) < 0) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not reallocate FIFO(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } + + /* Store the new samples in the FIFO buffer. */ + if ((error = av_audio_fifo_write(fifo_, (void **)samples, frame_size)) < frame_size) { + return srs_error_new(ERROR_RTC_RTP_MUXER, "Could not write data to FIFO(%d,%s)", error, + av_make_error_string(err_buf, AV_ERROR_MAX_STRING_SIZE, error)); + } + + return srs_success; +} + +void SrsAudioTranscoder::free_swr_samples() +{ + if (swr_data_) { + av_freep(&swr_data_[0]); + free(swr_data_); + swr_data_ = NULL; + } +} + diff --git a/trunk/src/app/srs_app_rtc_codec.hpp b/trunk/src/app/srs_app_rtc_codec.hpp index b9da7daf5..e38f184a5 100644 --- a/trunk/src/app/srs_app_rtc_codec.hpp +++ b/trunk/src/app/srs_app_rtc_codec.hpp @@ -26,6 +26,8 @@ #include +#include + #include #ifdef __cplusplus @@ -39,98 +41,59 @@ extern "C" { #include #include #include +#include #ifdef __cplusplus } #endif -class SrsSample; - -class SrsAudioDecoder +class SrsAudioTranscoder { private: - AVFrame* frame_; - AVPacket* packet_; - AVCodecContext* codec_ctx_; - SrsAudioCodecId codec_id_; -public: - //Only support "aac","opus" - SrsAudioDecoder(SrsAudioCodecId codec); - virtual ~SrsAudioDecoder(); - srs_error_t initialize(); - virtual srs_error_t decode(SrsSample *pkt, char *buf, int &size); - AVCodecContext* codec_ctx(); -}; + AVCodecContext *dec_; + AVFrame *dec_frame_; + AVPacket *dec_packet_; -class SrsAudioEncoder -{ + AVCodecContext *enc_; + AVFrame *enc_frame_; + AVPacket *enc_packet_; + + SwrContext *swr_; + //buffer for swr out put + uint8_t **swr_data_; + AVAudioFifo *fifo_; + + int64_t new_pkt_pts_; + int64_t next_out_pts_; +public: + SrsAudioTranscoder(); + virtual ~SrsAudioTranscoder(); +public: + // Initialize the transcoder, transcode from codec as to codec. + // The channels specifies the number of output channels for encoder, for example, 2. + // The sample_rate specifies the sample rate of encoder, for example, 48000. + // The bit_rate specifies the bitrate of encoder, for example, 48000. + srs_error_t initialize(SrsAudioCodecId from, SrsAudioCodecId to, int channels, int sample_rate, int bit_rate); + // Transcode the input audio frame in, as output audio frames outs. + virtual srs_error_t transcode(SrsAudioFrame* in, std::vector& outs); + // Free the generated audio frames by transcode. + void free_frames(std::vector& frames); +public: + // Get the aac codec header, for example, FLV sequence header. + // @remark User should never free the data, it's managed by this transcoder. + void aac_codec_header(uint8_t** data, int* len); private: - int channels_; - int sampling_rate_; - AVCodecContext* codec_ctx_; - SrsAudioCodecId codec_id_; - int want_bytes_; - AVFrame* frame_; -public: - //Only support "aac","opus" - SrsAudioEncoder(SrsAudioCodecId codec, int samplerate, int channelsy); - virtual ~SrsAudioEncoder(); - srs_error_t initialize(); - //The encoder wanted bytes to call encode, if > 0, caller must feed the same bytes - //Call after initialize successed - int want_bytes(); - virtual srs_error_t encode(SrsSample *frame, char *buf, int &size); - AVCodecContext* codec_ctx(); -}; + srs_error_t init_dec(SrsAudioCodecId from); + srs_error_t init_enc(SrsAudioCodecId to, int channels, int samplerate, int bit_rate); + srs_error_t init_swr(AVCodecContext* decoder); + srs_error_t init_fifo(); -class SrsAudioResample -{ -private: - int src_rate_; - int src_ch_layout_; - int src_nb_channels_; - enum AVSampleFormat src_sample_fmt_; - int src_linesize_; - int src_nb_samples_; - uint8_t **src_data_; + srs_error_t decode_and_resample(SrsAudioFrame* pkt); + srs_error_t encode(std::vector &pkts); - int dst_rate_; - int dst_ch_layout_; - int dst_nb_channels_; - enum AVSampleFormat dst_sample_fmt_; - int dst_linesize_; - int dst_nb_samples_; - uint8_t **dst_data_; - - int max_dst_nb_samples_; - struct SwrContext *swr_ctx_; -public: - SrsAudioResample(int src_rate, int src_layout, enum AVSampleFormat src_fmt, - int src_nb, int dst_rate, int dst_layout, enum AVSampleFormat dst_fmt); - virtual ~SrsAudioResample(); - srs_error_t initialize(); - virtual srs_error_t resample(SrsSample *pcm, char *buf, int &size); -}; - -// TODO: FIXME: Rename to Transcoder. -class SrsAudioRecode -{ -private: - SrsAudioDecoder *dec_; - SrsAudioEncoder *enc_; - SrsAudioResample *resample_; - int dst_channels_; - int dst_samplerate_; - int size_; - char *data_; - SrsAudioCodecId src_codec_; - SrsAudioCodecId dst_codec_; - int enc_want_bytes_; -public: - SrsAudioRecode(SrsAudioCodecId src_codec, SrsAudioCodecId dst_codec,int channels, int samplerate); - virtual ~SrsAudioRecode(); - srs_error_t initialize(); - virtual srs_error_t transcode(SrsSample *pkt, char **buf, int *buf_len, int &n); + srs_error_t add_samples_to_fifo(uint8_t** samples, int frame_size); + void free_swr_samples(); }; #endif /* SRS_APP_AUDIO_RECODE_HPP */ + diff --git a/trunk/src/app/srs_app_rtc_source.cpp b/trunk/src/app/srs_app_rtc_source.cpp index a1a3d699d..4e2108d9d 100644 --- a/trunk/src/app/srs_app_rtc_source.cpp +++ b/trunk/src/app/srs_app_rtc_source.cpp @@ -70,11 +70,6 @@ const int kAudioSamplerate = 48000; const int kVideoPayloadType = 102; const int kVideoSamplerate = 90000; -// An AAC packet may be transcoded to many OPUS packets. -const int kMaxOpusPackets = 8; -// The max size for each OPUS packet. -const int kMaxOpusPacketSize = 4096; - // The RTP payload max size, reserved some paddings for SRTP as such: // kRtpPacketSize = kRtpMaxPayloadSize + paddings // For example, if kRtpPacketSize is 1500, recommend to set kRtpMaxPayloadSize to 1400, @@ -632,12 +627,11 @@ SrsRtcFromRtmpBridger::SrsRtcFromRtmpBridger(SrsRtcStream* source) req = NULL; source_ = source; format = new SrsRtmpFormat(); - codec = new SrsAudioRecode(SrsAudioCodecIdAAC, SrsAudioCodecIdOpus, kAudioChannel, kAudioSamplerate); + codec_ = new SrsAudioTranscoder(); discard_aac = false; discard_bframe = false; merge_nalus = false; meta = new SrsMetaCache(); - audio_timestamp = 0; audio_sequence = 0; video_sequence = 0; @@ -687,7 +681,7 @@ SrsRtcFromRtmpBridger::SrsRtcFromRtmpBridger(SrsRtcStream* source) SrsRtcFromRtmpBridger::~SrsRtcFromRtmpBridger() { srs_freep(format); - srs_freep(codec); + srs_freep(codec_); srs_freep(meta); } @@ -701,7 +695,8 @@ srs_error_t SrsRtcFromRtmpBridger::initialize(SrsRequest* r) return srs_error_wrap(err, "format initialize"); } - if ((err = codec->initialize()) != srs_success) { + int bitrate = 48000; // The output bitrate in bps. + if ((err = codec_->initialize(SrsAudioCodecIdAAC, SrsAudioCodecIdOpus, kAudioChannel, kAudioSamplerate, bitrate)) != srs_success) { return srs_error_wrap(err, "init codec"); } @@ -779,72 +774,58 @@ srs_error_t SrsRtcFromRtmpBridger::on_audio(SrsSharedPtrMessage* msg) return srs_error_wrap(err, "aac append header"); } - if (adts_audio) { - err = transcode(adts_audio, nn_adts_audio); - srs_freep(adts_audio); + if (!adts_audio) { + return err; } + SrsAudioFrame aac; + aac.dts = format->audio->dts; + aac.cts = format->audio->cts; + if ((err = aac.add_sample(adts_audio, nn_adts_audio)) == srs_success) { + // If OK, transcode the AAC to Opus and consume it. + err = transcode(&aac); + } + + srs_freepa(adts_audio); + return err; } -srs_error_t SrsRtcFromRtmpBridger::transcode(char* adts_audio, int nn_adts_audio) +srs_error_t SrsRtcFromRtmpBridger::transcode(SrsAudioFrame* pkt) { srs_error_t err = srs_success; - // Opus packet cache. - static char* opus_payloads[kMaxOpusPackets]; - - static bool initialized = false; - if (!initialized) { - initialized = true; - - static char opus_packets_cache[kMaxOpusPackets][kMaxOpusPacketSize]; - opus_payloads[0] = &opus_packets_cache[0][0]; - for (int i = 1; i < kMaxOpusPackets; i++) { - opus_payloads[i] = opus_packets_cache[i]; - } - } - - // Transcode an aac packet to many opus packets. - SrsSample aac; - aac.bytes = adts_audio; - aac.size = nn_adts_audio; - - int nn_opus_packets = 0; - int opus_sizes[kMaxOpusPackets]; - if ((err = codec->transcode(&aac, opus_payloads, opus_sizes, nn_opus_packets)) != srs_success) { + std::vector out_pkts; + if ((err = codec_->transcode(pkt, out_pkts)) != srs_success) { return srs_error_wrap(err, "recode error"); } // Save OPUS packets in shared message. - if (nn_opus_packets <= 0) { + if (out_pkts.empty()) { return err; } - int nn_max_extra_payload = 0; - for (int i = 0; i < nn_opus_packets; i++) { - char* data = (char*)opus_payloads[i]; - int size = (int)opus_sizes[i]; - - // TODO: FIXME: Use it to padding audios. - nn_max_extra_payload = srs_max(nn_max_extra_payload, size); - + for (std::vector::iterator it = out_pkts.begin(); it != out_pkts.end(); ++it) { SrsRtpPacketCacheHelper* helper = new SrsRtpPacketCacheHelper(); SrsAutoFree(SrsRtpPacketCacheHelper, helper); - if ((err = package_opus(data, size, helper)) != srs_success) { - return srs_error_wrap(err, "package opus"); + if ((err = package_opus(*it, helper)) != srs_success) { + err = srs_error_wrap(err, "package opus"); + break; } if ((err = source_->on_rtp(helper->pkt)) != srs_success) { - return srs_error_wrap(err, "consume opus"); + err = srs_error_wrap(err, "consume opus"); + break; } } + codec_->free_frames(out_pkts); + return err; } -srs_error_t SrsRtcFromRtmpBridger::package_opus(char* data, int size, SrsRtpPacketCacheHelper* helper) +srs_error_t SrsRtcFromRtmpBridger::package_opus(SrsAudioFrame* audio, SrsRtpPacketCacheHelper* helper) { srs_error_t err = srs_success; @@ -854,16 +835,14 @@ srs_error_t SrsRtcFromRtmpBridger::package_opus(char* data, int size, SrsRtpPack pkt->frame_type = SrsFrameTypeAudio; pkt->header.set_marker(true); pkt->header.set_sequence(audio_sequence++); - pkt->header.set_timestamp(audio_timestamp); - - // TODO: FIXME: Why 960? Need Refactoring? - audio_timestamp += 960; + pkt->header.set_timestamp(audio->dts * 48); SrsRtpRawPayload* raw = _srs_rtp_raw_cache->allocate(); pkt->set_payload(raw, SrsRtpPacketPayloadTypeRaw); - raw->payload = pkt->wrap(data, size); - raw->nn_payload = size; + srs_assert(audio->nb_samples == 1); + raw->payload = pkt->wrap(audio->samples[0].bytes, audio->samples[0].size); + raw->nn_payload = audio->samples[0].size; return err; } diff --git a/trunk/src/app/srs_app_rtc_source.hpp b/trunk/src/app/srs_app_rtc_source.hpp index e29711feb..1495f3eaf 100644 --- a/trunk/src/app/srs_app_rtc_source.hpp +++ b/trunk/src/app/srs_app_rtc_source.hpp @@ -45,7 +45,7 @@ class SrsCommonMessage; class SrsMessageArray; class SrsRtcStream; class SrsRtcFromRtmpBridger; -class SrsAudioRecode; +class SrsAudioTranscoder; class SrsRtpPacket2; class SrsRtpPacketCacheHelper; class SrsSample; @@ -263,10 +263,9 @@ private: SrsMetaCache* meta; private: bool discard_aac; - SrsAudioRecode* codec; + SrsAudioTranscoder* codec_; bool discard_bframe; bool merge_nalus; - uint32_t audio_timestamp; uint16_t audio_sequence; uint16_t video_sequence; uint32_t audio_ssrc; @@ -280,8 +279,8 @@ public: virtual void on_unpublish(); virtual srs_error_t on_audio(SrsSharedPtrMessage* msg); private: - srs_error_t transcode(char* adts_audio, int nn_adts_audio); - srs_error_t package_opus(char* data, int size, SrsRtpPacketCacheHelper* helper); + srs_error_t transcode(SrsAudioFrame* audio); + srs_error_t package_opus(SrsAudioFrame* audio, SrsRtpPacketCacheHelper* helper); public: virtual srs_error_t on_video(SrsSharedPtrMessage* msg); private: diff --git a/trunk/src/core/srs_core_version4.hpp b/trunk/src/core/srs_core_version4.hpp index 25dcf6fdb..a4c3312f9 100644 --- a/trunk/src/core/srs_core_version4.hpp +++ b/trunk/src/core/srs_core_version4.hpp @@ -26,6 +26,6 @@ #define VERSION_MAJOR 4 #define VERSION_MINOR 0 -#define VERSION_REVISION 93 +#define VERSION_REVISION 94 #endif diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index 56b0bf1d5..a4c0b3284 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -650,9 +650,8 @@ public: virtual bool is_avc_codec_ok(); }; -/** - * A frame, consists of a codec and a group of samples. - */ +// A frame, consists of a codec and a group of samples. +// TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsFrame { public: @@ -677,9 +676,8 @@ public: virtual srs_error_t add_sample(char* bytes, int size); }; -/** - * A audio frame, besides a frame, contains the audio frame info, such as frame type. - */ +// A audio frame, besides a frame, contains the audio frame info, such as frame type. +// TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsAudioFrame : public SrsFrame { public: @@ -691,9 +689,8 @@ public: virtual SrsAudioCodecConfig* acodec(); }; -/** - * A video frame, besides a frame, contains the video frame info, such as frame type. - */ +// A video frame, besides a frame, contains the video frame info, such as frame type. +// TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsVideoFrame : public SrsFrame { public: