From 95defe6dad9cc5a206b02e628a931a1974f00d6c Mon Sep 17 00:00:00 2001 From: winlin Date: Sun, 25 Dec 2022 21:11:08 +0800 Subject: [PATCH] MP3: Support dump stream information. v5.0.117 (#296) (#3339) --- trunk/doc/CHANGELOG.md | 1 + trunk/src/app/srs_app_dvr.cpp | 2 +- trunk/src/app/srs_app_source.cpp | 137 ++++++++++++++------------ trunk/src/app/srs_app_source.hpp | 4 +- trunk/src/core/srs_core_version5.hpp | 2 +- trunk/src/kernel/srs_kernel_codec.cpp | 15 ++- trunk/src/kernel/srs_kernel_codec.hpp | 12 ++- trunk/src/utest/srs_utest_kernel.cpp | 4 +- 8 files changed, 100 insertions(+), 77 deletions(-) diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md index 256dc2b4a..8439f7999 100644 --- a/trunk/doc/CHANGELOG.md +++ b/trunk/doc/CHANGELOG.md @@ -8,6 +8,7 @@ The changelog for SRS. ## SRS 5.0 Changelog +* v5.0, 2022-12-25, For [#296](https://github.com/ossrs/srs/issues/296): MP3: Support dump stream information. v5.0.117 * v5.0, 2022-12-25, For [#296](https://github.com/ossrs/srs/issues/296): MP3: Support mp3 for RTMP/HLS/HTTP-FLV/HTTP-TS/HLS etc. v5.0.116 * v5.0, 2022-12-24, Fix [#3328](https://github.com/ossrs/srs/issues/3328): Docker: Avoiding duplicated copy files. v5.0.115 * v5.0, 2022-12-20, Merge [#3321](https://github.com/ossrs/srs/pull/3321): GB: Refine lazy object GC. v5.0.114 diff --git a/trunk/src/app/srs_app_dvr.cpp b/trunk/src/app/srs_app_dvr.cpp index 0098a722a..db0048869 100644 --- a/trunk/src/app/srs_app_dvr.cpp +++ b/trunk/src/app/srs_app_dvr.cpp @@ -462,7 +462,7 @@ srs_error_t SrsDvrMp4Segmenter::encode_audio(SrsSharedPtrMessage* audio, SrsForm SrsAudioChannels channels = format->acodec->sound_type; SrsAudioAacFrameTrait ct = format->audio->aac_packet_type; - if (ct == SrsAudioAacFrameTraitSequenceHeader || ct == SrsAudioMp3FrameTrait) { + if (ct == SrsAudioAacFrameTraitSequenceHeader || ct == SrsAudioMp3FrameTraitSequenceHeader) { enc->acodec = sound_format; enc->sample_rate = sound_rate; enc->sound_bits = sound_size; diff --git a/trunk/src/app/srs_app_source.cpp b/trunk/src/app/srs_app_source.cpp index d52eed4e4..967c4f793 100755 --- a/trunk/src/app/srs_app_source.cpp +++ b/trunk/src/app/srs_app_source.cpp @@ -832,7 +832,6 @@ SrsOriginHub::SrsOriginHub() hds = new SrsHds(); #endif ng_exec = new SrsNgExec(); - format = new SrsRtmpFormat(); _srs_config->subscribe(this); } @@ -850,8 +849,7 @@ SrsOriginHub::~SrsOriginHub() forwarders.clear(); } srs_freep(ng_exec); - - srs_freep(format); + srs_freep(hls); srs_freep(dash); srs_freep(dvr); @@ -868,13 +866,6 @@ srs_error_t SrsOriginHub::initialize(SrsLiveSource* s, SrsRequest* r) req_ = r; source = s; - if ((err = format->initialize()) != srs_success) { - return srs_error_wrap(err, "format initialize"); - } - - // Setup the SPS/PPS parsing strategy. - format->try_annexb_first = _srs_config->try_annexb_first(r->vhost); - if ((err = hls->initialize(this, req_)) != srs_success) { return srs_error_wrap(err, "hls initialize"); } @@ -920,10 +911,6 @@ srs_error_t SrsOriginHub::on_meta_data(SrsSharedPtrMessage* shared_metadata, Srs { srs_error_t err = srs_success; - if ((err = format->on_metadata(packet)) != srs_success) { - return srs_error_wrap(err, "Format parse metadata"); - } - // copy to all forwarders if (true) { std::vector::iterator it; @@ -947,21 +934,10 @@ srs_error_t SrsOriginHub::on_audio(SrsSharedPtrMessage* shared_audio) srs_error_t err = srs_success; SrsSharedPtrMessage* msg = shared_audio; - - // TODO: FIXME: Support parsing OPUS for RTC. - if ((err = format->on_audio(msg)) != srs_success) { - return srs_error_wrap(err, "format consume audio"); - } - - // Ignore if no format->acodec, it means the codec is not parsed, or unsupport/unknown codec - // such as G.711 codec - if (!format->acodec) { - return err; - } + SrsRtmpFormat* format = source->format_; - // cache the sequence header if aac - // donot cache the sequence header to gop_cache, return here. - if (format->is_aac_sequence_header()) { + // Handle the metadata when got sequence header. + if (format->is_aac_sequence_header() || format->is_mp3_sequence_header()) { srs_assert(format->acodec); SrsAudioCodecConfig* c = format->acodec; @@ -970,15 +946,21 @@ srs_error_t SrsOriginHub::on_audio(SrsSharedPtrMessage* shared_audio) // when got audio stream info. SrsStatistic* stat = SrsStatistic::instance(); - if ((err = stat->on_audio_info(req_, SrsAudioCodecIdAAC, c->sound_rate, c->sound_type, c->aac_object)) != srs_success) { + if ((err = stat->on_audio_info(req_, format->acodec->id, c->sound_rate, c->sound_type, c->aac_object)) != srs_success) { return srs_error_wrap(err, "stat audio"); } - - srs_trace("%dB audio sh, codec(%d, profile=%s, %dchannels, %dkbps, %dHZ), flv(%dbits, %dchannels, %dHZ)", - msg->size, c->id, srs_aac_object2str(c->aac_object).c_str(), c->aac_channels, - c->audio_data_rate / 1000, srs_aac_srates[c->aac_sample_rate], - flv_sample_sizes[c->sound_size], flv_sound_types[c->sound_type], - srs_flv_srates[c->sound_rate]); + + if (format->acodec->id == SrsAudioCodecIdMP3) { + srs_trace("%dB audio sh, codec(%d, %dbits, %dchannels, %dHZ)", + msg->size, c->id, flv_sample_sizes[c->sound_size], flv_sound_types[c->sound_type], + srs_flv_srates[c->sound_rate]); + } else { + srs_trace("%dB audio sh, codec(%d, profile=%s, %dchannels, %dkbps, %dHZ), flv(%dbits, %dchannels, %dHZ)", + msg->size, c->id, srs_aac_object2str(c->aac_object).c_str(), c->aac_channels, + c->audio_data_rate / 1000, srs_aac_srates[c->aac_sample_rate], + flv_sample_sizes[c->sound_size], flv_sound_types[c->sound_type], + srs_flv_srates[c->sound_rate]); + } } if ((err = hls->on_audio(msg, format)) != srs_success) { @@ -1039,22 +1021,7 @@ srs_error_t SrsOriginHub::on_video(SrsSharedPtrMessage* shared_video, bool is_se srs_error_t err = srs_success; SrsSharedPtrMessage* msg = shared_video; - - // user can disable the sps parse to workaround when parse sps failed. - // @see https://github.com/ossrs/srs/issues/474 - if (is_sequence_header) { - format->avc_parse_sps = _srs_config->get_parse_sps(req_->vhost); - } - - if ((err = format->on_video(msg)) != srs_success) { - return srs_error_wrap(err, "format consume video"); - } - - // Ignore if no format->vcodec, it means the codec is not parsed, or unsupport/unknown codec - // such as H.263 codec - if (!format->vcodec) { - return err; - } + SrsRtmpFormat* format = source->format_; // cache the sequence header if h264 // donot cache the sequence header to gop_cache, return here. @@ -1295,6 +1262,8 @@ srs_error_t SrsOriginHub::on_reload_vhost_dash(string vhost) if ((err = dash->on_publish()) != srs_success) { return srs_error_wrap(err, "dash start publish"); } + + SrsRtmpFormat* format = source->format_; SrsSharedPtrMessage* cache_sh_video = source->meta->vsh(); if (cache_sh_video) { @@ -1340,6 +1309,8 @@ srs_error_t SrsOriginHub::on_reload_vhost_hls(string vhost) return srs_error_wrap(err, "hls publish failed"); } srs_trace("vhost %s hls reload success", vhost.c_str()); + + SrsRtmpFormat* format = source->format_; // when publish, don't need to fetch sequence header, which is old and maybe corrupt. // when reload, we must fetch the sequence header from source cache. @@ -1664,8 +1635,10 @@ srs_error_t SrsMetaCache::dumps(SrsLiveConsumer* consumer, bool atc, SrsRtmpJitt // copy sequence header // copy audio sequence first, for hls to fast parse the "right" audio codec. // @see https://github.com/ossrs/srs/issues/301 - if (ds && audio && (err = consumer->enqueue(audio, atc, ag)) != srs_success) { - return srs_error_wrap(err, "enqueue audio sh"); + if (aformat && aformat->acodec && aformat->acodec->id != SrsAudioCodecIdMP3) { + if (ds && audio && (err = consumer->enqueue(audio, atc, ag)) != srs_success) { + return srs_error_wrap(err, "enqueue audio sh"); + } } if (ds && video && (err = consumer->enqueue(video, atc, ag)) != srs_success) { @@ -1952,6 +1925,7 @@ SrsLiveSource::SrsLiveSource() gop_cache = new SrsGopCache(); hub = new SrsOriginHub(); meta = new SrsMetaCache(); + format_ = new SrsRtmpFormat(); is_monotonically_increase = false; last_packet_time = 0; @@ -1967,7 +1941,8 @@ SrsLiveSource::~SrsLiveSource() // never free the consumers, // for all consumers are auto free. consumers.clear(); - + + srs_freep(format_); srs_freep(hub); srs_freep(meta); srs_freep(mix_queue); @@ -2032,6 +2007,13 @@ srs_error_t SrsLiveSource::initialize(SrsRequest* r, ISrsLiveSourceHandler* h) handler = h; req = r->copy(); atc = _srs_config->get_atc(req->vhost); + + if ((err = format_->initialize()) != srs_success) { + return srs_error_wrap(err, "format initialize"); + } + + // Setup the SPS/PPS parsing strategy. + format_->try_annexb_first = _srs_config->try_annexb_first(r->vhost); if ((err = hub->initialize(this, req)) != srs_success) { return srs_error_wrap(err, "hub"); @@ -2200,6 +2182,10 @@ bool SrsLiveSource::can_publish(bool is_edge) srs_error_t SrsLiveSource::on_meta_data(SrsCommonMessage* msg, SrsOnMetaDataPacket* metadata) { srs_error_t err = srs_success; + + if ((err = format_->on_metadata(metadata)) != srs_success) { + return srs_error_wrap(err, "Format parse metadata"); + } // if allow atc_auto and bravo-atc detected, open atc for vhost. SrsAmf0Any* prop = NULL; @@ -2291,10 +2277,21 @@ srs_error_t SrsLiveSource::on_audio(SrsCommonMessage* shared_audio) srs_error_t SrsLiveSource::on_audio_imp(SrsSharedPtrMessage* msg) { srs_error_t err = srs_success; - - bool is_aac_sequence_header = SrsFlvAudio::sh(msg->payload, msg->size); - bool is_sequence_header = is_aac_sequence_header; - + + // TODO: FIXME: Support parsing OPUS for RTC. + if ((err = format_->on_audio(msg)) != srs_success) { + return srs_error_wrap(err, "format consume audio"); + } + + // Ignore if no format->acodec, it means the codec is not parsed, or unsupport/unknown codec + // such as G.711 codec + if (!format_->acodec) { + return err; + } + + // Whether current packet is sequence header. Note that MP3 does not have one, but we use the first packet as it. + bool is_sequence_header = format_->is_aac_sequence_header() || format_->is_mp3_sequence_header(); + // whether consumer should drop for the duplicated sequence header. bool drop_for_reduce = false; if (is_sequence_header && meta->previous_ash() && _srs_config->get_reduce_sequence_header(req->vhost)) { @@ -2324,10 +2321,8 @@ srs_error_t SrsLiveSource::on_audio_imp(SrsSharedPtrMessage* msg) } } - // cache the sequence header of aac, or first packet of mp3. - // for example, the mp3 is used for hls to write the "right" audio codec. - // TODO: FIXME: to refine the stream info system. - if (is_aac_sequence_header || !meta->ash()) { + // Refresh the sequence header in metadata. + if (is_sequence_header || !meta->ash()) { if ((err = meta->update_ash(msg)) != srs_success) { return srs_error_wrap(err, "meta consume audio"); } @@ -2416,8 +2411,24 @@ srs_error_t SrsLiveSource::on_video(SrsCommonMessage* shared_video) srs_error_t SrsLiveSource::on_video_imp(SrsSharedPtrMessage* msg) { srs_error_t err = srs_success; - + bool is_sequence_header = SrsFlvVideo::sh(msg->payload, msg->size); + + // user can disable the sps parse to workaround when parse sps failed. + // @see https://github.com/ossrs/srs/issues/474 + if (is_sequence_header) { + format_->avc_parse_sps = _srs_config->get_parse_sps(req->vhost); + } + + if ((err = format_->on_video(msg)) != srs_success) { + return srs_error_wrap(err, "format consume video"); + } + + // Ignore if no format->vcodec, it means the codec is not parsed, or unsupport/unknown codec + // such as H.263 codec + if (!format_->vcodec) { + return err; + } // whether consumer should drop for the duplicated sequence header. bool drop_for_reduce = false; diff --git a/trunk/src/app/srs_app_source.hpp b/trunk/src/app/srs_app_source.hpp index 9c8b7df6b..da47ec1c1 100644 --- a/trunk/src/app/srs_app_source.hpp +++ b/trunk/src/app/srs_app_source.hpp @@ -318,8 +318,6 @@ private: SrsRequest* req_; bool is_active; private: - // The format, codec information. - SrsRtmpFormat* format; // hls handler. SrsHls* hls; // The DASH encoder. @@ -530,6 +528,8 @@ private: SrsOriginHub* hub; // The metadata cache. SrsMetaCache* meta; + // The format, codec information. + SrsRtmpFormat* format_; private: // Whether source is avaiable for publishing. bool _can_publish; diff --git a/trunk/src/core/srs_core_version5.hpp b/trunk/src/core/srs_core_version5.hpp index da08ee970..eff14fa38 100644 --- a/trunk/src/core/srs_core_version5.hpp +++ b/trunk/src/core/srs_core_version5.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 5 #define VERSION_MINOR 0 -#define VERSION_REVISION 116 +#define VERSION_REVISION 117 #endif diff --git a/trunk/src/kernel/srs_kernel_codec.cpp b/trunk/src/kernel/srs_kernel_codec.cpp index 39d914e3c..a153b1283 100644 --- a/trunk/src/kernel/srs_kernel_codec.cpp +++ b/trunk/src/kernel/srs_kernel_codec.cpp @@ -670,7 +670,8 @@ srs_error_t SrsFormat::on_audio(int64_t timestamp, char* data, int size) if (codec != SrsAudioCodecIdMP3 && codec != SrsAudioCodecIdAAC) { return err; } - + + bool fresh = !acodec; if (!acodec) { acodec = new SrsAudioCodecConfig(); } @@ -686,7 +687,7 @@ srs_error_t SrsFormat::on_audio(int64_t timestamp, char* data, int size) buffer->skip(-1 * buffer->pos()); if (codec == SrsAudioCodecIdMP3) { - return audio_mp3_demux(buffer, timestamp); + return audio_mp3_demux(buffer, timestamp, fresh); } return audio_aac_demux(buffer, timestamp); @@ -755,6 +756,12 @@ bool SrsFormat::is_aac_sequence_header() && audio && audio->aac_packet_type == SrsAudioAacFrameTraitSequenceHeader; } +bool SrsFormat::is_mp3_sequence_header() +{ + return acodec && acodec->id == SrsAudioCodecIdMP3 + && audio && audio->aac_packet_type == SrsAudioMp3FrameTraitSequenceHeader; +} + bool SrsFormat::is_avc_sequence_header() { bool h264 = (vcodec && vcodec->id == SrsVideoCodecIdAVC); @@ -1449,13 +1456,13 @@ srs_error_t SrsFormat::audio_aac_demux(SrsBuffer* stream, int64_t timestamp) return err; } -srs_error_t SrsFormat::audio_mp3_demux(SrsBuffer* stream, int64_t timestamp) +srs_error_t SrsFormat::audio_mp3_demux(SrsBuffer* stream, int64_t timestamp, bool fresh) { srs_error_t err = srs_success; audio->cts = 0; audio->dts = timestamp; - audio->aac_packet_type = SrsAudioMp3FrameTrait; + audio->aac_packet_type = fresh ? SrsAudioMp3FrameTraitSequenceHeader : SrsAudioMp3FrameTraitRawData; // @see: E.4.2 Audio Tags, video_file_format_spec_v10_1.pdf, page 76 int8_t sound_format = stream->read_1bytes(); diff --git a/trunk/src/kernel/srs_kernel_codec.hpp b/trunk/src/kernel/srs_kernel_codec.hpp index e4f781bc4..11926d476 100644 --- a/trunk/src/kernel/srs_kernel_codec.hpp +++ b/trunk/src/kernel/srs_kernel_codec.hpp @@ -159,7 +159,8 @@ enum SrsAudioAacFrameTrait // set to the max value to reserved, for array map. SrsAudioAacFrameTraitReserved = 0xff, SrsAudioAacFrameTraitForbidden = 0xff, - + + // For AAC, we detect the sequence header by content. SrsAudioAacFrameTraitSequenceHeader = 0, SrsAudioAacFrameTraitRawData = 1, @@ -170,8 +171,10 @@ enum SrsAudioAacFrameTrait // 16/32 reserved for g711a/g711u - // For MP3 - SrsAudioMp3FrameTrait = 64, + // For MP3 we assume the first packet is sequence header, while it actually is not the same thing, because we do + // this to simplify the workflow, to make sure we can detect the audio codec from the sequence headers. + SrsAudioMp3FrameTraitSequenceHeader = 63, + SrsAudioMp3FrameTraitRawData = 64, }; /** @@ -750,6 +753,7 @@ public: virtual srs_error_t on_aac_sequence_header(char* data, int size); public: virtual bool is_aac_sequence_header(); + virtual bool is_mp3_sequence_header(); virtual bool is_avc_sequence_header(); private: // Demux the video packet in H.264 codec. @@ -776,7 +780,7 @@ private: // Demux the asc from sequence header. // Demux the sampels from RAW data. virtual srs_error_t audio_aac_demux(SrsBuffer* stream, int64_t timestamp); - virtual srs_error_t audio_mp3_demux(SrsBuffer* stream, int64_t timestamp); + virtual srs_error_t audio_mp3_demux(SrsBuffer* stream, int64_t timestamp, bool fresh); public: // Directly demux the sequence header, without RTMP packet header. virtual srs_error_t audio_aac_sequence_header_demux(char* data, int size); diff --git a/trunk/src/utest/srs_utest_kernel.cpp b/trunk/src/utest/srs_utest_kernel.cpp index e817ebe83..37a61dccd 100644 --- a/trunk/src/utest/srs_utest_kernel.cpp +++ b/trunk/src/utest/srs_utest_kernel.cpp @@ -5612,7 +5612,7 @@ VOID TEST(KernelMP4Test, CoverMP4MultipleAVsWithMp3) // Frames order by dts asc. HELPER_EXPECT_SUCCESS(dec.read_sample(&ht, &ft, &ct, &dts, &pts, &sample, &nb_sample)); - EXPECT_EQ(0, (int)dts); EXPECT_EQ(127, (int)nb_sample); EXPECT_EQ(SrsMp4HandlerTypeVIDE, ht); EXPECT_NE(SrsAudioMp3FrameTrait, ct); + EXPECT_EQ(0, (int)dts); EXPECT_EQ(127, (int)nb_sample); EXPECT_EQ(SrsMp4HandlerTypeVIDE, ht); EXPECT_NE(SrsAudioMp3FrameTraitSequenceHeader, ct); srs_freepa(sample); HELPER_EXPECT_SUCCESS(dec.read_sample(&ht, &ft, &ct, &dts, &pts, &sample, &nb_sample)); @@ -5624,7 +5624,7 @@ VOID TEST(KernelMP4Test, CoverMP4MultipleAVsWithMp3) srs_freepa(sample); HELPER_EXPECT_SUCCESS(dec.read_sample(&ht, &ft, &ct, &dts, &pts, &sample, &nb_sample)); - EXPECT_EQ(40, (int)dts); EXPECT_EQ(40, (int)pts); EXPECT_EQ(127, (int)nb_sample); EXPECT_EQ(SrsMp4HandlerTypeVIDE, ht); EXPECT_NE(SrsAudioMp3FrameTrait, ct); + EXPECT_EQ(40, (int)dts); EXPECT_EQ(40, (int)pts); EXPECT_EQ(127, (int)nb_sample); EXPECT_EQ(SrsMp4HandlerTypeVIDE, ht); EXPECT_NE(SrsAudioMp3FrameTraitSequenceHeader, ct); srs_freepa(sample); } }