// // Copyright (c) 2013-2025 The SRS Authors // // SPDX-License-Identifier: MIT // #ifndef SRS_KERNEL_CODEC_HPP #define SRS_KERNEL_CODEC_HPP #include #include #include class SrsBuffer; class SrsBitBuffer; class SrsFormat; // @see: https://datatracker.ietf.org/doc/html/rfc6184#section-1.3 const int SrsAvcNaluHeaderSize = 1; // @see: https://datatracker.ietf.org/doc/html/rfc7798#section-1.1.4 const int SrsHevcNaluHeaderSize = 2; /** * The video codec id. * @doc video_file_format_spec_v10_1.pdf, page78, E.4.3.1 VIDEODATA * CodecID UB [4] * Codec Identifier. The following values are defined for FLV: * 2 = Sorenson H.263 * 3 = Screen video * 4 = On2 VP6 * 5 = On2 VP6 with alpha channel * 6 = Screen video version 2 * 7 = AVC * 12 = HEVC */ enum SrsVideoCodecId { // set to the zero to reserved, for array map. SrsVideoCodecIdReserved = 0, SrsVideoCodecIdForbidden = 0, SrsVideoCodecIdReserved1 = 1, SrsVideoCodecIdReserved2 = 9, // for user to disable video, for example, use pure audio hls. SrsVideoCodecIdDisabled = 8, SrsVideoCodecIdSorensonH263 = 2, SrsVideoCodecIdScreenVideo = 3, SrsVideoCodecIdOn2VP6 = 4, SrsVideoCodecIdOn2VP6WithAlphaChannel = 5, SrsVideoCodecIdScreenVideoVersion2 = 6, SrsVideoCodecIdAVC = 7, // See page 79 at @doc https://github.com/CDN-Union/H265/blob/master/Document/video_file_format_spec_v10_1_ksyun_20170615.doc SrsVideoCodecIdHEVC = 12, // https://mp.weixin.qq.com/s/H3qI7zsON5sdf4oDJ9qlkg SrsVideoCodecIdAV1 = 13, }; std::string srs_video_codec_id2str(SrsVideoCodecId codec); /** * The video AVC frame trait(characteristic). * @doc video_file_format_spec_v10_1.pdf, page79, E.4.3.2 AVCVIDEOPACKET * AVCPacketType IF CodecID == 7 UI8 * The following values are defined: * 0 = AVC sequence header * 1 = AVC NALU * 2 = AVC end of sequence (lower level NALU sequence ender is not required or supported) */ enum SrsVideoAvcFrameTrait { // set to the max value to reserved, for array map. SrsVideoAvcFrameTraitReserved = 6, SrsVideoAvcFrameTraitForbidden = 6, SrsVideoAvcFrameTraitSequenceHeader = 0, SrsVideoAvcFrameTraitNALU = 1, SrsVideoAvcFrameTraitSequenceHeaderEOF = 2, SrsVideoHEVCFrameTraitPacketTypeSequenceStart = 0, SrsVideoHEVCFrameTraitPacketTypeCodedFrames = 1, SrsVideoHEVCFrameTraitPacketTypeSequenceEnd = 2, // CompositionTime Offset is implied to equal zero. This is // an optimization to save putting SI24 composition time value of zero on // the wire. See pseudo code below in the VideoTagBody section SrsVideoHEVCFrameTraitPacketTypeCodedFramesX = 3, // VideoTagBody does not contain video data. VideoTagBody // instead contains an AMF encoded metadata. See Metadata Frame // section for an illustration of its usage. As an example, the metadata // can be HDR information. This is a good way to signal HDR // information. This also opens up future ways to express additional // metadata that is meant for the next video sequence. // // note: presence of PacketTypeMetadata means that FrameType // flags at the top of this table should be ignored SrsVideoHEVCFrameTraitPacketTypeMetadata = 4, // Carriage of bitstream in MPEG-2 TS format SrsVideoHEVCFrameTraitPacketTypeMPEG2TSSequenceStart = 5, }; /** * The video AVC frame type, such as I/P/B. * @doc video_file_format_spec_v10_1.pdf, page78, E.4.3.1 VIDEODATA * Frame Type UB [4] * Type of video frame. The following values are defined: * 1 = key frame (for AVC, a seekable frame) * 2 = inter frame (for AVC, a non-seekable frame) * 3 = disposable inter frame (H.263 only) * 4 = generated key frame (reserved for server use only) * 5 = video info/command frame */ enum SrsVideoAvcFrameType { // set to the zero to reserved, for array map. SrsVideoAvcFrameTypeReserved = 0, SrsVideoAvcFrameTypeForbidden = 0, SrsVideoAvcFrameTypeReserved1 = 6, SrsVideoAvcFrameTypeKeyFrame = 1, SrsVideoAvcFrameTypeInterFrame = 2, SrsVideoAvcFrameTypeDisposableInterFrame = 3, SrsVideoAvcFrameTypeGeneratedKeyFrame = 4, SrsVideoAvcFrameTypeVideoInfoFrame = 5, }; /** * The audio codec id. * @doc video_file_format_spec_v10_1.pdf, page 76, E.4.2 Audio Tags * SoundFormat UB [4] * Format of SoundData. The following values are defined: * 0 = Linear PCM, platform endian * 1 = ADPCM * 2 = MP3 * 3 = Linear PCM, little endian * 4 = Nellymoser 16 kHz mono * 5 = Nellymoser 8 kHz mono * 6 = Nellymoser * 7 = G.711 A-law logarithmic PCM * 8 = G.711 mu-law logarithmic PCM * 9 = reserved * 10 = AAC * 11 = Speex * 14 = MP3 8 kHz * 15 = Device-specific sound * Formats 7, 8, 14, and 15 are reserved. * AAC is supported in Flash Player 9,0,115,0 and higher. * Speex is supported in Flash Player 10 and higher. */ enum SrsAudioCodecId { // set to the max value to reserved, for array map. SrsAudioCodecIdReserved1 = 16, SrsAudioCodecIdForbidden = 16, // for user to disable audio, for example, use pure video hls. SrsAudioCodecIdDisabled = 17, SrsAudioCodecIdLinearPCMPlatformEndian = 0, SrsAudioCodecIdADPCM = 1, SrsAudioCodecIdMP3 = 2, SrsAudioCodecIdLinearPCMLittleEndian = 3, SrsAudioCodecIdNellymoser16kHzMono = 4, SrsAudioCodecIdNellymoser8kHzMono = 5, SrsAudioCodecIdNellymoser = 6, SrsAudioCodecIdReservedG711AlawLogarithmicPCM = 7, SrsAudioCodecIdReservedG711MuLawLogarithmicPCM = 8, SrsAudioCodecIdReserved = 9, SrsAudioCodecIdAAC = 10, SrsAudioCodecIdSpeex = 11, // For FLV, it's undefined, we define it as Opus for WebRTC. SrsAudioCodecIdOpus = 13, SrsAudioCodecIdReservedMP3_8kHz = 14, SrsAudioCodecIdReservedDeviceSpecificSound = 15, }; std::string srs_audio_codec_id2str(SrsAudioCodecId codec); /** * The audio AAC frame trait(characteristic). * @doc video_file_format_spec_v10_1.pdf, page 77, E.4.2 Audio Tags * AACPacketType IF SoundFormat == 10 or 13 UI8 * The following values are defined: * 0 = AAC sequence header * 1 = AAC raw */ enum SrsAudioAacFrameTrait { // set to the max value to reserved, for array map. SrsAudioAacFrameTraitReserved = 0xff, SrsAudioAacFrameTraitForbidden = 0xff, // For AAC, we detect the sequence header by content. SrsAudioAacFrameTraitSequenceHeader = 0, SrsAudioAacFrameTraitRawData = 1, // For Opus, the frame trait, may has more than one traits. SrsAudioOpusFrameTraitRaw = 2, SrsAudioOpusFrameTraitSamplingRate = 4, SrsAudioOpusFrameTraitAudioLevel = 8, // 16/32 reserved for g711a/g711u // For MP3 we assume the first packet is sequence header, while it actually is not the same thing, because we do // this to simplify the workflow, to make sure we can detect the audio codec from the sequence headers. SrsAudioMp3FrameTraitSequenceHeader = 63, SrsAudioMp3FrameTraitRawData = 64, }; /** * The audio sample rate. * @see srs_flv_srates and srs_aac_srates. * @doc video_file_format_spec_v10_1.pdf, page 76, E.4.2 Audio Tags * 0 = 5.5 kHz = 5512 Hz * 1 = 11 kHz = 11025 Hz * 2 = 22 kHz = 22050 Hz * 3 = 44 kHz = 44100 Hz * However, we can extends this table. * @remark Use srs_flv_srates to convert it. */ enum SrsAudioSampleRate { // set to the max value to reserved, for array map. SrsAudioSampleRateReserved = 0xff, SrsAudioSampleRateForbidden = 0xff, // For FLV, only support 5, 11, 22, 44KHz sampling rate. SrsAudioSampleRate5512 = 0, SrsAudioSampleRate11025 = 1, SrsAudioSampleRate22050 = 2, SrsAudioSampleRate44100 = 3, // For MP4, extra sampling rate to FLV. SrsAudioSampleRate12000 = 12, SrsAudioSampleRate24000 = 24, SrsAudioSampleRate48000 = 48, // For Opus, support 8, 12, 16, 24, 48KHz // We will write a UINT8 sampling rate after FLV audio tag header. // @doc https://tools.ietf.org/html/rfc6716#section-2 SrsAudioSampleRateNB8kHz = 8, // NB (narrowband) SrsAudioSampleRateMB12kHz = 12, // MB (medium-band) SrsAudioSampleRateWB16kHz = 16, // WB (wideband) SrsAudioSampleRateSWB24kHz = 24, // SWB (super-wideband) SrsAudioSampleRateFB48kHz = 48, // FB (fullband) }; SrsAudioSampleRate srs_audio_sample_rate_from_number(uint32_t v); SrsAudioSampleRate srs_audio_sample_rate_guess_number(uint32_t v); uint32_t srs_audio_sample_rate2number(SrsAudioSampleRate v); std::string srs_audio_sample_rate2str(SrsAudioSampleRate v); /** * The frame type, for example, audio, video or data. * @doc video_file_format_spec_v10_1.pdf, page 75, E.4.1 FLV Tag */ enum SrsFrameType { // set to the zero to reserved, for array map. SrsFrameTypeReserved = 0, SrsFrameTypeForbidden = 0, // 8 = audio SrsFrameTypeAudio = 8, // 9 = video SrsFrameTypeVideo = 9, // 18 = script data SrsFrameTypeScript = 18, }; /** * Fast tough the codec of FLV video. * @doc video_file_format_spec_v10_1.pdf, page 78, E.4.3 Video Tags */ class SrsFlvVideo { public: SrsFlvVideo(); virtual ~SrsFlvVideo(); // the following function used to finger out the flv/rtmp packet detail. public: /** * only check the frame_type, not check the codec type. */ static bool keyframe(char* data, int size); /** * check codec h264, keyframe, sequence header */ // TODO: FIXME: Remove it, use SrsFormat instead. static bool sh(char* data, int size); /** * check codec h264. */ static bool h264(char* data, int size); #ifdef SRS_H265 // Check whether codec is HEVC(H.265). static bool hevc(char* data, int size); #endif /** * check the video RTMP/flv header info, * @return true if video RTMP/flv header is ok. * @remark all type of audio is possible, no need to check audio. */ static bool acceptable(char* data, int size); }; /** * Fast tough the codec of FLV video. * @doc video_file_format_spec_v10_1.pdf, page 76, E.4.2 Audio Tags */ class SrsFlvAudio { public: SrsFlvAudio(); virtual ~SrsFlvAudio(); // the following function used to finger out the flv/rtmp packet detail. public: /** * check codec aac, sequence header */ static bool sh(char* data, int size); /** * check codec aac. */ static bool aac(char* data, int size); }; /** * the public data, event HLS disable, others can use it. */ /** * the flv sample rate map */ extern int srs_flv_srates[]; /** * the aac sample rate map */ extern int srs_aac_srates[]; // The number of aac samplerates, size for srs_aac_srates. #define SrsAAcSampleRateNumbers 16 // The impossible aac sample rate index. #define SrsAacSampleRateUnset 15 // The max number of NALUs in a video, or aac frame in audio packet. #define SrsMaxNbSamples 256 /** * The audio sample size in bits. * @doc video_file_format_spec_v10_1.pdf, page 76, E.4.2 Audio Tags * Size of each audio sample. This parameter only pertains to * uncompressed formats. Compressed formats always decode * to 16 bits internally. * 0 = 8-bit samples * 1 = 16-bit samples */ enum SrsAudioSampleBits { // set to the max value to reserved, for array map. SrsAudioSampleBitsReserved = 2, SrsAudioSampleBitsForbidden = 2, SrsAudioSampleBits8bit = 0, SrsAudioSampleBits16bit = 1, }; std::string srs_audio_sample_bits2str(SrsAudioSampleBits v); /** * The audio channels. * @doc video_file_format_spec_v10_1.pdf, page 77, E.4.2 Audio Tags * Mono or stereo sound * 0 = Mono sound * 1 = Stereo sound */ enum SrsAudioChannels { // set to the max value to reserved, for array map. SrsAudioChannelsReserved = 2, SrsAudioChannelsForbidden = 2, SrsAudioChannelsMono = 0, SrsAudioChannelsStereo = 1, }; std::string srs_audio_channels2str(SrsAudioChannels v); /** * Table 7-1 - NAL unit type codes, syntax element categories, and NAL unit type classes * ISO_IEC_14496-10-AVC-2012.pdf, page 83. */ enum SrsAvcNaluType { // Unspecified SrsAvcNaluTypeReserved = 0, SrsAvcNaluTypeForbidden = 0, // Coded slice of a non-IDR picture slice_layer_without_partitioning_rbsp( ) SrsAvcNaluTypeNonIDR = 1, // Coded slice data partition A slice_data_partition_a_layer_rbsp( ) SrsAvcNaluTypeDataPartitionA = 2, // Coded slice data partition B slice_data_partition_b_layer_rbsp( ) SrsAvcNaluTypeDataPartitionB = 3, // Coded slice data partition C slice_data_partition_c_layer_rbsp( ) SrsAvcNaluTypeDataPartitionC = 4, // Coded slice of an IDR picture slice_layer_without_partitioning_rbsp( ) SrsAvcNaluTypeIDR = 5, // Supplemental enhancement information (SEI) sei_rbsp( ) SrsAvcNaluTypeSEI = 6, // Sequence parameter set seq_parameter_set_rbsp( ) SrsAvcNaluTypeSPS = 7, // Picture parameter set pic_parameter_set_rbsp( ) SrsAvcNaluTypePPS = 8, // Access unit delimiter access_unit_delimiter_rbsp( ) SrsAvcNaluTypeAccessUnitDelimiter = 9, // End of sequence end_of_seq_rbsp( ) SrsAvcNaluTypeEOSequence = 10, // End of stream end_of_stream_rbsp( ) SrsAvcNaluTypeEOStream = 11, // Filler data filler_data_rbsp( ) SrsAvcNaluTypeFilterData = 12, // Sequence parameter set extension seq_parameter_set_extension_rbsp( ) SrsAvcNaluTypeSPSExt = 13, // Prefix NAL unit prefix_nal_unit_rbsp( ) SrsAvcNaluTypePrefixNALU = 14, // Subset sequence parameter set subset_seq_parameter_set_rbsp( ) SrsAvcNaluTypeSubsetSPS = 15, // Coded slice of an auxiliary coded picture without partitioning slice_layer_without_partitioning_rbsp( ) SrsAvcNaluTypeLayerWithoutPartition = 19, // Coded slice extension slice_layer_extension_rbsp( ) SrsAvcNaluTypeCodedSliceExt = 20, }; // @see https://datatracker.ietf.org/doc/html/rfc6184#section-1.3 #define SrsAvcNaluTypeParse(code) (SrsAvcNaluType)(code & 0x1F) std::string srs_avc_nalu2str(SrsAvcNaluType nalu_type); #ifdef SRS_H265 /** * The enum NALU type for HEVC * @see Table 7-1 – NAL unit type codes and NAL unit type classes * @doc ITU-T-H.265-2021.pdf, page 86. */ enum SrsHevcNaluType { SrsHevcNaluType_CODED_SLICE_TRAIL_N = 0, SrsHevcNaluType_CODED_SLICE_TRAIL_R, //1 SrsHevcNaluType_CODED_SLICE_TSA_N, //2 SrsHevcNaluType_CODED_SLICE_TLA, //3 SrsHevcNaluType_CODED_SLICE_STSA_N, //4 SrsHevcNaluType_CODED_SLICE_STSA_R, //5 SrsHevcNaluType_CODED_SLICE_RADL_N, //6 SrsHevcNaluType_CODED_SLICE_DLP, //7 SrsHevcNaluType_CODED_SLICE_RASL_N, //8 SrsHevcNaluType_CODED_SLICE_TFD, //9 SrsHevcNaluType_RESERVED_10, SrsHevcNaluType_RESERVED_11, SrsHevcNaluType_RESERVED_12, SrsHevcNaluType_RESERVED_13, SrsHevcNaluType_RESERVED_14, SrsHevcNaluType_RESERVED_15, SrsHevcNaluType_CODED_SLICE_BLA, //16 SrsHevcNaluType_CODED_SLICE_BLANT, //17 SrsHevcNaluType_CODED_SLICE_BLA_N_LP, //18 SrsHevcNaluType_CODED_SLICE_IDR, //19 SrsHevcNaluType_CODED_SLICE_IDR_N_LP, //20 SrsHevcNaluType_CODED_SLICE_CRA, //21 SrsHevcNaluType_RESERVED_22, SrsHevcNaluType_RESERVED_23, SrsHevcNaluType_RESERVED_24, SrsHevcNaluType_RESERVED_25, SrsHevcNaluType_RESERVED_26, SrsHevcNaluType_RESERVED_27, SrsHevcNaluType_RESERVED_28, SrsHevcNaluType_RESERVED_29, SrsHevcNaluType_RESERVED_30, SrsHevcNaluType_RESERVED_31, SrsHevcNaluType_VPS, // 32 SrsHevcNaluType_SPS, // 33 SrsHevcNaluType_PPS, // 34 SrsHevcNaluType_ACCESS_UNIT_DELIMITER, // 35 SrsHevcNaluType_EOS, // 36 SrsHevcNaluType_EOB, // 37 SrsHevcNaluType_FILLER_DATA, // 38 SrsHevcNaluType_SEI , // 39 Prefix SEI SrsHevcNaluType_SEI_SUFFIX, // 40 Suffix SEI SrsHevcNaluType_RESERVED_41, SrsHevcNaluType_RESERVED_42, SrsHevcNaluType_RESERVED_43, SrsHevcNaluType_RESERVED_44, SrsHevcNaluType_RESERVED_45, SrsHevcNaluType_RESERVED_46, SrsHevcNaluType_RESERVED_47, SrsHevcNaluType_UNSPECIFIED_48, SrsHevcNaluType_UNSPECIFIED_49, SrsHevcNaluType_UNSPECIFIED_50, SrsHevcNaluType_UNSPECIFIED_51, SrsHevcNaluType_UNSPECIFIED_52, SrsHevcNaluType_UNSPECIFIED_53, SrsHevcNaluType_UNSPECIFIED_54, SrsHevcNaluType_UNSPECIFIED_55, SrsHevcNaluType_UNSPECIFIED_56, SrsHevcNaluType_UNSPECIFIED_57, SrsHevcNaluType_UNSPECIFIED_58, SrsHevcNaluType_UNSPECIFIED_59, SrsHevcNaluType_UNSPECIFIED_60, SrsHevcNaluType_UNSPECIFIED_61, SrsHevcNaluType_UNSPECIFIED_62, SrsHevcNaluType_UNSPECIFIED_63, SrsHevcNaluType_INVALID, }; // @see https://datatracker.ietf.org/doc/html/rfc7798#section-1.1.4 #define SrsHevcNaluTypeParse(code) (SrsHevcNaluType)((code & 0x7E) >> 1) /** * @see Table 7-7 – Name association to slice_type * @doc ITU-T-H.265-2021.pdf, page 96. */ enum SrsHevcSliceType { SrsHevcSliceTypeB = 0, SrsHevcSliceTypeP = 1, SrsHevcSliceTypeI = 2, }; struct SrsHevcNalData { uint16_t nal_unit_length; std::vector nal_unit_data; }; struct SrsHevcHvccNalu { uint8_t array_completeness; uint8_t nal_unit_type; uint16_t num_nalus; std::vector nal_data_vec; }; /** * HEVC Common Max define. * @doc ITU-T-H.265-2021.pdf */ // @see F.7.3.2.1: vps_video_parameter_set_id is u(4). // @doc ITU-T-H.265-2021.pdf, page 464. const int SrsHevcMax_VPS_COUNT = 16; // @see 7.4.3.2.1: sps_seq_parameter_set_id is in [0, 15]. // @doc ITU-T-H.265-2021.pdf, page 95. const int SrsHevcMax_SPS_COUNT = 16; // @see 7.4.3.3.1: pps_pic_parameter_set_id is in [0, 63]. // @doc ITU-T-H.265-2021.pdf, page 102. const int SrsHevcMax_PPS_COUNT = 64; /** * Profile, tier and level * @see 7.3.3 Profile, tier and level syntax * @doc ITU-T-H.265-2021.pdf, page 62. */ struct SrsHevcProfileTierLevel { public: uint8_t general_profile_space; uint8_t general_tier_flag; uint8_t general_profile_idc; uint8_t general_profile_compatibility_flag[32]; uint8_t general_progressive_source_flag; uint8_t general_interlaced_source_flag; uint8_t general_non_packed_constraint_flag; uint8_t general_frame_only_constraint_flag; uint8_t general_max_12bit_constraint_flag; uint8_t general_max_10bit_constraint_flag; uint8_t general_max_8bit_constraint_flag; uint8_t general_max_422chroma_constraint_flag; uint8_t general_max_420chroma_constraint_flag; uint8_t general_max_monochrome_constraint_flag; uint8_t general_intra_constraint_flag; uint8_t general_one_picture_only_constraint_flag; uint8_t general_lower_bit_rate_constraint_flag; uint32_t general_max_14bit_constraint_flag; uint8_t general_reserved_zero_7bits; uint64_t general_reserved_zero_33bits; uint64_t general_reserved_zero_34bits; uint64_t general_reserved_zero_35bits; uint64_t general_reserved_zero_43bits; uint8_t general_inbld_flag; uint8_t general_reserved_zero_bit; uint8_t general_level_idc; std::vector sub_layer_profile_present_flag; std::vector sub_layer_level_present_flag; uint8_t reserved_zero_2bits[8]; std::vector sub_layer_profile_space; std::vector sub_layer_tier_flag; std::vector sub_layer_profile_idc; std::vector > sub_layer_profile_compatibility_flag; std::vector sub_layer_progressive_source_flag; std::vector sub_layer_interlaced_source_flag; std::vector sub_layer_non_packed_constraint_flag; std::vector sub_layer_frame_only_constraint_flag; std::vector sub_layer_max_12bit_constraint_flag; std::vector sub_layer_max_10bit_constraint_flag; std::vector sub_layer_max_8bit_constraint_flag; std::vector sub_layer_max_422chroma_constraint_flag; std::vector sub_layer_max_420chroma_constraint_flag; std::vector sub_layer_max_monochrome_constraint_flag; std::vector sub_layer_intra_constraint_flag; std::vector sub_layer_one_picture_only_constraint_flag; std::vector sub_layer_lower_bit_rate_constraint_flag; std::vector sub_layer_reserved_zero_7bits; std::vector sub_layer_reserved_zero_33bits; std::vector sub_layer_reserved_zero_34bits; std::vector sub_layer_reserved_zero_35bits; std::vector sub_layer_reserved_zero_43bits; std::vector sub_layer_inbld_flag; std::vector sub_layer_reserved_zero_bit; std::vector sub_layer_level_idc; public: SrsHevcProfileTierLevel(); virtual ~SrsHevcProfileTierLevel(); }; /** * Sub-layer HRD parameters * @see E.2.3 Sub-layer HRD parameters syntax * @doc ITU-T-H.265-2021.pdf, page 440. */ struct SrsHevcSubLayerHrdParameters { std::vector bit_rate_value_minus1; std::vector cpb_size_value_minus1; std::vector cpb_size_du_value_minus1; std::vector bit_rate_du_value_minus1; std::vector cbr_flag; }; /** * HRD parameters * @see E.2.2 HRD parameters syntax * @doc ITU-T-H.265-2021.pdf, page 439. */ struct SrsHevcHrdParameters { uint8_t nal_hrd_parameters_present_flag; uint8_t vcl_hrd_parameters_present_flag; uint8_t sub_pic_hrd_params_present_flag; uint8_t tick_divisor_minus2; uint8_t du_cpb_removal_delay_increment_length_minus1; uint8_t sub_pic_cpb_params_in_pic_timing_sei_flag; uint8_t dpb_output_delay_du_length_minus1; uint8_t bit_rate_scale; uint8_t cpb_size_scale; uint8_t cpb_size_du_scale; uint8_t initial_cpb_removal_delay_length_minus1; uint8_t au_cpb_removal_delay_length_minus1; uint8_t dpb_output_delay_length_minus1; std::vector fixed_pic_rate_general_flag; std::vector fixed_pic_rate_within_cvs_flag; std::vector elemental_duration_in_tc_minus1; std::vector low_delay_hrd_flag; std::vector cpb_cnt_minus1; SrsHevcSubLayerHrdParameters sub_layer_hrd_parameters; // nal SrsHevcSubLayerHrdParameters sub_layer_hrd_parameters_v; // vlc }; /** * Scaling list data * @see 7.3.4 Scaling list data syntax * @doc ITU-T-H.265-2021.pdf, page 65. */ struct SrsHevcScalingListData { uint32_t scaling_list_pred_mode_flag[4][6]; uint32_t scaling_list_pred_matrix_id_delta[4][6]; int32_t scaling_list_dc_coef_minus8[4][6]; uint32_t ScalingList[4][6][64]; int32_t coefNum; }; /** * Sequence parameter set range extension * @see 7.3.2.2.2 Sequence parameter set range extension syntax * @doc ITU-T-H.265-2021.pdf, page 57. */ struct SrsHevcSpsRangeExtension { uint8_t transform_skip_rotation_enabled_flag; uint8_t transform_skip_context_enabled_flag; uint8_t implicit_rdpcm_enabled_flag; uint8_t explicit_rdpcm_enabled_flag; uint8_t extended_precision_processing_flag; uint8_t intra_smoothing_disabled_flag; uint8_t high_precision_offsets_enabled_flag; uint8_t persistent_rice_adaptation_enabled_flag; uint8_t cabac_bypass_alignment_enabled_flag; }; /** * Picture parameter set RBSP syntax * @see 7.3.2.3.1 General picture parameter set RBSP syntax * @doc ITU-T-H.265-2021.pdf, page 57. */ struct SrsHevcPpsRangeExtension { uint32_t log2_max_transform_skip_block_size_minus2; uint8_t cross_component_prediction_enabled_flag; uint8_t chroma_qp_offset_list_enabled_flag; uint32_t diff_cu_chroma_qp_offset_depth; uint32_t chroma_qp_offset_list_len_minus1; std::vector cb_qp_offset_list; std::vector cr_qp_offset_list; uint32_t log2_sao_offset_scale_luma; uint32_t log2_sao_offset_scale_chroma; }; /** * Short-term reference picture set * @see 7.3.7 Short-term reference picture set syntax * @doc ITU-T-H.265-2021.pdf, page 70. */ struct SrsHevcStRefPicSet { uint8_t inter_ref_pic_set_prediction_flag; int delta_idx_minus1; uint8_t delta_rps_sign; int abs_delta_rps_minus1; std::vector used_by_curr_pic_flag; std::vector use_delta_flag; int num_negative_pics; int num_positive_pics; std::vector delta_poc_s0_minus1; std::vector used_by_curr_pic_s0_flag; std::vector delta_poc_s1_minus1; std::vector used_by_curr_pic_s1_flag; }; /** * VUI parameters * @see E.2.1 VUI parameters syntax * @doc ITU-T-H.265-2021.pdf, page 437. */ struct SrsHevcVuiParameters { uint8_t aspect_ratio_info_present_flag; uint8_t aspect_ratio_idc; int sar_width; int sar_height; uint8_t overscan_info_present_flag; uint8_t overscan_appropriate_flag; uint8_t video_signal_type_present_flag; uint8_t video_format; uint8_t video_full_range_flag; uint8_t colour_description_present_flag; uint8_t colour_primaries; uint8_t transfer_characteristics; uint8_t matrix_coeffs; uint8_t chroma_loc_info_present_flag; int chroma_sample_loc_type_top_field; int chroma_sample_loc_type_bottom_field; uint8_t neutral_chroma_indication_flag; uint8_t field_seq_flag; uint8_t frame_field_info_present_flag; uint8_t default_display_window_flag; int def_disp_win_left_offset; int def_disp_win_right_offset; int def_disp_win_top_offset; int def_disp_win_bottom_offset; uint8_t vui_timing_info_present_flag; uint32_t vui_num_units_in_tick; uint32_t vui_time_scale; uint8_t vui_poc_proportional_to_timing_flag; int vui_num_ticks_poc_diff_one_minus1; uint8_t vui_hrd_parameters_present_flag; SrsHevcHrdParameters hrd_parameters; uint8_t bitstream_restriction_flag; uint8_t tiles_fixed_structure_flag; uint8_t motion_vectors_over_pic_boundaries_flag; uint8_t restricted_ref_pic_lists_flag; int min_spatial_segmentation_idc; int max_bytes_per_pic_denom; int max_bits_per_min_cu_denom; int log2_max_mv_length_horizontal; int log2_max_mv_length_vertical; }; /** * Video Parameter Set * @see 7.3.2.1 Video parameter set RBSP syntax * @doc ITU-T-H.265-2021.pdf, page 54. */ struct SrsHevcRbspVps { uint8_t vps_video_parameter_set_id; // u(4) uint8_t vps_base_layer_internal_flag; // u(1) uint8_t vps_base_layer_available_flag; // u(1) uint8_t vps_max_layers_minus1; // u(6) uint8_t vps_max_sub_layers_minus1; // u(3) uint8_t vps_temporal_id_nesting_flag; // u(1) int vps_reserved_0xffff_16bits; // u(16) SrsHevcProfileTierLevel ptl; uint8_t vps_sub_layer_ordering_info_present_flag; // Sublayers uint32_t vps_max_dec_pic_buffering_minus1[8]; // max u(3) uint32_t vps_max_num_reorder_pics[8]; uint32_t vps_max_latency_increase_plus1[8]; uint8_t vps_max_layer_id; uint32_t vps_num_layer_sets_minus1; std::vector< std::vector > layer_id_included_flag; uint8_t vps_timing_info_present_flag; uint32_t vps_num_units_in_tick; uint32_t vps_time_scale; uint8_t vps_poc_proportional_to_timing_flag; uint32_t vps_num_ticks_poc_diff_one_minus1; uint32_t vps_num_hrd_parameters; std::vector hrd_layer_set_idx; std::vector cprms_present_flag; SrsHevcHrdParameters hrd_parameters; uint8_t vps_extension_flag; uint8_t vps_extension_data_flag; }; /** * Sequence Parameter Set * @see 7.3.2.2 Sequence parameter set RBSP syntax * @doc ITU-T-H.265-2021.pdf, page 55. */ struct SrsHevcRbspSps { uint8_t sps_video_parameter_set_id; uint8_t sps_max_sub_layers_minus1; uint8_t sps_temporal_id_nesting_flag; SrsHevcProfileTierLevel ptl; uint32_t sps_seq_parameter_set_id; uint32_t chroma_format_idc; uint8_t separate_colour_plane_flag; uint32_t pic_width_in_luma_samples; uint32_t pic_height_in_luma_samples; uint32_t conformance_window_flag; uint32_t conf_win_left_offset; uint32_t conf_win_right_offset; uint32_t conf_win_top_offset; uint32_t conf_win_bottom_offset; uint32_t bit_depth_luma_minus8; uint32_t bit_depth_chroma_minus8; uint32_t log2_max_pic_order_cnt_lsb_minus4; uint8_t sps_sub_layer_ordering_info_present_flag; uint32_t sps_max_dec_pic_buffering_minus1[8]; // max u(3) uint32_t sps_max_num_reorder_pics[8]; uint32_t sps_max_latency_increase_plus1[8]; uint32_t log2_min_luma_coding_block_size_minus3; uint32_t log2_diff_max_min_luma_coding_block_size; uint32_t log2_min_luma_transform_block_size_minus2; uint32_t log2_diff_max_min_luma_transform_block_size; uint32_t max_transform_hierarchy_depth_inter; uint32_t max_transform_hierarchy_depth_intra; uint8_t scaling_list_enabled_flag; uint8_t sps_infer_scaling_list_flag; uint32_t sps_scaling_list_ref_layer_id; uint32_t sps_scaling_list_data_present_flag; SrsHevcScalingListData scaling_list_data; uint8_t amp_enabled_flag; uint8_t sample_adaptive_offset_enabled_flag; uint8_t pcm_enabled_flag; uint8_t pcm_sample_bit_depth_luma_minus1; uint8_t pcm_sample_bit_depth_chroma_minus1; uint32_t log2_min_pcm_luma_coding_block_size_minus3; uint32_t log2_diff_max_min_pcm_luma_coding_block_size; uint8_t pcm_loop_filter_disabled_flag; uint32_t num_short_term_ref_pic_sets; std::vector st_ref_pic_set; uint8_t long_term_ref_pics_present_flag; uint32_t num_long_term_ref_pics_sps; uint32_t lt_ref_pic_poc_lsb_sps_bytes; std::vector lt_ref_pic_poc_lsb_sps; std::vector used_by_curr_pic_lt_sps_flag; uint8_t sps_temporal_mvp_enabled_flag; uint8_t strong_intra_smoothing_enabled_flag; uint8_t vui_parameters_present_flag; SrsHevcVuiParameters vui; uint8_t sps_extension_present_flag; uint8_t sps_range_extension_flag; uint8_t sps_multilayer_extension_flag; uint8_t sps_3d_extension_flag; uint8_t sps_extension_5bits; SrsHevcSpsRangeExtension sps_range_extension; uint8_t inter_view_mv_vert_constraint_flag; // sps_multilayer_extension_t sps_multilayer_extension; // sps_3d_extension_t sps_3d_extension; // int sps_extension_data_flag; // no need // rbsp_trailing_bits()... }; /** * Picture Parameter Set * @see 7.3.2.3 Picture parameter set RBSP syntax * @doc ITU-T-H.265-2021.pdf, page 57. */ struct SrsHevcRbspPps { uint8_t pps_pic_parameter_set_id; uint8_t pps_seq_parameter_set_id; uint8_t dependent_slice_segments_enabled_flag; uint8_t output_flag_present_flag; uint8_t num_extra_slice_header_bits; uint8_t sign_data_hiding_enabled_flag; uint8_t cabac_init_present_flag; uint32_t num_ref_idx_l0_default_active_minus1; uint32_t num_ref_idx_l1_default_active_minus1; int32_t init_qp_minus26; uint8_t constrained_intra_pred_flag; uint8_t transform_skip_enabled_flag; uint8_t cu_qp_delta_enabled_flag; uint32_t diff_cu_qp_delta_depth; int32_t pps_cb_qp_offset; int32_t pps_cr_qp_offset; uint8_t pps_slice_chroma_qp_offsets_present_flag; uint8_t weighted_pred_flag; uint32_t weighted_bipred_flag; uint8_t transquant_bypass_enabled_flag; uint8_t tiles_enabled_flag; uint8_t entropy_coding_sync_enabled_flag; uint32_t num_tile_columns_minus1; uint32_t num_tile_rows_minus1; uint32_t uniform_spacing_flag; std::vector column_width_minus1; std::vector row_height_minus1; uint8_t loop_filter_across_tiles_enabled_flag; uint8_t pps_loop_filter_across_slices_enabled_flag; uint8_t deblocking_filter_control_present_flag; uint8_t deblocking_filter_override_enabled_flag; uint8_t pps_deblocking_filter_disabled_flag; int32_t pps_beta_offset_div2; int32_t pps_tc_offset_div2; uint8_t pps_scaling_list_data_present_flag; SrsHevcScalingListData scaling_list_data; uint8_t lists_modification_present_flag; uint32_t log2_parallel_merge_level_minus2; uint8_t slice_segment_header_extension_present_flag; uint8_t pps_extension_present_flag; uint8_t pps_range_extension_flag; uint8_t pps_multilayer_extension_flag; uint8_t pps_3d_extension_flag; uint8_t pps_scc_extension_flag; uint8_t pps_extension_4bits; SrsHevcPpsRangeExtension pps_range_extension; // pps_multilayer_extension_t pps_multilayer_extension; // pps_3d_extension_t pps_3d_extension; uint8_t pps_extension_data_flag; // rbsp_trailing_bits( ) ... }; struct SrsHevcDecoderConfigurationRecord { uint8_t configuration_version; uint8_t general_profile_space; uint8_t general_tier_flag; uint8_t general_profile_idc; uint32_t general_profile_compatibility_flags; uint64_t general_constraint_indicator_flags; uint8_t general_level_idc; uint16_t min_spatial_segmentation_idc; uint8_t parallelism_type; uint8_t chroma_format; uint8_t bit_depth_luma_minus8; uint8_t bit_depth_chroma_minus8; uint16_t avg_frame_rate; uint8_t constant_frame_rate; uint8_t num_temporal_layers; uint8_t temporal_id_nested; uint8_t length_size_minus_one; std::vector nalu_vec; SrsHevcRbspVps vps_table[SrsHevcMax_VPS_COUNT]; SrsHevcRbspSps sps_table[SrsHevcMax_SPS_COUNT]; SrsHevcRbspPps pps_table[SrsHevcMax_PPS_COUNT]; }; #endif /** * Table 7-6 – Name association to slice_type * ISO_IEC_14496-10-AVC-2012.pdf, page 105. */ enum SrsAvcSliceType { SrsAvcSliceTypeP = 0, SrsAvcSliceTypeB = 1, SrsAvcSliceTypeI = 2, SrsAvcSliceTypeSP = 3, SrsAvcSliceTypeSI = 4, SrsAvcSliceTypeP1 = 5, SrsAvcSliceTypeB1 = 6, SrsAvcSliceTypeI1 = 7, SrsAvcSliceTypeSP1 = 8, SrsAvcSliceTypeSI1 = 9, }; /** * the avc payload format, must be ibmf or annexb format. * we guess by annexb first, then ibmf for the first time, * and we always use the guessed format for the next time. */ enum SrsAvcPayloadFormat { SrsAvcPayloadFormatGuess = 0, SrsAvcPayloadFormatAnnexb, SrsAvcPayloadFormatIbmf, }; /** * the aac profile, for ADTS(HLS/TS) * @see https://github.com/ossrs/srs/issues/310 */ enum SrsAacProfile { SrsAacProfileReserved = 3, // @see 7.1 Profiles, ISO_IEC_13818-7-AAC-2004.pdf, page 40 SrsAacProfileMain = 0, SrsAacProfileLC = 1, SrsAacProfileSSR = 2, }; std::string srs_aac_profile2str(SrsAacProfile aac_profile); /** * the aac object type, for RTMP sequence header * for AudioSpecificConfig, @see ISO_IEC_14496-3-AAC-2001.pdf, page 33 * for audioObjectType, @see ISO_IEC_14496-3-AAC-2001.pdf, page 23 */ enum SrsAacObjectType { SrsAacObjectTypeReserved = 0, SrsAacObjectTypeForbidden = 0, // Table 1.1 - Audio Object Type definition // @see @see ISO_IEC_14496-3-AAC-2001.pdf, page 23 SrsAacObjectTypeAacMain = 1, SrsAacObjectTypeAacLC = 2, SrsAacObjectTypeAacSSR = 3, // AAC HE = LC+SBR SrsAacObjectTypeAacHE = 5, // AAC HEv2 = LC+SBR+PS SrsAacObjectTypeAacHEV2 = 29, }; std::string srs_aac_object2str(SrsAacObjectType aac_object); // ts/hls/adts audio header profile to RTMP sequence header object type. SrsAacObjectType srs_aac_ts2rtmp(SrsAacProfile profile); // RTMP sequence header object type to ts/hls/adts audio header profile. SrsAacProfile srs_aac_rtmp2ts(SrsAacObjectType object_type); /** * the profile for avc/h.264. * @see Annex A Profiles and levels, ISO_IEC_14496-10-AVC-2003.pdf, page 205. */ enum SrsAvcProfile { SrsAvcProfileReserved = 0, // @see ffmpeg, libavcodec/avcodec.h:2713 SrsAvcProfileBaseline = 66, // FF_PROFILE_H264_CONSTRAINED (1<<9) // 8+1; constraint_set1_flag // FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED) SrsAvcProfileConstrainedBaseline = 578, SrsAvcProfileMain = 77, SrsAvcProfileExtended = 88, SrsAvcProfileHigh = 100, SrsAvcProfileHigh10 = 110, SrsAvcProfileHigh10Intra = 2158, SrsAvcProfileHigh422 = 122, SrsAvcProfileHigh422Intra = 2170, SrsAvcProfileHigh444 = 144, SrsAvcProfileHigh444Predictive = 244, SrsAvcProfileHigh444Intra = 2192, }; std::string srs_avc_profile2str(SrsAvcProfile profile); /** * the level for avc/h.264. * @see Annex A Profiles and levels, ISO_IEC_14496-10-AVC-2003.pdf, page 207. */ enum SrsAvcLevel { SrsAvcLevelReserved = 0, SrsAvcLevel_1 = 10, SrsAvcLevel_11 = 11, SrsAvcLevel_12 = 12, SrsAvcLevel_13 = 13, SrsAvcLevel_2 = 20, SrsAvcLevel_21 = 21, SrsAvcLevel_22 = 22, SrsAvcLevel_3 = 30, SrsAvcLevel_31 = 31, SrsAvcLevel_32 = 32, SrsAvcLevel_4 = 40, SrsAvcLevel_41 = 41, SrsAvcLevel_5 = 50, SrsAvcLevel_51 = 51, }; std::string srs_avc_level2str(SrsAvcLevel level); #ifdef SRS_H265 /** * the profile for hevc/h.265, Annex A Profiles, tiers and levels * @see A.3 Profiles * @doc ITU-T-H.265-2021.pdf, page 268. */ enum SrsHevcProfile { SrsHevcProfileReserved = 0, // @see ffmpeg, libavcodec/avcodec.h:2986 SrsHevcProfileMain = 1, SrsHevcProfileMain10 = 2, SrsHevcProfileMainStillPicture = 3, SrsHevcProfileRext = 4, }; std::string srs_hevc_profile2str(SrsHevcProfile profile); /** * the level for hevc/h.265, Annex A Profiles, tiers and levels * @see A.4 Tiers and levels * @doc ITU-T-H.265-2021.pdf, page 283. */ enum SrsHevcLevel { SrsHevcLevelReserved = 0, SrsHevcLevel_1 = 30, SrsHevcLevel_2 = 60, SrsHevcLevel_21 = 63, SrsHevcLevel_3 = 90, SrsHevcLevel_31 = 93, SrsHevcLevel_4 = 120, SrsHevcLevel_41 = 123, SrsHevcLevel_5 = 150, SrsHevcLevel_51 = 153, SrsHevcLevel_52 = 156, SrsHevcLevel_6 = 180, SrsHevcLevel_61 = 183, SrsHevcLevel_62 = 186, }; std::string srs_hevc_level2str(SrsHevcLevel level); #endif /** * A sample is the unit of frame. * It's a NALU for H.264, H.265. * It's the whole AAC raw data for AAC. * @remark Neither SPS/PPS or ASC is sample unit, it's codec sequence header. */ class SrsSample { public: // The size of unit. int size; // The ptr of unit, user must free it. char* bytes; public: SrsSample(); SrsSample(char* b, int s); ~SrsSample(); public: // Copy sample, share the bytes pointer. SrsSample* copy(); }; /** * The codec is the information of encoder, * corresponding to the sequence header of FLV, * parsed to detail info. */ class SrsCodecConfig { public: SrsCodecConfig(); virtual ~SrsCodecConfig(); }; /** * The audio codec info. */ class SrsAudioCodecConfig : public SrsCodecConfig { // In FLV specification. public: // The audio codec id; for FLV, it's SoundFormat. SrsAudioCodecId id; // The audio sample rate; for FLV, it's SoundRate. SrsAudioSampleRate sound_rate; // The audio sample size, such as 16 bits; for FLV, it's SoundSize. SrsAudioSampleBits sound_size; // The audio number of channels; for FLV, it's SoundType. // TODO: FIXME: Rename to sound_channels. SrsAudioChannels sound_type; int audio_data_rate; // in bps // In AAC specification. public: /** * audio specified * audioObjectType, in 1.6.2.1 AudioSpecificConfig, page 33, * 1.5.1.1 Audio object type definition, page 23, * in ISO_IEC_14496-3-AAC-2001.pdf. */ SrsAacObjectType aac_object; /** * samplingFrequencyIndex */ uint8_t aac_sample_rate; /** * channelConfiguration */ uint8_t aac_channels; // Sequence header payload. public: /** * the aac extra data, the AAC sequence header, * without the flv codec header, * @see: ffmpeg, AVCodecContext::extradata */ std::vector aac_extra_data; public: SrsAudioCodecConfig(); virtual ~SrsAudioCodecConfig(); public: virtual bool is_aac_codec_ok(); }; /** * The video codec info. */ class SrsVideoCodecConfig : public SrsCodecConfig { public: SrsVideoCodecId id; int video_data_rate; // in bps double frame_rate; double duration; int width; int height; public: /** * the avc extra data, the AVC sequence header, * without the flv codec header, * @see: ffmpeg, AVCodecContext::extradata */ std::vector avc_extra_data; public: /** * video specified */ // profile_idc, ISO_IEC_14496-10-AVC-2003.pdf, page 45. SrsAvcProfile avc_profile; // level_idc, ISO_IEC_14496-10-AVC-2003.pdf, page 45. SrsAvcLevel avc_level; #ifdef SRS_H265 // The profile_idc, ITU-T-H.265-2021.pdf, page 62. SrsHevcProfile hevc_profile; // The level_idc, ITU-T-H.265-2021.pdf, page 63. SrsHevcLevel hevc_level; #endif // lengthSizeMinusOne, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16 int8_t NAL_unit_length; // Note that we may resize the vector, so the under-layer bytes may change. std::vector sequenceParameterSetNALUnit; std::vector pictureParameterSetNALUnit; public: // the avc payload format. SrsAvcPayloadFormat payload_format; #ifdef SRS_H265 public: SrsHevcDecoderConfigurationRecord hevc_dec_conf_record_; #endif public: SrsVideoCodecConfig(); virtual ~SrsVideoCodecConfig(); public: virtual bool is_avc_codec_ok(); }; // A frame, consists of a codec and a group of samples. // TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsFrame { public: // The DTS/PTS in milliseconds, which is TBN=1000. int64_t dts; // PTS = DTS + CTS. int32_t cts; public: // The codec info of frame. SrsCodecConfig* codec; // The actual parsed number of samples. int nb_samples; // The sampels cache. SrsSample samples[SrsMaxNbSamples]; public: SrsFrame(); virtual ~SrsFrame(); public: // Initialize the frame, to parse sampels. virtual srs_error_t initialize(SrsCodecConfig* c); // Add a sample to frame. virtual srs_error_t add_sample(char* bytes, int size); }; // A audio frame, besides a frame, contains the audio frame info, such as frame type. // TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsAudioFrame : public SrsFrame { public: SrsAudioAacFrameTrait aac_packet_type; public: SrsAudioFrame(); virtual ~SrsAudioFrame(); public: virtual SrsAudioCodecConfig* acodec(); }; // A video frame, besides a frame, contains the video frame info, such as frame type. // TODO: FIXME: Rename to packet to follow names of FFmpeg, which means before decoding or after decoding. class SrsVideoFrame : public SrsFrame { public: // video specified SrsVideoAvcFrameType frame_type; SrsVideoAvcFrameTrait avc_packet_type; // whether sample_units contains IDR frame. bool has_idr; // Whether exists AUD NALU. bool has_aud; // Whether exists SPS/PPS NALU. bool has_sps_pps; // The first nalu type. SrsAvcNaluType first_nalu_type; public: SrsVideoFrame(); virtual ~SrsVideoFrame(); public: // Initialize the frame, to parse sampels. virtual srs_error_t initialize(SrsCodecConfig* c); // Add the sample without ANNEXB or IBMF header, or RAW AAC or MP3 data. virtual srs_error_t add_sample(char* bytes, int size); public: virtual SrsVideoCodecConfig* vcodec(); public: static srs_error_t parse_avc_nalu_type(const SrsSample* sample, SrsAvcNaluType& avc_nalu_type); static srs_error_t parse_avc_b_frame(const SrsSample* sample, bool& is_b_frame); static srs_error_t parse_hevc_nalu_type(const SrsSample* sample, SrsHevcNaluType& hevc_nalu_type); static srs_error_t parse_hevc_b_frame(const SrsSample* sample, SrsFormat* format, bool& is_b_frame); }; /** * A codec format, including one or many stream, each stream identified by a frame. * For example, a typical RTMP stream format, consits of a video and audio frame. * Maybe some RTMP stream only has a audio stream, for instance, redio application. */ class SrsFormat { public: SrsAudioFrame* audio; SrsAudioCodecConfig* acodec; SrsVideoFrame* video; SrsVideoCodecConfig* vcodec; public: char* raw; int nb_raw; public: // for sequence header, whether parse the h.264 sps. // TODO: FIXME: Refine it. bool avc_parse_sps; // Whether try to parse in ANNEXB, then by IBMF. bool try_annexb_first; public: SrsFormat(); virtual ~SrsFormat(); public: // Initialize the format. virtual srs_error_t initialize(); // When got a parsed audio packet. // @param data The data in FLV format. virtual srs_error_t on_audio(int64_t timestamp, char* data, int size); // When got a parsed video packet. // @param data The data in FLV format. virtual srs_error_t on_video(int64_t timestamp, char* data, int size); // When got a audio aac sequence header. virtual srs_error_t on_aac_sequence_header(char* data, int size); public: virtual bool is_aac_sequence_header(); virtual bool is_mp3_sequence_header(); virtual bool is_avc_sequence_header(); private: // Demux the video packet in H.264 codec. // The packet is muxed in FLV format, defined in flv specification. // Demux the sps/pps from sequence header. // Demux the samples from NALUs. virtual srs_error_t video_avc_demux(SrsBuffer* stream, int64_t timestamp); #ifdef SRS_H265 private: virtual srs_error_t hevc_demux_hvcc(SrsBuffer* stream); private: virtual srs_error_t hevc_demux_vps_sps_pps(SrsHevcHvccNalu *nal); virtual srs_error_t hevc_demux_vps_rbsp(char *rbsp, int nb_rbsp); virtual srs_error_t hevc_demux_sps_rbsp(char *rbsp, int nb_rbsp); virtual srs_error_t hevc_demux_pps_rbsp(char *rbsp, int nb_rbsp); virtual srs_error_t hevc_demux_rbsp_ptl(SrsBitBuffer* bs, SrsHevcProfileTierLevel* ptl, int profile_present_flag, int max_sub_layers_minus1); public: virtual srs_error_t hevc_demux_vps(SrsBuffer *stream); virtual srs_error_t hevc_demux_sps(SrsBuffer *stream); virtual srs_error_t hevc_demux_pps(SrsBuffer *stream); #endif private: // Parse the H.264 SPS/PPS. virtual srs_error_t avc_demux_sps_pps(SrsBuffer* stream); virtual srs_error_t avc_demux_sps(); virtual srs_error_t avc_demux_sps_rbsp(char* rbsp, int nb_rbsp); private: // Parse the H.264 or H.265 NALUs. virtual srs_error_t video_nalu_demux(SrsBuffer* stream); // Demux the avc NALU in "AnnexB" from ISO_IEC_14496-10-AVC-2003.pdf, page 211. virtual srs_error_t avc_demux_annexb_format(SrsBuffer* stream); virtual srs_error_t do_avc_demux_annexb_format(SrsBuffer* stream); // Demux the avc NALU in "ISO Base Media File Format" from ISO_IEC_14496-15-AVC-format-2012.pdf, page 20 virtual srs_error_t avc_demux_ibmf_format(SrsBuffer* stream); virtual srs_error_t do_avc_demux_ibmf_format(SrsBuffer* stream); private: // Demux the audio packet in AAC codec. // Demux the asc from sequence header. // Demux the sampels from RAW data. virtual srs_error_t audio_aac_demux(SrsBuffer* stream, int64_t timestamp); virtual srs_error_t audio_mp3_demux(SrsBuffer* stream, int64_t timestamp, bool fresh); public: // Directly demux the sequence header, without RTMP packet header. virtual srs_error_t audio_aac_sequence_header_demux(char* data, int size); }; #endif