srs/trunk/src/protocol/srs_raw_avc.cpp

//
// Copyright (c) 2013-2021 Winlin
//
// SPDX-License-Identifier: MIT
//

#include <srs_raw_avc.hpp>

#include <string.h>
using namespace std;

#include <srs_kernel_error.hpp>
#include <srs_kernel_log.hpp>
#include <srs_kernel_buffer.hpp>
#include <srs_kernel_utility.hpp>
#include <srs_core_autofree.hpp>
#include <srs_kernel_codec.hpp>

SrsRawH264Stream::SrsRawH264Stream()
{
}

SrsRawH264Stream::~SrsRawH264Stream()
{
}

srs_error_t SrsRawH264Stream::annexb_demux(SrsBuffer* stream, char** pframe, int* pnb_frame)
{
    srs_error_t err = srs_success;

    *pframe = NULL;
    *pnb_frame = 0;

    while (!stream->empty()) {
        // each frame must prefixed by annexb format.
        // about annexb, @see ISO_IEC_14496-10-AVC-2003.pdf, page 211.
        int pnb_start_code = 0;
        if (!srs_avc_startswith_annexb(stream, &pnb_start_code)) {
            return srs_error_new(ERROR_H264_API_NO_PREFIXED, "annexb start code");
        }
        int start = stream->pos() + pnb_start_code;

        // find the last frame prefixed by annexb format.
        stream->skip(pnb_start_code);
        while (!stream->empty()) {
            if (srs_avc_startswith_annexb(stream, NULL)) {
                break;
            }
            stream->skip(1);
        }

        // demux the frame.
        *pnb_frame = stream->pos() - start;
        *pframe = stream->data() + start;
        break;
    }

    return err;
}

bool SrsRawH264Stream::is_sps(char* frame, int nb_frame)
{
    srs_assert(nb_frame > 0);

    // 5bits, 7.3.1 NAL unit syntax,
    // ISO_IEC_14496-10-AVC-2003.pdf, page 44.
    //  7: SPS, 8: PPS, 5: I Frame, 1: P Frame
    uint8_t nal_unit_type = (char)frame[0] & 0x1f;

    return nal_unit_type == 7;
}

bool SrsRawH264Stream::is_pps(char* frame, int nb_frame)
{
    srs_assert(nb_frame > 0);

    // 5bits, 7.3.1 NAL unit syntax,
    // ISO_IEC_14496-10-AVC-2003.pdf, page 44.
    //  7: SPS, 8: PPS, 5: I Frame, 1: P Frame
    uint8_t nal_unit_type = (char)frame[0] & 0x1f;

    return nal_unit_type == 8;
}

srs_error_t SrsRawH264Stream::sps_demux(char* frame, int nb_frame, string& sps)
{
    srs_error_t err = srs_success;

    // atleast 1bytes for SPS to decode the type, profile, constrain and level.
    if (nb_frame < 4) {
        return err;
    }

    sps = string(frame, nb_frame);

    return err;
}

srs_error_t SrsRawH264Stream::pps_demux(char* frame, int nb_frame, string& pps)
{
    srs_error_t err = srs_success;

    if (nb_frame <= 0) {
        return srs_error_new(ERROR_STREAM_CASTER_AVC_PPS, "no pps");
    }

    pps = string(frame, nb_frame);

    return err;
}

srs_error_t SrsRawH264Stream::mux_sequence_header(string sps, string pps, uint32_t dts, uint32_t pts, string& sh)
{
    srs_error_t err = srs_success;

    // 5bytes sps/pps header:
    //      configurationVersion, AVCProfileIndication, profile_compatibility,
    //      AVCLevelIndication, lengthSizeMinusOne
    // 3bytes size of sps:
    //      numOfSequenceParameterSets, sequenceParameterSetLength(2B)
    // Nbytes of sps.
    //      sequenceParameterSetNALUnit
    // 3bytes size of pps:
    //      numOfPictureParameterSets, pictureParameterSetLength
    // Nbytes of pps:
    //      pictureParameterSetNALUnit
    int nb_packet = 5 + (3 + (int)sps.length()) + (3 + (int)pps.length());
    char* packet = new char[nb_packet];
    SrsAutoFreeA(char, packet);

    // use stream to generate the h264 packet.
    SrsBuffer stream(packet, nb_packet);

    // decode the SPS:
    // @see: 7.3.2.1.1, ISO_IEC_14496-10-AVC-2012.pdf, page 62
    if (true) {
        srs_assert((int)sps.length() >= 4);
        char* frame = (char*)sps.data();

        // @see: Annex A Profiles and levels, ISO_IEC_14496-10-AVC-2003.pdf, page 205
        //      Baseline profile profile_idc is 66(0x42).
        //      Main profile profile_idc is 77(0x4d).
        //      Extended profile profile_idc is 88(0x58).
        uint8_t profile_idc = frame[1];
        //uint8_t constraint_set = frame[2];
        uint8_t level_idc = frame[3];

        // generate the sps/pps header
        // 5.3.4.2.1 Syntax, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16
        // configurationVersion
        stream.write_1bytes(0x01);
        // AVCProfileIndication
        stream.write_1bytes(profile_idc);
        // profile_compatibility
        stream.write_1bytes(0x00);
        // AVCLevelIndication
        stream.write_1bytes(level_idc);
        // lengthSizeMinusOne, or NAL_unit_length, always use 4bytes size,
        // so we always set it to 0x03.
        stream.write_1bytes(0x03);
    }

    // sps
    if (true) {
        // 5.3.4.2.1 Syntax, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16
        // numOfSequenceParameterSets, always 1
        stream.write_1bytes(0x01);
        // sequenceParameterSetLength
        stream.write_2bytes((int16_t)sps.length());
        // sequenceParameterSetNALUnit
        stream.write_string(sps);
    }

    // pps
    if (true) {
        // 5.3.4.2.1 Syntax, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16
        // numOfPictureParameterSets, always 1
        stream.write_1bytes(0x01);
        // pictureParameterSetLength
        stream.write_2bytes((int16_t)pps.length());
        // pictureParameterSetNALUnit
        stream.write_string(pps);
    }

    // TODO: FIXME: for more profile.
    // 5.3.4.2.1 Syntax, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16
    // profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 144

    sh = string(packet, nb_packet);

    return err;
}

srs_error_t SrsRawH264Stream::mux_ipb_frame(char* frame, int nb_frame, string& ibp)
{
    srs_error_t err = srs_success;

    // 4bytes size of nalu:
    //      NALUnitLength
    // Nbytes of nalu.
    //      NALUnit
    int nb_packet = 4 + nb_frame;
    char* packet = new char[nb_packet];
    SrsAutoFreeA(char, packet);

    // use stream to generate the h264 packet.
    SrsBuffer stream(packet, nb_packet);

    // 5.3.4.2.1 Syntax, ISO_IEC_14496-15-AVC-format-2012.pdf, page 16
    // lengthSizeMinusOne, or NAL_unit_length, always use 4bytes size
    uint32_t NAL_unit_length = nb_frame;

    // mux the avc NALU in "ISO Base Media File Format"
    // from ISO_IEC_14496-15-AVC-format-2012.pdf, page 20
    // NALUnitLength
    stream.write_4bytes(NAL_unit_length);
    // NALUnit
    stream.write_bytes(frame, nb_frame);

    ibp = string(packet, nb_packet);

    return err;
}

srs_error_t SrsRawH264Stream::mux_avc2flv(string video, int8_t frame_type, int8_t avc_packet_type, uint32_t dts, uint32_t pts, char** flv, int* nb_flv)
{
    srs_error_t err = srs_success;

    // for h264 in RTMP video payload, there is 5bytes header:
    //      1bytes, FrameType | CodecID
    //      1bytes, AVCPacketType
    //      3bytes, CompositionTime, the cts.
    // @see: E.4.3 Video Tags, video_file_format_spec_v10_1.pdf, page 78
    int size = (int)video.length() + 5;
    char* data = new char[size];
    char* p = data;

    // @see: E.4.3 Video Tags, video_file_format_spec_v10_1.pdf, page 78
    // Frame Type, Type of video frame.
    // CodecID, Codec Identifier.
    // set the rtmp header
    *p++ = (frame_type << 4) | SrsVideoCodecIdAVC;

    // AVCPacketType
    *p++ = avc_packet_type;

    // CompositionTime
    // pts = dts + cts, or
    // cts = pts - dts.
    // where cts is the header in rtmp video packet payload header.
    uint32_t cts = pts - dts;
    char* pp = (char*)&cts;
    *p++ = pp[2];
    *p++ = pp[1];
    *p++ = pp[0];

    // h.264 raw data.
    memcpy(p, video.data(), video.length());

    *flv = data;
    *nb_flv = size;

    return err;
}

SrsRawAacStream::SrsRawAacStream()
{
}

SrsRawAacStream::~SrsRawAacStream()
{
}

srs_error_t SrsRawAacStream::adts_demux(SrsBuffer* stream, char** pframe, int* pnb_frame, SrsRawAacStreamCodec& codec)
{
    srs_error_t err = srs_success;

    while (!stream->empty()) {
        int adts_header_start = stream->pos();

        // decode the ADTS.
        // @see ISO_IEC_13818-7-AAC-2004.pdf, page 26
        //      6.2 Audio Data Transport Stream, ADTS
        // @see https://github.com/ossrs/srs/issues/212#issuecomment-64145885
        // byte_alignment()

        // adts_fixed_header:
        //      12bits syncword,
        //      16bits left.
        // adts_variable_header:
        //      28bits
        //      12+16+28=56bits
        // adts_error_check:
        //      16bits if protection_absent
        //      56+16=72bits
        // if protection_absent:
        //      require(7bytes)=56bits
        // else
        //      require(9bytes)=72bits
        if (!stream->require(7)) {
            return srs_error_new(ERROR_AAC_ADTS_HEADER, "requires 7 only %d bytes", stream->left());
        }

        // for aac, the frame must be ADTS format.
        if (!srs_aac_startswith_adts(stream)) {
            return srs_error_new(ERROR_AAC_REQUIRED_ADTS, "not adts");
        }

        // syncword 12 bslbf
        stream->read_1bytes();
        // 4bits left.
        // adts_fixed_header(), 1.A.2.2.1 Fixed Header of ADTS
        // ID 1 bslbf
        // layer 2 uimsbf
        // protection_absent 1 bslbf
        int8_t pav = (stream->read_1bytes() & 0x0f);
        int8_t id = (pav >> 3) & 0x01;
        /*int8_t layer = (pav >> 1) & 0x03;*/
        int8_t protection_absent = pav & 0x01;

        /**
         * ID: MPEG identifier, set to '1' if the audio data in the ADTS stream are MPEG-2 AAC (See ISO/IEC 13818-7)
         * and set to '0' if the audio data are MPEG-4. See also ISO/IEC 11172-3, subclause 2.4.2.3.
         */
        if (id != 0x01) {
            // well, some system always use 0, but actually is aac format.
            // for example, houjian vod ts always set the aac id to 0, actually 1.
            // we just ignore it, and alwyas use 1(aac) to demux.
            id = 0x01;
        }

        int16_t sfiv = stream->read_2bytes();
        // profile 2 uimsbf
        // sampling_frequency_index 4 uimsbf
        // private_bit 1 bslbf
        // channel_configuration 3 uimsbf
        // original/copy 1 bslbf
        // home 1 bslbf
        int8_t profile = (sfiv >> 14) & 0x03;
        int8_t sampling_frequency_index = (sfiv >> 10) & 0x0f;
        /*int8_t private_bit = (sfiv >> 9) & 0x01;*/
        int8_t channel_configuration = (sfiv >> 6) & 0x07;
        /*int8_t original = (sfiv >> 5) & 0x01;*/
        /*int8_t home = (sfiv >> 4) & 0x01;*/
        //int8_t Emphasis; @remark, Emphasis is removed, @see https://github.com/ossrs/srs/issues/212#issuecomment-64154736
        // 4bits left.
        // adts_variable_header(), 1.A.2.2.2 Variable Header of ADTS
        // copyright_identification_bit 1 bslbf
        // copyright_identification_start 1 bslbf
        /*int8_t fh_copyright_identification_bit = (fh1 >> 3) & 0x01;*/
        /*int8_t fh_copyright_identification_start = (fh1 >> 2) & 0x01;*/
        // frame_length 13 bslbf: Length of the frame including headers and error_check in bytes.
        // use the left 2bits as the 13 and 12 bit,
        // the frame_length is 13bits, so we move 13-2=11.
        int16_t frame_length = (sfiv << 11) & 0x1800;

        int32_t abfv = stream->read_3bytes();
        // frame_length 13 bslbf: consume the first 13-2=11bits
        // the fh2 is 24bits, so we move right 24-11=13.
        frame_length |= (abfv >> 13) & 0x07ff;
        // adts_buffer_fullness 11 bslbf
        /*int16_t fh_adts_buffer_fullness = (abfv >> 2) & 0x7ff;*/
        // number_of_raw_data_blocks_in_frame 2 uimsbf
        /*int16_t number_of_raw_data_blocks_in_frame = abfv & 0x03;*/
        // adts_error_check(), 1.A.2.2.3 Error detection
        if (!protection_absent) {
            if (!stream->require(2)) {
                return srs_error_new(ERROR_AAC_ADTS_HEADER, "requires 2 only %d bytes", stream->left());
            }
            // crc_check 16 Rpchof
            /*int16_t crc_check = */stream->read_2bytes();
        }

        // TODO: check the sampling_frequency_index
        // TODO: check the channel_configuration

        // raw_data_blocks
        int adts_header_size = stream->pos() - adts_header_start;
        int raw_data_size = frame_length - adts_header_size;
        if (!stream->require(raw_data_size)) {
            return srs_error_new(ERROR_AAC_ADTS_HEADER, "requires %d only %d bytes", raw_data_size, stream->left());
        }

        // the codec info.
        codec.protection_absent = protection_absent;
        codec.aac_object = srs_aac_ts2rtmp((SrsAacProfile)profile);
        codec.sampling_frequency_index = sampling_frequency_index;
        codec.channel_configuration = channel_configuration;
        codec.frame_length = frame_length;

        // The aac sampleing rate defined in srs_aac_srates.
        // TODO: FIXME: maybe need to resample audio.
        codec.sound_format = 10; // AAC
        if (sampling_frequency_index <= 0x0c && sampling_frequency_index > 0x0a) {
            codec.sound_rate = SrsAudioSampleRate5512;
        } else if (sampling_frequency_index <= 0x0a && sampling_frequency_index > 0x07) {
            codec.sound_rate = SrsAudioSampleRate11025;
        } else if (sampling_frequency_index <= 0x07 && sampling_frequency_index > 0x04) {
            codec.sound_rate = SrsAudioSampleRate22050;
        } else if (sampling_frequency_index <= 0x04) {
            codec.sound_rate = SrsAudioSampleRate44100;
        } else {
            codec.sound_rate = SrsAudioSampleRate44100;
            srs_warn("adts invalid sample rate for flv, rate=%#x", sampling_frequency_index);
        }
        codec.sound_type = srs_max(0, srs_min(1, channel_configuration - 1));
        // TODO: FIXME: finger it out the sound size by adts.
        codec.sound_size = 1; // 0(8bits) or 1(16bits).

        // frame data.
        *pframe = stream->data() + stream->pos();
        *pnb_frame = raw_data_size;
        stream->skip(raw_data_size);

        break;
    }

    return err;
}

srs_error_t SrsRawAacStream::mux_sequence_header(SrsRawAacStreamCodec* codec, string& sh)
{
    srs_error_t err = srs_success;

    // only support aac profile 1-4.
    if (codec->aac_object == SrsAacObjectTypeReserved) {
        return srs_error_new(ERROR_AAC_DATA_INVALID, "invalid aac object");
    }

    SrsAacObjectType audioObjectType = codec->aac_object;
    char channelConfiguration = codec->channel_configuration;

    // Here we are generating AAC sequence header, the ASC structure,
    // because we have already parsed the sampling rate from AAC codec,
    // which is more precise than the sound_rate defined by RTMP.
    //
    // For example, AAC sampling_frequency_index is 3(48000HZ) or 4(44100HZ),
    // the sound_rate is always 3(44100HZ), if we covert sound_rate to
    // sampling_frequency_index, we may make mistake.
    uint8_t samplingFrequencyIndex = (uint8_t)codec->sampling_frequency_index;
    if (samplingFrequencyIndex >= SrsAacSampleRateUnset) {
        switch (codec->sound_rate) {
            case SrsAudioSampleRate5512:
                samplingFrequencyIndex = 0x0c; break;
            case SrsAudioSampleRate11025:
                samplingFrequencyIndex = 0x0a; break;
            case SrsAudioSampleRate22050:
                samplingFrequencyIndex = 0x07; break;
            case SrsAudioSampleRate44100:
                samplingFrequencyIndex = 0x04; break;
            default:
                break;
        }
    }
    if (samplingFrequencyIndex >= SrsAacSampleRateUnset) {
        return srs_error_new(ERROR_AAC_DATA_INVALID, "invalid sample index %d", samplingFrequencyIndex);
    }

    char chs[2];
    // @see ISO_IEC_14496-3-AAC-2001.pdf
    // AudioSpecificConfig (), page 33
    // 1.6.2.1 AudioSpecificConfig
    // audioObjectType; 5 bslbf
    chs[0] = (audioObjectType << 3) & 0xf8;
    // 3bits left.

    // samplingFrequencyIndex; 4 bslbf
    chs[0] |= (samplingFrequencyIndex >> 1) & 0x07;
    chs[1] = (samplingFrequencyIndex << 7) & 0x80;
    // 7bits left.

    // channelConfiguration; 4 bslbf
    chs[1] |= (channelConfiguration << 3) & 0x78;
    // 3bits left.

    // GASpecificConfig(), page 451
    // 4.4.1 Decoder configuration (GASpecificConfig)
    // frameLengthFlag; 1 bslbf
    // dependsOnCoreCoder; 1 bslbf
    // extensionFlag; 1 bslbf
    sh = string((char*)chs, sizeof(chs));

    return err;
}

srs_error_t SrsRawAacStream::mux_aac2flv(char* frame, int nb_frame, SrsRawAacStreamCodec* codec, uint32_t dts, char** flv, int* nb_flv)
{
    srs_error_t err = srs_success;

    char sound_format = codec->sound_format;
    char sound_type = codec->sound_type;
    char sound_size = codec->sound_size;
    char sound_rate = codec->sound_rate;
    char aac_packet_type = codec->aac_packet_type;

    // for audio frame, there is 1 or 2 bytes header:
    //      1bytes, SoundFormat|SoundRate|SoundSize|SoundType
    //      1bytes, AACPacketType for SoundFormat == 10, 0 is sequence header.
    int size = nb_frame + 1;
    if (sound_format == SrsAudioCodecIdAAC) {
        size += 1;
    }
    char* data = new char[size];
    char* p = data;

    uint8_t audio_header = sound_type & 0x01;
    audio_header |= (sound_size << 1) & 0x02;
    audio_header |= (sound_rate << 2) & 0x0c;
    audio_header |= (sound_format << 4) & 0xf0;

    *p++ = audio_header;

    if (sound_format == SrsAudioCodecIdAAC) {
        *p++ = aac_packet_type;
    }

    memcpy(p, frame, nb_frame);

    *flv = data;
    *nb_flv = size;

    return err;
}