5. RTP transmission AAC

This article achieves the goal : use vlc to open the sdp file to hear the audio

1. RTP package

This part has been introduced in the previous article, but it ’s just because I ’m afraid you did n’t read the previous article

1.1 RTP data structure

The RTP packet format has been introduced in detail before, please refer to the RTSP protocol to explain

Look at the format of an RTP header to recall

           

Each RTP packet contains such an RTP header and RTP payload. For convenience, I will encapsulate this header into a structure and send packets into a function. Let ’s take a look

  • RTP head structure
  struct RtpHeader
  {
      /* byte 0 */
      uint8_t csrcLen:4;
      uint8_t extension:1;
      uint8_t padding:1;
      uint8_t version:2;
  
      /* byte 1 */
      uint8_t payloadType:7;
      uint8_t marker:1;
      
      /* bytes 2,3 */
      uint16_t seq;
      
      /* bytes 4-7 */
      uint32_t timestamp;
      
      /* bytes 8-11 */
      uint32_t ssrc;
  };

Among them :nis a bit representation, this structure corresponds to the RTP head one by one

  • RTP sending function

    RTP package

struct RtpPacket
{
    struct RtpHeader rtpHeader;
    uint8_t payload[0];
};

This is an RTP packet that I encapsulated. It contains an RTP header and RTP payload. It uint8_t payload[0]does not occupy space. It represents the address immediately following the rtp header

RTP sending function

  /*
   * 函数功能:发送RTP包
   * 参数 socket:表示本机的udp套接字
   * 参数 ip:表示目的ip地址
   * 参数 port:表示目的的端口号
   * 参数 rtpPacket:表示rtp包
   * 参数 dataSize:表示rtp包中载荷的大小
   * 放回值:发送字节数
   */
  int rtpSendPacket(int socket, char* ip, int16_t port, struct RtpPacket* rtpPacket, uint32_t dataSize)
  {
      struct sockaddr_in addr;
      int ret;
  
      addr.sin_family = AF_INET;
      addr.sin_port = htons(port);
      addr.sin_addr.s_addr = inet_addr(ip);
  
      rtpPacket->rtpHeader.seq = htons(rtpPacket->rtpHeader.seq);
      rtpPacket->rtpHeader.timestamp = htonl(rtpPacket->rtpHeader.timestamp);
      rtpPacket->rtpHeader.ssrc = htonl(rtpPacket->rtpHeader.ssrc);
  
      ret = sendto(socket, (void*)rtpPacket, dataSize+RTP_HEADER_SIZE, 0,
                      (struct sockaddr*)&addr, sizeof(addr));
  
      rtpPacket->rtpHeader.seq = ntohs(rtpPacket->rtpHeader.seq);
      rtpPacket->rtpHeader.timestamp = ntohl(rtpPacket->rtpHeader.timestamp);
      rtpPacket->rtpHeader.ssrc = ntohl(rtpPacket->rtpHeader.ssrc);
  
      return ret;
  }

 

Look carefully at this function, you should be able to understand

After we set up a package, we will call this function to send the specified target

This function is used in many places in this function htons, because RTP uses the network byte order (big endian mode), so it is necessary to convert the host byte order to the network byte order

The source code is given below, rtp.hand rtp.c, these two files are often used later

1.2 Source code

rtp.h 

#ifndef _RTP_H_
#define _RTP_H_
#include <stdint.h>

#define RTP_VESION              2

#define RTP_PAYLOAD_TYPE_H264   96
#define RTP_PAYLOAD_TYPE_AAC    97

#define RTP_HEADER_SIZE         12
#define RTP_MAX_PKT_SIZE        1400

/*
 *
 *    0                   1                   2                   3
 *    7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0|7 6 5 4 3 2 1 0
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |V=2|P|X|  CC   |M|     PT      |       sequence number         |
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |                           timestamp                           |
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *   |           synchronization source (SSRC) identifier            |
 *   +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
 *   |            contributing source (CSRC) identifiers             |
 *   :                             ....                              :
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 */
struct RtpHeader
{
    /* byte 0 */
    uint8_t csrcLen:4;
    uint8_t extension:1;
    uint8_t padding:1;
    uint8_t version:2;

    /* byte 1 */
    uint8_t payloadType:7;
    uint8_t marker:1;
    
    /* bytes 2,3 */
    uint16_t seq;
    
    /* bytes 4-7 */
    uint32_t timestamp;
    
    /* bytes 8-11 */
    uint32_t ssrc;
};

struct RtpPacket
{
    struct RtpHeader rtpHeader;
    uint8_t payload[0];
};

void rtpHeaderInit(struct RtpPacket* rtpPacket, uint8_t csrcLen, uint8_t extension,
                    uint8_t padding, uint8_t version, uint8_t payloadType, uint8_t marker,
                   uint16_t seq, uint32_t timestamp, uint32_t ssrc);
int rtpSendPacket(int socket, char* ip, int16_t port, struct RtpPacket* rtpPacket, uint32_t dataSize);

#endif //_RTP_H_

 rtp.c

#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include "rtp.h"

void rtpHeaderInit(struct RtpPacket* rtpPacket, uint8_t csrcLen, uint8_t extension,
                    uint8_t padding, uint8_t version, uint8_t payloadType, uint8_t marker,
                   uint16_t seq, uint32_t timestamp, uint32_t ssrc)
{
    rtpPacket->rtpHeader.csrcLen = csrcLen;
    rtpPacket->rtpHeader.extension = extension;
    rtpPacket->rtpHeader.padding = padding;
    rtpPacket->rtpHeader.version = version;
    rtpPacket->rtpHeader.payloadType =  payloadType;
    rtpPacket->rtpHeader.marker = marker;
    rtpPacket->rtpHeader.seq = seq;
    rtpPacket->rtpHeader.timestamp = timestamp;
    rtpPacket->rtpHeader.ssrc = ssrc;
}

int rtpSendPacket(int socket, char* ip, int16_t port, struct RtpPacket* rtpPacket, uint32_t dataSize)
{
    struct sockaddr_in addr;
    int ret;

    addr.sin_family = AF_INET;
    addr.sin_port = htons(port);
    addr.sin_addr.s_addr = inet_addr(ip);

    rtpPacket->rtpHeader.seq = htons(rtpPacket->rtpHeader.seq);
    rtpPacket->rtpHeader.timestamp = htonl(rtpPacket->rtpHeader.timestamp);
    rtpPacket->rtpHeader.ssrc = htonl(rtpPacket->rtpHeader.ssrc);

    ret = sendto(socket, (void*)rtpPacket, dataSize+RTP_HEADER_SIZE, 0,
                    (struct sockaddr*)&addr, sizeof(addr));

    rtpPacket->rtpHeader.seq = ntohs(rtpPacket->rtpHeader.seq);
    rtpPacket->rtpHeader.timestamp = ntohl(rtpPacket->rtpHeader.timestamp);
    rtpPacket->rtpHeader.ssrc = ntohl(rtpPacket->rtpHeader.ssrc);

    return ret;
}

 

2. AAC RTP packaging

2.1 AAC format

The AAC audio file consists of one frame of ADTS frame, and each ADTS frame contains the ADTS header and AAC data, as shown below

The size of the ADTS header is usually 7个字节, contains the information of this frame of data, the content is as follows 

 

 

The meaning of each field is as follows

syncword

Always 0xFFF, represents the beginning of an ADTS frame, used for synchronization.

ID

MPEG Version: 0 for MPEG-4,1 for MPEG-2

Layer

always: ‘00’

protection_absent

Warning, set to 1 if there is no CRC and 0 if there is CRC

profile

Indicates which level of AAC to use, such as 01 Low Complexity (LC) – AAC LC

sampling_frequency_index

Subscript of sampling rate

aac_frame_length

The length of an ADTS frame includes the ADTS header and AAC original stream

adts_buffer_fullness

0x7FF means that the code rate is variable

number_of_raw_data_blocks_in_frame

Indicates that there are number_of_raw_data_blocks_in_frame + 1 AAC original frame in the ADTS frame

Mainly remember here that the ADTS header is usually 7 bytes, and the header contains aac_frame_length, indicating the size of the ADTS frame
 

2.2 AAC RTP packaging method

AAC's RTP packaging method is not as rich as H.264. I know only one way. The main reason is that the data size of one frame of AAC is a few hundred bytes. Festivals, as many as thousands

AAC's RTP packaging method is to take the ADTS frame out of the ADTS header, take out the AAC data, and encapsulate each frame of data into an RTP packet

It should be noted that the AAC data is not copied directly into the RTP payload. AAC is encapsulated into RTP packets. The first four bytes in the RTP payload have a special meaning , and then the AAC data, as shown in the following figure

One byte of the RTP payload is 0x00, and the second byte is 0x10

The third byte and the fourth byte save the size of AAC Data, which can only save up to 13 bits, the third byte saves the high eight bits of the data size, and the fourth 5 bits saves the low data size 5 digits

2.3 Time stamp calculation of AAC RTP packet

Suppose the sampling rate of audio is 44100, that is, 44100 samples per second

AAC generally encodes 1024 samples into one frame, so there are 44100/1024 = 43 frames in one second

The time increment of each frame of data sent by the RTP packet is 44100/43 = 1025

The time interval of each frame of data is 1000/43 = 23ms

2.4 Source code

The source code of the aac file sent by rtp is given below. The program extracts the AAC data of each frame from the aac file, and then sends RTP to the destination.

How to get AAC Data?
This example reads the 7-byte ADTS header first, then obtains the frame size, and then reads out the AAC Data

rtp_aac.c


#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>

#include "rtp.h"

#define AAC_FILE    "test.aac"
#define CLIENT_PORT 9832

struct AdtsHeader
{
    unsigned int syncword;  //12 bit 同步字 '1111 1111 1111',说明一个ADTS帧的开始
    unsigned int id;        //1 bit MPEG 标示符, 0 for MPEG-4,1 for MPEG-2
    unsigned int layer;     //2 bit 总是'00'
    unsigned int protectionAbsent;  //1 bit 1表示没有crc,0表示有crc
    unsigned int profile;           //1 bit 表示使用哪个级别的AAC
    unsigned int samplingFreqIndex; //4 bit 表示使用的采样频率
    unsigned int privateBit;        //1 bit
    unsigned int channelCfg; //3 bit 表示声道数
    unsigned int originalCopy;         //1 bit 
    unsigned int home;                  //1 bit 

    /*下面的为改变的参数即每一帧都不同*/
    unsigned int copyrightIdentificationBit;   //1 bit
    unsigned int copyrightIdentificationStart; //1 bit
    unsigned int aacFrameLength;               //13 bit 一个ADTS帧的长度包括ADTS头和AAC原始流
    unsigned int adtsBufferFullness;           //11 bit 0x7FF 说明是码率可变的码流

    /* number_of_raw_data_blocks_in_frame
     * 表示ADTS帧中有number_of_raw_data_blocks_in_frame + 1个AAC原始帧
     * 所以说number_of_raw_data_blocks_in_frame == 0 
     * 表示说ADTS帧中有一个AAC数据块并不是说没有。(一个AAC原始帧包含一段时间内1024个采样及相关数据)
     */
    unsigned int numberOfRawDataBlockInFrame; //2 bit
};

static int parseAdtsHeader(uint8_t* in, struct AdtsHeader* res)
{
    static int frame_number = 0;
    memset(res,0,sizeof(*res));

    if ((in[0] == 0xFF)&&((in[1] & 0xF0) == 0xF0))
    {
        res->id = ((unsigned int) in[1] & 0x08) >> 3;
        printf("adts:id  %d\n", res->id);
        res->layer = ((unsigned int) in[1] & 0x06) >> 1;
        printf( "adts:layer  %d\n", res->layer);
        res->protectionAbsent = (unsigned int) in[1] & 0x01;
        printf( "adts:protection_absent  %d\n", res->protectionAbsent);
        res->profile = ((unsigned int) in[2] & 0xc0) >> 6;
        printf( "adts:profile  %d\n", res->profile);
        res->samplingFreqIndex = ((unsigned int) in[2] & 0x3c) >> 2;
        printf( "adts:sf_index  %d\n", res->samplingFreqIndex);
        res->privateBit = ((unsigned int) in[2] & 0x02) >> 1;
        printf( "adts:pritvate_bit  %d\n", res->privateBit);
        res->channelCfg = ((((unsigned int) in[2] & 0x01) << 2) | (((unsigned int) in[3] & 0xc0) >> 6));
        printf( "adts:channel_configuration  %d\n", res->channelCfg);
        res->originalCopy = ((unsigned int) in[3] & 0x20) >> 5;
        printf( "adts:original  %d\n", res->originalCopy);
        res->home = ((unsigned int) in[3] & 0x10) >> 4;
        printf( "adts:home  %d\n", res->home);
        res->copyrightIdentificationBit = ((unsigned int) in[3] & 0x08) >> 3;
        printf( "adts:copyright_identification_bit  %d\n", res->copyrightIdentificationBit);
        res->copyrightIdentificationStart = (unsigned int) in[3] & 0x04 >> 2;
        printf( "adts:copyright_identification_start  %d\n", res->copyrightIdentificationStart);
        res->aacFrameLength = (((((unsigned int) in[3]) & 0x03) << 11) |
                                (((unsigned int)in[4] & 0xFF) << 3) |
                                    ((unsigned int)in[5] & 0xE0) >> 5) ;
        printf( "adts:aac_frame_length  %d\n", res->aacFrameLength);
        res->adtsBufferFullness = (((unsigned int) in[5] & 0x1f) << 6 |
                                        ((unsigned int) in[6] & 0xfc) >> 2);
        printf( "adts:adts_buffer_fullness  %d\n", res->adtsBufferFullness);
        res->numberOfRawDataBlockInFrame = ((unsigned int) in[6] & 0x03);
        printf( "adts:no_raw_data_blocks_in_frame  %d\n", res->numberOfRawDataBlockInFrame);

        return 0;
    }
    else
    {
        printf("failed to parse adts header\n");
        return -1;
    }
}

static int createUdpSocket()
{
    int fd;
    int on = 1;

    fd = socket(AF_INET, SOCK_DGRAM, 0);
    if(fd < 0)
        return -1;

    setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on));

    return fd;
}

static int rtpSendAACFrame(int socket, char* ip, int16_t port,
                            struct RtpPacket* rtpPacket, uint8_t* frame, uint32_t frameSize)
{
    int ret;

    rtpPacket->payload[0] = 0x00;
    rtpPacket->payload[1] = 0x10;
    rtpPacket->payload[2] = (frameSize & 0x1FE0) >> 5; //高8位
    rtpPacket->payload[3] = (frameSize & 0x1F) << 3; //低5位

    memcpy(rtpPacket->payload+4, frame, frameSize);

    ret = rtpSendPacket(socket, ip, port, rtpPacket, frameSize+4);
    if(ret < 0)
    {
        printf("failed to send rtp packet\n");
        return -1;
    }

    rtpPacket->rtpHeader.seq++;

    /*
     * 如果采样频率是44100
     * 一般AAC每个1024个采样为一帧
     * 所以一秒就有 44100 / 1024 = 43帧
     * 时间增量就是 44100 / 43 = 1025
     * 一帧的时间为 1 / 43 = 23ms
     */
    rtpPacket->rtpHeader.timestamp += 1025;

    return 0;
}

int main(int argc, char* argv[])
{
    int fd;
    int ret;
    int socket;
    uint8_t* frame;
    struct AdtsHeader adtsHeader;
    struct RtpPacket* rtpPacket;

    if(argc != 2)
    {
        printf("Usage: %s <dest ip>\n", argv[0]);
        return -1;
    }

    fd = open(AAC_FILE, O_RDONLY);
    if(fd < 0)
    {
        printf("failed to open %s\n", AAC_FILE);
        return -1;
    }    

    socket = createUdpSocket();
    if(socket < 0)
    {
        printf("failed to create udp socket\n");
        return -1;
    }

    frame = (uint8_t*)malloc(5000);
    rtpPacket = malloc(5000);

    rtpHeaderInit(rtpPacket, 0, 0, 0, RTP_VESION, RTP_PAYLOAD_TYPE_AAC, 1, 0, 0, 0x32411);

    while(1)
    {
        printf("--------------------------------\n");

        ret = read(fd, frame, 7);
        if(ret <= 0)
        {
            lseek(fd, 0, SEEK_SET);
            continue;            
        }

        if(parseAdtsHeader(frame, &adtsHeader) < 0)
        {
            printf("parse err\n");
            break;
        }

        ret = read(fd, frame, adtsHeader.aacFrameLength-7);
        if(ret < 0)
        {
            printf("read err\n");
            break;
        }

        rtpSendAACFrame(socket, argv[1], CLIENT_PORT,
                        rtpPacket, frame, adtsHeader.aacFrameLength-7);

        usleep(23000);
    }

    close(fd);
    close(socket);

    free(frame);
    free(rtpPacket);

    return 0;
}

3. AAC sdp media description

The media description information of AAC is given below

m=audio 9832 RTP/AVP 97
a=rtpmap:97 mpeg4-generic/44100/2
a=fmtp:97 SizeLength=13;
c=IN IP4 127.0.0.1

**m=audio 9832 RTP/AVP 97 **

The format is m = <media type> <port number> <transport protocol> <media format>
Media type: audio, which means this is an audio stream

Port number: 9832, indicating that the destination port for UDP sending is 9832

Transmission protocol: RTP / AVP, which means RTP OVER UDP, sending RTP packets through UDP

Media format: indicates the payload type, generally 97 is used to indicate AAC

a=rtpmap:97 mpeg4-generic/44100/2

The format is a = rtpmap: <media format> <encoding format> / <clock frequency> / [channel]

mpeg4-generic means encoding, 44100 means clock frequency, 2 means dual channel

c=IN IP4 127.0.0.1

IN: indicates internet

IP4: indicates IPV4

127.0.0.1: indicates that the destination address sent by UDP is 127.0.0.1

Special note: the destination IP sent by udp described in this sdp file is 127.0.0.1, and the destination port is 9832

Four, test

Save the source code ,, and given above rtp.c, rtp.hand rtp_h264.csave the sdp file asrtp_aac.sdp

注意:该程序默认打开的是test.aac,如果你没有音频源,可以从RtspServer的example目录下获取

编译运行

# gcc rtp.c rtp_aac.c
# ./a.out 127.0.0.1

这里的ip地址必须跟sdp里描述的目标地址一致

使用vlc打开sdp文件

# vlc rtp_aac.sdp

到这里就可以听到音频了,下一篇文章讲解如何写一个发送AAC的RTSP服务器

 

Published 115 original articles · Like 29 · Visitors 50,000+

Guess you like

Origin blog.csdn.net/huabiaochen/article/details/104576088
RTP