编码视频 | Z's blog

前言

此文主要介绍使用FFmpeg和x264两个静态库，将采集到的YUV420p数据编码为h264。

关于编码和FFmpeg & x264

从iPhone摄像头获取到的YUV420p数据体积是非常大的，每一帧的图片和帧之间存在大量冗余数据，所以需要编码压缩再存储。H264视频压缩算法在目前应用比较流行且比较广泛，借助FFmpeg和x264开源库可以很便捷地对视频进行编码压缩，具体的原理可以看看《H264基本原理》。

解析CMSampleBufferRef

根据YUV420p数据的排列方式，分别取出pixelBuffer中的Y、U、V分量，调用sd_encode()进行编码操作，在编码前需调用sd_init_encoder()初始好编码器具体见下一节的源码

/*
 * 将CMSampleBufferRef格式的数据编码成h264并写入文件
 *
 */
- (void)encoderToH264:(CMSampleBufferRef)sampleBuffer
{
    CVPixelBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
    if (CVPixelBufferLockBaseAddress(pixelBuffer, 0) == kCVReturnSuccess) {
        //获取Y分量
        UInt8 *pY = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0);
        //获取UV分量
        UInt8 *pUV = (UInt8 *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1);
        size_t width = CVPixelBufferGetWidth(pixelBuffer);
        size_t height = CVPixelBufferGetHeight(pixelBuffer);
        size_t pYBytes = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0);
        size_t pUVBytes = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1);
        
        // buffer to store YUV with layout YYYYYYYYUUVV
        UInt8 *pYUV420P = (UInt8 *)malloc(width * height * 3 / 2); 
        
        /* convert NV12 data to YUV420*/
        UInt8 *pU = pYUV420P + (width * height);
        UInt8 *pV = pU + (width * height / 4);
        for(int i = 0; i < height; i++) {
            memcpy(pYUV420P + i * width, pY + i * pYBytes, width);
        }
        for(int j = 0; j < height / 2; j++) {
            for(int i = 0; i < width / 2; i++) {
                *(pU++) = pUV[i<<1];
                *(pV++) = pUV[(i<<1) + 1];
            }
            pUV += pUVBytes;
        }
        
        // add code to push pYUV420P to video encoder here
        if (1 == sd_is_encoder_valid()) {
            sd_encode(pYUV420P);
        }
        
        // 7.释放yuv数据
        free(pYUV420P);
    }
    
    CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
}

编码代码

以下代码用于处理编码，sd_init_encoder()初始化完成后，使用sd_encode()进行编码，具体的参数代码中有标注。

SDCEncoder.h

#ifndef SDCEncoder_h
#define SDCEncoder_h

#include <stdio.h>

/**
 初始化编码器

 @param width 编码宽
 @param height 编码高
 @param bitrate 码率
 */
int sd_init_encoder(int width, int height, int bitrate, const char * out_file);

/**
 释放资源
 */
void sd_free_resource(void);

/**
 当前解码器是否有效

 @return 1 为有效，0 表示编码器资源需要重新初始化
 */
int sd_is_encoder_valid(void);

/**
 编码数据

 @param data 图像数据
 */
void sd_encode(unsigned char *data);

/**
 在设置了B帧后，上下文会延后帧的编码，在采集结束时调用方法完成延后的编码

 @return 0 表示编码成功
 */
int sd_flush_encode(void);

#endif /* SDCEncoder_h */

SDCEncoder.c

#include "SDCEncoder.h"
#ifdef __cplusplus
extern "C" {
#endif
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include "libavcodec/avcodec.h"
#include <libavutil/imgutils.h>
#ifdef __cplusplus
};
#endif

/** 是否已释放，未初始化的状态为已释放 */
static int is_released = 1;
/** 帧下标 */
static int framecnt;
/** 图像宽度 */
static int encoder_h264_frame_width;
/** 图像高度 */
static int encoder_h264_frame_height;
/** 图片尺寸 */
static int y_size;
/** 图片缓存 */
static unsigned char *picture_buf;
/** 图片尺寸 */
static int picture_size;
/** 格式化上下文 */
static AVFormatContext *pFormatCtx;
/** 输出格式 */
static AVOutputFormat *fmt;
/** 视频流 */
static AVStream *video_st;
/** 编码上下文 */
static AVCodecContext *pCodecCtx;
/** 编码器 */
static struct AVCodec *pCodec;
/** 视频帧 */
static AVFrame *pFrame;
/** 视频包 */
static AVPacket pkt;

/**
 初始化编码器
 
 @param width 编码宽
 @param height 编码高
 @param bitrate 码率
 @param out_file 输出文件路径
 */
int sd_init_encoder(int width, int height, int bitrate, const char * out_file) {
    is_released = 0;
    // 1.默认从第0帧开始(记录当前的帧数)
    framecnt = 0;
    
    // 2.记录传入的宽度&高度
    encoder_h264_frame_width = width;
    encoder_h264_frame_height = height;
    
    // 3.注册FFmpeg所有编解码器(无论编码还是解码都需要该步骤)
    av_register_all();
    
    // 4.初始化AVFormatContext: 用作之后写入视频帧并编码成 h264，贯穿整个工程当中(释放资源时需要销毁)
    if (avformat_alloc_output_context2(&pFormatCtx, NULL, NULL, out_file) < 0) {
        printf("Failed to alloc AVFormatContext! \n");
        return -1;
    }
    
    // 5.设置输出文件的路径
    fmt = pFormatCtx->oformat;
    
    // 6.打开文件的缓冲区输入输出，flags 标识为  AVIO_FLAG_READ_WRITE ，可读写
    if (avio_open( &pFormatCtx->pb, out_file, AVIO_FLAG_READ_WRITE) < 0){
        printf("Failed to open output file! \n");
        return -1;
    }
    
    // 7.创建新的输出流, 用于写入文件
    video_st = avformat_new_stream(pFormatCtx, NULL);
    
    // 8.设置 30 帧每秒 ，也就是 fps 为 30
    video_st->time_base.num = 1;
    video_st->time_base.den = 30;
    
    if (video_st==NULL){
        return -1;
    }
    
    // 9.pCodecCtx 用户存储编码所需的参数格式等等
    // 9.1.从媒体流中获取到编码结构体，他们是一一对应的关系，一个 AVStream 对应一个  AVCodecContext
    // Param that must set
    pCodecCtx = avcodec_alloc_context3(pCodec);

    // 9.2.设置编码器的编码格式(是一个id)，每一个编码器都对应着自己的 id，例如 h264 的编码 id 就是 AV_CODEC_ID_H264
    pCodecCtx->codec_id = fmt->video_codec;
    
    // 9.3.设置编码类型为 视频编码
    pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
    
    // 9.4.设置像素格式为 yuv 格式
    pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
    
    // 9.5.设置视频的宽高
    pCodecCtx->width = encoder_h264_frame_width;
    pCodecCtx->height = encoder_h264_frame_height;
    
    // 9.6.设置帧率
    pCodecCtx->time_base.num = 1;
    pCodecCtx->time_base.den = 30;
    
    // 9.7.设置码率（比特率）
    pCodecCtx->bit_rate = bitrate;
    
    // 9.8.视频质量度量标准(常见qmin=10, qmax=51)
    pCodecCtx->qmin = 10;
    pCodecCtx->qmax = 51;
    
    // 9.9.设置图像组层的大小(GOP-->两个I帧之间的间隔)
    pCodecCtx->gop_size = 250;
    
    pCodecCtx->max_b_frames = 0;
    
    // Set Option
    AVDictionary *param = NULL;
    if(pCodecCtx->codec_id == AV_CODEC_ID_H264) {
        //调节编码速度和质量的平衡
        av_dict_set(&param, "preset", "slow", 0);
        //零延迟，用在需要非常低的延迟的情况下
        av_dict_set(&param, "tune", "zerolatency", 0);
    }
    
    pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
    if (!pCodec) {
        printf("Can not find encoder!");
    }
    
    if (avcodec_open2(pCodecCtx, pCodec, &param) < 0) {
        printf("Failed to open encoder!");
    }
    
    avcodec_parameters_from_context(video_st->codecpar, pCodecCtx);
    
    av_dump_format(pFormatCtx, 0, out_file, 1);
    
    // 13.初始化原始数据对象: AVFrame
    pFrame = av_frame_alloc();
    pFrame->width = pCodecCtx->width;
    pFrame->height = pCodecCtx->height;
    pFrame->format = AV_PIX_FMT_YUV420P;
    pFrame->color_range = AVCOL_RANGE_MPEG;
    
    // 14.通过像素格式(这里为 YUV)获取图片的真实大小，例如将 480 * 720 转换成 int 类型
    av_image_fill_arrays(pFrame->data, pFrame->linesize, picture_buf, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, 1);
    
    
    // 15.h264 封装格式的文件头部，基本上每种编码都有着自己的格式的头部，想看具体实现的同学可以看看 h264 的具体实现
    if (AVSTREAM_INIT_IN_WRITE_HEADER != avformat_write_header(pFormatCtx, NULL) ) {
        printf("avformat_write_header fail \n");
        return -1;
    }
    
    // 16.创建编码后的数据 AVPacket 结构体来存储 AVFrame 编码后生成的数据
    av_new_packet(&pkt, picture_size);
    
    // 17.设置 yuv 数据中 y 图的宽高
    y_size = pCodecCtx->width * pCodecCtx->height;
    
    return 0;
}

/**
 释放资源
 */
void sd_free_resource(void) {
    if (1 == is_released) {
        return;
    }
    // 2.将还未输出的AVPacket输出出来
    av_write_trailer(pFormatCtx);
    
    // 3.关闭资源
    if (pCodecCtx){
        avcodec_close(pCodecCtx);
        avcodec_free_context(&pCodecCtx);
        pCodecCtx = NULL;
        
    }
    
    if (pFrame) {
        av_free(pFrame);
        pFrame = NULL;
    }
    avio_close(pFormatCtx->pb);
    avformat_free_context(pFormatCtx);
    
    framecnt = 0;
    is_released = 1;
}

/**
 当前解码器是否有效
 
 @return 1 为有效，0 表示编码器资源需要重新初始化
 */
int sd_is_encoder_valid(void) {
    if (pCodecCtx) {
        return 1;
    }
    return 0;
}

/**
 编码数据
 
 @param data 图像数据
 */
void sd_encode(unsigned char *data) {
    //Read raw YUV data
    pFrame->data[0] = data;                                // Y
    pFrame->data[1] = pFrame->data[0] + y_size;        // U
    pFrame->data[2] = pFrame->data[1] + y_size / 4;  // V
    // PTS
    pFrame->pts = framecnt;
    
    int ret = avcodec_send_frame(pCodecCtx, pFrame);
    while (ret == 0) {
        ret = avcodec_receive_packet(pCodecCtx, &pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            printf("Error during encoding\n");
            return;
        }
        printf("Succeed to encode frame: %5d\tsize:%5db\n", framecnt, pkt.size);
        framecnt++;
        pkt.stream_index = video_st->index;
        ret = av_write_frame(pFormatCtx, &pkt);
        
        av_packet_unref(&pkt);
    }
}

/**
 在设置了B帧后，上下文会延后帧的编码，在采集结束时调用方法完成延后的编码
 
 @return 1 表示编码成功
 */
int sd_flush_encode() {
    if (1 == is_released) {
        return -1;
    }
    
    int ret = -1;
    AVPacket enc_pkt;
    
    ret = avcodec_send_frame(pCodecCtx, NULL);
    if (ret != 0) {
        return -1;
    }
    
    int got_pic = 0;
    while (got_pic == 0) {
        enc_pkt.data = NULL;
        enc_pkt.size = 0;
        av_init_packet(&enc_pkt);
        
        got_pic = avcodec_receive_packet(pCodecCtx, &enc_pkt);
        if (got_pic == AVERROR(EAGAIN) || got_pic == AVERROR_EOF)
            return -1;
        else if (got_pic < 0) {
            printf("Error during encoding\n");
            return -1;
        }
        
        printf("Succeed to flush frame: %5d\tsize:%5d \n", framecnt, enc_pkt.size);
        framecnt++;
        enc_pkt.stream_index = video_st->index;
        ret = av_write_frame(pFormatCtx, &enc_pkt);
        av_packet_unref(&enc_pkt);
        
        if (ret < 0) {
            break;
        }
    }
    return ret;
}

参考

《H264基本原理》
《h264编码参数》