demand
Encoding video data in iOS generally requires only one encoder for a project, but sometimes special requirements may require both encoders to work simultaneously. The encoder class is implemented in this example. The required encoders can be quickly generated by specifying enumeration values for different encoders, and two encoders can work together.
Implementation principle:
IOS uses VideoToolBox framework to complete video hard coding operations, supporting H.264,H.265 encoders.
Soft coding: Encoding using CPU.
Hard encoding: Use GPU, DSP, FPGA, ASIC chip and other hardware to encode instead of CPU.
Prerequisites for reading:
- Basic knowledge of audio and video
- Recommended reading :H264, H265 hardware codec basics and bit stream analysis
- Video collection :iOS Video collection combat (AVCaptureSession)
- C, C + +
GitHub address (with code)Video Encoder
Address of nuggets:Video Encoder
Letter Address:Video Encoder
Blog Address:Video Encoder
The test results
This example tests h264’s coding efficiency by writing the encoded file as a.mov file. The recording time is the same and the scene is basically the same. The result shows that H265 only needs half of the memory of H264 to complete the same picture quality. Note that recorded files can only be played using ffMPEG related tools.
Implementation steps
1. Initialize encoder parameters
The encoder class in this example is not a singleton, because we can generate h264 encoders, H265 encoders, and generate two different types of encoder objects simultaneously. The width and height frame rates specified here need to be consistent with the camera. The bit rate is the average bit rate during playback. Whether real-time encoding is supported or not cannot be controlled. Finally, we can only create h264 encoder or H265 encoder by specifying the type of encoder.
- Check whether encoders are supported
Determine whether support hevc encoder, and not all devices support h265 encoder, which is determined by the hardware, but there is no direct API to judge whether to support h265 encoder, here with the help of h2 AVAssetExportPresetHEVCHighestQuality attribute to determine whether indirect support 65 codes.
Note: The SOFTWARE API encoded by H265 must run on iOS 11 or higher to be used. All popular iphones already support h264 encoders.
// You could select h264 / h265 encoder.
self.videoEncoder = [[XDXVideoEncoder alloc] initWithWidth:1280
height:720
fps:30
bitrate:2048
isSupportRealTimeEncode:NO
encoderType:XDXH265Encoder]; // XDXH264Encoder
-(instancetype)initWithWidth:(int)width height:(int)height fps:(int)fps bitrate:(int)bitrate isSupportRealTimeEncode:(BOOL)isSupportRealTimeEncode encoderType:(XDXVideoEncoderType)encoderType {
if (self = [super init]) {
mSession = NULL;
mVideoFile = NULL;
_width = width;
_height = height;
_fps = fps;
_bitrate = bitrate << 10; //convert to bps
_errorCount = 0;
_isSupportEncoder = NO;
_encoderType = encoderType;
_lock = [[NSLock alloc] init];
_isSupportRealTimeEncode = isSupportRealTimeEncode;
_needResetKeyParamSetBuffer = YES;
if (encoderType == XDXH265Encoder) {
if(@ the available (iOS 11.0, *)) {if([[AVAssetExportSession allExportPresets] containsObject:AVAssetExportPresetHEVCHighestQuality]) { _isSupportEncoder = YES; }}}else if (encoderType == XDXH264Encoder){
_isSupportEncoder = YES;
}
log4cplus_info("Video Encoder:"."Init encoder width:%d, height:%d, fps:%d, bitrate:%d, is support encoder:%d, encoder type:H%lu", width, height, fps, bitrate, isSupportRealTimeEncode, (unsigned long)encoderType);
}
return self;
}
Copy the code
2. Initialize the encoder
Initializing an encoder consists of three steps: first, create a VTCompressionSessionRef reference object to manage the encoder, and then assign all the encoder attributes to the object. Finally, some resources are pre-allocated (that is, memory is pre-allocated for the data to be encoded) for use by the encoded buffer before encoding.
- (void)configureEncoderWithWidth:(int)width height:(int)height {
log4cplus_info("Video Encoder:"."configure encoder with and height for init,with = %d,height = %d",width, height);
if(width == 0 || height == 0) {
log4cplus_error("Video Encoder:"."encoder param can't is null. width:%d, height:%d",width, height);
return;
}
self.width = width;
self.height = height;
mSession = [self configureEncoderWithEncoderType:self.encoderType
callback:EncodeCallBack
width:self.width
height:self.height
fps:self.fps
bitrate:self.bitrate
isSupportRealtimeEncode:self.isSupportRealTimeEncode
iFrameDuration:30
lock:self.lock];
}
- (VTCompressionSessionRef)configureEncoderWithEncoderType:(XDXVideoEncoderType)encoderType callback:(VTCompressionOutputCallback)callback width:(int)width height:(int)height fps:(int)fps bitrate:(int)bitrate isSupportRealtimeEncode:(BOOL)isSupportRealtimeEncode iFrameDuration:(int)iFrameDuration lock:(NSLock *)lock {
log4cplus_info("Video Encoder:"."configure encoder width:%d, height:%d, fps:%d, bitrate:%d, is support realtime encode:%d, I frame duration:%d", width, height, fps, bitrate, isSupportRealtimeEncode, iFrameDuration);
[lock lock];
// Create compression session
VTCompressionSessionRef session = [self createCompressionSessionWithEncoderType:encoderType
width:width
height:height
callback:callback];
// Set compresssion property
[self setCompressionSessionPropertyWithSession:session
fps:fps
bitrate:bitrate
isSupportRealtimeEncode:isSupportRealtimeEncode
iFrameDuration:iFrameDuration
EncoderType:encoderType];
// Prepare to encode
OSStatus status = VTCompressionSessionPrepareToEncodeFrames(session);
[lock unlock];
if(status ! = noErr) {log4cplus_error("Video Encoder:"."create encoder failed, status: %d",(int)status);
return NULL;
}else {
log4cplus_info("Video Encoder:"."create encoder success");
returnsession; }}Copy the code
2.1. To createVTCompressionSessionRef
object
Encoder VTCompressionSessionCreate: create video encoder session, namely management context object.
- Allocator: memory allocator of a session. Passing NULL denotes the default allocator.
- Width,height: Specifies the width and height of the encoder’s pixels, consistent with the resolution of the captured video
- CodecType: indicates the type of the encoder. H264 is the next generation of h264 with higher compression performance, but it has just been released in iOS11, which has some bugs.
- EncoderSpecification: Specifies that a particular encoder must be used. Generally, NULL is enough. Video Toolbox will be selected by itself.
- SourceImageBufferAttributes: the properties of the original video data needs. Mainly used to create
a pixel buffer pool
. - CompressedDataAllocator: Memory allocator for compressed data. Passing NULL means that the default allocator is used.
- OutputCallback: Callback to receive compressed data. This callback can optionally be received synchronously or asynchronously. If synchronization is used, and
VTCompressionSessionEncodeFrame
The function thread remains consistent, and if asynchronous, a new thread will receive it. This parameter can also be passed NULL but only if we use itVTCompressionSessionEncodeFrameWithOutputHandler
Function encoding. - OutputCallbackRefCon: User-defined data can be passed in. Mainly used for the interaction between callback functions and the main class.
- CompressionSessionOut: Passes in the memory address of the session to be created. Note that session cannot be NULL.
VT_EXPORT OSStatus
VTCompressionSessionCreate(
CM_NULLABLE CFAllocatorRef allocator,
int32_t width,
int32_t height,
CMVideoCodecType codecType,
CM_NULLABLE CFDictionaryRef encoderSpecification,
CM_NULLABLE CFDictionaryRef sourceImageBufferAttributes, CM_NULLABLE CFAllocatorRef compressedDataAllocator, CM_NULLABLE VTCompressionOutputCallback outputCallback, void * CM_NULLABLE outputCallbackRefCon, CM_RETURNS_RETAINED_PARAMETER CM_NULLABLE VTCompressionSessionRef * CM_NONNULL compressionSessionOut) API_AVAILABLE (macosx (10.8), the ios (8.0), tvos (10.2));Copy the code
Here’s how. Note If the resolution captured by the camera changes, you need to destroy the current encoder session and create a new one.
- (VTCompressionSessionRef)createCompressionSessionWithEncoderType:(XDXVideoEncoderType)encoderType width:(int)width height:(int)height callback:(VTCompressionOutputCallback)callback {
CMVideoCodecType codecType;
if (encoderType == XDXH264Encoder) {
codecType = kCMVideoCodecType_H264;
}else if (encoderType == XDXH265Encoder) {
codecType = kCMVideoCodecType_HEVC;
}else {
return nil;
}
VTCompressionSessionRef session;
OSStatus status = VTCompressionSessionCreate(NULL,
width,
height,
codecType,
NULL,
NULL,
NULL,
callback,
(__bridge void *)self,
&session);
if(status ! = noErr) {log4cplus_error("Video Encoder:"."%s: Create session failed:%d",__func__,(int)status);
return nil;
}else {
returnsession; }}Copy the code
2.2. Set the Session attribute
- Query whether the session supports the current attribute
VTSessionCopySupportedPropertyDictionary function called created after the session, all attributes of the current session support can be copied to the specified in the dictionary, after before set properties in the dictionary query support.
- (BOOL)isSupportPropertyWithSession:(VTCompressionSessionRef)session key:(CFStringRef)key {
OSStatus status;
static CFDictionaryRef supportedPropertyDictionary;
if(! supportedPropertyDictionary) { status = VTSessionCopySupportedPropertyDictionary(session, &supportedPropertyDictionary);if(status ! = noErr) {return NO;
}
}
BOOL isSupport = [NSNumber numberWithBool:CFDictionaryContainsKey(supportedPropertyDictionary, key)].intValue;
return isSupport;
}
Copy the code
- Set session properties
Use the VTSessionSetProperty function to specify the key and value to set the property.
- (OSStatus)setSessionPropertyWithSession:(VTCompressionSessionRef)session key:(CFStringRef)key value:(CFTypeRef)value {
if (value == nil || value == NULL || value == 0x0) {
return noErr;
}
OSStatus status = VTSessionSetProperty(session, key, value);
if(status ! = noErr) {log4cplus_error("Video Encoder:"."Set session of %s Failed, status = %d",CFStringGetCStringPtr(key, kCFStringEncodingUTF8),status);
}
return status;
}
Copy the code
- KVTCompressionPropertyKey_MaxFrameDelayCount: encoder in front of the output compressed frame allowed to keep the maximum number of frames. Defaults to kVTUnlimitedFrameDelayCount, namely no restrictions keep frames. For example, if 10 frames of data are currently encoded and the maximum delay is 3(M), then 10-3(n-M) frames of video data must have been sent to the encoding callback when 10(N) frames of video data are encoded. That is, n-M frame data has been encoded, but the unencoded data of M frame is retained.
- KVTCompressionPropertyKey_ExpectedFrameRate: expect frame rate, frame rate as measured by number of receiving video frames per second. This property does not control the frame rate but only serves as an indication of the encoder encoding. To set up the internal configuration before coding. The actual duration depends on the video frame and may vary. The default is 0, indicating unknown.
- KVTCompressionPropertyKey_AverageBitRate: average bit rate coding for a long time. This property is not an absolute setting and the actual bit rate may be higher than this value. The default is 0, indicating that the encoder should determine the size of the encoded data. Note that the bit rate setting is only valid if timing information is provided for the raw frame, and some codecs do not support limiting to a specified bit rate.
- KVTCompressionPropertyKey_DataRateLimits: can choose the following two hard limit for bit rate. Each hard limit is described by the data size in bytes and the duration in seconds, and requires that the total size of compressed data for any continuous segment of that duration (within decoding time) not exceed the data size. By default, no data rate limit is set. This property is the CFArray of an even number of Cfnumbers, alternating between bytes and seconds. Note that the data rate setting is only valid if timing information is provided for the raw frame, and some codecs do not support limiting the specified data rate.
- KVTCompressionPropertyKey_RealTime: whether real-time compression. False said video encoder can work more slowly than in real time, in order to produce better results. Set to true for more timely coding. The default value is NULL, indicating unknown.
- KVTCompressionPropertyKey_AllowFrameReordering: if the encoder open B frame, the time will order and the encoder must be reordered. Default to True, set it to false to prevent frame reordering. Note: In iOS, B frames are generally not captured with a camera.
- KVTCompressionPropertyKey_ProfileLevel: specify the coded bit stream profile and level. The configuration files and levels available vary by format and video encoder. Video encoders should use standard keys where available, not standard mode.
- KVTCompressionPropertyKey_H264EntropyMode: h. 264 compression entropy coding patterns. This property controls whether the encoder uses context-based adaptive variable-length encoding (CAVLC) or context-based adaptive binary arithmetic encoding (CABAC), if the H.264 encoder supports it. CABAC generally provides better compression at the expense of higher computational overhead. Default values are encoder specific and may change based on other encoder Settings. Care should be taken when using this property – changes may cause the configuration to be incompatible with the requested profile and level. The result in this case is undefined and may include an encoding error or a nonconforming output stream.
- KVTCompressionPropertyKey_MaxKeyFrameIntervalDuration: key frames from one to the next key frames of the longest duration (in seconds). The default is zero and there is no limit. This property is especially useful when the frame rate is variable. This key can be used with
kVTCompressionPropertyKey\_MaxKeyFrameInterval
Set together, and both limits will be enforced – one keyframe per X frame or one keyframe per Y second, whichever comes first. - KVTCompressionPropertyKey_MaxKeyFrameInterval: the largest interval between key frames, and the number of frames for the unit. Keyframes, also known as I-frames, reset inter-frame dependencies; Decoding key frames is sufficient to prepare the decoder to properly decode subsequent differential frames. Allows the video encoder to generate keyframes more frequently if this will result in more efficient compression. The default key frame interval is 0, indicating that the video encoder should select the location where to place all key frames. Key frame interval of 1 per frame must be the key frame, said every frame must be at least 2 key frames, etc. This button can be set with kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, Both limits will be enforced – one keyframe per X frame or one keyframe per Y second, whichever comes first.
// Set compresssion property
[self setCompressionSessionPropertyWithSession:session
fps:fps
bitrate:bitrate
isSupportRealtimeEncode:isSupportRealtimeEncode
iFrameDuration:iFrameDuration
EncoderType:encoderType];
- (void)setCompressionSessionPropertyWithSession:(VTCompressionSessionRef)session fps:(int)fps bitrate:(int)bitrate isSupportRealtimeEncode:(BOOL)isSupportRealtimeEncode iFrameDuration:(int)iFrameDuration EncoderType:(XDXVideoEncoderType)encoderType {
int maxCount = 3;
if(! isSupportRealtimeEncode) {if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_MaxFrameDelayCount]) {
CFNumberRef ref = CFNumberCreate(NULL, kCFNumberSInt32Type, &maxCount);
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_MaxFrameDelayCount value:ref]; CFRelease(ref); }}if(fps) {
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_ExpectedFrameRate]) {
int value = fps;
CFNumberRef ref = CFNumberCreate(NULL, kCFNumberSInt32Type, &value);
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_ExpectedFrameRate value:ref]; CFRelease(ref); }}else {
log4cplus_error("Video Encoder:"."Current fps is 0");
return;
}
if(bitrate) {
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_AverageBitRate]) {
int value = bitrate << 10;
CFNumberRef ref = CFNumberCreate(NULL, kCFNumberSInt32Type, &value);
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_AverageBitRate value:ref]; CFRelease(ref); }}else {
log4cplus_error("Video Encoder:"."Current bitrate is 0");
return;
}
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_RealTime]) {
log4cplus_info("Video Encoder:"."use realTimeEncoder");
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_RealTime value:isSupportRealtimeEncode ? kCFBooleanTrue : kCFBooleanFalse];
}
// Ban B frame.
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_AllowFrameReordering]) {
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_AllowFrameReordering value:kCFBooleanFalse];
}
if (encoderType == XDXH264Encoder) {
if (isSupportRealtimeEncode) {
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel]) {
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel value:kVTProfileLevel_H264_Main_AutoLevel]; }}else {
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel]) {
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel value:kVTProfileLevel_H264_Baseline_AutoLevel];
}
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_H264EntropyMode]) {
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_H264EntropyMode value:kVTH264EntropyMode_CAVLC]; }}}else if (encoderType == XDXH265Encoder) {
if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel]) {
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_ProfileLevel value:kVTProfileLevel_HEVC_Main_AutoLevel]; }}if([self isSupportPropertyWithSession:session key:kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration]) {
int value = iFrameDuration;
CFNumberRef ref = CFNumberCreate(NULL, kCFNumberSInt32Type, &value);
[self setSessionPropertyWithSession:session key:kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration value:ref];
CFRelease(ref);
}
log4cplus_info("Video Encoder:"."The compression session max frame delay count = %d, expected frame rate = %d, average bitrate = %d, is support realtime encode = %d, I frame duration = %d",maxCount, fps, bitrate, isSupportRealtimeEncode,iFrameDuration);
}
Copy the code
2.3. Pre-coding resource allocation
You may choose to call this function to give the encoder the opportunity to perform any necessary resource allocation before starting frame encoding. This optional call can be used to provide the encoder with the opportunity to allocate the required resources before starting to encode frames. If you do not call this method, will be in the first VTCompressionSessionEncodeFrame call allocated on any necessary resources. Additional calls to this function will not work.
// Prepare to encode
OSStatus status = VTCompressionSessionPrepareToEncodeFrames(session);
[lock unlock];
if(status ! = noErr) {log4cplus_error("Video Encoder:"."create encoder failed, status: %d",(int)status);
return NULL;
}else {
log4cplus_info("Video Encoder:"."create encoder success");
return session;
}
Copy the code
At this point, we have initialized the encoder, and now we need to encode the video frame data. In this example, AVCaptureSession is used to capture video frames to pass to the encoder for encoding.
3. The coding
Note that because the encoding thread and the creation and destruction of the encoder are asynchronous operations, locks are required.
- Timestamp synchronization
First of all, we take the first frame of video data as the reference point, and take the current system time as the reference time of encoding the first frame of data. This operation is mainly used for audio and video synchronization, which is not explained in this example. In addition, the timestamp synchronization generation mechanism is not as simple as in this example. You can make your own generation rules.
- Timestamp correction
Determine whether the timestamp in the current encoded video frame is larger than the previous frame. Because the video is in strict accordance with the timestamp ordering broadcast, so the timestamp should be increasing all the time, but considering to the encoder may not be a video source, such as the start is camera acquisition, from network flow back into the video decoding raw data, the timestamp will be out of sync, if forced to the encoder, it is the picture will be caton.
- Coded video frame
- Session: indicates a previously configured session
- ImageBuffer: Raw video data
- PresentationTimeStamp: PTS of the video frame
- Duration: The duration of this frame to be appended to the sample buffer. If there is no duration information, pass kCMTimeInvalid.
- FrameProperties: Specifies other properties of the video frame, such as whether to force i-frame generation.
- SourceFrameRefcon: A reference to the original frame that can be passed to the callback function.
- InfoFlagsOut: Points to VTEncodeInfoFlags to receive information about encoding operations. If the encoding is (or is) running asynchronously, you can set the kVTEncodeInfo_Asynchronous bit. If frames are dropped (synchronized), the kVTEncodeInfo_FrameDropped bit can be set. If you do not want to receive this message, pass NULL.
VT_EXPORT OSStatus
VTCompressionSessionEncodeFrame(
CM_NONNULL VTCompressionSessionRef session,
CM_NONNULL CVImageBufferRef imageBuffer,
CMTime presentationTimeStamp,
CMTime duration, // may be kCMTimeInvalid
CM_NULLABLE CFDictionaryRef frameProperties,
void * CM_NULLABLE sourceFrameRefcon, VTEncodeInfoFlags * CM_NULLABLE infoFlagsOut) API_AVAILABLE(MacOSX (10.8), ios(8.0), TVOS (10.2));Copy the code
-(void)startEncodeWithBuffer:(CMSampleBufferRef)sampleBuffer session:(VTCompressionSessionRef)session isNeedFreeBuffer:(BOOL)isNeedFreeBuffer isDrop:(BOOL)isDrop needForceInsertKeyFrame:(BOOL)needForceInsertKeyFrame lock:(NSLock *)lock {
[lock lock];
if(session == NULL) {
log4cplus_error("Video Encoder:"."%s,session is empty",__func__);
[self handleEncodeFailedWithIsNeedFreeBuffer:isNeedFreeBuffer sampleBuffer:sampleBuffer];
return;
}
//the first frame must be iframe then create the reference timeStamp;
static BOOL isFirstFrame = YES;
if(isFirstFrame && g_capture_base_time == 0) { CMTime pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer); g_capture_base_time = CMTimeGetSeconds(pts); // system absolutly time(s) // g_capture_base_time = g_tvustartcaptureTime - (ntp_time_offset/1000); isFirstFrame = NO;log4cplus_error("Video Encoder:"."start capture time = %u",g_capture_base_time);
}
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CMTime presentationTimeStamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer);
// Switch different source data will show mosaic because timestamp not sync.
static int64_t lastPts = 0;
int64_t currentPts = (int64_t)(CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * 1000);
if (currentPts - lastPts < 0) {
log4cplus_error("Video Encoder:"."Switch different source data the timestamp < last timestamp, currentPts = %lld, lastPts = %lld, duration = %lld",currentPts, lastPts, currentPts - lastPts);
[self handleEncodeFailedWithIsNeedFreeBuffer:isNeedFreeBuffer sampleBuffer:sampleBuffer];
return;
}
lastPts = currentPts;
OSStatus status = noErr;
NSDictionary *properties = @{(__bridge NSString *)kVTEncodeFrameOptionKey_ForceKeyFrame:@(needForceInsertKeyFrame)};
status = VTCompressionSessionEncodeFrame(session,
imageBuffer,
presentationTimeStamp,
kCMTimeInvalid,
(__bridge CFDictionaryRef)properties,
NULL,
NULL);
if(status ! = noErr) {log4cplus_error("Video Encoder:"."encode frame failed");
[self handleEncodeFailedWithIsNeedFreeBuffer:isNeedFreeBuffer sampleBuffer:sampleBuffer];
}
[lock unlock];
if (isNeedFreeBuffer) {
if(sampleBuffer ! = NULL) { CFRelease(sampleBuffer);log4cplus_debug("Video Encoder:"."release the sample buffer"); }}}Copy the code
4. H264 stream -H264, H265 hardware codec basis and code stream analysis
If you don’t understand the code stream part below, you should first look at the recommended link in the title, which is to understand the basic knowledge of codecs and data structure parsing in the VideoToolbox framework in iOS.
5. Callback functions
- Line fault check
If an error message is displayed in status, the encoding fails. You can do some special things.
- Timestamp correction
We need for the encoded data filling timestamp, here we can according to their own rules for a timestamp to generate rules, we only use the most simple of offset, which is the first time frame before video data coding system as the reference point, and then after each frame coding time collected the timestamp of the resulting value minus the benchmark time as the timestamp of the encoded data.
- Look for I frames.
After encoding, the original video data is divided into I frame,B frame and P frame. Generally,B frame is not enabled on the iOS terminal, and B frame needs to be re-sorted. After obtaining the encoded data, we first determine whether it is I frame by kCMSampleAttachmentKey_DependsOnOthers attribute LU header key information, namely VPS, SPS, PPS. VPS is only available in H265 encoder. Videos without these codes can’t be played on the other end or recorded as files.
- Read key information about the encoder
VPS, SPS, PPS data can be read from the I frame. If CMVideoFormatDescriptionGetH264ParameterSetAtIndex function called h264 encoder is, if it is call CMVideoFormatDescriptionGetHEVCParameterSet h265 encoder The AtIndex function, where the index values of the second parameter 0,1, and 2 respectively represent the index values of the data.
After finding these data we need to concatenate them because they are independent NALUs, i.e. 0x00, 0x00, 0x00, 0x01 as partitions to distinguish SPS, PPS.
Therefore, the VPS, SPS and PPS obtained in accordance with the rules were splice into a complete and continuous buffer with 00, 00, 00, 01 as the partition character respectively. This example takes writing a file as an example. We need to write the NALU header information into the file first, that is, we need to write the I frame first, because I frame represents a complete image,P frame needs to rely on I frame to produce an image, so our file reading must start with an I frame data.
- An image associated with NALU:
After passing through the H.264 encoder, a frame is encoded into one or more slices, and the carrier carrying these slices is called NALU.
Note: The concept of slice is different from frame, which is used to describe an image and a frame corresponds to an image, while slice, a new concept proposed in H.264, is integrated in an efficient way by encoding the image and dividing it into slices. An image has at least one or more slices. Slices are loaded and transmitted by NALU, but this does not necessarily mean that slices are in NALU, which is a sufficient and unnecessary condition, because NALU may also load other information used to describe the video.
- NALU in split code stream
First by CMBlockBufferGetDataPointer to obtain video frame data. This frame represents an H264/H265 bit stream, which may contain multiple NALUs. We need to find each NALU and use 00 00 00 01 as the partition. That is, the while loop looks for the NALU in the stream, because there is no start code in the raw stream. We need to copy in the start code.
CFSwapInt32BigToHost: Transfers data encoded from H264 to system – side mode
static void EncodeCallBack(void *outputCallbackRefCon,void *souceFrameRefCon,OSStatus status,VTEncodeInfoFlags infoFlags, CMSampleBufferRef sampleBuffer) {
XDXVideoEncoder *encoder = (__bridge XDXVideoEncoder*)outputCallbackRefCon;
if(status ! = noErr) { NSError *error = [NSError errorWithDomain:NSOSStatusErrorDomain code:status userInfo:nil]; NSLog(@"H264: vtCallBack failed with %@", error);
log4cplus_error("TVUEncoder"."encode frame failured! %s" ,error.debugDescription.UTF8String);
return;
}
if(! encoder.isSupportEncoder) {return; } CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sampleBuffer); CMTime pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer); CMTime dts = CMSampleBufferGetDecodeTimeStamp(sampleBuffer); // Use our define time. (the time is used to sync audio and video) int64_t ptsAfter = (int64_t)((CMTimeGetSeconds(pts) - g_capture_base_time) * 1000); int64_t dtsAfter = (int64_t)((CMTimeGetSeconds(dts) - g_capture_base_time) * 1000); dtsAfter = ptsAfter; /*sometimes relative dts is zero, provide a workground to restore dts*/ static int64_t last_dts = 0;if(dtsAfter == 0){
dtsAfter = last_dts +33;
}else if (dtsAfter == last_dts){
dtsAfter = dtsAfter + 1;
}
BOOL isKeyFrame = NO;
CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, false);
if(attachments ! = NULL) { CFDictionaryRef attachment =(CFDictionaryRef)CFArrayGetValueAtIndex(attachments, 0); CFBooleanRef dependsOnOthers = (CFBooleanRef)CFDictionaryGetValue(attachment, kCMSampleAttachmentKey_DependsOnOthers); isKeyFrame = (dependsOnOthers == kCFBooleanFalse); }if(isKeyFrame) {
static uint8_t *keyParameterSetBuffer = NULL;
static size_t keyParameterSetBufferSize = 0;
// Note: the NALU header will not change if video resolution not change.
if (keyParameterSetBufferSize == 0 || YES == encoder.needResetKeyParamSetBuffer) {
const uint8_t *vps, *sps, *pps;
size_t vpsSize, spsSize, ppsSize;
int NALUnitHeaderLengthOut;
size_t parmCount;
if(keyParameterSetBuffer ! = NULL) { free(keyParameterSetBuffer); } CMFormatDescriptionRef format = CMSampleBufferGetFormatDescription(sampleBuffer);if (encoder.encoderType == XDXH264Encoder) {
CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 0, &sps, &spsSize, &parmCount, &NALUnitHeaderLengthOut);
CMVideoFormatDescriptionGetH264ParameterSetAtIndex(format, 1, &pps, &ppsSize, &parmCount, &NALUnitHeaderLengthOut);
keyParameterSetBufferSize = spsSize+4+ppsSize+4;
keyParameterSetBuffer = (uint8_t*)malloc(keyParameterSetBufferSize);
memcpy(keyParameterSetBuffer, "\x00\x00\x00\x01", 4);
memcpy(&keyParameterSetBuffer[4], sps, spsSize);
memcpy(&keyParameterSetBuffer[4+spsSize], "\x00\x00\x00\x01", 4);
memcpy(&keyParameterSetBuffer[4+spsSize+4], pps, ppsSize);
log4cplus_info("Video Encoder:"."H264 find IDR frame, spsSize: %zu, ppsSize: %zu",spsSize, ppsSize);
}else if (encoder.encoderType == XDXH265Encoder) {
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(format, 0, &vps, &vpsSize, &parmCount, &NALUnitHeaderLengthOut);
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(format, 1, &sps, &spsSize, &parmCount, &NALUnitHeaderLengthOut);
CMVideoFormatDescriptionGetHEVCParameterSetAtIndex(format, 2, &pps, &ppsSize, &parmCount, &NALUnitHeaderLengthOut);
keyParameterSetBufferSize = vpsSize+4+spsSize+4+ppsSize+4;
keyParameterSetBuffer = (uint8_t*)malloc(keyParameterSetBufferSize);
memcpy(keyParameterSetBuffer, "\x00\x00\x00\x01", 4);
memcpy(&keyParameterSetBuffer[4], vps, vpsSize);
memcpy(&keyParameterSetBuffer[4+vpsSize], "\x00\x00\x00\x01", 4);
memcpy(&keyParameterSetBuffer[4+vpsSize+4], sps, spsSize);
memcpy(&keyParameterSetBuffer[4+vpsSize+4+spsSize], "\x00\x00\x00\x01", 4);
memcpy(&keyParameterSetBuffer[4+vpsSize+4+spsSize+4], pps, ppsSize);
log4cplus_info("Video Encoder:"."H265 find IDR frame, vpsSize : %zu, spsSize : %zu, ppsSize : %zu",vpsSize,spsSize, ppsSize);
}
encoder.needResetKeyParamSetBuffer = NO;
}
if (encoder.isNeedRecord) {
if (encoder->mVideoFile == NULL) {
[encoder initSaveVideoFile];
log4cplus_info("Video Encoder:"."Start video record.");
}
fwrite(keyParameterSetBuffer, 1, keyParameterSetBufferSize, encoder->mVideoFile);
}
log4cplus_info("Video Encoder:"."Load a I frame.");
}
size_t blockBufferLength;
uint8_t *bufferDataPointer = NULL;
CMBlockBufferGetDataPointer(block, 0, NULL, &blockBufferLength, (char **)&bufferDataPointer);
size_t bufferOffset = 0;
while (bufferOffset < blockBufferLength - kStartCodeLength)
{
uint32_t NALUnitLength = 0;
memcpy(&NALUnitLength, bufferDataPointer+bufferOffset, kStartCodeLength);
NALUnitLength = CFSwapInt32BigToHost(NALUnitLength);
memcpy(bufferDataPointer+bufferOffset, kStartCode, kStartCodeLength);
bufferOffset += kStartCodeLength + NALUnitLength;
}
if(encoder.isNeedRecord && encoder->mVideoFile ! = NULL) { fwrite(bufferDataPointer, 1, blockBufferLength, encoder->mVideoFile); }else {
if(encoder->mVideoFile ! = NULL) { fclose(encoder->mVideoFile); encoder->mVideoFile = NULL;log4cplus_info("Video Encoder:"."Stop video record."); / /}}log4cplus_debug("Video Encoder:"."H265 encoded video:%lld, size:%lu, interval:%lld", dtsAfter,blockBufferLength, dtsAfter - last_dts);
last_dts = dtsAfter;
}
Copy the code