HaishinKit.swift/Sources/Codec/H264Encoder.swift

416 lines
14 KiB
Swift

import AVFoundation
import CoreFoundation
import VideoToolbox
#if os(iOS)
import UIKit
#endif
protocol VideoEncoderDelegate: class {
func didSetFormatDescription(video formatDescription: CMFormatDescription?)
func sampleOutput(video sampleBuffer: CMSampleBuffer)
}
// MARK: -
public final class H264Encoder {
public enum Option: String, KeyPathRepresentable, CaseIterable {
case muted
case width
case height
case bitrate
case profileLevel
#if os(macOS)
case enabledHardwareEncoder
#endif
case maxKeyFrameIntervalDuration
case scalingMode
case multiPassCount
public var keyPath: AnyKeyPath {
switch self {
case .muted:
return \H264Encoder.muted
case .width:
return \H264Encoder.width
case .height:
return \H264Encoder.height
case .bitrate:
return \H264Encoder.bitrate
#if os(macOS)
case .enabledHardwareEncoder:
return \H264Encoder.enabledHardwareEncoder
#endif
case .maxKeyFrameIntervalDuration:
return \H264Encoder.maxKeyFrameIntervalDuration
case .scalingMode:
return \H264Encoder.scalingMode
case .profileLevel:
return \H264Encoder.profileLevel
case .multiPassCount:
return \H264Encoder.multiPassCount
}
}
}
private struct ImageBuffer {
let image: CVImageBuffer
let presentationTimeStamp: CMTime
let duration: CMTime
}
public static let defaultWidth: Int32 = 480
public static let defaultHeight: Int32 = 272
public static let defaultBitrate: UInt32 = 160 * 1024
public static let defaultScalingMode: ScalingMode = .trim
#if os(iOS)
static let defaultAttributes: [NSString: AnyObject] = [
kCVPixelBufferIOSurfacePropertiesKey: [:] as AnyObject,
kCVPixelBufferOpenGLESCompatibilityKey: kCFBooleanTrue
]
#else
static let defaultAttributes: [NSString: AnyObject] = [
kCVPixelBufferIOSurfacePropertiesKey: [:] as AnyObject,
kCVPixelBufferOpenGLCompatibilityKey: kCFBooleanTrue
]
#endif
public var settings: Setting<H264Encoder, Option> = [:] {
didSet {
settings.observer = self
}
}
public private(set) var isRunning: Atomic<Bool> = .init(false)
var muted: Bool = false
var scalingMode: ScalingMode = H264Encoder.defaultScalingMode {
didSet {
guard scalingMode != oldValue else {
return
}
invalidateSession = true
}
}
var width: Int32 = H264Encoder.defaultWidth {
didSet {
guard width != oldValue else {
return
}
invalidateSession = true
}
}
var height: Int32 = H264Encoder.defaultHeight {
didSet {
guard height != oldValue else {
return
}
invalidateSession = true
}
}
#if os(macOS)
var enabledHardwareEncoder: Bool = true {
didSet {
guard enabledHardwareEncoder != oldValue else {
return
}
invalidateSession = true
}
}
#endif
var bitrate: UInt32 = H264Encoder.defaultBitrate {
didSet {
guard bitrate != oldValue else {
return
}
try? session?.setProperty(.averageBitRate, value: Int(bitrate) as CFTypeRef)
}
}
var profileLevel: String = kVTProfileLevel_H264_Baseline_3_1 as String {
didSet {
guard profileLevel != oldValue else {
return
}
invalidateSession = true
}
}
var maxKeyFrameIntervalDuration: Double = 2.0 {
didSet {
guard maxKeyFrameIntervalDuration != oldValue else {
return
}
invalidateSession = true
}
}
var locked: UInt32 = 0
var lockQueue = DispatchQueue(label: "com.haishinkit.HaishinKit.H264Encoder.lock")
var expectedFPS: Float64 = AVMixer.defaultFPS {
didSet {
guard expectedFPS != oldValue else {
return
}
try? session?.setProperty(.expectedFrameRate, value: NSNumber(value: expectedFPS))
}
}
var formatDescription: CMFormatDescription? {
didSet {
guard !CMFormatDescriptionEqual(formatDescription, otherFormatDescription: oldValue) else {
return
}
delegate?.didSetFormatDescription(video: formatDescription)
}
}
var multiPassCount: Int = 1 {
didSet {
guard multiPassCount != oldValue else {
return
}
invalidateSession = true
}
}
weak var delegate: VideoEncoderDelegate?
private var frameSilo: VTFrameSilo?
private var multiPassStorage: VTMultiPassStorage? {
didSet {
try? oldValue?.close()
}
}
private var multiPassBuffers: [ImageBuffer] = []
private var multiPassDuration: Double = 0.2
private var canperformMultiPass: Bool {
1 < multiPassCount
}
private var attributes: [NSString: AnyObject] {
var attributes: [NSString: AnyObject] = H264Encoder.defaultAttributes
attributes[kCVPixelBufferWidthKey] = NSNumber(value: width)
attributes[kCVPixelBufferHeightKey] = NSNumber(value: height)
return attributes
}
private var invalidateSession = true
private var lastImageBuffer: CVImageBuffer?
// @see: https://developer.apple.com/library/mac/releasenotes/General/APIDiffsMacOSX10_8/VideoToolbox.html
private var properties: [NSString: NSObject] {
let isBaseline: Bool = profileLevel.contains("Baseline")
var properties: [NSString: NSObject] = [
kVTCompressionPropertyKey_RealTime: kCFBooleanTrue,
kVTCompressionPropertyKey_ProfileLevel: profileLevel as NSObject,
kVTCompressionPropertyKey_AverageBitRate: Int(bitrate) as NSObject,
kVTCompressionPropertyKey_ExpectedFrameRate: NSNumber(value: expectedFPS),
kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration: NSNumber(value: maxKeyFrameIntervalDuration),
kVTCompressionPropertyKey_AllowFrameReordering: !isBaseline as NSObject,
kVTCompressionPropertyKey_PixelTransferProperties: [
"ScalingMode": scalingMode.rawValue
] as NSObject
]
#if os(OSX)
if enabledHardwareEncoder {
properties[kVTVideoEncoderSpecification_EncoderID] = "com.apple.videotoolbox.videoencoder.h264.gva" as NSObject
properties["EnableHardwareAcceleratedVideoEncoder"] = kCFBooleanTrue
properties["RequireHardwareAcceleratedVideoEncoder"] = kCFBooleanTrue
}
#endif
if !isBaseline {
properties[kVTCompressionPropertyKey_H264EntropyMode] = kVTH264EntropyMode_CABAC
}
return properties
}
private var callback: VTCompressionOutputCallback = {(
outputCallbackRefCon: UnsafeMutableRawPointer?,
sourceFrameRefCon: UnsafeMutableRawPointer?,
status: OSStatus,
infoFlags: VTEncodeInfoFlags,
sampleBuffer: CMSampleBuffer?) in
guard
let refcon: UnsafeMutableRawPointer = outputCallbackRefCon,
let sampleBuffer: CMSampleBuffer = sampleBuffer, status == noErr else {
if status == kVTParameterErr {
// on iphone 11 with size=1792x827 this occurs
logger.error("encoding failed with kVTParameterErr. Perhaps the width x height is too big for the encoder setup?")
}
return
}
let encoder: H264Encoder = Unmanaged<H264Encoder>.fromOpaque(refcon).takeUnretainedValue()
encoder.formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer)
if encoder.canperformMultiPass {
encoder.performMultiPass(sampleBuffer)
} else {
encoder.delegate?.sampleOutput(video: sampleBuffer)
}
}
private var _session: VTCompressionSession?
private var session: VTCompressionSession? {
get {
if _session == nil {
guard VTCompressionSessionCreate(
allocator: kCFAllocatorDefault,
width: width,
height: height,
codecType: kCMVideoCodecType_H264,
encoderSpecification: nil,
imageBufferAttributes: attributes as CFDictionary?,
compressedDataAllocator: nil,
outputCallback: callback,
refcon: Unmanaged.passUnretained(self).toOpaque(),
compressionSessionOut: &_session
) == noErr, let session = _session else {
logger.warn("create a VTCompressionSessionCreate")
return nil
}
invalidateSession = false
do {
try session.setProperties(properties)
if canperformMultiPass {
VTMultiPassStorageCreate(allocator: kCFAllocatorDefault, fileURL: nil, timeRange: .invalid, options: nil, multiPassStorageOut: &multiPassStorage)
try session.setProperty(.multiPassStorage, value: multiPassStorage)
try session.setProperty(.realTime, value: kCFBooleanFalse)
try session.beginPass()
VTFrameSiloCreate(allocator: kCFAllocatorDefault, fileURL: nil, timeRange: .invalid, options: nil, frameSiloOut: &frameSilo)
} else {
try session.prepareToEncodeFrame()
}
} catch {
logger.error(error)
return nil
}
}
return _session
}
set {
_session?.invalidate()
_session = newValue
}
}
init() {
settings.observer = self
}
func encodeImageBuffer(_ imageBuffer: CVImageBuffer, presentationTimeStamp: CMTime, duration: CMTime) throws {
guard isRunning.value && locked == 0 else {
return
}
if invalidateSession {
session = nil
}
guard let session = session else {
return
}
let currentImageBuffer = muted ? lastImageBuffer ?? imageBuffer : imageBuffer
if canperformMultiPass {
multiPassBuffers.append(ImageBuffer(image: currentImageBuffer, presentationTimeStamp: presentationTimeStamp, duration: duration))
try multiPassEndOfRoundIfNeeded(session)
}
try session.encodeFrame(currentImageBuffer, presentaionTimeStamp: presentationTimeStamp, duration: duration)
if !muted || lastImageBuffer == nil {
lastImageBuffer = imageBuffer
}
}
func performMultiPass(_ sampleBuffer: CMSampleBuffer) {
do {
try frameSilo?.addSampleBuffer(sampleBuffer)
} catch {
logger.error(error)
}
}
private func multiPassEndOfRoundIfNeeded(_ session: VTCompressionSession) throws {
let timeRange = makeTimeRange()
guard multiPassDuration < timeRange.duration.seconds else {
return
}
print(try session.endPass())
print(try session.timeRangeForNextPass())
for i in 0...multiPassCount {
print(i, ":", multiPassCount)
try session.beginPass(i == multiPassCount ? .beginFinalPass : .init(rawValue: 0))
for buffer in multiPassBuffers {
try? session.encodeFrame(buffer.image, presentaionTimeStamp: buffer.presentationTimeStamp, duration: buffer.duration)
}
guard (try session.endPass()).boolValue else {
break
}
try session.timeRangeForNextPass()
}
multiPassBuffers.removeAll()
try frameSilo?.forEachSampleBuffer(timeRange) { [weak self] sampleBuffer -> OSStatus in
self?.delegate?.sampleOutput(video: sampleBuffer)
return noErr
}
try session.beginPass()
}
private func makeTimeRange() -> CMTimeRange {
guard let first = multiPassBuffers.first, let last = multiPassBuffers.last else {
return .invalid
}
return CMTimeRange(start: first.presentationTimeStamp, end: last.presentationTimeStamp)
}
#if os(iOS)
@objc
private func applicationWillEnterForeground(_ notification: Notification) {
invalidateSession = true
}
@objc
private func didAudioSessionInterruption(_ notification: Notification) {
guard
let userInfo: [AnyHashable: Any] = notification.userInfo,
let value: NSNumber = userInfo[AVAudioSessionInterruptionTypeKey] as? NSNumber,
let type: AVAudioSession.InterruptionType = AVAudioSession.InterruptionType(rawValue: value.uintValue) else {
return
}
switch type {
case .ended:
invalidateSession = true
default:
break
}
}
#endif
}
extension H264Encoder: Running {
// MARK: Running
public func startRunning() {
lockQueue.async {
self.isRunning.mutate { $0 = true }
#if os(iOS)
NotificationCenter.default.addObserver(
self,
selector: #selector(self.didAudioSessionInterruption),
name: AVAudioSession.interruptionNotification,
object: nil
)
NotificationCenter.default.addObserver(
self,
selector: #selector(self.applicationWillEnterForeground),
name: UIApplication.willEnterForegroundNotification,
object: nil
)
#endif
}
}
public func stopRunning() {
lockQueue.async {
self.session = nil
self.lastImageBuffer = nil
self.formatDescription = nil
self.frameSilo = nil
self.multiPassStorage = nil
self.multiPassBuffers.removeAll()
#if os(iOS)
NotificationCenter.default.removeObserver(self)
#endif
OSAtomicAnd32Barrier(0, &self.locked)
self.isRunning.mutate { $0 = false }
}
}
}