2026-01-16 13:38:03 +08:00
|
|
|
|
//
|
|
|
|
|
|
// AudioCaptureManager.m
|
|
|
|
|
|
// keyBoard
|
|
|
|
|
|
//
|
|
|
|
|
|
// Created by Mac on 2026/1/15.
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
#import "AudioCaptureManager.h"
|
|
|
|
|
|
#import <AVFoundation/AVFoundation.h>
|
|
|
|
|
|
|
|
|
|
|
|
// 音频采集参数常量
|
|
|
|
|
|
const double kAudioSampleRate = 16000.0;
|
|
|
|
|
|
const int kAudioChannels = 1;
|
|
|
|
|
|
const NSUInteger kAudioFrameDuration = 20; // ms
|
|
|
|
|
|
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
|
|
|
|
|
|
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
|
2026-01-21 17:59:12 +08:00
|
|
|
|
static const float kAudioSoftwareGain = 2.5f;
|
2026-01-16 13:38:03 +08:00
|
|
|
|
|
|
|
|
|
|
@interface AudioCaptureManager ()
|
|
|
|
|
|
|
|
|
|
|
|
@property(nonatomic, strong) AVAudioEngine *audioEngine;
|
|
|
|
|
|
@property(nonatomic, strong) dispatch_queue_t audioQueue;
|
|
|
|
|
|
@property(nonatomic, assign) BOOL capturing;
|
|
|
|
|
|
|
|
|
|
|
|
// Ring buffer for accumulating samples to form 20ms frames
|
|
|
|
|
|
@property(nonatomic, strong) NSMutableData *ringBuffer;
|
|
|
|
|
|
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
|
2026-01-21 17:59:12 +08:00
|
|
|
|
@property(nonatomic, assign) NSTimeInterval lastStatsLogTime;
|
2026-01-16 13:38:03 +08:00
|
|
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
|
|
|
|
@implementation AudioCaptureManager
|
|
|
|
|
|
|
|
|
|
|
|
- (instancetype)init {
|
|
|
|
|
|
self = [super init];
|
|
|
|
|
|
if (self) {
|
|
|
|
|
|
_audioEngine = [[AVAudioEngine alloc] init];
|
|
|
|
|
|
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
|
|
|
|
|
|
DISPATCH_QUEUE_SERIAL);
|
|
|
|
|
|
_ringBuffer = [[NSMutableData alloc]
|
|
|
|
|
|
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
|
|
|
|
|
|
_ringBufferWriteIndex = 0;
|
|
|
|
|
|
_capturing = NO;
|
2026-01-21 17:59:12 +08:00
|
|
|
|
_lastStatsLogTime = 0;
|
2026-01-16 13:38:03 +08:00
|
|
|
|
}
|
|
|
|
|
|
return self;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)dealloc {
|
|
|
|
|
|
[self stopCapture];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Public Methods
|
|
|
|
|
|
|
|
|
|
|
|
- (BOOL)startCapture:(NSError **)error {
|
|
|
|
|
|
if (self.capturing) {
|
|
|
|
|
|
return YES;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
|
|
|
|
|
|
|
|
|
|
|
|
// 获取输入格式
|
|
|
|
|
|
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
|
|
|
|
|
|
|
|
|
|
|
|
// 目标格式:16kHz, Mono, Int16
|
|
|
|
|
|
AVAudioFormat *targetFormat =
|
|
|
|
|
|
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
|
|
|
|
|
|
sampleRate:kAudioSampleRate
|
|
|
|
|
|
channels:kAudioChannels
|
|
|
|
|
|
interleaved:YES];
|
|
|
|
|
|
|
|
|
|
|
|
// 创建格式转换器
|
|
|
|
|
|
AVAudioConverter *converter =
|
|
|
|
|
|
[[AVAudioConverter alloc] initFromFormat:inputFormat
|
|
|
|
|
|
toFormat:targetFormat];
|
|
|
|
|
|
if (!converter) {
|
|
|
|
|
|
if (error) {
|
|
|
|
|
|
*error = [NSError errorWithDomain:@"AudioCaptureManager"
|
|
|
|
|
|
code:-1
|
|
|
|
|
|
userInfo:@{
|
|
|
|
|
|
NSLocalizedDescriptionKey :
|
|
|
|
|
|
@"Failed to create audio converter"
|
|
|
|
|
|
}];
|
|
|
|
|
|
}
|
|
|
|
|
|
return NO;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 计算合适的 buffer size(约 20ms 的输入采样数)
|
|
|
|
|
|
AVAudioFrameCount bufferSize =
|
|
|
|
|
|
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
|
|
|
|
|
|
|
|
|
|
|
|
// 安装 tap
|
|
|
|
|
|
__weak typeof(self) weakSelf = self;
|
|
|
|
|
|
[inputNode installTapOnBus:0
|
|
|
|
|
|
bufferSize:bufferSize
|
|
|
|
|
|
format:inputFormat
|
|
|
|
|
|
block:^(AVAudioPCMBuffer *_Nonnull buffer,
|
|
|
|
|
|
AVAudioTime *_Nonnull when) {
|
|
|
|
|
|
[weakSelf processAudioBuffer:buffer
|
|
|
|
|
|
withConverter:converter
|
|
|
|
|
|
targetFormat:targetFormat];
|
|
|
|
|
|
}];
|
|
|
|
|
|
|
|
|
|
|
|
// 启动引擎
|
|
|
|
|
|
NSError *startError = nil;
|
|
|
|
|
|
[self.audioEngine prepare];
|
|
|
|
|
|
|
|
|
|
|
|
if (![self.audioEngine startAndReturnError:&startError]) {
|
|
|
|
|
|
[inputNode removeTapOnBus:0];
|
|
|
|
|
|
if (error) {
|
|
|
|
|
|
*error = startError;
|
|
|
|
|
|
}
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
|
|
|
|
|
|
startError.localizedDescription);
|
|
|
|
|
|
return NO;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
self.capturing = YES;
|
|
|
|
|
|
self.ringBufferWriteIndex = 0;
|
|
|
|
|
|
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
|
|
|
|
|
|
inputFormat.sampleRate);
|
|
|
|
|
|
return YES;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)stopCapture {
|
|
|
|
|
|
if (!self.capturing) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
[self.audioEngine.inputNode removeTapOnBus:0];
|
|
|
|
|
|
[self.audioEngine stop];
|
|
|
|
|
|
|
|
|
|
|
|
self.capturing = NO;
|
|
|
|
|
|
self.ringBufferWriteIndex = 0;
|
|
|
|
|
|
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Stopped capturing");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Audio Processing
|
|
|
|
|
|
|
|
|
|
|
|
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
|
|
|
|
|
|
withConverter:(AVAudioConverter *)converter
|
|
|
|
|
|
targetFormat:(AVAudioFormat *)targetFormat {
|
|
|
|
|
|
|
|
|
|
|
|
if (!self.capturing) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 计算输出帧数
|
|
|
|
|
|
AVAudioFrameCount outputFrameCapacity =
|
|
|
|
|
|
(AVAudioFrameCount)(buffer.frameLength *
|
|
|
|
|
|
(kAudioSampleRate / buffer.format.sampleRate)) +
|
|
|
|
|
|
1;
|
|
|
|
|
|
|
|
|
|
|
|
// 创建输出 buffer
|
|
|
|
|
|
AVAudioPCMBuffer *outputBuffer =
|
|
|
|
|
|
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
|
|
|
|
|
|
frameCapacity:outputFrameCapacity];
|
|
|
|
|
|
|
|
|
|
|
|
// 格式转换
|
|
|
|
|
|
NSError *conversionError = nil;
|
|
|
|
|
|
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
|
|
|
|
|
|
AVAudioPacketCount inNumberOfPackets,
|
|
|
|
|
|
AVAudioConverterInputStatus *_Nonnull outStatus) {
|
|
|
|
|
|
*outStatus = AVAudioConverterInputStatus_HaveData;
|
|
|
|
|
|
return buffer;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
AVAudioConverterOutputStatus status =
|
|
|
|
|
|
[converter convertToBuffer:outputBuffer
|
|
|
|
|
|
error:&conversionError
|
|
|
|
|
|
withInputFromBlock:inputBlock];
|
|
|
|
|
|
|
|
|
|
|
|
if (status == AVAudioConverterOutputStatus_Error) {
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Conversion error: %@",
|
|
|
|
|
|
conversionError.localizedDescription);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取 Int16 数据
|
2026-01-22 13:47:34 +08:00
|
|
|
|
if (!outputBuffer.int16ChannelData) {
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Int16 channel data is null");
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-16 13:38:03 +08:00
|
|
|
|
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
|
|
|
|
|
|
NSUInteger sampleCount = outputBuffer.frameLength;
|
|
|
|
|
|
NSUInteger byteCount = sampleCount * sizeof(int16_t);
|
|
|
|
|
|
|
2026-01-21 17:59:12 +08:00
|
|
|
|
[self applySoftwareGainIfNeeded:samples sampleCount:sampleCount];
|
|
|
|
|
|
|
2026-01-16 13:38:03 +08:00
|
|
|
|
// 计算 RMS
|
|
|
|
|
|
[self calculateAndReportRMS:samples sampleCount:sampleCount];
|
2026-01-21 17:59:12 +08:00
|
|
|
|
[self logAudioStatsIfNeeded:samples sampleCount:sampleCount];
|
2026-01-16 13:38:03 +08:00
|
|
|
|
|
2026-01-22 13:47:34 +08:00
|
|
|
|
if (byteCount == 0) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
NSData *pcmData = [NSData dataWithBytes:samples length:byteCount];
|
|
|
|
|
|
|
2026-01-16 13:38:03 +08:00
|
|
|
|
// 将数据添加到 ring buffer 并输出完整帧
|
|
|
|
|
|
dispatch_async(self.audioQueue, ^{
|
2026-01-22 13:47:34 +08:00
|
|
|
|
[self appendToRingBuffer:(const uint8_t *)pcmData.bytes
|
|
|
|
|
|
byteCount:pcmData.length];
|
2026-01-16 13:38:03 +08:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-22 13:47:34 +08:00
|
|
|
|
- (void)appendToRingBuffer:(const uint8_t *)bytes byteCount:(NSUInteger)byteCount {
|
2026-01-16 13:38:03 +08:00
|
|
|
|
// 将新数据追加到 ring buffer
|
|
|
|
|
|
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
|
|
|
|
|
|
NSUInteger ringBufferLength = self.ringBuffer.length;
|
|
|
|
|
|
|
|
|
|
|
|
NSUInteger bytesToCopy = byteCount;
|
|
|
|
|
|
NSUInteger sourceOffset = 0;
|
|
|
|
|
|
|
|
|
|
|
|
while (bytesToCopy > 0) {
|
|
|
|
|
|
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
|
|
|
|
|
|
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
|
|
|
|
|
|
|
|
|
|
|
|
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
|
2026-01-22 13:47:34 +08:00
|
|
|
|
bytes + sourceOffset, copySize);
|
2026-01-16 13:38:03 +08:00
|
|
|
|
self.ringBufferWriteIndex += copySize;
|
|
|
|
|
|
sourceOffset += copySize;
|
|
|
|
|
|
bytesToCopy -= copySize;
|
|
|
|
|
|
|
|
|
|
|
|
// 检查是否有完整的 20ms 帧
|
|
|
|
|
|
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
|
|
|
|
|
|
// 提取一个完整帧
|
|
|
|
|
|
NSData *frame = [NSData dataWithBytes:ringBufferBytes
|
|
|
|
|
|
length:kAudioFrameBytes];
|
|
|
|
|
|
|
|
|
|
|
|
// 移动剩余数据到开头
|
|
|
|
|
|
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
|
|
|
|
|
|
if (remaining > 0) {
|
|
|
|
|
|
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
|
|
|
|
|
|
}
|
|
|
|
|
|
self.ringBufferWriteIndex = remaining;
|
|
|
|
|
|
|
|
|
|
|
|
// 回调输出帧
|
|
|
|
|
|
[self outputPCMFrame:frame];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 如果 ring buffer 已满,从头开始
|
|
|
|
|
|
if (self.ringBufferWriteIndex >= ringBufferLength) {
|
|
|
|
|
|
self.ringBufferWriteIndex = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)outputPCMFrame:(NSData *)frame {
|
|
|
|
|
|
if (!self.capturing) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if ([self.delegate respondsToSelector:@selector
|
|
|
|
|
|
(audioCaptureManagerDidOutputPCMFrame:)]) {
|
|
|
|
|
|
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)calculateAndReportRMS:(int16_t *)samples
|
|
|
|
|
|
sampleCount:(NSUInteger)sampleCount {
|
|
|
|
|
|
if (sampleCount == 0)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
// 计算 RMS
|
|
|
|
|
|
double sum = 0.0;
|
|
|
|
|
|
for (NSUInteger i = 0; i < sampleCount; i++) {
|
|
|
|
|
|
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
|
|
|
|
|
|
sum += sample * sample;
|
|
|
|
|
|
}
|
|
|
|
|
|
double rms = sqrt(sum / sampleCount);
|
|
|
|
|
|
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
|
|
|
|
|
|
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if ([self.delegate
|
|
|
|
|
|
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
|
|
|
|
|
|
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-21 17:59:12 +08:00
|
|
|
|
- (void)applySoftwareGainIfNeeded:(int16_t *)samples
|
|
|
|
|
|
sampleCount:(NSUInteger)sampleCount {
|
|
|
|
|
|
if (kAudioSoftwareGain <= 1.0f || sampleCount == 0) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (NSUInteger i = 0; i < sampleCount; i++) {
|
|
|
|
|
|
float scaled = (float)samples[i] * kAudioSoftwareGain;
|
|
|
|
|
|
if (scaled > 32767.0f) {
|
|
|
|
|
|
samples[i] = 32767;
|
|
|
|
|
|
} else if (scaled < -32768.0f) {
|
|
|
|
|
|
samples[i] = -32768;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
samples[i] = (int16_t)scaled;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)logAudioStatsIfNeeded:(int16_t *)samples
|
|
|
|
|
|
sampleCount:(NSUInteger)sampleCount {
|
|
|
|
|
|
NSTimeInterval now = [[NSDate date] timeIntervalSince1970];
|
|
|
|
|
|
if (now - self.lastStatsLogTime < 1.0) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
self.lastStatsLogTime = now;
|
|
|
|
|
|
|
|
|
|
|
|
if (sampleCount == 0) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
NSUInteger nonZeroCount = 0;
|
|
|
|
|
|
int16_t peak = 0;
|
|
|
|
|
|
for (NSUInteger i = 0; i < sampleCount; i++) {
|
|
|
|
|
|
int16_t value = samples[i];
|
|
|
|
|
|
if (value != 0) {
|
|
|
|
|
|
nonZeroCount++;
|
|
|
|
|
|
}
|
|
|
|
|
|
int16_t absValue = (int16_t)abs(value);
|
|
|
|
|
|
if (absValue > peak) {
|
|
|
|
|
|
peak = absValue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
double nonZeroRatio = (double)nonZeroCount / (double)sampleCount;
|
|
|
|
|
|
double peakNormalized = (double)peak / 32768.0;
|
|
|
|
|
|
NSLog(@"[AudioCaptureManager] Stats: peak=%.3f nonZero=%.2f%%",
|
|
|
|
|
|
peakNormalized, nonZeroRatio * 100.0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-16 13:38:03 +08:00
|
|
|
|
@end
|