Files
keyboard/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m

337 lines
9.7 KiB
Mathematica
Raw Normal View History

//
// AudioCaptureManager.m
// keyBoard
//
// Created by Mac on 2026/1/15.
//
#import "AudioCaptureManager.h"
#import <AVFoundation/AVFoundation.h>
//
const double kAudioSampleRate = 16000.0;
const int kAudioChannels = 1;
const NSUInteger kAudioFrameDuration = 20; // ms
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
2026-01-21 17:59:12 +08:00
static const float kAudioSoftwareGain = 2.5f;
@interface AudioCaptureManager ()
@property(nonatomic, strong) AVAudioEngine *audioEngine;
@property(nonatomic, strong) dispatch_queue_t audioQueue;
@property(nonatomic, assign) BOOL capturing;
// Ring buffer for accumulating samples to form 20ms frames
@property(nonatomic, strong) NSMutableData *ringBuffer;
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
2026-01-21 17:59:12 +08:00
@property(nonatomic, assign) NSTimeInterval lastStatsLogTime;
@end
@implementation AudioCaptureManager
- (instancetype)init {
self = [super init];
if (self) {
_audioEngine = [[AVAudioEngine alloc] init];
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
DISPATCH_QUEUE_SERIAL);
_ringBuffer = [[NSMutableData alloc]
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
_ringBufferWriteIndex = 0;
_capturing = NO;
2026-01-21 17:59:12 +08:00
_lastStatsLogTime = 0;
}
return self;
}
- (void)dealloc {
[self stopCapture];
}
#pragma mark - Public Methods
- (BOOL)startCapture:(NSError **)error {
if (self.capturing) {
return YES;
}
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
//
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
// 16kHz, Mono, Int16
AVAudioFormat *targetFormat =
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
sampleRate:kAudioSampleRate
channels:kAudioChannels
interleaved:YES];
//
AVAudioConverter *converter =
[[AVAudioConverter alloc] initFromFormat:inputFormat
toFormat:targetFormat];
if (!converter) {
if (error) {
*error = [NSError errorWithDomain:@"AudioCaptureManager"
code:-1
userInfo:@{
NSLocalizedDescriptionKey :
@"Failed to create audio converter"
}];
}
return NO;
}
// buffer size 20ms
AVAudioFrameCount bufferSize =
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
// tap
__weak typeof(self) weakSelf = self;
[inputNode installTapOnBus:0
bufferSize:bufferSize
format:inputFormat
block:^(AVAudioPCMBuffer *_Nonnull buffer,
AVAudioTime *_Nonnull when) {
[weakSelf processAudioBuffer:buffer
withConverter:converter
targetFormat:targetFormat];
}];
//
NSError *startError = nil;
[self.audioEngine prepare];
if (![self.audioEngine startAndReturnError:&startError]) {
[inputNode removeTapOnBus:0];
if (error) {
*error = startError;
}
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
startError.localizedDescription);
return NO;
}
self.capturing = YES;
self.ringBufferWriteIndex = 0;
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
inputFormat.sampleRate);
return YES;
}
- (void)stopCapture {
if (!self.capturing) {
return;
}
[self.audioEngine.inputNode removeTapOnBus:0];
[self.audioEngine stop];
self.capturing = NO;
self.ringBufferWriteIndex = 0;
NSLog(@"[AudioCaptureManager] Stopped capturing");
}
#pragma mark - Audio Processing
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
withConverter:(AVAudioConverter *)converter
targetFormat:(AVAudioFormat *)targetFormat {
if (!self.capturing) {
return;
}
//
AVAudioFrameCount outputFrameCapacity =
(AVAudioFrameCount)(buffer.frameLength *
(kAudioSampleRate / buffer.format.sampleRate)) +
1;
// buffer
AVAudioPCMBuffer *outputBuffer =
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
frameCapacity:outputFrameCapacity];
//
NSError *conversionError = nil;
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
AVAudioPacketCount inNumberOfPackets,
AVAudioConverterInputStatus *_Nonnull outStatus) {
*outStatus = AVAudioConverterInputStatus_HaveData;
return buffer;
};
AVAudioConverterOutputStatus status =
[converter convertToBuffer:outputBuffer
error:&conversionError
withInputFromBlock:inputBlock];
if (status == AVAudioConverterOutputStatus_Error) {
NSLog(@"[AudioCaptureManager] Conversion error: %@",
conversionError.localizedDescription);
return;
}
// Int16
2026-01-22 13:47:34 +08:00
if (!outputBuffer.int16ChannelData) {
NSLog(@"[AudioCaptureManager] Int16 channel data is null");
return;
}
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
NSUInteger sampleCount = outputBuffer.frameLength;
NSUInteger byteCount = sampleCount * sizeof(int16_t);
2026-01-21 17:59:12 +08:00
[self applySoftwareGainIfNeeded:samples sampleCount:sampleCount];
// RMS
[self calculateAndReportRMS:samples sampleCount:sampleCount];
2026-01-21 17:59:12 +08:00
[self logAudioStatsIfNeeded:samples sampleCount:sampleCount];
2026-01-22 13:47:34 +08:00
if (byteCount == 0) {
return;
}
NSData *pcmData = [NSData dataWithBytes:samples length:byteCount];
// ring buffer
dispatch_async(self.audioQueue, ^{
2026-01-22 13:47:34 +08:00
[self appendToRingBuffer:(const uint8_t *)pcmData.bytes
byteCount:pcmData.length];
});
}
2026-01-22 13:47:34 +08:00
- (void)appendToRingBuffer:(const uint8_t *)bytes byteCount:(NSUInteger)byteCount {
// ring buffer
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
NSUInteger ringBufferLength = self.ringBuffer.length;
NSUInteger bytesToCopy = byteCount;
NSUInteger sourceOffset = 0;
while (bytesToCopy > 0) {
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
2026-01-22 13:47:34 +08:00
bytes + sourceOffset, copySize);
self.ringBufferWriteIndex += copySize;
sourceOffset += copySize;
bytesToCopy -= copySize;
// 20ms
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
//
NSData *frame = [NSData dataWithBytes:ringBufferBytes
length:kAudioFrameBytes];
//
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
if (remaining > 0) {
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
}
self.ringBufferWriteIndex = remaining;
//
[self outputPCMFrame:frame];
}
// ring buffer
if (self.ringBufferWriteIndex >= ringBufferLength) {
self.ringBufferWriteIndex = 0;
}
}
}
- (void)outputPCMFrame:(NSData *)frame {
if (!self.capturing) {
return;
}
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(audioCaptureManagerDidOutputPCMFrame:)]) {
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
}
});
}
- (void)calculateAndReportRMS:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
if (sampleCount == 0)
return;
// RMS
double sum = 0.0;
for (NSUInteger i = 0; i < sampleCount; i++) {
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
sum += sample * sample;
}
double rms = sqrt(sum / sampleCount);
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
}
});
}
2026-01-21 17:59:12 +08:00
- (void)applySoftwareGainIfNeeded:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
if (kAudioSoftwareGain <= 1.0f || sampleCount == 0) {
return;
}
for (NSUInteger i = 0; i < sampleCount; i++) {
float scaled = (float)samples[i] * kAudioSoftwareGain;
if (scaled > 32767.0f) {
samples[i] = 32767;
} else if (scaled < -32768.0f) {
samples[i] = -32768;
} else {
samples[i] = (int16_t)scaled;
}
}
}
- (void)logAudioStatsIfNeeded:(int16_t *)samples
sampleCount:(NSUInteger)sampleCount {
NSTimeInterval now = [[NSDate date] timeIntervalSince1970];
if (now - self.lastStatsLogTime < 1.0) {
return;
}
self.lastStatsLogTime = now;
if (sampleCount == 0) {
return;
}
NSUInteger nonZeroCount = 0;
int16_t peak = 0;
for (NSUInteger i = 0; i < sampleCount; i++) {
int16_t value = samples[i];
if (value != 0) {
nonZeroCount++;
}
int16_t absValue = (int16_t)abs(value);
if (absValue > peak) {
peak = absValue;
}
}
double nonZeroRatio = (double)nonZeroCount / (double)sampleCount;
double peakNormalized = (double)peak / 32768.0;
NSLog(@"[AudioCaptureManager] Stats: peak=%.3f nonZero=%.2f%%",
peakNormalized, nonZeroRatio * 100.0);
}
@end