This commit is contained in:
2026-03-08 21:29:10 +08:00
parent 9fb2e2e694
commit c1ace5f53e
34 changed files with 870 additions and 1930 deletions

View File

@@ -175,16 +175,18 @@ self.voiceInputBar.enabled = YES;
---
## 🎯 完整示例(集成 Deepgram
## 🎯 完整示例(桥接录音事件
```objc
#import "YourViewController.h"
#import "KBVoiceInputBar.h"
#import "DeepgramStreamingManager.h"
#import "KBVoiceToTextManager.h"
#import "KBVoiceRecordManager.h"
@interface YourViewController () <KBVoiceInputBarDelegate, DeepgramStreamingManagerDelegate>
@interface YourViewController () <KBVoiceToTextManagerDelegate, KBVoiceRecordManagerDelegate>
@property (nonatomic, strong) KBVoiceInputBar *voiceInputBar;
@property (nonatomic, strong) DeepgramStreamingManager *deepgramManager;
@property (nonatomic, strong) KBVoiceToTextManager *voiceToTextManager;
@property (nonatomic, strong) KBVoiceRecordManager *voiceRecordManager;
@end
@implementation YourViewController
@@ -192,7 +194,7 @@ self.voiceInputBar.enabled = YES;
- (void)viewDidLoad {
[super viewDidLoad];
[self setupUI];
[self setupDeepgram];
[self setupVoiceManagers];
}
- (void)setupUI {
@@ -205,50 +207,44 @@ self.voiceInputBar.enabled = YES;
}];
}
- (void)setupDeepgram {
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
self.deepgramManager.delegate = self;
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
self.deepgramManager.apiKey = @"your_api_key";
[self.deepgramManager prepareConnection];
- (void)setupVoiceManagers {
self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar];
self.voiceToTextManager.delegate = self;
self.voiceRecordManager = [[KBVoiceRecordManager alloc] init];
self.voiceRecordManager.delegate = self;
}
#pragma mark - KBVoiceInputBarDelegate
#pragma mark - KBVoiceToTextManagerDelegate
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
inputBar.statusText = @"正在连接...";
[self.deepgramManager start];
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager {
[self.voiceRecordManager startRecording];
}
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
inputBar.statusText = @"正在识别...";
[self.deepgramManager stopAndFinalize];
- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager {
[self.voiceRecordManager stopRecording];
}
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
inputBar.statusText = @"已取消";
[self.deepgramManager cancel];
- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager {
[self.voiceRecordManager cancelRecording];
}
#pragma mark - DeepgramStreamingManagerDelegate
#pragma mark - KBVoiceRecordManagerDelegate
- (void)deepgramStreamingManagerDidConnect {
self.voiceInputBar.statusText = @"正在聆听...";
- (void)voiceRecordManager:(KBVoiceRecordManager *)manager
didFinishRecordingAtURL:(NSURL *)fileURL
duration:(NSTimeInterval)duration {
NSLog(@"录音完成:%@ %.2fs", fileURL, duration);
// TODO: 上传音频文件并处理转写结果
}
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
[self.voiceInputBar updateVolumeRMS:rms];
- (void)voiceRecordManagerDidRecordTooShort:(KBVoiceRecordManager *)manager {
NSLog(@"录音过短");
}
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
self.voiceInputBar.statusText = text.length > 0 ? text : @"正在识别...";
}
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
self.voiceInputBar.statusText = @"识别完成";
NSLog(@"最终识别结果:%@", text);
// TODO: 处理识别结果
- (void)voiceRecordManager:(KBVoiceRecordManager *)manager
didFailWithError:(NSError *)error {
NSLog(@"录音失败:%@", error.localizedDescription);
}
@end
@@ -316,5 +312,6 @@ _recordButton.tintColor = [UIColor systemBlueColor];
## 🔗 相关组件
- `KBAiRecordButton`:录音按钮(支持长按、波形动画)
- `DeepgramStreamingManager`:语音识别管理器
- `KBVoiceToTextManager`:语音输入事件桥接层
- `KBVoiceRecordManager`:录音文件管理器
- `VoiceChatStreamingManager`:语音聊天管理器

View File

@@ -600,8 +600,6 @@ static void KBChatUpdatedDarwinCallback(CFNotificationCenterRef center,
- (void)setupVoiceToTextManager {
self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar];
self.voiceToTextManager.delegate = self;
self.voiceToTextManager.deepgramEnabled = NO;
[self.voiceToTextManager prepareConnection];
}
/// 5
@@ -1199,16 +1197,6 @@ static void KBChatUpdatedDarwinCallback(CFNotificationCenterRef center,
#pragma mark - KBVoiceToTextManagerDelegate
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
didReceiveFinalText:(NSString *)text {
[self handleTranscribedText:text];
}
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
didFailWithError:(NSError *)error {
NSLog(@"[KBAIHomeVC] 语音识别失败:%@", error.localizedDescription);
}
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager {
self.isVoiceRecording = YES;
self.isVoiceProcessing = YES;

View File

@@ -1,50 +0,0 @@
//
// DeepgramStreamingManager.h
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@protocol DeepgramStreamingManagerDelegate <NSObject>
@optional
- (void)deepgramStreamingManagerDidConnect;
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error;
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms;
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text;
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text;
- (void)deepgramStreamingManagerDidFail:(NSError *)error;
@end
/// Manager for Deepgram live transcription.
@interface DeepgramStreamingManager : NSObject
@property(nonatomic, weak) id<DeepgramStreamingManagerDelegate> delegate;
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
@property(nonatomic, copy) NSString *apiKey;
@property(nonatomic, copy, nullable) NSString *language;
@property(nonatomic, copy, nullable) NSString *model;
@property(nonatomic, assign) BOOL punctuate;
@property(nonatomic, assign) BOOL smartFormat;
@property(nonatomic, assign) BOOL interimResults;
@property(nonatomic, copy) NSString *encoding; // linear16
@property(nonatomic, assign) double sampleRate;
@property(nonatomic, assign) int channels;
@property(nonatomic, assign, readonly, getter=isStreaming) BOOL streaming;
- (void)start;
- (void)prepareConnection;
- (void)stopAndFinalize;
- (void)cancel;
- (void)disconnect;
@end
NS_ASSUME_NONNULL_END

View File

@@ -1,516 +0,0 @@
//
// DeepgramStreamingManager.m
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import "DeepgramStreamingManager.h"
#import "AudioCaptureManager.h"
#import "AudioSessionManager.h"
#import "DeepgramWebSocketClient.h"
#import <UIKit/UIKit.h>
static NSString *const kDeepgramStreamingManagerErrorDomain =
@"DeepgramStreamingManager";
@interface DeepgramStreamingManager () <AudioSessionManagerDelegate,
AudioCaptureManagerDelegate,
DeepgramWebSocketClientDelegate>
@property(nonatomic, strong) AudioSessionManager *audioSession;
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
@property(nonatomic, strong) DeepgramWebSocketClient *client;
@property(nonatomic, strong) dispatch_queue_t stateQueue;
@property(nonatomic, assign) BOOL streaming;
@property(nonatomic, strong) NSMutableArray<NSData *> *pendingFrames;
@property(nonatomic, assign) NSUInteger pendingFrameLimit;
@property(nonatomic, assign) BOOL connecting;
@property(nonatomic, assign) BOOL pendingStart;
@property(nonatomic, assign) BOOL keepConnection;
@property(nonatomic, strong) dispatch_source_t keepAliveTimer;
@property(nonatomic, assign) NSInteger reconnectAttempts;
@property(nonatomic, assign) NSInteger maxReconnectAttempts;
@property(nonatomic, assign) BOOL reconnectScheduled;
@property(nonatomic, assign) BOOL appInBackground;
@property(nonatomic, assign) BOOL shouldReconnectOnForeground;
@end
@implementation DeepgramStreamingManager
- (instancetype)init {
self = [super init];
if (self) {
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.manager",
DISPATCH_QUEUE_SERIAL);
_audioSession = [AudioSessionManager sharedManager];
_audioSession.delegate = self;
_audioCapture = [[AudioCaptureManager alloc] init];
_audioCapture.delegate = self;
///
// _client = [[DeepgramWebSocketClient alloc] init];
// _client.delegate = self;
_serverURL = @"wss://api.deepgram.com/v1/listen";
_encoding = @"linear16";
_sampleRate = 16000.0;
_channels = 1;
_punctuate = YES;
_smartFormat = YES;
_interimResults = YES;
_pendingFrames = [[NSMutableArray alloc] init];
_pendingFrameLimit = 25;
_connecting = NO;
_pendingStart = NO;
_keepConnection = NO;
_reconnectAttempts = 0;
_maxReconnectAttempts = 5;
_reconnectScheduled = NO;
_appInBackground = NO;
_shouldReconnectOnForeground = NO;
[self setupNotifications];
}
return self;
}
- (void)dealloc {
[self removeNotifications];
[self disconnectInternal];
}
- (void)start {
dispatch_async(self.stateQueue, ^{
if (self.appInBackground) {
self.shouldReconnectOnForeground = YES;
return;
}
self.keepConnection = YES;
self.pendingStart = YES;
self.reconnectAttempts = 0;
if (self.apiKey.length == 0) {
[self reportErrorWithMessage:@"Deepgram API key is required"];
return;
}
if (![self.audioSession hasMicrophonePermission]) {
__weak typeof(self) weakSelf = self;
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
if (!granted) {
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
return;
}
dispatch_async(strongSelf.stateQueue, ^{
[strongSelf start];
});
}];
return;
}
NSError *error = nil;
if (![self.audioSession configureForConversation:&error]) {
[self reportError:error];
return;
}
if (![self.audioSession activateSession:&error]) {
[self reportError:error];
return;
}
if (![self.audioCapture isCapturing]) {
NSError *captureError = nil;
if (![self.audioCapture startCapture:&captureError]) {
[self reportError:captureError];
return;
}
}
NSLog(@"[DeepgramStreamingManager] Start streaming, server: %@",
self.serverURL);
if (self.client.isConnected) {
[self beginStreamingIfReady];
return;
}
[self connectIfNeeded];
});
}
- (void)prepareConnection {
dispatch_async(self.stateQueue, ^{
if (self.appInBackground) {
self.shouldReconnectOnForeground = YES;
return;
}
self.keepConnection = YES;
self.pendingStart = NO;
self.reconnectAttempts = 0;
if (self.apiKey.length == 0) {
NSLog(@"[DeepgramStreamingManager] Prepare skipped: API key missing");
return;
}
if (self.client.isConnected) {
return;
}
[self connectIfNeeded];
});
}
- (void)stopAndFinalize {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.pendingFrames removeAllObjects];
self.pendingStart = NO;
if (self.client.isConnected) {
[self.client finish];
}
[self.client disableAudioSending];
[self startKeepAliveIfNeeded];
});
}
- (void)cancel {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.pendingFrames removeAllObjects];
self.pendingStart = NO;
self.keepConnection = NO;
[self.client disableAudioSending];
[self stopKeepAlive];
[self.client disconnect];
});
}
- (void)disconnect {
dispatch_async(self.stateQueue, ^{
[self disconnectInternal];
});
}
- (void)disconnectInternal {
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.pendingFrames removeAllObjects];
self.pendingStart = NO;
self.keepConnection = NO;
self.shouldReconnectOnForeground = NO;
[self.client disableAudioSending];
[self stopKeepAlive];
[self.client disconnect];
[self.audioSession deactivateSession];
}
#pragma mark - AudioCaptureManagerDelegate
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
if (pcmFrame.length == 0) {
return;
}
dispatch_async(self.stateQueue, ^{
if (!self.streaming || !self.client.isConnected) {
[self.pendingFrames addObject:pcmFrame];
if (self.pendingFrames.count > self.pendingFrameLimit) {
[self.pendingFrames removeObjectAtIndex:0];
}
return;
}
[self.client sendAudioPCMFrame:pcmFrame];
});
}
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidUpdateRMS:)]) {
[self.delegate deepgramStreamingManagerDidUpdateRMS:rms];
}
});
}
#pragma mark - AudioSessionManagerDelegate
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
if (type == KBAudioSessionInterruptionTypeBegan) {
[self cancel];
}
}
- (void)audioSessionManagerMicrophonePermissionDenied {
[self reportErrorWithMessage:@"Microphone permission denied"];
}
#pragma mark - DeepgramWebSocketClientDelegate
- (void)deepgramClientDidConnect {
dispatch_async(self.stateQueue, ^{
self.connecting = NO;
self.reconnectAttempts = 0;
self.reconnectScheduled = NO;
[self beginStreamingIfReady];
[self startKeepAliveIfNeeded];
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidConnect)]) {
[self.delegate deepgramStreamingManagerDidConnect];
}
});
});
}
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error {
dispatch_async(self.stateQueue, ^{
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
self.connecting = NO;
[self.audioSession deactivateSession];
[self stopKeepAlive];
if (self.pendingStart || self.keepConnection) {
[self scheduleReconnectWithError:error];
}
});
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidDisconnect:)]) {
[self.delegate deepgramStreamingManagerDidDisconnect:error];
}
});
}
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidReceiveInterimTranscript:)]) {
[self.delegate deepgramStreamingManagerDidReceiveInterimTranscript:text];
}
});
}
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidReceiveFinalTranscript:)]) {
[self.delegate deepgramStreamingManagerDidReceiveFinalTranscript:text];
}
});
}
- (void)deepgramClientDidFail:(NSError *)error {
[self reportError:error];
}
#pragma mark - Error Reporting
- (void)reportError:(NSError *)error {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector
(deepgramStreamingManagerDidFail:)]) {
[self.delegate deepgramStreamingManagerDidFail:error];
}
});
}
- (void)reportErrorWithMessage:(NSString *)message {
NSError *error = [NSError errorWithDomain:kDeepgramStreamingManagerErrorDomain
code:-1
userInfo:@{
NSLocalizedDescriptionKey : message ?: @""
}];
[self reportError:error];
}
- (void)connectIfNeeded {
if (self.connecting || self.client.isConnected) {
return;
}
if (self.serverURL.length == 0) {
[self reportErrorWithMessage:@"Deepgram server URL is required"];
return;
}
self.client.serverURL = self.serverURL;
self.client.apiKey = self.apiKey;
self.client.language = self.language;
self.client.model = self.model;
self.client.punctuate = self.punctuate;
self.client.smartFormat = self.smartFormat;
self.client.interimResults = self.interimResults;
self.client.encoding = self.encoding;
self.client.sampleRate = self.sampleRate;
self.client.channels = self.channels;
[self.client disableAudioSending];
self.connecting = YES;
[self.client connect];
}
- (void)beginStreamingIfReady {
if (!self.pendingStart) {
return;
}
self.streaming = YES;
[self.client enableAudioSending];
[self stopKeepAlive];
if (self.pendingFrames.count > 0) {
NSArray<NSData *> *frames = [self.pendingFrames copy];
[self.pendingFrames removeAllObjects];
for (NSData *frame in frames) {
[self.client sendAudioPCMFrame:frame];
}
NSLog(@"[DeepgramStreamingManager] Flushed %lu pending frames",
(unsigned long)frames.count);
}
}
- (void)scheduleReconnectWithError:(NSError *_Nullable)error {
if (self.reconnectScheduled || self.connecting || self.client.isConnected) {
return;
}
if (self.appInBackground) {
self.shouldReconnectOnForeground = YES;
return;
}
if (self.reconnectAttempts >= self.maxReconnectAttempts) {
NSLog(@"[DeepgramStreamingManager] Reconnect failed %ld times, stop retry. %@",
(long)self.maxReconnectAttempts,
error.localizedDescription ?: @"");
self.pendingStart = NO;
self.keepConnection = NO;
return;
}
self.reconnectAttempts += 1;
self.reconnectScheduled = YES;
dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t)(1 * NSEC_PER_SEC)),
self.stateQueue, ^{
self.reconnectScheduled = NO;
if (self.appInBackground) {
self.shouldReconnectOnForeground = YES;
return;
}
if (!self.pendingStart && !self.keepConnection) {
return;
}
[self connectIfNeeded];
});
}
- (void)setupNotifications {
NSNotificationCenter *center = [NSNotificationCenter defaultCenter];
[center addObserver:self
selector:@selector(handleAppDidEnterBackground)
name:UIApplicationDidEnterBackgroundNotification
object:nil];
[center addObserver:self
selector:@selector(handleAppWillEnterForeground)
name:UIApplicationWillEnterForegroundNotification
object:nil];
}
- (void)removeNotifications {
[[NSNotificationCenter defaultCenter] removeObserver:self];
}
- (void)handleAppDidEnterBackground {
dispatch_async(self.stateQueue, ^{
self.appInBackground = YES;
self.shouldReconnectOnForeground =
self.keepConnection || self.pendingStart;
self.pendingStart = NO;
self.keepConnection = NO;
if (self.streaming) {
[self.audioCapture stopCapture];
self.streaming = NO;
}
[self.pendingFrames removeAllObjects];
[self.client disableAudioSending];
[self stopKeepAlive];
[self.client disconnect];
[self.audioSession deactivateSession];
NSLog(@"[DeepgramStreamingManager] App entered background, socket closed");
});
}
- (void)handleAppWillEnterForeground {
dispatch_async(self.stateQueue, ^{
self.appInBackground = NO;
if (self.shouldReconnectOnForeground) {
self.keepConnection = YES;
self.reconnectAttempts = 0;
[self connectIfNeeded];
}
self.shouldReconnectOnForeground = NO;
});
}
- (void)startKeepAliveIfNeeded {
if (!self.keepConnection || !self.client.isConnected || self.streaming) {
return;
}
if (self.keepAliveTimer) {
return;
}
self.keepAliveTimer =
dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0,
self.stateQueue);
dispatch_source_set_timer(self.keepAliveTimer,
dispatch_time(DISPATCH_TIME_NOW, 15 * NSEC_PER_SEC),
15 * NSEC_PER_SEC, 1 * NSEC_PER_SEC);
__weak typeof(self) weakSelf = self;
dispatch_source_set_event_handler(self.keepAliveTimer, ^{
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
[strongSelf.client sendKeepAlive];
});
dispatch_resume(self.keepAliveTimer);
}
- (void)stopKeepAlive {
if (self.keepAliveTimer) {
dispatch_source_cancel(self.keepAliveTimer);
self.keepAliveTimer = nil;
}
}
@end

View File

@@ -1,52 +0,0 @@
//
// DeepgramWebSocketClient.h
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@protocol DeepgramWebSocketClientDelegate <NSObject>
@optional
- (void)deepgramClientDidConnect;
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error;
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text;
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text;
- (void)deepgramClientDidFail:(NSError *)error;
@end
/// WebSocket client for Deepgram live transcription.
@interface DeepgramWebSocketClient : NSObject
@property(nonatomic, weak) id<DeepgramWebSocketClientDelegate> delegate;
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
@property(nonatomic, copy) NSString *apiKey;
@property(nonatomic, copy, nullable) NSString *language;
@property(nonatomic, copy, nullable) NSString *model;
@property(nonatomic, assign) BOOL punctuate;
@property(nonatomic, assign) BOOL smartFormat;
@property(nonatomic, assign) BOOL interimResults;
@property(nonatomic, copy) NSString *encoding; // linear16
@property(nonatomic, assign) double sampleRate;
@property(nonatomic, assign) int channels;
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
- (void)connect;
- (void)disconnect;
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
- (void)finish;
- (void)sendKeepAlive;
- (void)enableAudioSending;
- (void)disableAudioSending;
@end
NS_ASSUME_NONNULL_END

View File

@@ -1,411 +0,0 @@
//
// DeepgramWebSocketClient.m
// keyBoard
//
// Created by Mac on 2026/1/21.
//
#import "DeepgramWebSocketClient.h"
static NSString *const kDeepgramWebSocketClientErrorDomain =
@"DeepgramWebSocketClient";
@interface DeepgramWebSocketClient () <NSURLSessionWebSocketDelegate>
@property(nonatomic, strong) NSURLSession *urlSession;
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
@property(nonatomic, strong) dispatch_queue_t networkQueue;
@property(nonatomic, assign) BOOL connected;
@property(nonatomic, assign) BOOL audioSendingEnabled;
@end
@implementation DeepgramWebSocketClient
- (instancetype)init {
self = [super init];
if (self) {
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.ws",
DISPATCH_QUEUE_SERIAL);
_serverURL = @"wss://api.deepgram.com/v1/listen";
_encoding = @"linear16";
_sampleRate = 16000.0;
_channels = 1;
_punctuate = YES;
_smartFormat = YES;
_interimResults = YES;
_audioSendingEnabled = NO;
}
return self;
}
- (void)dealloc {
[self disconnectInternal];
}
#pragma mark - Public Methods
- (void)connect {
dispatch_async(self.networkQueue, ^{
[self disconnectInternal];
if (self.apiKey.length == 0) {
[self reportErrorWithMessage:@"Deepgram API key is required"];
return;
}
NSURL *url = [self buildURL];
if (!url) {
[self reportErrorWithMessage:@"Invalid Deepgram URL"];
return;
}
NSLog(@"[DeepgramWebSocketClient] Connecting: %@", url.absoluteString);
NSURLSessionConfiguration *config =
[NSURLSessionConfiguration defaultSessionConfiguration];
config.timeoutIntervalForRequest = 30;
config.timeoutIntervalForResource = 300;
self.urlSession = [NSURLSession sessionWithConfiguration:config
delegate:self
delegateQueue:nil];
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
[request setValue:[NSString stringWithFormat:@"Token %@", self.apiKey]
forHTTPHeaderField:@"Authorization"];
self.webSocketTask = [self.urlSession webSocketTaskWithRequest:request];
[self.webSocketTask resume];
[self receiveMessage];
});
}
- (void)disconnect {
dispatch_async(self.networkQueue, ^{
BOOL shouldNotify = self.webSocketTask != nil;
if (shouldNotify) {
NSLog(@"[DeepgramWebSocketClient] Disconnect requested");
}
[self disconnectInternal];
if (shouldNotify) {
[self notifyDisconnect:nil];
}
});
}
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
return;
}
dispatch_async(self.networkQueue, ^{
if (!self.audioSendingEnabled) {
return;
}
if (!self.connected || !self.webSocketTask) {
return;
}
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
[self.webSocketTask
sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
} else {
NSLog(@"[DeepgramWebSocketClient] Sent audio frame: %lu bytes",
(unsigned long)pcmFrame.length);
}
}];
});
}
- (void)finish {
NSLog(@"[DeepgramWebSocketClient] Sending CloseStream");
[self sendJSON:@{@"type" : @"CloseStream"}];
}
- (void)sendKeepAlive {
if (!self.connected || !self.webSocketTask) {
return;
}
[self sendJSON:@{@"type" : @"KeepAlive"}];
}
- (void)enableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = YES;
});
}
- (void)disableAudioSending {
dispatch_async(self.networkQueue, ^{
self.audioSendingEnabled = NO;
});
}
#pragma mark - Private Methods
- (NSURL *)buildURL {
if (self.serverURL.length == 0) {
return nil;
}
NSURLComponents *components =
[NSURLComponents componentsWithString:self.serverURL];
if (!components) {
return nil;
}
NSMutableArray<NSURLQueryItem *> *items =
components.queryItems.mutableCopy ?: [NSMutableArray array];
[self upsertQueryItemWithName:@"model" value:self.model items:items];
[self upsertQueryItemWithName:@"language" value:self.language items:items];
[self
upsertQueryItemWithName:@"punctuate"
value:(self.punctuate ? @"true" : @"false")items:items];
[self upsertQueryItemWithName:@"smart_format"
value:(self.smartFormat ? @"true" : @"false")items
:items];
[self upsertQueryItemWithName:@"interim_results"
value:(self.interimResults ? @"true" : @"false")items
:items];
[self upsertQueryItemWithName:@"encoding" value:self.encoding items:items];
[self upsertQueryItemWithName:@"sample_rate"
value:[NSString
stringWithFormat:@"%.0f", self.sampleRate]
items:items];
[self upsertQueryItemWithName:@"channels"
value:[NSString stringWithFormat:@"%d", self.channels]
items:items];
components.queryItems = items;
return components.URL;
}
- (void)upsertQueryItemWithName:(NSString *)name
value:(NSString *)value
items:(NSMutableArray<NSURLQueryItem *> *)items {
if (name.length == 0 || value.length == 0) {
return;
}
for (NSUInteger i = 0; i < items.count; i++) {
NSURLQueryItem *item = items[i];
if ([item.name isEqualToString:name]) {
items[i] = [NSURLQueryItem queryItemWithName:name value:value];
return;
}
}
[items addObject:[NSURLQueryItem queryItemWithName:name value:value]];
}
- (void)sendJSON:(NSDictionary *)dict {
if (!self.webSocketTask) {
return;
}
NSError *jsonError = nil;
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *jsonString = [[NSString alloc] initWithData:jsonData
encoding:NSUTF8StringEncoding];
if (!jsonString) {
[self reportErrorWithMessage:@"Failed to encode JSON message"];
return;
}
dispatch_async(self.networkQueue, ^{
NSURLSessionWebSocketMessage *message =
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
[self.webSocketTask sendMessage:message
completionHandler:^(NSError *_Nullable error) {
if (error) {
[self reportError:error];
}
}];
});
}
- (void)receiveMessage {
if (!self.webSocketTask) {
return;
}
__weak typeof(self) weakSelf = self;
[self.webSocketTask receiveMessageWithCompletionHandler:^(
NSURLSessionWebSocketMessage *_Nullable message,
NSError *_Nullable error) {
__strong typeof(weakSelf) strongSelf = weakSelf;
if (!strongSelf) {
return;
}
if (error) {
if (error.code != NSURLErrorCancelled && error.code != 57) {
[strongSelf notifyDisconnect:error];
[strongSelf disconnectInternal];
}
return;
}
if (message.type == NSURLSessionWebSocketMessageTypeString) {
NSLog(@"[DeepgramWebSocketClient] Received text: %@", message.string);
[strongSelf handleTextMessage:message.string];
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
NSLog(@"[DeepgramWebSocketClient] Received binary: %lu bytes",
(unsigned long)message.data.length);
[strongSelf handleBinaryMessage:message.data];
}
[strongSelf receiveMessage];
}];
}
- (void)handleTextMessage:(NSString *)text {
if (text.length == 0) {
return;
}
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
if (!data) {
return;
}
NSError *jsonError = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
options:0
error:&jsonError];
if (jsonError) {
[self reportError:jsonError];
return;
}
NSString *errorMessage = json[@"error"];
if (errorMessage.length > 0) {
[self reportErrorWithMessage:errorMessage];
return;
}
NSDictionary *channel = json[@"channel"];
if (![channel isKindOfClass:[NSDictionary class]]) {
return;
}
NSArray *alternatives = channel[@"alternatives"];
if (![alternatives isKindOfClass:[NSArray class]] ||
alternatives.count == 0) {
return;
}
NSDictionary *firstAlt = alternatives.firstObject;
NSString *transcript = firstAlt[@"transcript"] ?: @"";
BOOL isFinal =
[json[@"is_final"] boolValue] || [json[@"speech_final"] boolValue];
if (transcript.length == 0) {
return;
}
dispatch_async(dispatch_get_main_queue(), ^{
if (isFinal) {
if ([self.delegate respondsToSelector:@selector
(deepgramClientDidReceiveFinalTranscript:)]) {
[self.delegate deepgramClientDidReceiveFinalTranscript:transcript];
}
} else {
if ([self.delegate respondsToSelector:@selector
(deepgramClientDidReceiveInterimTranscript:)]) {
[self.delegate deepgramClientDidReceiveInterimTranscript:transcript];
}
}
});
}
- (void)handleBinaryMessage:(NSData *)data {
}
- (void)disconnectInternal {
self.connected = NO;
self.audioSendingEnabled = NO;
if (self.webSocketTask) {
[self.webSocketTask
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
reason:nil];
self.webSocketTask = nil;
}
if (self.urlSession) {
[self.urlSession invalidateAndCancel];
self.urlSession = nil;
}
}
- (void)reportError:(NSError *)error {
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate respondsToSelector:@selector(deepgramClientDidFail:)]) {
[self.delegate deepgramClientDidFail:error];
}
});
}
- (void)reportErrorWithMessage:(NSString *)message {
NSError *error =
[NSError errorWithDomain:kDeepgramWebSocketClientErrorDomain
code:-1
userInfo:@{NSLocalizedDescriptionKey : message ?: @""}];
[self reportError:error];
}
- (void)notifyDisconnect:(NSError *_Nullable)error {
self.connected = NO;
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(deepgramClientDidDisconnect:)]) {
[self.delegate deepgramClientDidDisconnect:error];
}
});
}
#pragma mark - NSURLSessionWebSocketDelegate
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didOpenWithProtocol:(NSString *)protocol {
self.connected = YES;
NSLog(@"[DeepgramWebSocketClient] Connected");
dispatch_async(dispatch_get_main_queue(), ^{
if ([self.delegate
respondsToSelector:@selector(deepgramClientDidConnect)]) {
[self.delegate deepgramClientDidConnect];
}
});
}
- (void)URLSession:(NSURLSession *)session
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
reason:(NSData *)reason {
if (!self.webSocketTask) {
return;
}
NSLog(@"[DeepgramWebSocketClient] Closed with code: %ld", (long)closeCode);
[self notifyDisconnect:nil];
[self disconnectInternal];
}
@end

View File

@@ -17,24 +17,15 @@ NS_ASSUME_NONNULL_BEGIN
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager;
- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager;
- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager;
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
didUpdateInterimText:(NSString *)text;
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
didReceiveFinalText:(NSString *)text;
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
didFailWithError:(NSError *)error;
@end
/// Voice-to-text manager (binds KBVoiceInputBar and uses Deepgram).
/// 语音输入事件管理器,仅负责桥接 KBVoiceInputBar 的录音事件。
@interface KBVoiceToTextManager : NSObject
@property(nonatomic, weak) id<KBVoiceToTextManagerDelegate> delegate;
@property(nonatomic, weak, readonly) KBVoiceInputBar *inputBar;
@property(nonatomic, assign) BOOL deepgramEnabled;
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar;
- (void)prepareConnection;
- (void)disconnect;
@end

View File

@@ -6,118 +6,30 @@
//
#import "KBVoiceToTextManager.h"
#import "DeepgramStreamingManager.h"
#import "KBVoiceInputBar.h"
#import "KBLocalizationManager.h"
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate,
DeepgramStreamingManagerDelegate>
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate>
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
@property(nonatomic, weak) KBVoiceInputBar *inputBar;
@property(nonatomic, strong) NSMutableString *fullText;
@end
@implementation KBVoiceToTextManager
- (void)setDeepgramEnabled:(BOOL)deepgramEnabled {
if (_deepgramEnabled == deepgramEnabled) {
return;
}
_deepgramEnabled = deepgramEnabled;
if (!deepgramEnabled) {
[self.deepgramManager cancel];
[self resetTranscript];
} else {
[self.deepgramManager prepareConnection];
}
}
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar {
self = [super init];
if (self) {
_inputBar = inputBar;
_inputBar.delegate = self;
_fullText = [[NSMutableString alloc] init];
_deepgramEnabled = YES;
[self setupDeepgram];
}
return self;
}
- (void)dealloc {
[self.deepgramManager disconnect];
}
#pragma mark - Public Methods
- (void)prepareConnection {
if (!self.deepgramEnabled) {
return;
}
[self kb_refreshDeepgramLanguage];
[self.deepgramManager prepareConnection];
}
- (void)disconnect {
if (!self.deepgramEnabled) {
return;
}
[self.deepgramManager disconnect];
}
#pragma mark - Private Methods
- (void)setupDeepgram {
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
self.deepgramManager.delegate = self;
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
[self kb_refreshDeepgramLanguage];
self.deepgramManager.model = @"nova-3";
self.deepgramManager.punctuate = YES;
self.deepgramManager.smartFormat = YES;
self.deepgramManager.interimResults = YES;
self.deepgramManager.encoding = @"linear16";
self.deepgramManager.sampleRate = 16000.0;
self.deepgramManager.channels = 1;
}
- (void)resetTranscript {
[self.fullText setString:@""];
}
- (void)kb_refreshDeepgramLanguage {
self.deepgramManager.language = [self kb_currentDeepgramLanguageCode];
}
- (NSString *)kb_currentDeepgramLanguageCode {
NSString *languageCode = [KBLocalizationManager shared].currentLanguageCode ?: @"en";
NSString *lc = languageCode.lowercaseString;
if ([lc hasPrefix:@"es"]) { return @"es"; }
if ([lc hasPrefix:@"id"]) { return @"id"; }
if ([lc hasPrefix:@"pt"]) { return @"pt"; }
if ([lc hasPrefix:@"zh-hant"] || [lc hasPrefix:@"zh_tw"] || [lc hasPrefix:@"zh-tw"] || [lc hasPrefix:@"zh-hk"]) {
return @"zh-TW";
}
if ([lc hasPrefix:@"zh-hans"] || [lc hasPrefix:@"zh_cn"] || [lc hasPrefix:@"zh-cn"]) {
return @"zh-CN";
}
return @"en";
}
#pragma mark - KBVoiceInputBarDelegate
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
[self resetTranscript];
if (self.deepgramEnabled) {
[self kb_refreshDeepgramLanguage];
inputBar.statusText = KBLocalized(@"Voice Connecting...");
[self.deepgramManager start];
} else {
inputBar.statusText = KBLocalized(@"Voice Recording...");
}
inputBar.statusText = KBLocalized(@"Voice Recording...");
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidBeginRecording:)]) {
@@ -126,12 +38,7 @@
}
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
if (self.deepgramEnabled) {
inputBar.statusText = KBLocalized(@"Voice Recognizing...");
[self.deepgramManager stopAndFinalize];
} else {
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
}
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidEndRecording:)]) {
@@ -141,10 +48,6 @@
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
inputBar.statusText = KBLocalized(@"Voice Cancelled");
[self resetTranscript];
if (self.deepgramEnabled) {
[self.deepgramManager cancel];
}
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidCancelRecording:)]) {
@@ -152,89 +55,4 @@
}
}
#pragma mark - DeepgramStreamingManagerDelegate
- (void)deepgramStreamingManagerDidConnect {
if (!self.deepgramEnabled) {
return;
}
self.inputBar.statusText = KBLocalized(@"Voice Listening...");
}
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
if (!self.deepgramEnabled) {
return;
}
if (!error) {
return;
}
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didFailWithError:)]) {
[self.delegate voiceToTextManager:self didFailWithError:error];
}
}
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
if (!self.deepgramEnabled) {
return;
}
[self.inputBar updateVolumeRMS:rms];
}
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
if (!self.deepgramEnabled) {
return;
}
NSString *displayText = text ?: @"";
if (self.fullText.length > 0 && displayText.length > 0) {
displayText =
[NSString stringWithFormat:@"%@ %@", self.fullText, displayText];
} else if (self.fullText.length > 0) {
displayText = [self.fullText copy];
}
self.inputBar.statusText =
displayText.length > 0 ? displayText : KBLocalized(@"Voice Recognizing...");
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didUpdateInterimText:)]) {
[self.delegate voiceToTextManager:self didUpdateInterimText:displayText];
}
}
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
if (!self.deepgramEnabled) {
return;
}
if (text.length > 0) {
if (self.fullText.length > 0) {
[self.fullText appendString:@" "];
}
[self.fullText appendString:text];
}
NSString *finalText = [self.fullText copy];
self.inputBar.statusText =
finalText.length > 0 ? finalText : KBLocalized(@"Voice Recognition Completed");
if (finalText.length > 0 &&
[self.delegate respondsToSelector:@selector
(voiceToTextManager:didReceiveFinalText:)]) {
[self.delegate voiceToTextManager:self didReceiveFinalText:finalText];
}
}
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
if (!self.deepgramEnabled) {
return;
}
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didFailWithError:)]) {
[self.delegate voiceToTextManager:self didFailWithError:error];
}
}
@end