1
This commit is contained in:
@@ -175,16 +175,18 @@ self.voiceInputBar.enabled = YES;
|
||||
|
||||
---
|
||||
|
||||
## 🎯 完整示例(集成 Deepgram)
|
||||
## 🎯 完整示例(桥接录音事件)
|
||||
|
||||
```objc
|
||||
#import "YourViewController.h"
|
||||
#import "KBVoiceInputBar.h"
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "KBVoiceToTextManager.h"
|
||||
#import "KBVoiceRecordManager.h"
|
||||
|
||||
@interface YourViewController () <KBVoiceInputBarDelegate, DeepgramStreamingManagerDelegate>
|
||||
@interface YourViewController () <KBVoiceToTextManagerDelegate, KBVoiceRecordManagerDelegate>
|
||||
@property (nonatomic, strong) KBVoiceInputBar *voiceInputBar;
|
||||
@property (nonatomic, strong) DeepgramStreamingManager *deepgramManager;
|
||||
@property (nonatomic, strong) KBVoiceToTextManager *voiceToTextManager;
|
||||
@property (nonatomic, strong) KBVoiceRecordManager *voiceRecordManager;
|
||||
@end
|
||||
|
||||
@implementation YourViewController
|
||||
@@ -192,7 +194,7 @@ self.voiceInputBar.enabled = YES;
|
||||
- (void)viewDidLoad {
|
||||
[super viewDidLoad];
|
||||
[self setupUI];
|
||||
[self setupDeepgram];
|
||||
[self setupVoiceManagers];
|
||||
}
|
||||
|
||||
- (void)setupUI {
|
||||
@@ -205,50 +207,44 @@ self.voiceInputBar.enabled = YES;
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)setupDeepgram {
|
||||
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
|
||||
self.deepgramManager.delegate = self;
|
||||
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
self.deepgramManager.apiKey = @"your_api_key";
|
||||
[self.deepgramManager prepareConnection];
|
||||
- (void)setupVoiceManagers {
|
||||
self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar];
|
||||
self.voiceToTextManager.delegate = self;
|
||||
|
||||
self.voiceRecordManager = [[KBVoiceRecordManager alloc] init];
|
||||
self.voiceRecordManager.delegate = self;
|
||||
}
|
||||
|
||||
#pragma mark - KBVoiceInputBarDelegate
|
||||
#pragma mark - KBVoiceToTextManagerDelegate
|
||||
|
||||
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = @"正在连接...";
|
||||
[self.deepgramManager start];
|
||||
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager {
|
||||
[self.voiceRecordManager startRecording];
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = @"正在识别...";
|
||||
[self.deepgramManager stopAndFinalize];
|
||||
- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager {
|
||||
[self.voiceRecordManager stopRecording];
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = @"已取消";
|
||||
[self.deepgramManager cancel];
|
||||
- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager {
|
||||
[self.voiceRecordManager cancelRecording];
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramStreamingManagerDelegate
|
||||
#pragma mark - KBVoiceRecordManagerDelegate
|
||||
|
||||
- (void)deepgramStreamingManagerDidConnect {
|
||||
self.voiceInputBar.statusText = @"正在聆听...";
|
||||
- (void)voiceRecordManager:(KBVoiceRecordManager *)manager
|
||||
didFinishRecordingAtURL:(NSURL *)fileURL
|
||||
duration:(NSTimeInterval)duration {
|
||||
NSLog(@"录音完成:%@ %.2fs", fileURL, duration);
|
||||
// TODO: 上传音频文件并处理转写结果
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
|
||||
[self.voiceInputBar updateVolumeRMS:rms];
|
||||
- (void)voiceRecordManagerDidRecordTooShort:(KBVoiceRecordManager *)manager {
|
||||
NSLog(@"录音过短");
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
|
||||
self.voiceInputBar.statusText = text.length > 0 ? text : @"正在识别...";
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
|
||||
self.voiceInputBar.statusText = @"识别完成";
|
||||
NSLog(@"最终识别结果:%@", text);
|
||||
|
||||
// TODO: 处理识别结果
|
||||
- (void)voiceRecordManager:(KBVoiceRecordManager *)manager
|
||||
didFailWithError:(NSError *)error {
|
||||
NSLog(@"录音失败:%@", error.localizedDescription);
|
||||
}
|
||||
|
||||
@end
|
||||
@@ -316,5 +312,6 @@ _recordButton.tintColor = [UIColor systemBlueColor];
|
||||
## 🔗 相关组件
|
||||
|
||||
- `KBAiRecordButton`:录音按钮(支持长按、波形动画)
|
||||
- `DeepgramStreamingManager`:语音识别管理器
|
||||
- `KBVoiceToTextManager`:语音输入事件桥接层
|
||||
- `KBVoiceRecordManager`:录音文件管理器
|
||||
- `VoiceChatStreamingManager`:语音聊天管理器
|
||||
|
||||
@@ -600,8 +600,6 @@ static void KBChatUpdatedDarwinCallback(CFNotificationCenterRef center,
|
||||
- (void)setupVoiceToTextManager {
|
||||
self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar];
|
||||
self.voiceToTextManager.delegate = self;
|
||||
self.voiceToTextManager.deepgramEnabled = NO;
|
||||
[self.voiceToTextManager prepareConnection];
|
||||
}
|
||||
|
||||
/// 5:录音管理
|
||||
@@ -1199,16 +1197,6 @@ static void KBChatUpdatedDarwinCallback(CFNotificationCenterRef center,
|
||||
|
||||
#pragma mark - KBVoiceToTextManagerDelegate
|
||||
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didReceiveFinalText:(NSString *)text {
|
||||
[self handleTranscribedText:text];
|
||||
}
|
||||
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didFailWithError:(NSError *)error {
|
||||
NSLog(@"[KBAIHomeVC] 语音识别失败:%@", error.localizedDescription);
|
||||
}
|
||||
|
||||
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager {
|
||||
self.isVoiceRecording = YES;
|
||||
self.isVoiceProcessing = YES;
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
//
|
||||
// DeepgramStreamingManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol DeepgramStreamingManagerDelegate <NSObject>
|
||||
@optional
|
||||
- (void)deepgramStreamingManagerDidConnect;
|
||||
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms;
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)deepgramStreamingManagerDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// Manager for Deepgram live transcription.
|
||||
@interface DeepgramStreamingManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<DeepgramStreamingManagerDelegate> delegate;
|
||||
|
||||
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
|
||||
@property(nonatomic, copy) NSString *apiKey;
|
||||
|
||||
@property(nonatomic, copy, nullable) NSString *language;
|
||||
@property(nonatomic, copy, nullable) NSString *model;
|
||||
@property(nonatomic, assign) BOOL punctuate;
|
||||
@property(nonatomic, assign) BOOL smartFormat;
|
||||
@property(nonatomic, assign) BOOL interimResults;
|
||||
|
||||
@property(nonatomic, copy) NSString *encoding; // linear16
|
||||
@property(nonatomic, assign) double sampleRate;
|
||||
@property(nonatomic, assign) int channels;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isStreaming) BOOL streaming;
|
||||
|
||||
- (void)start;
|
||||
- (void)prepareConnection;
|
||||
- (void)stopAndFinalize;
|
||||
- (void)cancel;
|
||||
- (void)disconnect;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
@@ -1,516 +0,0 @@
|
||||
//
|
||||
// DeepgramStreamingManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
#import "AudioSessionManager.h"
|
||||
#import "DeepgramWebSocketClient.h"
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
static NSString *const kDeepgramStreamingManagerErrorDomain =
|
||||
@"DeepgramStreamingManager";
|
||||
|
||||
@interface DeepgramStreamingManager () <AudioSessionManagerDelegate,
|
||||
AudioCaptureManagerDelegate,
|
||||
DeepgramWebSocketClientDelegate>
|
||||
|
||||
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||||
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||||
@property(nonatomic, strong) DeepgramWebSocketClient *client;
|
||||
@property(nonatomic, strong) dispatch_queue_t stateQueue;
|
||||
|
||||
@property(nonatomic, assign) BOOL streaming;
|
||||
@property(nonatomic, strong) NSMutableArray<NSData *> *pendingFrames;
|
||||
@property(nonatomic, assign) NSUInteger pendingFrameLimit;
|
||||
@property(nonatomic, assign) BOOL connecting;
|
||||
@property(nonatomic, assign) BOOL pendingStart;
|
||||
@property(nonatomic, assign) BOOL keepConnection;
|
||||
@property(nonatomic, strong) dispatch_source_t keepAliveTimer;
|
||||
@property(nonatomic, assign) NSInteger reconnectAttempts;
|
||||
@property(nonatomic, assign) NSInteger maxReconnectAttempts;
|
||||
@property(nonatomic, assign) BOOL reconnectScheduled;
|
||||
@property(nonatomic, assign) BOOL appInBackground;
|
||||
@property(nonatomic, assign) BOOL shouldReconnectOnForeground;
|
||||
|
||||
@end
|
||||
|
||||
@implementation DeepgramStreamingManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.manager",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
|
||||
_audioSession = [AudioSessionManager sharedManager];
|
||||
_audioSession.delegate = self;
|
||||
|
||||
_audioCapture = [[AudioCaptureManager alloc] init];
|
||||
_audioCapture.delegate = self;
|
||||
|
||||
/// 不需要自己处理音频转文本,改为录音结束把文件传递给后端
|
||||
// _client = [[DeepgramWebSocketClient alloc] init];
|
||||
// _client.delegate = self;
|
||||
|
||||
_serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
_encoding = @"linear16";
|
||||
_sampleRate = 16000.0;
|
||||
_channels = 1;
|
||||
_punctuate = YES;
|
||||
_smartFormat = YES;
|
||||
_interimResults = YES;
|
||||
|
||||
_pendingFrames = [[NSMutableArray alloc] init];
|
||||
_pendingFrameLimit = 25;
|
||||
_connecting = NO;
|
||||
_pendingStart = NO;
|
||||
_keepConnection = NO;
|
||||
_reconnectAttempts = 0;
|
||||
_maxReconnectAttempts = 5;
|
||||
_reconnectScheduled = NO;
|
||||
_appInBackground = NO;
|
||||
_shouldReconnectOnForeground = NO;
|
||||
|
||||
[self setupNotifications];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self removeNotifications];
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
- (void)start {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
self.keepConnection = YES;
|
||||
self.pendingStart = YES;
|
||||
self.reconnectAttempts = 0;
|
||||
if (self.apiKey.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram API key is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession hasMicrophonePermission]) {
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
if (!granted) {
|
||||
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
|
||||
return;
|
||||
}
|
||||
dispatch_async(strongSelf.stateQueue, ^{
|
||||
[strongSelf start];
|
||||
});
|
||||
}];
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioSession configureForConversation:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession activateSession:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioCapture isCapturing]) {
|
||||
NSError *captureError = nil;
|
||||
if (![self.audioCapture startCapture:&captureError]) {
|
||||
[self reportError:captureError];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
NSLog(@"[DeepgramStreamingManager] Start streaming, server: %@",
|
||||
self.serverURL);
|
||||
|
||||
if (self.client.isConnected) {
|
||||
[self beginStreamingIfReady];
|
||||
return;
|
||||
}
|
||||
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)prepareConnection {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
self.keepConnection = YES;
|
||||
self.pendingStart = NO;
|
||||
self.reconnectAttempts = 0;
|
||||
|
||||
if (self.apiKey.length == 0) {
|
||||
NSLog(@"[DeepgramStreamingManager] Prepare skipped: API key missing");
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stopAndFinalize {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
if (self.client.isConnected) {
|
||||
[self.client finish];
|
||||
}
|
||||
[self.client disableAudioSending];
|
||||
[self startKeepAliveIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
[self disconnectInternal];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnectInternal {
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
self.shouldReconnectOnForeground = NO;
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
}
|
||||
|
||||
#pragma mark - AudioCaptureManagerDelegate
|
||||
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||||
if (pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (!self.streaming || !self.client.isConnected) {
|
||||
[self.pendingFrames addObject:pcmFrame];
|
||||
if (self.pendingFrames.count > self.pendingFrameLimit) {
|
||||
[self.pendingFrames removeObjectAtIndex:0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
[self.client sendAudioPCMFrame:pcmFrame];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidUpdateRMS:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidUpdateRMS:rms];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioSessionManagerDelegate
|
||||
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||||
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||||
[self cancel];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||||
[self reportErrorWithMessage:@"Microphone permission denied"];
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramWebSocketClientDelegate
|
||||
|
||||
- (void)deepgramClientDidConnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.connecting = NO;
|
||||
self.reconnectAttempts = 0;
|
||||
self.reconnectScheduled = NO;
|
||||
[self beginStreamingIfReady];
|
||||
[self startKeepAliveIfNeeded];
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidConnect)]) {
|
||||
[self.delegate deepgramStreamingManagerDidConnect];
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
self.connecting = NO;
|
||||
[self.audioSession deactivateSession];
|
||||
[self stopKeepAlive];
|
||||
|
||||
if (self.pendingStart || self.keepConnection) {
|
||||
[self scheduleReconnectWithError:error];
|
||||
}
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidDisconnect:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidReceiveInterimTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidReceiveFinalTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidFail:(NSError *)error {
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
#pragma mark - Error Reporting
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidFail:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kDeepgramStreamingManagerErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)connectIfNeeded {
|
||||
if (self.connecting || self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.serverURL.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram server URL is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
self.client.serverURL = self.serverURL;
|
||||
self.client.apiKey = self.apiKey;
|
||||
self.client.language = self.language;
|
||||
self.client.model = self.model;
|
||||
self.client.punctuate = self.punctuate;
|
||||
self.client.smartFormat = self.smartFormat;
|
||||
self.client.interimResults = self.interimResults;
|
||||
self.client.encoding = self.encoding;
|
||||
self.client.sampleRate = self.sampleRate;
|
||||
self.client.channels = self.channels;
|
||||
[self.client disableAudioSending];
|
||||
self.connecting = YES;
|
||||
[self.client connect];
|
||||
}
|
||||
|
||||
- (void)beginStreamingIfReady {
|
||||
if (!self.pendingStart) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.streaming = YES;
|
||||
[self.client enableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
|
||||
if (self.pendingFrames.count > 0) {
|
||||
NSArray<NSData *> *frames = [self.pendingFrames copy];
|
||||
[self.pendingFrames removeAllObjects];
|
||||
for (NSData *frame in frames) {
|
||||
[self.client sendAudioPCMFrame:frame];
|
||||
}
|
||||
NSLog(@"[DeepgramStreamingManager] Flushed %lu pending frames",
|
||||
(unsigned long)frames.count);
|
||||
}
|
||||
}
|
||||
|
||||
- (void)scheduleReconnectWithError:(NSError *_Nullable)error {
|
||||
if (self.reconnectScheduled || self.connecting || self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.reconnectAttempts >= self.maxReconnectAttempts) {
|
||||
NSLog(@"[DeepgramStreamingManager] Reconnect failed %ld times, stop retry. %@",
|
||||
(long)self.maxReconnectAttempts,
|
||||
error.localizedDescription ?: @"");
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
return;
|
||||
}
|
||||
|
||||
self.reconnectAttempts += 1;
|
||||
self.reconnectScheduled = YES;
|
||||
|
||||
dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t)(1 * NSEC_PER_SEC)),
|
||||
self.stateQueue, ^{
|
||||
self.reconnectScheduled = NO;
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
if (!self.pendingStart && !self.keepConnection) {
|
||||
return;
|
||||
}
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)setupNotifications {
|
||||
NSNotificationCenter *center = [NSNotificationCenter defaultCenter];
|
||||
[center addObserver:self
|
||||
selector:@selector(handleAppDidEnterBackground)
|
||||
name:UIApplicationDidEnterBackgroundNotification
|
||||
object:nil];
|
||||
[center addObserver:self
|
||||
selector:@selector(handleAppWillEnterForeground)
|
||||
name:UIApplicationWillEnterForegroundNotification
|
||||
object:nil];
|
||||
}
|
||||
|
||||
- (void)removeNotifications {
|
||||
[[NSNotificationCenter defaultCenter] removeObserver:self];
|
||||
}
|
||||
|
||||
- (void)handleAppDidEnterBackground {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.appInBackground = YES;
|
||||
self.shouldReconnectOnForeground =
|
||||
self.keepConnection || self.pendingStart;
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
|
||||
[self.pendingFrames removeAllObjects];
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
|
||||
NSLog(@"[DeepgramStreamingManager] App entered background, socket closed");
|
||||
});
|
||||
}
|
||||
|
||||
- (void)handleAppWillEnterForeground {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.appInBackground = NO;
|
||||
if (self.shouldReconnectOnForeground) {
|
||||
self.keepConnection = YES;
|
||||
self.reconnectAttempts = 0;
|
||||
[self connectIfNeeded];
|
||||
}
|
||||
self.shouldReconnectOnForeground = NO;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)startKeepAliveIfNeeded {
|
||||
if (!self.keepConnection || !self.client.isConnected || self.streaming) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.keepAliveTimer) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.keepAliveTimer =
|
||||
dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0,
|
||||
self.stateQueue);
|
||||
dispatch_source_set_timer(self.keepAliveTimer,
|
||||
dispatch_time(DISPATCH_TIME_NOW, 15 * NSEC_PER_SEC),
|
||||
15 * NSEC_PER_SEC, 1 * NSEC_PER_SEC);
|
||||
__weak typeof(self) weakSelf = self;
|
||||
dispatch_source_set_event_handler(self.keepAliveTimer, ^{
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
[strongSelf.client sendKeepAlive];
|
||||
});
|
||||
dispatch_resume(self.keepAliveTimer);
|
||||
}
|
||||
|
||||
- (void)stopKeepAlive {
|
||||
if (self.keepAliveTimer) {
|
||||
dispatch_source_cancel(self.keepAliveTimer);
|
||||
self.keepAliveTimer = nil;
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
@@ -1,52 +0,0 @@
|
||||
//
|
||||
// DeepgramWebSocketClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol DeepgramWebSocketClientDelegate <NSObject>
|
||||
@optional
|
||||
- (void)deepgramClientDidConnect;
|
||||
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)deepgramClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// WebSocket client for Deepgram live transcription.
|
||||
@interface DeepgramWebSocketClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<DeepgramWebSocketClientDelegate> delegate;
|
||||
|
||||
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
|
||||
@property(nonatomic, copy) NSString *apiKey;
|
||||
|
||||
@property(nonatomic, copy, nullable) NSString *language;
|
||||
@property(nonatomic, copy, nullable) NSString *model;
|
||||
@property(nonatomic, assign) BOOL punctuate;
|
||||
@property(nonatomic, assign) BOOL smartFormat;
|
||||
@property(nonatomic, assign) BOOL interimResults;
|
||||
|
||||
@property(nonatomic, copy) NSString *encoding; // linear16
|
||||
@property(nonatomic, assign) double sampleRate;
|
||||
@property(nonatomic, assign) int channels;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
|
||||
|
||||
- (void)connect;
|
||||
- (void)disconnect;
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
|
||||
- (void)finish;
|
||||
- (void)sendKeepAlive;
|
||||
|
||||
- (void)enableAudioSending;
|
||||
- (void)disableAudioSending;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
@@ -1,411 +0,0 @@
|
||||
//
|
||||
// DeepgramWebSocketClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "DeepgramWebSocketClient.h"
|
||||
|
||||
static NSString *const kDeepgramWebSocketClientErrorDomain =
|
||||
@"DeepgramWebSocketClient";
|
||||
|
||||
@interface DeepgramWebSocketClient () <NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL connected;
|
||||
@property(nonatomic, assign) BOOL audioSendingEnabled;
|
||||
|
||||
@end
|
||||
|
||||
@implementation DeepgramWebSocketClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.ws",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
_encoding = @"linear16";
|
||||
_sampleRate = 16000.0;
|
||||
_channels = 1;
|
||||
_punctuate = YES;
|
||||
_smartFormat = YES;
|
||||
_interimResults = YES;
|
||||
_audioSendingEnabled = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)connect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self disconnectInternal];
|
||||
|
||||
if (self.apiKey.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram API key is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSURL *url = [self buildURL];
|
||||
if (!url) {
|
||||
[self reportErrorWithMessage:@"Invalid Deepgram URL"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[DeepgramWebSocketClient] Connecting: %@", url.absoluteString);
|
||||
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
|
||||
[request setValue:[NSString stringWithFormat:@"Token %@", self.apiKey]
|
||||
forHTTPHeaderField:@"Authorization"];
|
||||
|
||||
self.webSocketTask = [self.urlSession webSocketTaskWithRequest:request];
|
||||
[self.webSocketTask resume];
|
||||
[self receiveMessage];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
BOOL shouldNotify = self.webSocketTask != nil;
|
||||
if (shouldNotify) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Disconnect requested");
|
||||
}
|
||||
[self disconnectInternal];
|
||||
if (shouldNotify) {
|
||||
[self notifyDisconnect:nil];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
if (!self.audioSendingEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
NSLog(@"[DeepgramWebSocketClient] Sent audio frame: %lu bytes",
|
||||
(unsigned long)pcmFrame.length);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)finish {
|
||||
NSLog(@"[DeepgramWebSocketClient] Sending CloseStream");
|
||||
[self sendJSON:@{@"type" : @"CloseStream"}];
|
||||
}
|
||||
|
||||
- (void)sendKeepAlive {
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
[self sendJSON:@{@"type" : @"KeepAlive"}];
|
||||
}
|
||||
|
||||
- (void)enableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = YES;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = NO;
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (NSURL *)buildURL {
|
||||
if (self.serverURL.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURLComponents *components =
|
||||
[NSURLComponents componentsWithString:self.serverURL];
|
||||
if (!components) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSMutableArray<NSURLQueryItem *> *items =
|
||||
components.queryItems.mutableCopy ?: [NSMutableArray array];
|
||||
|
||||
[self upsertQueryItemWithName:@"model" value:self.model items:items];
|
||||
[self upsertQueryItemWithName:@"language" value:self.language items:items];
|
||||
|
||||
[self
|
||||
upsertQueryItemWithName:@"punctuate"
|
||||
value:(self.punctuate ? @"true" : @"false")items:items];
|
||||
[self upsertQueryItemWithName:@"smart_format"
|
||||
value:(self.smartFormat ? @"true" : @"false")items
|
||||
:items];
|
||||
[self upsertQueryItemWithName:@"interim_results"
|
||||
value:(self.interimResults ? @"true" : @"false")items
|
||||
:items];
|
||||
|
||||
[self upsertQueryItemWithName:@"encoding" value:self.encoding items:items];
|
||||
[self upsertQueryItemWithName:@"sample_rate"
|
||||
value:[NSString
|
||||
stringWithFormat:@"%.0f", self.sampleRate]
|
||||
items:items];
|
||||
[self upsertQueryItemWithName:@"channels"
|
||||
value:[NSString stringWithFormat:@"%d", self.channels]
|
||||
items:items];
|
||||
|
||||
components.queryItems = items;
|
||||
return components.URL;
|
||||
}
|
||||
|
||||
- (void)upsertQueryItemWithName:(NSString *)name
|
||||
value:(NSString *)value
|
||||
items:(NSMutableArray<NSURLQueryItem *> *)items {
|
||||
if (name.length == 0 || value.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (NSUInteger i = 0; i < items.count; i++) {
|
||||
NSURLQueryItem *item = items[i];
|
||||
if ([item.name isEqualToString:name]) {
|
||||
items[i] = [NSURLQueryItem queryItemWithName:name value:value];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
[items addObject:[NSURLQueryItem queryItemWithName:name value:value]];
|
||||
}
|
||||
|
||||
- (void)sendJSON:(NSDictionary *)dict {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString = [[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
if (!jsonString) {
|
||||
[self reportErrorWithMessage:@"Failed to encode JSON message"];
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
[self.webSocketTask sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)receiveMessage {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
||||
[strongSelf notifyDisconnect:error];
|
||||
[strongSelf disconnectInternal];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Received text: %@", message.string);
|
||||
[strongSelf handleTextMessage:message.string];
|
||||
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Received binary: %lu bytes",
|
||||
(unsigned long)message.data.length);
|
||||
[strongSelf handleBinaryMessage:message.data];
|
||||
}
|
||||
|
||||
[strongSelf receiveMessage];
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)handleTextMessage:(NSString *)text {
|
||||
if (text.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *errorMessage = json[@"error"];
|
||||
if (errorMessage.length > 0) {
|
||||
[self reportErrorWithMessage:errorMessage];
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *channel = json[@"channel"];
|
||||
if (![channel isKindOfClass:[NSDictionary class]]) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSArray *alternatives = channel[@"alternatives"];
|
||||
if (![alternatives isKindOfClass:[NSArray class]] ||
|
||||
alternatives.count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *firstAlt = alternatives.firstObject;
|
||||
NSString *transcript = firstAlt[@"transcript"] ?: @"";
|
||||
BOOL isFinal =
|
||||
[json[@"is_final"] boolValue] || [json[@"speech_final"] boolValue];
|
||||
|
||||
if (transcript.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (isFinal) {
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramClientDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate deepgramClientDidReceiveFinalTranscript:transcript];
|
||||
}
|
||||
} else {
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramClientDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate deepgramClientDidReceiveInterimTranscript:transcript];
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)handleBinaryMessage:(NSData *)data {
|
||||
}
|
||||
|
||||
- (void)disconnectInternal {
|
||||
self.connected = NO;
|
||||
self.audioSendingEnabled = NO;
|
||||
|
||||
if (self.webSocketTask) {
|
||||
[self.webSocketTask
|
||||
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
|
||||
reason:nil];
|
||||
self.webSocketTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(deepgramClientDidFail:)]) {
|
||||
[self.delegate deepgramClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error =
|
||||
[NSError errorWithDomain:kDeepgramWebSocketClientErrorDomain
|
||||
code:-1
|
||||
userInfo:@{NSLocalizedDescriptionKey : message ?: @""}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)notifyDisconnect:(NSError *_Nullable)error {
|
||||
self.connected = NO;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(deepgramClientDidDisconnect:)]) {
|
||||
[self.delegate deepgramClientDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionWebSocketDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didOpenWithProtocol:(NSString *)protocol {
|
||||
self.connected = YES;
|
||||
NSLog(@"[DeepgramWebSocketClient] Connected");
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(deepgramClientDidConnect)]) {
|
||||
[self.delegate deepgramClientDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
||||
reason:(NSData *)reason {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSLog(@"[DeepgramWebSocketClient] Closed with code: %ld", (long)closeCode);
|
||||
[self notifyDisconnect:nil];
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
@end
|
||||
@@ -17,24 +17,15 @@ NS_ASSUME_NONNULL_BEGIN
|
||||
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didUpdateInterimText:(NSString *)text;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didReceiveFinalText:(NSString *)text;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didFailWithError:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// Voice-to-text manager (binds KBVoiceInputBar and uses Deepgram).
|
||||
/// 语音输入事件管理器,仅负责桥接 KBVoiceInputBar 的录音事件。
|
||||
@interface KBVoiceToTextManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<KBVoiceToTextManagerDelegate> delegate;
|
||||
@property(nonatomic, weak, readonly) KBVoiceInputBar *inputBar;
|
||||
@property(nonatomic, assign) BOOL deepgramEnabled;
|
||||
|
||||
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar;
|
||||
- (void)prepareConnection;
|
||||
- (void)disconnect;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@@ -6,118 +6,30 @@
|
||||
//
|
||||
|
||||
#import "KBVoiceToTextManager.h"
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "KBVoiceInputBar.h"
|
||||
#import "KBLocalizationManager.h"
|
||||
|
||||
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate,
|
||||
DeepgramStreamingManagerDelegate>
|
||||
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate>
|
||||
|
||||
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
|
||||
@property(nonatomic, weak) KBVoiceInputBar *inputBar;
|
||||
@property(nonatomic, strong) NSMutableString *fullText;
|
||||
|
||||
@end
|
||||
|
||||
@implementation KBVoiceToTextManager
|
||||
|
||||
- (void)setDeepgramEnabled:(BOOL)deepgramEnabled {
|
||||
if (_deepgramEnabled == deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
_deepgramEnabled = deepgramEnabled;
|
||||
if (!deepgramEnabled) {
|
||||
[self.deepgramManager cancel];
|
||||
[self resetTranscript];
|
||||
} else {
|
||||
[self.deepgramManager prepareConnection];
|
||||
}
|
||||
}
|
||||
|
||||
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_inputBar = inputBar;
|
||||
_inputBar.delegate = self;
|
||||
_fullText = [[NSMutableString alloc] init];
|
||||
_deepgramEnabled = YES;
|
||||
[self setupDeepgram];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self.deepgramManager disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)prepareConnection {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
[self kb_refreshDeepgramLanguage];
|
||||
[self.deepgramManager prepareConnection];
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
[self.deepgramManager disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)setupDeepgram {
|
||||
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
|
||||
self.deepgramManager.delegate = self;
|
||||
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
|
||||
[self kb_refreshDeepgramLanguage];
|
||||
self.deepgramManager.model = @"nova-3";
|
||||
self.deepgramManager.punctuate = YES;
|
||||
self.deepgramManager.smartFormat = YES;
|
||||
self.deepgramManager.interimResults = YES;
|
||||
self.deepgramManager.encoding = @"linear16";
|
||||
self.deepgramManager.sampleRate = 16000.0;
|
||||
self.deepgramManager.channels = 1;
|
||||
}
|
||||
|
||||
- (void)resetTranscript {
|
||||
[self.fullText setString:@""];
|
||||
}
|
||||
|
||||
- (void)kb_refreshDeepgramLanguage {
|
||||
self.deepgramManager.language = [self kb_currentDeepgramLanguageCode];
|
||||
}
|
||||
|
||||
- (NSString *)kb_currentDeepgramLanguageCode {
|
||||
NSString *languageCode = [KBLocalizationManager shared].currentLanguageCode ?: @"en";
|
||||
NSString *lc = languageCode.lowercaseString;
|
||||
if ([lc hasPrefix:@"es"]) { return @"es"; }
|
||||
if ([lc hasPrefix:@"id"]) { return @"id"; }
|
||||
if ([lc hasPrefix:@"pt"]) { return @"pt"; }
|
||||
if ([lc hasPrefix:@"zh-hant"] || [lc hasPrefix:@"zh_tw"] || [lc hasPrefix:@"zh-tw"] || [lc hasPrefix:@"zh-hk"]) {
|
||||
return @"zh-TW";
|
||||
}
|
||||
if ([lc hasPrefix:@"zh-hans"] || [lc hasPrefix:@"zh_cn"] || [lc hasPrefix:@"zh-cn"]) {
|
||||
return @"zh-CN";
|
||||
}
|
||||
return @"en";
|
||||
}
|
||||
|
||||
#pragma mark - KBVoiceInputBarDelegate
|
||||
|
||||
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
|
||||
[self resetTranscript];
|
||||
if (self.deepgramEnabled) {
|
||||
[self kb_refreshDeepgramLanguage];
|
||||
inputBar.statusText = KBLocalized(@"Voice Connecting...");
|
||||
[self.deepgramManager start];
|
||||
} else {
|
||||
inputBar.statusText = KBLocalized(@"Voice Recording...");
|
||||
}
|
||||
inputBar.statusText = KBLocalized(@"Voice Recording...");
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidBeginRecording:)]) {
|
||||
@@ -126,12 +38,7 @@
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
|
||||
if (self.deepgramEnabled) {
|
||||
inputBar.statusText = KBLocalized(@"Voice Recognizing...");
|
||||
[self.deepgramManager stopAndFinalize];
|
||||
} else {
|
||||
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
|
||||
}
|
||||
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidEndRecording:)]) {
|
||||
@@ -141,10 +48,6 @@
|
||||
|
||||
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = KBLocalized(@"Voice Cancelled");
|
||||
[self resetTranscript];
|
||||
if (self.deepgramEnabled) {
|
||||
[self.deepgramManager cancel];
|
||||
}
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidCancelRecording:)]) {
|
||||
@@ -152,89 +55,4 @@
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramStreamingManagerDelegate
|
||||
|
||||
- (void)deepgramStreamingManagerDidConnect {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
self.inputBar.statusText = KBLocalized(@"Voice Listening...");
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didFailWithError:)]) {
|
||||
[self.delegate voiceToTextManager:self didFailWithError:error];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
[self.inputBar updateVolumeRMS:rms];
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
NSString *displayText = text ?: @"";
|
||||
if (self.fullText.length > 0 && displayText.length > 0) {
|
||||
displayText =
|
||||
[NSString stringWithFormat:@"%@ %@", self.fullText, displayText];
|
||||
} else if (self.fullText.length > 0) {
|
||||
displayText = [self.fullText copy];
|
||||
}
|
||||
|
||||
self.inputBar.statusText =
|
||||
displayText.length > 0 ? displayText : KBLocalized(@"Voice Recognizing...");
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didUpdateInterimText:)]) {
|
||||
[self.delegate voiceToTextManager:self didUpdateInterimText:displayText];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
if (text.length > 0) {
|
||||
if (self.fullText.length > 0) {
|
||||
[self.fullText appendString:@" "];
|
||||
}
|
||||
[self.fullText appendString:text];
|
||||
}
|
||||
|
||||
NSString *finalText = [self.fullText copy];
|
||||
self.inputBar.statusText =
|
||||
finalText.length > 0 ? finalText : KBLocalized(@"Voice Recognition Completed");
|
||||
|
||||
if (finalText.length > 0 &&
|
||||
[self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didReceiveFinalText:)]) {
|
||||
[self.delegate voiceToTextManager:self didReceiveFinalText:finalText];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
|
||||
if (!self.deepgramEnabled) {
|
||||
return;
|
||||
}
|
||||
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didFailWithError:)]) {
|
||||
[self.delegate voiceToTextManager:self didFailWithError:error];
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
Reference in New Issue
Block a user