diff --git a/keyBoard.xcodeproj/project.pbxproj b/keyBoard.xcodeproj/project.pbxproj index 176044d..7682a58 100644 --- a/keyBoard.xcodeproj/project.pbxproj +++ b/keyBoard.xcodeproj/project.pbxproj @@ -208,6 +208,7 @@ 04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038E22F20E500002CA5A0 /* deepgramAPI.md */; }; 04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */; }; 04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */; }; + 04E0B1022F300001002CA5A0 /* KBVoiceToTextManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */; }; 04E038EF2F21F0EC002CA5A0 /* AiVM.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038EE2F21F0EC002CA5A0 /* AiVM.m */; }; 04E0394B2F236E75002CA5A0 /* KBChatUserMessageCell.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0394A2F236E75002CA5A0 /* KBChatUserMessageCell.m */; }; 04E0394C2F236E75002CA5A0 /* KBChatTimeCell.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E039482F236E75002CA5A0 /* KBChatTimeCell.m */; }; @@ -650,6 +651,8 @@ 04E038E22F20E500002CA5A0 /* deepgramAPI.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = deepgramAPI.md; sourceTree = ""; }; 04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramStreamingManager.h; sourceTree = ""; }; 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramStreamingManager.m; sourceTree = ""; }; + 04E0B1002F300001002CA5A0 /* KBVoiceToTextManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBVoiceToTextManager.h; sourceTree = ""; }; + 04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBVoiceToTextManager.m; sourceTree = ""; }; 04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramWebSocketClient.h; sourceTree = ""; }; 04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramWebSocketClient.m; sourceTree = ""; }; 04E038ED2F21F0EC002CA5A0 /* AiVM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AiVM.h; sourceTree = ""; }; @@ -1074,6 +1077,8 @@ 04E038DC2F20C420002CA5A0 /* VoiceChatWebSocketClient.m */, 04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */, 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */, + 04E0B1002F300001002CA5A0 /* KBVoiceToTextManager.h */, + 04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */, 04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */, 04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */, 04E038ED2F21F0EC002CA5A0 /* AiVM.h */, @@ -2325,6 +2330,7 @@ 0450AAE22EF03D5100B6AF06 /* KBPerson.swift in Sources */, 04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */, 04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */, + 04E0B1022F300001002CA5A0 /* KBVoiceToTextManager.m in Sources */, 048908E32EBF821700FABA60 /* KBSkinDetailVC.m in Sources */, 0477BDF32EBB7B850055D639 /* KBDirectionIndicatorView.m in Sources */, 048FFD142F274342005D62AE /* KBPersonaChatCell.m in Sources */, diff --git a/keyBoard/Class/AiTalk/VC/KBAIHomeVC.m b/keyBoard/Class/AiTalk/VC/KBAIHomeVC.m index 98f0aed..e9d56e3 100644 --- a/keyBoard/Class/AiTalk/VC/KBAIHomeVC.m +++ b/keyBoard/Class/AiTalk/VC/KBAIHomeVC.m @@ -9,10 +9,11 @@ #import "KBPersonaChatCell.h" #import "KBPersonaModel.h" #import "KBVoiceInputBar.h" +#import "KBVoiceToTextManager.h" #import "AiVM.h" #import -@interface KBAIHomeVC () +@interface KBAIHomeVC () /// 人设列表容器 @property (nonatomic, strong) UICollectionView *collectionView; @@ -20,6 +21,9 @@ /// 底部语音输入栏 @property (nonatomic, strong) KBVoiceInputBar *voiceInputBar; +/// 语音转写管理器 +@property (nonatomic, strong) KBVoiceToTextManager *voiceToTextManager; + /// 人设数据 @property (nonatomic, strong) NSMutableArray *personas; @@ -61,6 +65,7 @@ self.aiVM = [[AiVM alloc] init]; [self setupUI]; + [self setupVoiceToTextManager]; [self loadPersonas]; } @@ -235,6 +240,14 @@ } } +#pragma mark - 4:语音转写 + +- (void)setupVoiceToTextManager { + self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar]; + self.voiceToTextManager.delegate = self; + [self.voiceToTextManager prepareConnection]; +} + #pragma mark - Lazy Load - (UICollectionView *)collectionView { @@ -263,41 +276,26 @@ - (KBVoiceInputBar *)voiceInputBar { if (!_voiceInputBar) { _voiceInputBar = [[KBVoiceInputBar alloc] init]; - _voiceInputBar.delegate = self; _voiceInputBar.statusText = @"按住按钮开始对话"; } return _voiceInputBar; } -#pragma mark - KBVoiceInputBarDelegate +#pragma mark - KBVoiceToTextManagerDelegate -- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar { - NSLog(@"[KBAIHomeVC] 开始录音"); - inputBar.statusText = @"正在聆听..."; +- (void)voiceToTextManager:(KBVoiceToTextManager *)manager + didReceiveFinalText:(NSString *)text { + if (text.length == 0) { + return; + } + NSLog(@"[KBAIHomeVC] 语音识别结果:%@", text); - // TODO: 开始录音逻辑 - // 1. 检查登录状态 - // 2. 连接语音识别服务 - // 3. 开始录音 + // TODO: 使用识别文本(例如发起聊天请求) } -- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar { - NSLog(@"[KBAIHomeVC] 结束录音"); - inputBar.statusText = @"正在识别..."; - - // TODO: 结束录音逻辑 - // 1. 停止录音 - // 2. 发送音频数据 - // 3. 等待识别结果 -} - -- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar { - NSLog(@"[KBAIHomeVC] 取消录音"); - inputBar.statusText = @"已取消"; - - // TODO: 取消录音逻辑 - // 1. 停止录音 - // 2. 清理资源 +- (void)voiceToTextManager:(KBVoiceToTextManager *)manager + didFailWithError:(NSError *)error { + NSLog(@"[KBAIHomeVC] 语音识别失败:%@", error.localizedDescription); } @end diff --git a/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.h b/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.h new file mode 100644 index 0000000..50c4a55 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.h @@ -0,0 +1,40 @@ +// +// KBVoiceToTextManager.h +// keyBoard +// +// Created by Mac on 2026/1/26. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +@class KBVoiceInputBar; +@class KBVoiceToTextManager; + +@protocol KBVoiceToTextManagerDelegate +@optional +- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager; +- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager; +- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager; +- (void)voiceToTextManager:(KBVoiceToTextManager *)manager + didUpdateInterimText:(NSString *)text; +- (void)voiceToTextManager:(KBVoiceToTextManager *)manager + didReceiveFinalText:(NSString *)text; +- (void)voiceToTextManager:(KBVoiceToTextManager *)manager + didFailWithError:(NSError *)error; +@end + +/// Voice-to-text manager (binds KBVoiceInputBar and uses Deepgram). +@interface KBVoiceToTextManager : NSObject + +@property(nonatomic, weak) id delegate; +@property(nonatomic, weak, readonly) KBVoiceInputBar *inputBar; + +- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar; +- (void)prepareConnection; +- (void)disconnect; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m b/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m new file mode 100644 index 0000000..e25a22c --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m @@ -0,0 +1,170 @@ +// +// KBVoiceToTextManager.m +// keyBoard +// +// Created by Mac on 2026/1/26. +// + +#import "KBVoiceToTextManager.h" +#import "DeepgramStreamingManager.h" +#import "KBVoiceInputBar.h" + +@interface KBVoiceToTextManager () + +@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager; +@property(nonatomic, weak) KBVoiceInputBar *inputBar; +@property(nonatomic, strong) NSMutableString *fullText; + +@end + +@implementation KBVoiceToTextManager + +- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar { + self = [super init]; + if (self) { + _inputBar = inputBar; + _inputBar.delegate = self; + _fullText = [[NSMutableString alloc] init]; + [self setupDeepgram]; + } + return self; +} + +- (void)dealloc { + [self.deepgramManager disconnect]; +} + +#pragma mark - Public Methods + +- (void)prepareConnection { + [self.deepgramManager prepareConnection]; +} + +- (void)disconnect { + [self.deepgramManager disconnect]; +} + +#pragma mark - Private Methods + +- (void)setupDeepgram { + self.deepgramManager = [[DeepgramStreamingManager alloc] init]; + self.deepgramManager.delegate = self; + self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen"; + self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf"; + self.deepgramManager.language = @"en"; + self.deepgramManager.model = @"nova-3"; + self.deepgramManager.punctuate = YES; + self.deepgramManager.smartFormat = YES; + self.deepgramManager.interimResults = YES; + self.deepgramManager.encoding = @"linear16"; + self.deepgramManager.sampleRate = 16000.0; + self.deepgramManager.channels = 1; +} + +- (void)resetTranscript { + [self.fullText setString:@""]; +} + +#pragma mark - KBVoiceInputBarDelegate + +- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar { + [self resetTranscript]; + inputBar.statusText = @"正在连接..."; + [self.deepgramManager start]; + + if ([self.delegate respondsToSelector:@selector + (voiceToTextManagerDidBeginRecording:)]) { + [self.delegate voiceToTextManagerDidBeginRecording:self]; + } +} + +- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar { + inputBar.statusText = @"正在识别..."; + [self.deepgramManager stopAndFinalize]; + + if ([self.delegate respondsToSelector:@selector + (voiceToTextManagerDidEndRecording:)]) { + [self.delegate voiceToTextManagerDidEndRecording:self]; + } +} + +- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar { + inputBar.statusText = @"已取消"; + [self resetTranscript]; + [self.deepgramManager cancel]; + + if ([self.delegate respondsToSelector:@selector + (voiceToTextManagerDidCancelRecording:)]) { + [self.delegate voiceToTextManagerDidCancelRecording:self]; + } +} + +#pragma mark - DeepgramStreamingManagerDelegate + +- (void)deepgramStreamingManagerDidConnect { + self.inputBar.statusText = @"正在聆听..."; +} + +- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error { + if (!error) { + return; + } + + self.inputBar.statusText = @"识别失败"; + if ([self.delegate respondsToSelector:@selector + (voiceToTextManager:didFailWithError:)]) { + [self.delegate voiceToTextManager:self didFailWithError:error]; + } +} + +- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms { + [self.inputBar updateVolumeRMS:rms]; +} + +- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text { + NSString *displayText = text ?: @""; + if (self.fullText.length > 0 && displayText.length > 0) { + displayText = + [NSString stringWithFormat:@"%@ %@", self.fullText, displayText]; + } else if (self.fullText.length > 0) { + displayText = [self.fullText copy]; + } + + self.inputBar.statusText = + displayText.length > 0 ? displayText : @"正在识别..."; + + if ([self.delegate respondsToSelector:@selector + (voiceToTextManager:didUpdateInterimText:)]) { + [self.delegate voiceToTextManager:self didUpdateInterimText:displayText]; + } +} + +- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text { + if (text.length > 0) { + if (self.fullText.length > 0) { + [self.fullText appendString:@" "]; + } + [self.fullText appendString:text]; + } + + NSString *finalText = [self.fullText copy]; + self.inputBar.statusText = + finalText.length > 0 ? finalText : @"识别完成"; + + if (finalText.length > 0 && + [self.delegate respondsToSelector:@selector + (voiceToTextManager:didReceiveFinalText:)]) { + [self.delegate voiceToTextManager:self didReceiveFinalText:finalText]; + } +} + +- (void)deepgramStreamingManagerDidFail:(NSError *)error { + self.inputBar.statusText = @"识别失败"; + if ([self.delegate respondsToSelector:@selector + (voiceToTextManager:didFailWithError:)]) { + [self.delegate voiceToTextManager:self didFailWithError:error]; + } +} + +@end