diff --git a/keyBoard.xcodeproj/project.pbxproj b/keyBoard.xcodeproj/project.pbxproj index e0f39f2..a909ccb 100644 --- a/keyBoard.xcodeproj/project.pbxproj +++ b/keyBoard.xcodeproj/project.pbxproj @@ -53,6 +53,22 @@ 0459D1B72EBA287900F2D189 /* KBSkinManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 0459D1B62EBA287900F2D189 /* KBSkinManager.m */; }; 0459D1B82EBA287900F2D189 /* KBSkinManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 0459D1B62EBA287900F2D189 /* KBSkinManager.m */; }; 0460866B2F18D75500757C95 /* ai_test.m4a in Resources */ = {isa = PBXBuildFile; fileRef = 0460866A2F18D75500757C95 /* ai_test.m4a */; }; + 046086732F191B6900757C95 /* KBAiMainVC.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086722F191B6900757C95 /* KBAiMainVC.m */; }; + 046086752F191CC700757C95 /* AI技术分析.txt in Resources */ = {isa = PBXBuildFile; fileRef = 046086742F191CC700757C95 /* AI技术分析.txt */; }; + 0460869A2F19238500757C95 /* KBAiWaveformView.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086992F19238500757C95 /* KBAiWaveformView.m */; }; + 0460869B2F19238500757C95 /* KBAiChatView.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086952F19238500757C95 /* KBAiChatView.m */; }; + 0460869C2F19238500757C95 /* KBAiRecordButton.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086972F19238500757C95 /* KBAiRecordButton.m */; }; + 046086B12F19239B00757C95 /* SubtitleSync.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086AC2F19239B00757C95 /* SubtitleSync.m */; }; + 046086B22F19239B00757C95 /* TTSServiceClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086B02F19239B00757C95 /* TTSServiceClient.m */; }; + 046086B32F19239B00757C95 /* AudioSessionManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A22F19239B00757C95 /* AudioSessionManager.m */; }; + 046086B42F19239B00757C95 /* LLMStreamClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A82F19239B00757C95 /* LLMStreamClient.m */; }; + 046086B52F19239B00757C95 /* Segmenter.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086AA2F19239B00757C95 /* Segmenter.m */; }; + 046086B62F19239B00757C95 /* TTSPlaybackPipeline.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086AE2F19239B00757C95 /* TTSPlaybackPipeline.m */; }; + 046086B72F19239B00757C95 /* ConversationOrchestrator.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A62F19239B00757C95 /* ConversationOrchestrator.m */; }; + 046086B82F19239B00757C95 /* ASRStreamClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 0460869E2F19239B00757C95 /* ASRStreamClient.m */; }; + 046086B92F19239B00757C95 /* AudioCaptureManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A02F19239B00757C95 /* AudioCaptureManager.m */; }; + 046086BA2F19239B00757C95 /* AudioStreamPlayer.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A42F19239B00757C95 /* AudioStreamPlayer.m */; }; + 046086BD2F1A039F00757C95 /* KBAICommentView.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086BC2F1A039F00757C95 /* KBAICommentView.m */; }; 046131142ECF454500A6FADF /* KBKeyPreviewView.m in Sources */ = {isa = PBXBuildFile; fileRef = 046131132ECF454500A6FADF /* KBKeyPreviewView.m */; }; 0477BDF02EBB76E30055D639 /* HomeSheetVC.m in Sources */ = {isa = PBXBuildFile; fileRef = 0477BDEF2EBB76E30055D639 /* HomeSheetVC.m */; }; 0477BDF32EBB7B850055D639 /* KBDirectionIndicatorView.m in Sources */ = {isa = PBXBuildFile; fileRef = 0477BDF22EBB7B850055D639 /* KBDirectionIndicatorView.m */; }; @@ -327,6 +343,37 @@ 0459D1B52EBA287900F2D189 /* KBSkinManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBSkinManager.h; sourceTree = ""; }; 0459D1B62EBA287900F2D189 /* KBSkinManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBSkinManager.m; sourceTree = ""; }; 0460866A2F18D75500757C95 /* ai_test.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = ai_test.m4a; sourceTree = ""; }; + 046086712F191B6900757C95 /* KBAiMainVC.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiMainVC.h; sourceTree = ""; }; + 046086722F191B6900757C95 /* KBAiMainVC.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAiMainVC.m; sourceTree = ""; }; + 046086742F191CC700757C95 /* AI技术分析.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "AI技术分析.txt"; sourceTree = ""; }; + 046086942F19238500757C95 /* KBAiChatView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiChatView.h; sourceTree = ""; }; + 046086952F19238500757C95 /* KBAiChatView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAiChatView.m; sourceTree = ""; }; + 046086962F19238500757C95 /* KBAiRecordButton.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiRecordButton.h; sourceTree = ""; }; + 046086972F19238500757C95 /* KBAiRecordButton.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAiRecordButton.m; sourceTree = ""; }; + 046086982F19238500757C95 /* KBAiWaveformView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiWaveformView.h; sourceTree = ""; }; + 046086992F19238500757C95 /* KBAiWaveformView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAiWaveformView.m; sourceTree = ""; }; + 0460869D2F19239B00757C95 /* ASRStreamClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ASRStreamClient.h; sourceTree = ""; }; + 0460869E2F19239B00757C95 /* ASRStreamClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ASRStreamClient.m; sourceTree = ""; }; + 0460869F2F19239B00757C95 /* AudioCaptureManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AudioCaptureManager.h; sourceTree = ""; }; + 046086A02F19239B00757C95 /* AudioCaptureManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AudioCaptureManager.m; sourceTree = ""; }; + 046086A12F19239B00757C95 /* AudioSessionManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AudioSessionManager.h; sourceTree = ""; }; + 046086A22F19239B00757C95 /* AudioSessionManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AudioSessionManager.m; sourceTree = ""; }; + 046086A32F19239B00757C95 /* AudioStreamPlayer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AudioStreamPlayer.h; sourceTree = ""; }; + 046086A42F19239B00757C95 /* AudioStreamPlayer.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AudioStreamPlayer.m; sourceTree = ""; }; + 046086A52F19239B00757C95 /* ConversationOrchestrator.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ConversationOrchestrator.h; sourceTree = ""; }; + 046086A62F19239B00757C95 /* ConversationOrchestrator.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = ConversationOrchestrator.m; sourceTree = ""; }; + 046086A72F19239B00757C95 /* LLMStreamClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LLMStreamClient.h; sourceTree = ""; }; + 046086A82F19239B00757C95 /* LLMStreamClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LLMStreamClient.m; sourceTree = ""; }; + 046086A92F19239B00757C95 /* Segmenter.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Segmenter.h; sourceTree = ""; }; + 046086AA2F19239B00757C95 /* Segmenter.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Segmenter.m; sourceTree = ""; }; + 046086AB2F19239B00757C95 /* SubtitleSync.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SubtitleSync.h; sourceTree = ""; }; + 046086AC2F19239B00757C95 /* SubtitleSync.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SubtitleSync.m; sourceTree = ""; }; + 046086AD2F19239B00757C95 /* TTSPlaybackPipeline.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = TTSPlaybackPipeline.h; sourceTree = ""; }; + 046086AE2F19239B00757C95 /* TTSPlaybackPipeline.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = TTSPlaybackPipeline.m; sourceTree = ""; }; + 046086AF2F19239B00757C95 /* TTSServiceClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = TTSServiceClient.h; sourceTree = ""; }; + 046086B02F19239B00757C95 /* TTSServiceClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = TTSServiceClient.m; sourceTree = ""; }; + 046086BB2F1A039F00757C95 /* KBAICommentView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAICommentView.h; sourceTree = ""; }; + 046086BC2F1A039F00757C95 /* KBAICommentView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAICommentView.m; sourceTree = ""; }; 046131122ECF454500A6FADF /* KBKeyPreviewView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBKeyPreviewView.h; sourceTree = ""; }; 046131132ECF454500A6FADF /* KBKeyPreviewView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBKeyPreviewView.m; sourceTree = ""; }; 0477BDEE2EBB76E30055D639 /* HomeSheetVC.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HomeSheetVC.h; sourceTree = ""; }; @@ -849,6 +896,76 @@ path = StoreKit2Manager; sourceTree = ""; }; + 0460866C2F191A5100757C95 /* M */ = { + isa = PBXGroup; + children = ( + ); + path = M; + sourceTree = ""; + }; + 0460866D2F191A5100757C95 /* V */ = { + isa = PBXGroup; + children = ( + 046086942F19238500757C95 /* KBAiChatView.h */, + 046086952F19238500757C95 /* KBAiChatView.m */, + 046086962F19238500757C95 /* KBAiRecordButton.h */, + 046086972F19238500757C95 /* KBAiRecordButton.m */, + 046086982F19238500757C95 /* KBAiWaveformView.h */, + 046086992F19238500757C95 /* KBAiWaveformView.m */, + 046086BB2F1A039F00757C95 /* KBAICommentView.h */, + 046086BC2F1A039F00757C95 /* KBAICommentView.m */, + ); + path = V; + sourceTree = ""; + }; + 0460866E2F191A5100757C95 /* VC */ = { + isa = PBXGroup; + children = ( + 046086712F191B6900757C95 /* KBAiMainVC.h */, + 046086722F191B6900757C95 /* KBAiMainVC.m */, + ); + path = VC; + sourceTree = ""; + }; + 0460866F2F191A5100757C95 /* VM */ = { + isa = PBXGroup; + children = ( + 0460869D2F19239B00757C95 /* ASRStreamClient.h */, + 0460869E2F19239B00757C95 /* ASRStreamClient.m */, + 0460869F2F19239B00757C95 /* AudioCaptureManager.h */, + 046086A02F19239B00757C95 /* AudioCaptureManager.m */, + 046086A12F19239B00757C95 /* AudioSessionManager.h */, + 046086A22F19239B00757C95 /* AudioSessionManager.m */, + 046086A32F19239B00757C95 /* AudioStreamPlayer.h */, + 046086A42F19239B00757C95 /* AudioStreamPlayer.m */, + 046086A52F19239B00757C95 /* ConversationOrchestrator.h */, + 046086A62F19239B00757C95 /* ConversationOrchestrator.m */, + 046086A72F19239B00757C95 /* LLMStreamClient.h */, + 046086A82F19239B00757C95 /* LLMStreamClient.m */, + 046086A92F19239B00757C95 /* Segmenter.h */, + 046086AA2F19239B00757C95 /* Segmenter.m */, + 046086AB2F19239B00757C95 /* SubtitleSync.h */, + 046086AC2F19239B00757C95 /* SubtitleSync.m */, + 046086AD2F19239B00757C95 /* TTSPlaybackPipeline.h */, + 046086AE2F19239B00757C95 /* TTSPlaybackPipeline.m */, + 046086AF2F19239B00757C95 /* TTSServiceClient.h */, + 046086B02F19239B00757C95 /* TTSServiceClient.m */, + ); + path = VM; + sourceTree = ""; + }; + 046086702F191A5100757C95 /* AiTalk */ = { + isa = PBXGroup; + children = ( + 046086742F191CC700757C95 /* AI技术分析.txt */, + 0460866C2F191A5100757C95 /* M */, + 0460866D2F191A5100757C95 /* V */, + 0460866E2F191A5100757C95 /* VC */, + 0460866F2F191A5100757C95 /* VM */, + ); + path = AiTalk; + sourceTree = ""; + }; 0477BD942EBAFF4E0055D639 /* Utils */ = { isa = PBXGroup; children = ( @@ -1450,6 +1567,7 @@ 04FC95BF2EB1E3B1007BD342 /* Class */ = { isa = PBXGroup; children = ( + 046086702F191A5100757C95 /* AiTalk */, 04122F612EC5F3DF00EF7AB3 /* Pay */, 7276DDA22EC1B22500804C36 /* WebView */, 048908D32EBF618E00FABA60 /* Vender */, @@ -1851,6 +1969,7 @@ 04C6EABA2EAF86530089C901 /* Assets.xcassets in Resources */, 04A9FE212EB893F10020DB6D /* Localizable.strings in Resources */, 047920072ED86ABC004E8522 /* kb_guide_keyboard.gif in Resources */, + 046086752F191CC700757C95 /* AI技术分析.txt in Resources */, 047920112ED98E7D004E8522 /* permiss_video_2.mp4 in Resources */, 04C6EABC2EAF86530089C901 /* LaunchScreen.storyboard in Resources */, 04286A132ECDEBF900CE730C /* KBSkinIconMap.strings in Resources */, @@ -2005,6 +2124,7 @@ 04122FAD2EC73C0100EF7AB3 /* KBVipSubscribeCell.m in Sources */, 049FB31D2EC21BCD00FAB05D /* KBMyKeyboardCell.m in Sources */, 048909F62EC0AAAA00FABA60 /* KBCategoryTitleCell.m in Sources */, + 046086732F191B6900757C95 /* KBAiMainVC.m in Sources */, 048909F72EC0AAAA00FABA60 /* KBCategoryTitleView.m in Sources */, 04791F952ED48028004E8522 /* KBFeedBackVC.m in Sources */, 04890A042EC0BBBB00FABA60 /* KBCategoryTitleImageCell.m in Sources */, @@ -2035,6 +2155,7 @@ 0450AC1B2EF11E4400B6AF06 /* KBStoreKitBridge.swift in Sources */, 043FBCD22EAF97630036AFE1 /* KBPermissionViewController.m in Sources */, 049FB20E2EC1CD2800FAB05D /* KBAlert.m in Sources */, + 046086BD2F1A039F00757C95 /* KBAICommentView.m in Sources */, 04A9FE162EB873C80020DB6D /* UIViewController+Extension.m in Sources */, 04C6EABE2EAF86530089C901 /* AppDelegate.m in Sources */, 04791FFF2ED830FA004E8522 /* KBKeyboardMaskView.m in Sources */, @@ -2094,6 +2215,16 @@ 04FC970E2EB334F8007BD342 /* UIImageView+KBWebImage.m in Sources */, 049FB2232EC311F900FAB05D /* KBPersonInfoVC.m in Sources */, 0498BD6B2EE025FC006CC1D5 /* KBForgetPwdVC.m in Sources */, + 046086B12F19239B00757C95 /* SubtitleSync.m in Sources */, + 046086B22F19239B00757C95 /* TTSServiceClient.m in Sources */, + 046086B32F19239B00757C95 /* AudioSessionManager.m in Sources */, + 046086B42F19239B00757C95 /* LLMStreamClient.m in Sources */, + 046086B52F19239B00757C95 /* Segmenter.m in Sources */, + 046086B62F19239B00757C95 /* TTSPlaybackPipeline.m in Sources */, + 046086B72F19239B00757C95 /* ConversationOrchestrator.m in Sources */, + 046086B82F19239B00757C95 /* ASRStreamClient.m in Sources */, + 046086B92F19239B00757C95 /* AudioCaptureManager.m in Sources */, + 046086BA2F19239B00757C95 /* AudioStreamPlayer.m in Sources */, 048908FE2EC0CC2400FABA60 /* UIScrollView+KBEmptyView.m in Sources */, 0498BD7E2EE04F9C006CC1D5 /* KBTag.m in Sources */, 04791F922ED48010004E8522 /* KBNoticeVC.m in Sources */, @@ -2120,6 +2251,9 @@ 047C65502EBCBA9E0035E841 /* KBShopVC.m in Sources */, 0477BE042EBC83130055D639 /* HomeMainVC.m in Sources */, 0477BDFD2EBC6A170055D639 /* HomeHotVC.m in Sources */, + 0460869A2F19238500757C95 /* KBAiWaveformView.m in Sources */, + 0460869B2F19238500757C95 /* KBAiChatView.m in Sources */, + 0460869C2F19238500757C95 /* KBAiRecordButton.m in Sources */, 04122FAA2EC73C0100EF7AB3 /* KBVipPayHeaderView.m in Sources */, 0498BD622EDFFC12006CC1D5 /* KBMyVM.m in Sources */, 049FB2432EC4BBB700FAB05D /* KBLoginPopView.m in Sources */, diff --git a/keyBoard/Class/AiTalk/AI技术分析.txt b/keyBoard/Class/AiTalk/AI技术分析.txt new file mode 100644 index 0000000..28e4757 --- /dev/null +++ b/keyBoard/Class/AiTalk/AI技术分析.txt @@ -0,0 +1,521 @@ + 服务 用途 示例格式 +ASR 服务器 语音识别(WebSocket) wss://api.example.com/asr +LLM 服务器 AI 对话(HTTP SSE) https://api.example.com/chat +TTS 服务器 语音合成 https://api.example.com/tts + +iOS(Objective-C,iOS 15+)端技术实现文档 +低延迟流式语音陪伴聊天(按住说话,类似猫箱首页) +0. 范围与目标 + +实现首页语音陪伴对话: + +按住说话:开始录音并实时流式发送到 ASR + +松开结束:ASR 立即 finalize,返回最终文本并显示 + +AI 回复:边显示文字(打字机效果)边播放服务端 TTS 音频 + +延迟低优先:不等待完整回答/完整音频,采用“分句触发 + 流式/准流式播放” + +打断(Barge-in):AI 正在播报时用户再次按住 → 立即停止播报/取消请求,进入新一轮录音 + +iOS 最低版本:iOS 15 + +1. 总体架构(客户端模块) +KBAiMainVC + └─ ConversationOrchestrator (核心状态机 / 串联模块 / 取消与打断) + ├─ AudioSessionManager (AVAudioSession 配置与中断处理) + ├─ AudioCaptureManager (AVAudioEngine input tap -> 20ms PCM frames) + ├─ ASRStreamClient (NSURLSessionWebSocketTask 流式识别) + ├─ LLMStreamClient (SSE/WS token stream) + ├─ Segmenter (句子切分:够一句就触发 TTS) + ├─ TTSServiceClient (请求 TTS,适配多种返回形态) + ├─ TTSPlaybackPipeline (可插拔:URL播放器 / AAC解码 / PCM直喂) + ├─ AudioStreamPlayer (AVAudioEngine + AVAudioPlayerNode 播 PCM) + └─ SubtitleSync (按播放进度映射文字进度) + +2. 音频会话(AVAudioSession)与权限 +2.1 麦克风权限 + +仅在用户第一次按住说话前请求 + +若用户拒绝:提示到设置开启 + +2.2 AudioSession 配置(对话模式) + +Objective-C(建议参数): + +category:AVAudioSessionCategoryPlayAndRecord + +mode:AVAudioSessionModeVoiceChat + +options: + +AVAudioSessionCategoryOptionDefaultToSpeaker + +AVAudioSessionCategoryOptionAllowBluetooth + +(可选)AVAudioSessionCategoryOptionMixWithOthers:若你希望不打断宿主音频(看产品) + +2.3 中断与路由变化处理(必须) + +监听: + +AVAudioSessionInterruptionNotification + +AVAudioSessionRouteChangeNotification + +处理原则: + +来电/中断开始:停止采集 + 停止播放 + cancel 网络会话 + +中断结束:回到 Idle,等待用户重新按住 + +3. 音频采集(按住期间流式上传) +3.1 固定音频参数(锁死,便于端到端稳定) + +Sample Rate:16000 Hz + +Channels:1 + +Format:PCM Int16(pcm_s16le) + +Frame Duration:20ms + +16kHz * 0.02s = 320 samples + +每帧 bytes = 320 * 2 = 640 bytes + +3.2 AudioCaptureManager(AVAudioEngine 输入 tap) + +使用: + +AVAudioEngine + +inputNode installTapOnBus:bufferSize:format:block: + +关键点: + +tap 回调线程不可做重活:只做拷贝 + dispatch 到 audioQueue + +将 AVAudioPCMBuffer 转成 Int16 PCM NSData + +确保稳定输出“20ms帧”,如果 tap 回调 buffer 不刚好是 20ms,需要做 帧拼接/切片(ring buffer) + +3.3 接口定义(OC) +@protocol AudioCaptureManagerDelegate +- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame; // 20ms/640B +- (void)audioCaptureManagerDidUpdateRMS:(float)rms; // 可选:UI波形 +@end + +@interface AudioCaptureManager : NSObject +@property (nonatomic, weak) id delegate; +- (BOOL)startCapture:(NSError **)error; +- (void)stopCapture; +@end + +4. ASR 流式识别(iOS15:NSURLSessionWebSocketTask) +4.1 建议协议(控制帧 JSON + 音频帧二进制) + +Start(文本帧) + +{ + "type":"start", + "sessionId":"uuid", + "format":"pcm_s16le", + "sampleRate":16000, + "channels":1 +} + + +Audio(二进制帧) + +直接发送 640B/帧 PCM + +频率:50fps(每秒 50 帧) + +Finalize(文本帧) + +{ "type":"finalize", "sessionId":"uuid" } + +4.2 下行事件 +{ "type":"partial", "text":"今天" } +{ "type":"final", "text":"今天天气怎么样" } +{ "type":"error", "code":123, "message":"..." } + +4.3 ASRStreamClient 接口(OC) +@protocol ASRStreamClientDelegate +- (void)asrClientDidReceivePartialText:(NSString *)text; +- (void)asrClientDidReceiveFinalText:(NSString *)text; +- (void)asrClientDidFail:(NSError *)error; +@end + +@interface ASRStreamClient : NSObject +@property (nonatomic, weak) id delegate; +- (void)startWithSessionId:(NSString *)sessionId; +- (void)sendAudioPCMFrame:(NSData *)pcmFrame; // 20ms frame +- (void)finalize; +- (void)cancel; +@end + +5. LLM 流式生成(token stream) +5.1 目标 + +低延迟:不要等整段回答 + +使用 SSE 或 WS 收 token + +token 进入 Segmenter,够一句就触发 TTS + +5.2 LLMStreamClient 接口(OC) +@protocol LLMStreamClientDelegate +- (void)llmClientDidReceiveToken:(NSString *)token; +- (void)llmClientDidComplete; +- (void)llmClientDidFail:(NSError *)error; +@end + +@interface LLMStreamClient : NSObject +@property (nonatomic, weak) id delegate; +- (void)sendUserText:(NSString *)text conversationId:(NSString *)cid; +- (void)cancel; +@end + +6. Segmenter(句子切分:先播第一句) +6.1 切分规则(推荐) + +任一满足则切分成 segment: + +遇到 。!?\n 之一 + +或累积字符数 ≥ 30(可配置) + +6.2 Segmenter 接口(OC) +@interface Segmenter : NSObject +- (void)appendToken:(NSString *)token; +- (NSArray *)popReadySegments; // 返回立即可TTS的片段数组 +- (void)reset; +@end + +7. TTS:返回形态未定 → 客户端做“可插拔播放管线” + +由于服务端同事未定输出格式,客户端必须支持以下 四种 TTS 输出模式 的任意一种: + +模式 A:返回 m4a/MP3 URL(最容易落地) + +服务端返回 URL(或 base64 文件) + +客户端用 AVPlayer / AVAudioPlayer 播放 + +字幕同步用“音频时长映射”(可拿到 duration) + +优点:服务端简单 +缺点:首帧延迟通常更高(要等整段生成、至少等首包) + +模式 B:返回 AAC chunk(流式) + +服务端 WS 推 AAC 帧 + +客户端需要 AAC 解码成 PCM,再喂 AudioStreamPlayer + +模式 C:返回 Opus chunk(流式) + +需 Opus 解码库(服务端/客户端成本更高) + +解码后喂 PCM 播放 + +模式 D:返回 PCM chunk(最适合低延迟) + +服务端直接推 PCM16 chunk(比如 100ms 一块) + +客户端直接转 AVAudioPCMBuffer schedule + +延迟最低、实现最稳 + +8. TTSServiceClient(统一网络层接口) +8.1 统一回调事件(抽象) +typedef NS_ENUM(NSInteger, TTSPayloadType) { + TTSPayloadTypeURL, // A + TTSPayloadTypePCMChunk, // D + TTSPayloadTypeAACChunk, // B + TTSPayloadTypeOpusChunk // C +}; + +@protocol TTSServiceClientDelegate +- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId; +- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk + payloadType:(TTSPayloadType)type + segmentId:(NSString *)segmentId; +- (void)ttsClientDidFinishSegment:(NSString *)segmentId; +- (void)ttsClientDidFail:(NSError *)error; +@end + +@interface TTSServiceClient : NSObject +@property (nonatomic, weak) id delegate; +- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId; +- (void)cancel; +@end + + +这样服务端最后选哪种输出,你只需实现对应分支即可,不需要推翻客户端架构。 + +9. TTSPlaybackPipeline(播放管线:根据 payloadType 路由) +9.1 设计目标 + +支持 URL 播放与流式 chunk 播放 + +提供统一的“开始播放/停止/进度”接口供字幕同步与打断使用 + +9.2 Pipeline 结构(建议) + +TTSPlaybackPipeline 只做路由与队列管理 + +URL → TTSURLPlayer(AVPlayer) + +PCM → AudioStreamPlayer(AVAudioEngine) + +AAC/Opus → Decoder → PCM → AudioStreamPlayer + +9.3 Pipeline 接口(OC) +@protocol TTSPlaybackPipelineDelegate +- (void)pipelineDidStartSegment:(NSString *)segmentId duration:(NSTimeInterval)duration; +- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time segmentId:(NSString *)segmentId; +- (void)pipelineDidFinishSegment:(NSString *)segmentId; +@end + +@interface TTSPlaybackPipeline : NSObject +@property (nonatomic, weak) id delegate; + +- (BOOL)start:(NSError **)error; // 启动音频引擎等 +- (void)stop; // 立即停止(打断) + +- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId; +- (void)enqueueChunk:(NSData *)chunk payloadType:(TTSPayloadType)type segmentId:(NSString *)segmentId; + +// 可选:用于字幕同步 +- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId; +- (NSTimeInterval)durationForSegment:(NSString *)segmentId; +@end + +10. AudioStreamPlayer(PCM 流式播放,低延迟核心) +10.1 使用 AVAudioEngine + AVAudioPlayerNode + +将 PCM chunk 转 AVAudioPCMBuffer + +scheduleBuffer 播放 + +维护“当前 segment 的播放时间/总时长”(可估算或累加 chunk 时长) + +10.2 接口(OC) +@interface AudioStreamPlayer : NSObject +- (BOOL)start:(NSError **)error; +- (void)stop; +- (void)enqueuePCMChunk:(NSData *)pcmData + sampleRate:(double)sampleRate + channels:(int)channels + segmentId:(NSString *)segmentId; + +- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId; +- (NSTimeInterval)durationForSegment:(NSString *)segmentId; +@end + + +PCM chunk 的粒度建议:50ms~200ms(太小 schedule 太频繁,太大延迟高)。 + +11. 字幕同步(延迟优先) +11.1 策略 + +对每个 segment 的文本 text,按播放进度映射显示字符数: + +visibleCount = round(text.length * (t / T)) + +t:segment 当前播放进度(pipeline 提供) + +T:segment 总时长(URL 模式直接取;chunk 模式可累加估算) + +11.2 SubtitleSync 接口(OC) +@interface SubtitleSync : NSObject +- (NSString *)visibleTextForFullText:(NSString *)fullText + currentTime:(NSTimeInterval)t + duration:(NSTimeInterval)T; +@end + +12. ConversationOrchestrator(状态机 + 打断 + 队列) +12.1 状态 +typedef NS_ENUM(NSInteger, ConversationState) { + ConversationStateIdle, + ConversationStateListening, + ConversationStateRecognizing, + ConversationStateThinking, + ConversationStateSpeaking +}; + +12.2 关键流程 +事件:用户按住(userDidPressRecord) + +如果正在 Speaking/Thinking: + +[ttsService cancel] + +[llmClient cancel] + +[asrClient cancel](如仍在识别) + +[pipeline stop](立即停播) + +清空 segment 队列、字幕队列 + +配置/激活 AudioSession + +新建 sessionId + +[asrClient startWithSessionId:] + +[audioCapture startCapture:] + +state = Listening + +事件:用户松开(userDidReleaseRecord) + +[audioCapture stopCapture] + +[asrClient finalize] + +state = Recognizing + +回调:ASR final text + +UI 显示用户最终文本 + +state = Thinking + +开始 LLM stream:[llmClient sendUserText:conversationId:] + +回调:LLM token + +segmenter appendToken + +segments = [segmenter popReadySegments] + +对每个 segment: + +生成 segmentId + +记录 segmentTextMap[segmentId] = segmentText + +[ttsService requestTTSForText:segmentId:] + +当收到第一个可播放音频并开始播: + +state = Speaking + +回调:TTS 音频到达 + +URL:[pipeline enqueueURL:segmentId:] + +chunk:[pipeline enqueueChunk:payloadType:segmentId:] + +回调:pipeline 播放时间更新(每 30-60fps 或定时器) + +根据当前 segmentId 取到 fullText + +visible = [subtitleSync visibleTextForFullText:currentTime:duration:] + +UI 更新 AI 可见文本 + +12.3 打断(Barge-in) + +当用户再次按住: + +立即 stop 播放 + +取消所有未完成网络请求 + +丢弃所有未播放 segments + +开始新一轮录音 + +12.4 Orchestrator 接口(OC) +@interface ConversationOrchestrator : NSObject +@property (nonatomic, assign, readonly) ConversationState state; + +- (void)userDidPressRecord; +- (void)userDidReleaseRecord; + +@property (nonatomic, copy) void (^onUserFinalText)(NSString *text); +@property (nonatomic, copy) void (^onAssistantVisibleText)(NSString *text); +@property (nonatomic, copy) void (^onError)(NSError *error); +@end + +13. 线程/队列模型(强制要求,避免竞态) + +建议三条队列 + 一条 orchestrator 串行队列: + +dispatch_queue_t audioQueue;(采集帧处理、ring buffer) + +dispatch_queue_t networkQueue;(WS 收发解析) + +dispatch_queue_t orchestratorQueue;(状态机串行,唯一修改 state/队列的地方) + +UI 更新统一回主线程 + +规则: + +任何网络/音频回调 → dispatch_async(orchestratorQueue, ^{ ... }) + +Orchestrator 内部再决定是否发 UI 回调(主线程) + +14. 关键参数(延迟与稳定性) + +音频帧:20ms + +PCM:16k/mono/int16 + +ASR 上传:WS 二进制 + +LLM:token stream + +TTS:优先 chunk;若 URL 模式也要尽快开始下载与播放 + +chunk 播放缓冲:100~200ms(防抖动) + +15. 开发落地建议(服务端未定情况下的迭代路径) +Phase 1:先跑通端到端(用“URL 模式”模拟) + +TTSServiceClient 先假定服务端返回 m4a URL(或本地 mock URL) + +Pipeline 实现 URL 播放(AVPlayer) + +打断 + 字幕同步先跑通 + +Phase 2:服务端定了输出后再替换 + +若服务端给 PCM chunk:直接走 AudioStreamPlayer(最推荐) + +若给 AAC chunk:补 AAC 解码模块(AudioConverter 或第三方) + +若给 Opus chunk:集成 Opus 解码库,再喂 PCM + +关键:Orchestrator/Segmenter/ASR/字幕同步都不需要改,只替换 TTSPlaybackPipeline 分支。 + +16. 合规/体验注意 + +录音必须由用户动作触发(按住) + +明确的“正在录音”提示与波形 + +避免自动偷录 + +播放时允许随时打断 + +文档结束 +给“写代码的 AI”的额外要求(建议你一并附上) + +语言:Objective-C(.h/.m) + +iOS 15+,WebSocket 用 NSURLSessionWebSocketTask + +音频采集用 AVAudioEngine + ring buffer 切 20ms 帧 + +播放管线必须支持:URL 播放(AVPlayer)+ PCM chunk 播放(AVAudioEngine) + +其余 AAC/Opus 分支可留 TODO / stub,但接口要预留 diff --git a/keyBoard/Class/AiTalk/V/KBAICommentView.h b/keyBoard/Class/AiTalk/V/KBAICommentView.h new file mode 100644 index 0000000..9fe0534 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAICommentView.h @@ -0,0 +1,16 @@ +// +// KBAICommentView.h +// keyBoard +// +// Created by Mac on 2026/1/16. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface KBAICommentView : UIView + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/V/KBAICommentView.m b/keyBoard/Class/AiTalk/V/KBAICommentView.m new file mode 100644 index 0000000..2dbb95a --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAICommentView.m @@ -0,0 +1,20 @@ +// +// KBAICommentView.m +// keyBoard +// +// Created by Mac on 2026/1/16. +// + +#import "KBAICommentView.h" + +@implementation KBAICommentView + +/* +// Only override drawRect: if you perform custom drawing. +// An empty implementation adversely affects performance during animation. +- (void)drawRect:(CGRect)rect { + // Drawing code +} +*/ + +@end diff --git a/keyBoard/Class/AiTalk/V/KBAiChatView.h b/keyBoard/Class/AiTalk/V/KBAiChatView.h new file mode 100644 index 0000000..4a44ac4 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiChatView.h @@ -0,0 +1,54 @@ +// +// KBAiChatView.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 消息类型 +typedef NS_ENUM(NSInteger, KBAiChatMessageType) { + KBAiChatMessageTypeUser, // 用户消息 + KBAiChatMessageTypeAssistant // AI 回复 +}; + +/// 聊天消息模型 +@interface KBAiChatMessage : NSObject +@property(nonatomic, assign) KBAiChatMessageType type; +@property(nonatomic, copy) NSString *text; +@property(nonatomic, assign) BOOL isComplete; // 是否完成(用于打字机效果) ++ (instancetype)userMessageWithText:(NSString *)text; ++ (instancetype)assistantMessageWithText:(NSString *)text; +@end + +/// 聊天视图 +/// 显示用户消息和 AI 回复的气泡列表 +@interface KBAiChatView : UIView + +/// 添加用户消息 +/// @param text 消息文本 +- (void)addUserMessage:(NSString *)text; + +/// 添加 AI 消息 +/// @param text 消息文本 +- (void)addAssistantMessage:(NSString *)text; + +/// 更新最后一条 AI 消息(用于打字机效果) +/// @param text 当前可见文本 +- (void)updateLastAssistantMessage:(NSString *)text; + +/// 标记最后一条 AI 消息完成 +- (void)markLastAssistantMessageComplete; + +/// 清空所有消息 +- (void)clearMessages; + +/// 滚动到底部 +- (void)scrollToBottom; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/V/KBAiChatView.m b/keyBoard/Class/AiTalk/V/KBAiChatView.m new file mode 100644 index 0000000..1caab6f --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiChatView.m @@ -0,0 +1,296 @@ +// +// KBAiChatView.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "KBAiChatView.h" + +#pragma mark - KBAiChatMessage + +@implementation KBAiChatMessage + ++ (instancetype)userMessageWithText:(NSString *)text { + KBAiChatMessage *message = [[KBAiChatMessage alloc] init]; + message.type = KBAiChatMessageTypeUser; + message.text = text; + message.isComplete = YES; + return message; +} + ++ (instancetype)assistantMessageWithText:(NSString *)text { + KBAiChatMessage *message = [[KBAiChatMessage alloc] init]; + message.type = KBAiChatMessageTypeAssistant; + message.text = text; + message.isComplete = NO; + return message; +} + +@end + +#pragma mark - KBAiChatBubbleCell + +@interface KBAiChatBubbleCell : UITableViewCell +@property(nonatomic, strong) UIView *bubbleView; +@property(nonatomic, strong) UILabel *messageLabel; +@property(nonatomic, assign) KBAiChatMessageType messageType; +@end + +@implementation KBAiChatBubbleCell + +- (instancetype)initWithStyle:(UITableViewCellStyle)style + reuseIdentifier:(NSString *)reuseIdentifier { + self = [super initWithStyle:style reuseIdentifier:reuseIdentifier]; + if (self) { + self.backgroundColor = [UIColor clearColor]; + self.selectionStyle = UITableViewCellSelectionStyleNone; + + // 气泡视图 + self.bubbleView = [[UIView alloc] init]; + self.bubbleView.layer.cornerRadius = 16; + self.bubbleView.layer.masksToBounds = YES; + self.bubbleView.translatesAutoresizingMaskIntoConstraints = NO; + [self.contentView addSubview:self.bubbleView]; + + // 消息标签 + self.messageLabel = [[UILabel alloc] init]; + self.messageLabel.numberOfLines = 0; + self.messageLabel.font = [UIFont systemFontOfSize:16]; + self.messageLabel.translatesAutoresizingMaskIntoConstraints = NO; + [self.bubbleView addSubview:self.messageLabel]; + + // 消息标签约束 + [NSLayoutConstraint activateConstraints:@[ + [self.messageLabel.topAnchor + constraintEqualToAnchor:self.bubbleView.topAnchor + constant:10], + [self.messageLabel.bottomAnchor + constraintEqualToAnchor:self.bubbleView.bottomAnchor + constant:-10], + [self.messageLabel.leadingAnchor + constraintEqualToAnchor:self.bubbleView.leadingAnchor + constant:12], + [self.messageLabel.trailingAnchor + constraintEqualToAnchor:self.bubbleView.trailingAnchor + constant:-12], + ]]; + } + return self; +} + +- (void)configureWithMessage:(KBAiChatMessage *)message { + self.messageLabel.text = message.text; + self.messageType = message.type; + + // 移除旧约束 + for (NSLayoutConstraint *constraint in self.bubbleView.constraints) { + if (constraint.firstAttribute == NSLayoutAttributeWidth) { + constraint.active = NO; + } + } + + // 根据消息类型设置样式 + if (message.type == KBAiChatMessageTypeUser) { + // 用户消息:右对齐,主题色背景 + self.bubbleView.backgroundColor = [UIColor systemBlueColor]; + self.messageLabel.textColor = [UIColor whiteColor]; + + [NSLayoutConstraint deactivateConstraints:self.bubbleView.constraints]; + [NSLayoutConstraint activateConstraints:@[ + [self.bubbleView.topAnchor + constraintEqualToAnchor:self.contentView.topAnchor + constant:4], + [self.bubbleView.bottomAnchor + constraintEqualToAnchor:self.contentView.bottomAnchor + constant:-4], + [self.bubbleView.trailingAnchor + constraintEqualToAnchor:self.contentView.trailingAnchor + constant:-16], + [self.bubbleView.widthAnchor + constraintLessThanOrEqualToAnchor:self.contentView.widthAnchor + multiplier:0.75], + + [self.messageLabel.topAnchor + constraintEqualToAnchor:self.bubbleView.topAnchor + constant:10], + [self.messageLabel.bottomAnchor + constraintEqualToAnchor:self.bubbleView.bottomAnchor + constant:-10], + [self.messageLabel.leadingAnchor + constraintEqualToAnchor:self.bubbleView.leadingAnchor + constant:12], + [self.messageLabel.trailingAnchor + constraintEqualToAnchor:self.bubbleView.trailingAnchor + constant:-12], + ]]; + } else { + // AI 消息:左对齐,浅灰色背景 + self.bubbleView.backgroundColor = [UIColor systemGray5Color]; + self.messageLabel.textColor = [UIColor labelColor]; + + [NSLayoutConstraint deactivateConstraints:self.bubbleView.constraints]; + [NSLayoutConstraint activateConstraints:@[ + [self.bubbleView.topAnchor + constraintEqualToAnchor:self.contentView.topAnchor + constant:4], + [self.bubbleView.bottomAnchor + constraintEqualToAnchor:self.contentView.bottomAnchor + constant:-4], + [self.bubbleView.leadingAnchor + constraintEqualToAnchor:self.contentView.leadingAnchor + constant:16], + [self.bubbleView.widthAnchor + constraintLessThanOrEqualToAnchor:self.contentView.widthAnchor + multiplier:0.75], + + [self.messageLabel.topAnchor + constraintEqualToAnchor:self.bubbleView.topAnchor + constant:10], + [self.messageLabel.bottomAnchor + constraintEqualToAnchor:self.bubbleView.bottomAnchor + constant:-10], + [self.messageLabel.leadingAnchor + constraintEqualToAnchor:self.bubbleView.leadingAnchor + constant:12], + [self.messageLabel.trailingAnchor + constraintEqualToAnchor:self.bubbleView.trailingAnchor + constant:-12], + ]]; + } +} + +@end + +#pragma mark - KBAiChatView + +@interface KBAiChatView () +@property(nonatomic, strong) UITableView *tableView; +@property(nonatomic, strong) NSMutableArray *messages; +@end + +@implementation KBAiChatView + +- (instancetype)initWithFrame:(CGRect)frame { + self = [super initWithFrame:frame]; + if (self) { + [self setup]; + } + return self; +} + +- (instancetype)initWithCoder:(NSCoder *)coder { + self = [super initWithCoder:coder]; + if (self) { + [self setup]; + } + return self; +} + +- (void)setup { + self.messages = [[NSMutableArray alloc] init]; + + self.tableView = [[UITableView alloc] initWithFrame:self.bounds + style:UITableViewStylePlain]; + self.tableView.autoresizingMask = + UIViewAutoresizingFlexibleWidth | UIViewAutoresizingFlexibleHeight; + self.tableView.dataSource = self; + self.tableView.delegate = self; + self.tableView.separatorStyle = UITableViewCellSeparatorStyleNone; + self.tableView.backgroundColor = [UIColor clearColor]; + self.tableView.estimatedRowHeight = 60; + self.tableView.rowHeight = UITableViewAutomaticDimension; + [self.tableView registerClass:[KBAiChatBubbleCell class] + forCellReuseIdentifier:@"ChatCell"]; + [self addSubview:self.tableView]; +} + +#pragma mark - Public Methods + +- (void)addUserMessage:(NSString *)text { + KBAiChatMessage *message = [KBAiChatMessage userMessageWithText:text]; + [self.messages addObject:message]; + + [self.tableView reloadData]; + [self scrollToBottom]; +} + +- (void)addAssistantMessage:(NSString *)text { + KBAiChatMessage *message = [KBAiChatMessage assistantMessageWithText:text]; + [self.messages addObject:message]; + + [self.tableView reloadData]; + [self scrollToBottom]; +} + +- (void)updateLastAssistantMessage:(NSString *)text { + // 查找最后一条 AI 消息 + for (NSInteger i = self.messages.count - 1; i >= 0; i--) { + KBAiChatMessage *message = self.messages[i]; + if (message.type == KBAiChatMessageTypeAssistant && !message.isComplete) { + message.text = text; + + NSIndexPath *indexPath = [NSIndexPath indexPathForRow:i inSection:0]; + [self.tableView reloadRowsAtIndexPaths:@[ indexPath ] + withRowAnimation:UITableViewRowAnimationNone]; + return; + } + } + + // 如果没找到,添加新消息 + [self addAssistantMessage:text]; +} + +- (void)markLastAssistantMessageComplete { + for (NSInteger i = self.messages.count - 1; i >= 0; i--) { + KBAiChatMessage *message = self.messages[i]; + if (message.type == KBAiChatMessageTypeAssistant) { + message.isComplete = YES; + return; + } + } +} + +- (void)clearMessages { + [self.messages removeAllObjects]; + [self.tableView reloadData]; +} + +- (void)scrollToBottom { + if (self.messages.count == 0) + return; + + NSIndexPath *lastIndexPath = + [NSIndexPath indexPathForRow:self.messages.count - 1 inSection:0]; + [self.tableView scrollToRowAtIndexPath:lastIndexPath + atScrollPosition:UITableViewScrollPositionBottom + animated:YES]; +} + +#pragma mark - UITableViewDataSource + +- (NSInteger)tableView:(UITableView *)tableView + numberOfRowsInSection:(NSInteger)section { + return self.messages.count; +} + +- (UITableViewCell *)tableView:(UITableView *)tableView + cellForRowAtIndexPath:(NSIndexPath *)indexPath { + KBAiChatBubbleCell *cell = + [tableView dequeueReusableCellWithIdentifier:@"ChatCell" + forIndexPath:indexPath]; + + KBAiChatMessage *message = self.messages[indexPath.row]; + [cell configureWithMessage:message]; + + return cell; +} + +#pragma mark - UITableViewDelegate + +- (CGFloat)tableView:(UITableView *)tableView + estimatedHeightForRowAtIndexPath:(NSIndexPath *)indexPath { + return 60; +} + +@end diff --git a/keyBoard/Class/AiTalk/V/KBAiRecordButton.h b/keyBoard/Class/AiTalk/V/KBAiRecordButton.h new file mode 100644 index 0000000..35938d0 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiRecordButton.h @@ -0,0 +1,56 @@ +// +// KBAiRecordButton.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 录音按钮状态 +typedef NS_ENUM(NSInteger, KBAiRecordButtonState) { + KBAiRecordButtonStateNormal, // 正常状态 + KBAiRecordButtonStateRecording, // 录音中 + KBAiRecordButtonStateDisabled // 禁用 +}; + +@class KBAiRecordButton; + +/// 录音按钮代理 +@protocol KBAiRecordButtonDelegate +@optional +/// 开始按下 +- (void)recordButtonDidBeginPress:(KBAiRecordButton *)button; +/// 结束按下 +- (void)recordButtonDidEndPress:(KBAiRecordButton *)button; +/// 按下被取消(如手指滑出) +- (void)recordButtonDidCancelPress:(KBAiRecordButton *)button; +@end + +/// 按住说话按钮 +/// 支持长按手势和波形动画 +@interface KBAiRecordButton : UIView + +@property(nonatomic, weak) id delegate; + +/// 当前状态 +@property(nonatomic, assign) KBAiRecordButtonState state; + +/// 按钮标题(正常状态) +@property(nonatomic, copy) NSString *normalTitle; + +/// 按钮标题(录音状态) +@property(nonatomic, copy) NSString *recordingTitle; + +/// 主色调 +@property(nonatomic, strong) UIColor *tintColor; + +/// 更新音量(用于波形动画) +/// @param rms 音量 RMS 值 (0.0 - 1.0) +- (void)updateVolumeRMS:(float)rms; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/V/KBAiRecordButton.m b/keyBoard/Class/AiTalk/V/KBAiRecordButton.m new file mode 100644 index 0000000..3d24827 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiRecordButton.m @@ -0,0 +1,248 @@ +// +// KBAiRecordButton.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "KBAiRecordButton.h" +#import "KBAiWaveformView.h" + +@interface KBAiRecordButton () + +@property(nonatomic, strong) UIView *backgroundView; +@property(nonatomic, strong) UILabel *titleLabel; +@property(nonatomic, strong) KBAiWaveformView *waveformView; +@property(nonatomic, strong) UIImageView *micIconView; +@property(nonatomic, assign) BOOL isPressing; + +@end + +@implementation KBAiRecordButton + +- (instancetype)initWithFrame:(CGRect)frame { + self = [super initWithFrame:frame]; + if (self) { + [self setup]; + } + return self; +} + +- (instancetype)initWithCoder:(NSCoder *)coder { + self = [super initWithCoder:coder]; + if (self) { + [self setup]; + } + return self; +} + +- (void)setup { + _state = KBAiRecordButtonStateNormal; + _normalTitle = @"按住说话"; + _recordingTitle = @"松开结束"; + _tintColor = [UIColor systemBlueColor]; + + // 背景视图 + self.backgroundView = [[UIView alloc] init]; + self.backgroundView.backgroundColor = [UIColor systemGray6Color]; + self.backgroundView.layer.cornerRadius = 25; + self.backgroundView.layer.masksToBounds = YES; + self.backgroundView.translatesAutoresizingMaskIntoConstraints = NO; + [self addSubview:self.backgroundView]; + + // 麦克风图标 + self.micIconView = [[UIImageView alloc] init]; + self.micIconView.image = [UIImage systemImageNamed:@"mic.fill"]; + self.micIconView.tintColor = self.tintColor; + self.micIconView.contentMode = UIViewContentModeScaleAspectFit; + self.micIconView.translatesAutoresizingMaskIntoConstraints = NO; + [self.backgroundView addSubview:self.micIconView]; + + // 标题标签 + self.titleLabel = [[UILabel alloc] init]; + self.titleLabel.text = self.normalTitle; + self.titleLabel.font = [UIFont systemFontOfSize:16 weight:UIFontWeightMedium]; + self.titleLabel.textColor = [UIColor labelColor]; + self.titleLabel.translatesAutoresizingMaskIntoConstraints = NO; + [self.backgroundView addSubview:self.titleLabel]; + + // 波形视图(录音时显示) + self.waveformView = [[KBAiWaveformView alloc] init]; + self.waveformView.waveColor = self.tintColor; + self.waveformView.alpha = 0; + self.waveformView.translatesAutoresizingMaskIntoConstraints = NO; + [self.backgroundView addSubview:self.waveformView]; + + // 布局约束 + [NSLayoutConstraint activateConstraints:@[ + [self.backgroundView.topAnchor constraintEqualToAnchor:self.topAnchor], + [self.backgroundView.bottomAnchor + constraintEqualToAnchor:self.bottomAnchor], + [self.backgroundView.leadingAnchor + constraintEqualToAnchor:self.leadingAnchor], + [self.backgroundView.trailingAnchor + constraintEqualToAnchor:self.trailingAnchor], + + [self.micIconView.leadingAnchor + constraintEqualToAnchor:self.backgroundView.leadingAnchor + constant:20], + [self.micIconView.centerYAnchor + constraintEqualToAnchor:self.backgroundView.centerYAnchor], + [self.micIconView.widthAnchor constraintEqualToConstant:24], + [self.micIconView.heightAnchor constraintEqualToConstant:24], + + [self.titleLabel.leadingAnchor + constraintEqualToAnchor:self.micIconView.trailingAnchor + constant:12], + [self.titleLabel.centerYAnchor + constraintEqualToAnchor:self.backgroundView.centerYAnchor], + + [self.waveformView.trailingAnchor + constraintEqualToAnchor:self.backgroundView.trailingAnchor + constant:-20], + [self.waveformView.centerYAnchor + constraintEqualToAnchor:self.backgroundView.centerYAnchor], + [self.waveformView.widthAnchor constraintEqualToConstant:60], + [self.waveformView.heightAnchor constraintEqualToConstant:30], + ]]; + + // 添加手势 + UILongPressGestureRecognizer *longPress = + [[UILongPressGestureRecognizer alloc] + initWithTarget:self + action:@selector(handleLongPress:)]; + longPress.minimumPressDuration = 0.05; + [self addGestureRecognizer:longPress]; +} + +#pragma mark - Setters + +- (void)setState:(KBAiRecordButtonState)state { + if (_state == state) + return; + _state = state; + + [self updateAppearance]; +} + +- (void)setTintColor:(UIColor *)tintColor { + _tintColor = tintColor; + self.micIconView.tintColor = tintColor; + self.waveformView.waveColor = tintColor; +} + +#pragma mark - Public Methods + +- (void)updateVolumeRMS:(float)rms { + [self.waveformView updateWithRMS:rms]; +} + +#pragma mark - Private Methods + +- (void)updateAppearance { + switch (self.state) { + case KBAiRecordButtonStateNormal: + self.titleLabel.text = self.normalTitle; + self.backgroundView.backgroundColor = [UIColor systemGray6Color]; + self.micIconView.alpha = 1; + self.waveformView.alpha = 0; + [self.waveformView stopAnimation]; + break; + + case KBAiRecordButtonStateRecording: + self.titleLabel.text = self.recordingTitle; + self.backgroundView.backgroundColor = + [self.tintColor colorWithAlphaComponent:0.15]; + self.micIconView.alpha = 1; + self.waveformView.alpha = 1; + [self.waveformView startIdleAnimation]; + break; + + case KBAiRecordButtonStateDisabled: + self.titleLabel.text = self.normalTitle; + self.backgroundView.backgroundColor = [UIColor systemGray5Color]; + self.alpha = 0.5; + break; + } +} + +- (void)handleLongPress:(UILongPressGestureRecognizer *)gesture { + if (self.state == KBAiRecordButtonStateDisabled) { + return; + } + + CGPoint location = [gesture locationInView:self]; + BOOL isInside = CGRectContainsPoint(self.bounds, location); + + switch (gesture.state) { + case UIGestureRecognizerStateBegan: + self.isPressing = YES; + [self animateScale:0.95]; + self.state = KBAiRecordButtonStateRecording; + + if ([self.delegate + respondsToSelector:@selector(recordButtonDidBeginPress:)]) { + [self.delegate recordButtonDidBeginPress:self]; + } + break; + + case UIGestureRecognizerStateChanged: + if (!isInside && self.isPressing) { + // 手指滑出 + [self animateScale:1.0]; + } else if (isInside && self.isPressing) { + // 手指滑回 + [self animateScale:0.95]; + } + break; + + case UIGestureRecognizerStateEnded: + if (self.isPressing) { + self.isPressing = NO; + [self animateScale:1.0]; + self.state = KBAiRecordButtonStateNormal; + [self.waveformView reset]; + + if (isInside) { + if ([self.delegate + respondsToSelector:@selector(recordButtonDidEndPress:)]) { + [self.delegate recordButtonDidEndPress:self]; + } + } else { + if ([self.delegate + respondsToSelector:@selector(recordButtonDidCancelPress:)]) { + [self.delegate recordButtonDidCancelPress:self]; + } + } + } + break; + + case UIGestureRecognizerStateCancelled: + case UIGestureRecognizerStateFailed: + if (self.isPressing) { + self.isPressing = NO; + [self animateScale:1.0]; + self.state = KBAiRecordButtonStateNormal; + [self.waveformView reset]; + + if ([self.delegate + respondsToSelector:@selector(recordButtonDidCancelPress:)]) { + [self.delegate recordButtonDidCancelPress:self]; + } + } + break; + + default: + break; + } +} + +- (void)animateScale:(CGFloat)scale { + [UIView animateWithDuration:0.15 + animations:^{ + self.backgroundView.transform = + CGAffineTransformMakeScale(scale, scale); + }]; +} + +@end diff --git a/keyBoard/Class/AiTalk/V/KBAiWaveformView.h b/keyBoard/Class/AiTalk/V/KBAiWaveformView.h new file mode 100644 index 0000000..3bc1a46 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiWaveformView.h @@ -0,0 +1,43 @@ +// +// KBAiWaveformView.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 波形动画视图 +/// 根据音量 RMS 值实时显示波形动画 +@interface KBAiWaveformView : UIView + +/// 波形颜色 +@property(nonatomic, strong) UIColor *waveColor; + +/// 波形条数量 +@property(nonatomic, assign) NSInteger barCount; + +/// 波形条宽度 +@property(nonatomic, assign) CGFloat barWidth; + +/// 波形条间距 +@property(nonatomic, assign) CGFloat barSpacing; + +/// 更新音量值 +/// @param rms 音量 RMS 值 (0.0 - 1.0) +- (void)updateWithRMS:(float)rms; + +/// 开始动画(空闲波动) +- (void)startIdleAnimation; + +/// 停止动画 +- (void)stopAnimation; + +/// 重置波形 +- (void)reset; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/V/KBAiWaveformView.m b/keyBoard/Class/AiTalk/V/KBAiWaveformView.m new file mode 100644 index 0000000..ba8bbc9 --- /dev/null +++ b/keyBoard/Class/AiTalk/V/KBAiWaveformView.m @@ -0,0 +1,163 @@ +// +// KBAiWaveformView.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "KBAiWaveformView.h" + +@interface KBAiWaveformView () +@property(nonatomic, strong) NSMutableArray *barLayers; +@property(nonatomic, strong) NSMutableArray *barHeights; +@property(nonatomic, strong) CADisplayLink *displayLink; +@property(nonatomic, assign) float currentRMS; +@property(nonatomic, assign) float targetRMS; +@property(nonatomic, assign) BOOL isAnimating; +@end + +@implementation KBAiWaveformView + +- (instancetype)initWithFrame:(CGRect)frame { + self = [super initWithFrame:frame]; + if (self) { + [self setup]; + } + return self; +} + +- (instancetype)initWithCoder:(NSCoder *)coder { + self = [super initWithCoder:coder]; + if (self) { + [self setup]; + } + return self; +} + +- (void)setup { + _waveColor = [UIColor systemBlueColor]; + _barCount = 5; + _barWidth = 4; + _barSpacing = 3; + _barLayers = [[NSMutableArray alloc] init]; + _barHeights = [[NSMutableArray alloc] init]; + _currentRMS = 0; + _targetRMS = 0; + + self.backgroundColor = [UIColor clearColor]; +} + +- (void)layoutSubviews { + [super layoutSubviews]; + [self setupBars]; +} + +- (void)setupBars { + // 移除旧的图层 + for (CAShapeLayer *layer in self.barLayers) { + [layer removeFromSuperlayer]; + } + [self.barLayers removeAllObjects]; + [self.barHeights removeAllObjects]; + + // 计算总宽度 + CGFloat totalWidth = + self.barCount * self.barWidth + (self.barCount - 1) * self.barSpacing; + CGFloat startX = (self.bounds.size.width - totalWidth) / 2; + CGFloat maxHeight = self.bounds.size.height; + CGFloat minHeight = maxHeight * 0.2; + + for (NSInteger i = 0; i < self.barCount; i++) { + CAShapeLayer *barLayer = [CAShapeLayer layer]; + barLayer.fillColor = self.waveColor.CGColor; + barLayer.cornerRadius = self.barWidth / 2; + + CGFloat x = startX + i * (self.barWidth + self.barSpacing); + CGFloat height = minHeight; + CGFloat y = (maxHeight - height) / 2; + + barLayer.frame = CGRectMake(x, y, self.barWidth, height); + barLayer.backgroundColor = self.waveColor.CGColor; + + [self.layer addSublayer:barLayer]; + [self.barLayers addObject:barLayer]; + [self.barHeights addObject:@(height)]; + } +} + +#pragma mark - Public Methods + +- (void)updateWithRMS:(float)rms { + self.targetRMS = MIN(MAX(rms, 0), 1); +} + +- (void)startIdleAnimation { + if (self.isAnimating) + return; + + self.isAnimating = YES; + self.displayLink = + [CADisplayLink displayLinkWithTarget:self + selector:@selector(updateAnimation)]; + [self.displayLink addToRunLoop:[NSRunLoop mainRunLoop] + forMode:NSRunLoopCommonModes]; +} + +- (void)stopAnimation { + self.isAnimating = NO; + [self.displayLink invalidate]; + self.displayLink = nil; +} + +- (void)reset { + self.currentRMS = 0; + self.targetRMS = 0; + [self updateBarsWithRMS:0]; +} + +#pragma mark - Animation + +- (void)updateAnimation { + // 平滑过渡到目标 RMS + CGFloat smoothing = 0.3; + self.currentRMS = + self.currentRMS + (self.targetRMS - self.currentRMS) * smoothing; + + [self updateBarsWithRMS:self.currentRMS]; +} + +- (void)updateBarsWithRMS:(float)rms { + CGFloat maxHeight = self.bounds.size.height; + CGFloat minHeight = maxHeight * 0.2; + CGFloat range = maxHeight - minHeight; + + // 为每个条添加略微不同的高度和相位 + NSTimeInterval time = CACurrentMediaTime(); + + for (NSInteger i = 0; i < self.barLayers.count; i++) { + CAShapeLayer *layer = self.barLayers[i]; + + // 添加基于时间的波动效果 + CGFloat phase = (CGFloat)i / self.barLayers.count * M_PI * 2; + CGFloat wave = sin(time * 3 + phase) * 0.3 + 0.7; // 0.4 - 1.0 + + // 计算高度 + CGFloat heightFactor = rms * wave; + CGFloat height = minHeight + range * heightFactor; + height = MAX(minHeight, MIN(maxHeight, height)); + + // 更新位置 + CGFloat y = (maxHeight - height) / 2; + + [CATransaction begin]; + [CATransaction setDisableActions:YES]; + layer.frame = CGRectMake(layer.frame.origin.x, y, self.barWidth, height); + [CATransaction commit]; + } +} + +- (void)dealloc { + [self stopAnimation]; +} + +@end diff --git a/keyBoard/Class/AiTalk/VC/KBAiMainVC.h b/keyBoard/Class/AiTalk/VC/KBAiMainVC.h new file mode 100644 index 0000000..f9e8065 --- /dev/null +++ b/keyBoard/Class/AiTalk/VC/KBAiMainVC.h @@ -0,0 +1,17 @@ +// +// KBAiMainVC.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// AI 语音陪伴聊天主界面 +@interface KBAiMainVC : UIViewController + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VC/KBAiMainVC.m b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m new file mode 100644 index 0000000..4633713 --- /dev/null +++ b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m @@ -0,0 +1,291 @@ +// +// KBAiMainVC.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "KBAiMainVC.h" +#import "ConversationOrchestrator.h" +#import "KBAiChatView.h" +#import "KBAiRecordButton.h" +#import "KBAICommentView.h" +#import "LSTPopView.h" + +@interface KBAiMainVC () +@property (nonatomic,weak) LSTPopView *popView; + +// UI +@property(nonatomic, strong) KBAiChatView *chatView; +@property(nonatomic, strong) KBAiRecordButton *recordButton; +@property(nonatomic, strong) UILabel *statusLabel; + +// 核心模块 +@property(nonatomic, strong) ConversationOrchestrator *orchestrator; + +@end + +@implementation KBAiMainVC + +#pragma mark - Lifecycle + +- (void)viewDidLoad { + [super viewDidLoad]; + + [self setupUI]; + [self setupOrchestrator]; +} + +- (void)viewWillDisappear:(BOOL)animated { + [super viewWillDisappear:animated]; + + // 页面消失时停止对话 + [self.orchestrator stop]; +} + +#pragma mark - UI Setup + +- (void)setupUI { + self.view.backgroundColor = [UIColor systemBackgroundColor]; + self.title = @"AI 助手"; + + // 安全区域 + UILayoutGuide *safeArea = self.view.safeAreaLayoutGuide; + + // 状态标签 + self.statusLabel = [[UILabel alloc] init]; + self.statusLabel.text = @"按住按钮开始对话"; + self.statusLabel.font = [UIFont systemFontOfSize:14]; + self.statusLabel.textColor = [UIColor secondaryLabelColor]; + self.statusLabel.textAlignment = NSTextAlignmentCenter; + self.statusLabel.translatesAutoresizingMaskIntoConstraints = NO; + [self.view addSubview:self.statusLabel]; + + // 聊天视图 + self.chatView = [[KBAiChatView alloc] init]; + self.chatView.backgroundColor = [UIColor systemBackgroundColor]; + self.chatView.translatesAutoresizingMaskIntoConstraints = NO; + [self.view addSubview:self.chatView]; + + // 录音按钮 + self.recordButton = [[KBAiRecordButton alloc] init]; + self.recordButton.delegate = self; + self.recordButton.translatesAutoresizingMaskIntoConstraints = NO; + [self.view addSubview:self.recordButton]; + + // 布局约束 + [NSLayoutConstraint activateConstraints:@[ + // 状态标签 + [self.statusLabel.topAnchor constraintEqualToAnchor:safeArea.topAnchor + constant:8], + [self.statusLabel.leadingAnchor + constraintEqualToAnchor:safeArea.leadingAnchor + constant:16], + [self.statusLabel.trailingAnchor + constraintEqualToAnchor:safeArea.trailingAnchor + constant:-16], + + // 聊天视图 + [self.chatView.topAnchor + constraintEqualToAnchor:self.statusLabel.bottomAnchor + constant:8], + [self.chatView.leadingAnchor + constraintEqualToAnchor:safeArea.leadingAnchor], + [self.chatView.trailingAnchor + constraintEqualToAnchor:safeArea.trailingAnchor], + [self.chatView.bottomAnchor + constraintEqualToAnchor:self.recordButton.topAnchor + constant:-16], + + // 录音按钮 + [self.recordButton.leadingAnchor + constraintEqualToAnchor:safeArea.leadingAnchor + constant:20], + [self.recordButton.trailingAnchor + constraintEqualToAnchor:safeArea.trailingAnchor + constant:-20], + [self.recordButton.bottomAnchor + constraintEqualToAnchor:safeArea.bottomAnchor + constant:-16], + [self.recordButton.heightAnchor constraintEqualToConstant:50], + ]]; +} + +#pragma mark - Orchestrator Setup + +- (void)setupOrchestrator { + self.orchestrator = [[ConversationOrchestrator alloc] init]; + + // 配置服务器地址(TODO: 替换为实际地址) + // self.orchestrator.asrServerURL = @"wss://your-asr-server.com/ws/asr"; + // self.orchestrator.llmServerURL = + // @"https://your-llm-server.com/api/chat/stream"; + // self.orchestrator.ttsServerURL = @"https://your-tts-server.com/api/tts"; + + __weak typeof(self) weakSelf = self; + + // 状态变化回调 + self.orchestrator.onStateChange = ^(ConversationState state) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf updateStatusForState:state]; + }; + + // 实时识别文本回调 + self.orchestrator.onPartialText = ^(NSString *text) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + strongSelf.statusLabel.text = text.length > 0 ? text : @"正在识别..."; + }; + + // 用户最终文本回调 + self.orchestrator.onUserFinalText = ^(NSString *text) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + if (text.length > 0) { + [strongSelf.chatView addUserMessage:text]; + } + }; + + // AI 可见文本回调(打字机效果) + self.orchestrator.onAssistantVisibleText = ^(NSString *text) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf.chatView updateLastAssistantMessage:text]; + }; + + // AI 完整回复回调 + self.orchestrator.onAssistantFullText = ^(NSString *text) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf.chatView updateLastAssistantMessage:text]; + [strongSelf.chatView markLastAssistantMessageComplete]; + }; + + // 音量更新回调 + self.orchestrator.onVolumeUpdate = ^(float rms) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf.recordButton updateVolumeRMS:rms]; + }; + + // AI 开始说话 + self.orchestrator.onSpeakingStart = ^{ + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + // 添加空的 AI 消息占位 + [strongSelf.chatView addAssistantMessage:@""]; + }; + + // AI 说话结束 + self.orchestrator.onSpeakingEnd = ^{ + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf.chatView markLastAssistantMessageComplete]; + }; + + // 错误回调 + self.orchestrator.onError = ^(NSError *error) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + [strongSelf showError:error]; + }; +} + +#pragma mark - 事件 +- (void)showComment{ + CGFloat customViewHeight = KB_SCREEN_HEIGHT*(0.8); + KBAICommentView *customView = [[KBAICommentView alloc] initWithFrame:CGRectMake(0, 0, KB_SCREEN_WIDTH, customViewHeight)]; + LSTPopView *popView = [LSTPopView initWithCustomView:customView + parentView:self.view + popStyle:LSTPopStyleSmoothFromBottom + dismissStyle:LSTDismissStyleSmoothToBottom]; + self.popView = popView; + popView.priority = 1000; + popView.hemStyle = LSTHemStyleBottom; + popView.dragStyle = LSTDragStyleY_Positive; + popView.dragDistance = customViewHeight*0.5; + popView.sweepStyle = LSTSweepStyleY_Positive; + popView.swipeVelocity = 1600; + popView.sweepDismissStyle = LSTSweepDismissStyleSmooth; + + + [popView pop]; +} + +#pragma mark - UI Updates + +- (void)updateStatusForState:(ConversationState)state { + switch (state) { + case ConversationStateIdle: + self.statusLabel.text = @"按住按钮开始对话"; + self.recordButton.state = KBAiRecordButtonStateNormal; + break; + + case ConversationStateListening: + self.statusLabel.text = @"正在聆听..."; + self.recordButton.state = KBAiRecordButtonStateRecording; + break; + + case ConversationStateRecognizing: + self.statusLabel.text = @"正在识别..."; + self.recordButton.state = KBAiRecordButtonStateNormal; + break; + + case ConversationStateThinking: + self.statusLabel.text = @"AI 正在思考..."; + self.recordButton.state = KBAiRecordButtonStateNormal; + break; + + case ConversationStateSpeaking: + self.statusLabel.text = @"AI 正在回复..."; + self.recordButton.state = KBAiRecordButtonStateNormal; + break; + } +} + +- (void)showError:(NSError *)error { + UIAlertController *alert = + [UIAlertController alertControllerWithTitle:@"错误" + message:error.localizedDescription + preferredStyle:UIAlertControllerStyleAlert]; + [alert addAction:[UIAlertAction actionWithTitle:@"确定" + style:UIAlertActionStyleDefault + handler:nil]]; + [self presentViewController:alert animated:YES completion:nil]; +} + +#pragma mark - KBAiRecordButtonDelegate + +- (void)recordButtonDidBeginPress:(KBAiRecordButton *)button { + [self.orchestrator userDidPressRecord]; +} + +- (void)recordButtonDidEndPress:(KBAiRecordButton *)button { + [self.orchestrator userDidReleaseRecord]; +} + +- (void)recordButtonDidCancelPress:(KBAiRecordButton *)button { + // 取消录音(同样调用 release,ASR 会返回空或部分结果) + [self.orchestrator userDidReleaseRecord]; +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/ASRStreamClient.h b/keyBoard/Class/AiTalk/VM/ASRStreamClient.h new file mode 100644 index 0000000..b424baf --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/ASRStreamClient.h @@ -0,0 +1,51 @@ +// +// ASRStreamClient.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// ASR 流式识别客户端代理 +@protocol ASRStreamClientDelegate +@required +/// 收到实时识别结果(部分文本) +- (void)asrClientDidReceivePartialText:(NSString *)text; +/// 收到最终识别结果 +- (void)asrClientDidReceiveFinalText:(NSString *)text; +/// 识别失败 +- (void)asrClientDidFail:(NSError *)error; +@end + +/// ASR 流式识别客户端 +/// 使用 NSURLSessionWebSocketTask 实现流式语音识别 +@interface ASRStreamClient : NSObject + +@property(nonatomic, weak) id delegate; + +/// ASR 服务器 WebSocket URL +@property(nonatomic, copy) NSString *serverURL; + +/// 是否已连接 +@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected; + +/// 开始新的识别会话 +/// @param sessionId 会话 ID +- (void)startWithSessionId:(NSString *)sessionId; + +/// 发送 PCM 音频帧(20ms / 640 bytes) +/// @param pcmFrame PCM 数据 +- (void)sendAudioPCMFrame:(NSData *)pcmFrame; + +/// 结束当前会话,请求最终结果 +- (void)finalize; + +/// 取消会话 +- (void)cancel; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/ASRStreamClient.m b/keyBoard/Class/AiTalk/VM/ASRStreamClient.m new file mode 100644 index 0000000..218fb8a --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/ASRStreamClient.m @@ -0,0 +1,271 @@ +// +// ASRStreamClient.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "ASRStreamClient.h" +#import "AudioCaptureManager.h" + +@interface ASRStreamClient () + +@property(nonatomic, strong) NSURLSession *urlSession; +@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask; +@property(nonatomic, copy) NSString *currentSessionId; +@property(nonatomic, strong) dispatch_queue_t networkQueue; +@property(nonatomic, assign) BOOL connected; + +@end + +@implementation ASRStreamClient + +- (instancetype)init { + self = [super init]; + if (self) { + _networkQueue = dispatch_queue_create("com.keyboard.aitalk.asr.network", + DISPATCH_QUEUE_SERIAL); + // TODO: 替换为实际的 ASR 服务器地址 + _serverURL = @"wss://your-asr-server.com/ws/asr"; + } + return self; +} + +- (void)dealloc { + [self cancel]; +} + +#pragma mark - Public Methods + +- (void)startWithSessionId:(NSString *)sessionId { + dispatch_async(self.networkQueue, ^{ + [self cancelInternal]; + + self.currentSessionId = sessionId; + + // 创建 WebSocket 连接 + NSURL *url = [NSURL URLWithString:self.serverURL]; + NSURLSessionConfiguration *config = + [NSURLSessionConfiguration defaultSessionConfiguration]; + config.timeoutIntervalForRequest = 30; + config.timeoutIntervalForResource = 300; + + self.urlSession = [NSURLSession sessionWithConfiguration:config + delegate:self + delegateQueue:nil]; + + self.webSocketTask = [self.urlSession webSocketTaskWithURL:url]; + [self.webSocketTask resume]; + + // 发送 start 消息 + NSDictionary *startMessage = @{ + @"type" : @"start", + @"sessionId" : sessionId, + @"format" : @"pcm_s16le", + @"sampleRate" : @(kAudioSampleRate), + @"channels" : @(kAudioChannels) + }; + + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:startMessage + options:0 + error:&jsonError]; + if (jsonError) { + [self reportError:jsonError]; + return; + } + + NSString *jsonString = [[NSString alloc] initWithData:jsonData + encoding:NSUTF8StringEncoding]; + NSURLSessionWebSocketMessage *message = + [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString]; + + [self.webSocketTask + sendMessage:message + completionHandler:^(NSError *_Nullable error) { + if (error) { + [self reportError:error]; + } else { + self.connected = YES; + [self receiveMessage]; + NSLog(@"[ASRStreamClient] Started session: %@", sessionId); + } + }]; + }); +} + +- (void)sendAudioPCMFrame:(NSData *)pcmFrame { + if (!self.connected || !self.webSocketTask) { + return; + } + + dispatch_async(self.networkQueue, ^{ + NSURLSessionWebSocketMessage *message = + [[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame]; + [self.webSocketTask sendMessage:message + completionHandler:^(NSError *_Nullable error) { + if (error) { + NSLog(@"[ASRStreamClient] Failed to send audio frame: %@", + error.localizedDescription); + } + }]; + }); +} + +- (void)finalize { + if (!self.connected || !self.webSocketTask) { + return; + } + + dispatch_async(self.networkQueue, ^{ + NSDictionary *finalizeMessage = + @{@"type" : @"finalize", @"sessionId" : self.currentSessionId ?: @""}; + + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:finalizeMessage + options:0 + error:&jsonError]; + if (jsonError) { + [self reportError:jsonError]; + return; + } + + NSString *jsonString = [[NSString alloc] initWithData:jsonData + encoding:NSUTF8StringEncoding]; + NSURLSessionWebSocketMessage *message = + [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString]; + + [self.webSocketTask sendMessage:message + completionHandler:^(NSError *_Nullable error) { + if (error) { + [self reportError:error]; + } else { + NSLog(@"[ASRStreamClient] Sent finalize for session: %@", + self.currentSessionId); + } + }]; + }); +} + +- (void)cancel { + dispatch_async(self.networkQueue, ^{ + [self cancelInternal]; + }); +} + +#pragma mark - Private Methods + +- (void)cancelInternal { + self.connected = NO; + + if (self.webSocketTask) { + [self.webSocketTask cancel]; + self.webSocketTask = nil; + } + + if (self.urlSession) { + [self.urlSession invalidateAndCancel]; + self.urlSession = nil; + } + + self.currentSessionId = nil; +} + +- (void)receiveMessage { + if (!self.webSocketTask) { + return; + } + + __weak typeof(self) weakSelf = self; + [self.webSocketTask receiveMessageWithCompletionHandler:^( + NSURLSessionWebSocketMessage *_Nullable message, + NSError *_Nullable error) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + if (error) { + // 检查是否是正常关闭 + if (error.code != 57 && error.code != NSURLErrorCancelled) { + [strongSelf reportError:error]; + } + return; + } + + if (message.type == NSURLSessionWebSocketMessageTypeString) { + [strongSelf handleTextMessage:message.string]; + } + + // 继续接收下一条消息 + [strongSelf receiveMessage]; + }]; +} + +- (void)handleTextMessage:(NSString *)text { + NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding]; + NSError *jsonError = nil; + NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data + options:0 + error:&jsonError]; + + if (jsonError) { + NSLog(@"[ASRStreamClient] Failed to parse message: %@", text); + return; + } + + NSString *type = json[@"type"]; + + if ([type isEqualToString:@"partial"]) { + NSString *partialText = json[@"text"] ?: @""; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(asrClientDidReceivePartialText:)]) { + [self.delegate asrClientDidReceivePartialText:partialText]; + } + }); + } else if ([type isEqualToString:@"final"]) { + NSString *finalText = json[@"text"] ?: @""; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(asrClientDidReceiveFinalText:)]) { + [self.delegate asrClientDidReceiveFinalText:finalText]; + } + }); + // 收到最终结果后关闭连接 + [self cancelInternal]; + } else if ([type isEqualToString:@"error"]) { + NSInteger code = [json[@"code"] integerValue]; + NSString *message = json[@"message"] ?: @"Unknown error"; + NSError *error = + [NSError errorWithDomain:@"ASRStreamClient" + code:code + userInfo:@{NSLocalizedDescriptionKey : message}]; + [self reportError:error]; + } +} + +- (void)reportError:(NSError *)error { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector(asrClientDidFail:)]) { + [self.delegate asrClientDidFail:error]; + } + }); +} + +#pragma mark - NSURLSessionWebSocketDelegate + +- (void)URLSession:(NSURLSession *)session + webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask + didOpenWithProtocol:(NSString *)protocol { + NSLog(@"[ASRStreamClient] WebSocket connected with protocol: %@", protocol); +} + +- (void)URLSession:(NSURLSession *)session + webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask + didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode + reason:(NSData *)reason { + NSLog(@"[ASRStreamClient] WebSocket closed with code: %ld", (long)closeCode); + self.connected = NO; +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.h b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.h new file mode 100644 index 0000000..b6cc978 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.h @@ -0,0 +1,55 @@ +// +// AudioCaptureManager.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 音频采集参数(固定值,便于端到端稳定) +/// Sample Rate: 16000 Hz +/// Channels: 1 (Mono) +/// Format: PCM Int16 (pcm_s16le) +/// Frame Duration: 20ms (320 samples, 640 bytes) +extern const double kAudioSampleRate; // 16000.0 +extern const int kAudioChannels; // 1 +extern const NSUInteger kAudioFrameDuration; // 20 (ms) +extern const NSUInteger kAudioFrameSamples; // 320 (16000 * 0.02) +extern const NSUInteger kAudioFrameBytes; // 640 (320 * 2) + +/// 音频采集管理器代理 +@protocol AudioCaptureManagerDelegate +@required +/// 输出 PCM 帧(20ms / 640 bytes) +/// @param pcmFrame 640 字节的 PCM Int16 数据 +- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame; + +@optional +/// 更新 RMS 值(用于波形显示) +/// @param rms 当前音量的 RMS 值 (0.0 - 1.0) +- (void)audioCaptureManagerDidUpdateRMS:(float)rms; +@end + +/// 音频采集管理器 +/// 使用 AVAudioEngine 采集麦克风音频,输出 20ms PCM 帧 +@interface AudioCaptureManager : NSObject + +@property(nonatomic, weak) id delegate; + +/// 是否正在采集 +@property(nonatomic, assign, readonly, getter=isCapturing) BOOL capturing; + +/// 开始采集 +/// @param error 错误信息 +/// @return 是否启动成功 +- (BOOL)startCapture:(NSError **)error; + +/// 停止采集 +- (void)stopCapture; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m new file mode 100644 index 0000000..e0a37ed --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m @@ -0,0 +1,269 @@ +// +// AudioCaptureManager.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "AudioCaptureManager.h" +#import + +// 音频采集参数常量 +const double kAudioSampleRate = 16000.0; +const int kAudioChannels = 1; +const NSUInteger kAudioFrameDuration = 20; // ms +const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02 +const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16) + +@interface AudioCaptureManager () + +@property(nonatomic, strong) AVAudioEngine *audioEngine; +@property(nonatomic, strong) dispatch_queue_t audioQueue; +@property(nonatomic, assign) BOOL capturing; + +// Ring buffer for accumulating samples to form 20ms frames +@property(nonatomic, strong) NSMutableData *ringBuffer; +@property(nonatomic, assign) NSUInteger ringBufferWriteIndex; + +@end + +@implementation AudioCaptureManager + +- (instancetype)init { + self = [super init]; + if (self) { + _audioEngine = [[AVAudioEngine alloc] init]; + _audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture", + DISPATCH_QUEUE_SERIAL); + _ringBuffer = [[NSMutableData alloc] + initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames + _ringBufferWriteIndex = 0; + _capturing = NO; + } + return self; +} + +- (void)dealloc { + [self stopCapture]; +} + +#pragma mark - Public Methods + +- (BOOL)startCapture:(NSError **)error { + if (self.capturing) { + return YES; + } + + AVAudioInputNode *inputNode = self.audioEngine.inputNode; + + // 获取输入格式 + AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0]; + + // 目标格式:16kHz, Mono, Int16 + AVAudioFormat *targetFormat = + [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16 + sampleRate:kAudioSampleRate + channels:kAudioChannels + interleaved:YES]; + + // 创建格式转换器 + AVAudioConverter *converter = + [[AVAudioConverter alloc] initFromFormat:inputFormat + toFormat:targetFormat]; + if (!converter) { + if (error) { + *error = [NSError errorWithDomain:@"AudioCaptureManager" + code:-1 + userInfo:@{ + NSLocalizedDescriptionKey : + @"Failed to create audio converter" + }]; + } + return NO; + } + + // 计算合适的 buffer size(约 20ms 的输入采样数) + AVAudioFrameCount bufferSize = + (AVAudioFrameCount)(inputFormat.sampleRate * 0.02); + + // 安装 tap + __weak typeof(self) weakSelf = self; + [inputNode installTapOnBus:0 + bufferSize:bufferSize + format:inputFormat + block:^(AVAudioPCMBuffer *_Nonnull buffer, + AVAudioTime *_Nonnull when) { + [weakSelf processAudioBuffer:buffer + withConverter:converter + targetFormat:targetFormat]; + }]; + + // 启动引擎 + NSError *startError = nil; + [self.audioEngine prepare]; + + if (![self.audioEngine startAndReturnError:&startError]) { + [inputNode removeTapOnBus:0]; + if (error) { + *error = startError; + } + NSLog(@"[AudioCaptureManager] Failed to start engine: %@", + startError.localizedDescription); + return NO; + } + + self.capturing = YES; + self.ringBufferWriteIndex = 0; + + NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz", + inputFormat.sampleRate); + return YES; +} + +- (void)stopCapture { + if (!self.capturing) { + return; + } + + [self.audioEngine.inputNode removeTapOnBus:0]; + [self.audioEngine stop]; + + self.capturing = NO; + self.ringBufferWriteIndex = 0; + + NSLog(@"[AudioCaptureManager] Stopped capturing"); +} + +#pragma mark - Audio Processing + +- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer + withConverter:(AVAudioConverter *)converter + targetFormat:(AVAudioFormat *)targetFormat { + + if (!self.capturing) { + return; + } + + // 计算输出帧数 + AVAudioFrameCount outputFrameCapacity = + (AVAudioFrameCount)(buffer.frameLength * + (kAudioSampleRate / buffer.format.sampleRate)) + + 1; + + // 创建输出 buffer + AVAudioPCMBuffer *outputBuffer = + [[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat + frameCapacity:outputFrameCapacity]; + + // 格式转换 + NSError *conversionError = nil; + AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable( + AVAudioPacketCount inNumberOfPackets, + AVAudioConverterInputStatus *_Nonnull outStatus) { + *outStatus = AVAudioConverterInputStatus_HaveData; + return buffer; + }; + + AVAudioConverterOutputStatus status = + [converter convertToBuffer:outputBuffer + error:&conversionError + withInputFromBlock:inputBlock]; + + if (status == AVAudioConverterOutputStatus_Error) { + NSLog(@"[AudioCaptureManager] Conversion error: %@", + conversionError.localizedDescription); + return; + } + + // 获取 Int16 数据 + int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0]; + NSUInteger sampleCount = outputBuffer.frameLength; + NSUInteger byteCount = sampleCount * sizeof(int16_t); + + // 计算 RMS + [self calculateAndReportRMS:samples sampleCount:sampleCount]; + + // 将数据添加到 ring buffer 并输出完整帧 + dispatch_async(self.audioQueue, ^{ + [self appendToRingBuffer:samples byteCount:byteCount]; + }); +} + +- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount { + // 将新数据追加到 ring buffer + uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes; + NSUInteger ringBufferLength = self.ringBuffer.length; + + NSUInteger bytesToCopy = byteCount; + NSUInteger sourceOffset = 0; + + while (bytesToCopy > 0) { + NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex; + NSUInteger copySize = MIN(bytesToCopy, spaceAvailable); + + memcpy(ringBufferBytes + self.ringBufferWriteIndex, + (uint8_t *)samples + sourceOffset, copySize); + self.ringBufferWriteIndex += copySize; + sourceOffset += copySize; + bytesToCopy -= copySize; + + // 检查是否有完整的 20ms 帧 + while (self.ringBufferWriteIndex >= kAudioFrameBytes) { + // 提取一个完整帧 + NSData *frame = [NSData dataWithBytes:ringBufferBytes + length:kAudioFrameBytes]; + + // 移动剩余数据到开头 + NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes; + if (remaining > 0) { + memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining); + } + self.ringBufferWriteIndex = remaining; + + // 回调输出帧 + [self outputPCMFrame:frame]; + } + + // 如果 ring buffer 已满,从头开始 + if (self.ringBufferWriteIndex >= ringBufferLength) { + self.ringBufferWriteIndex = 0; + } + } +} + +- (void)outputPCMFrame:(NSData *)frame { + if (!self.capturing) { + return; + } + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector + (audioCaptureManagerDidOutputPCMFrame:)]) { + [self.delegate audioCaptureManagerDidOutputPCMFrame:frame]; + } + }); +} + +- (void)calculateAndReportRMS:(int16_t *)samples + sampleCount:(NSUInteger)sampleCount { + if (sampleCount == 0) + return; + + // 计算 RMS + double sum = 0.0; + for (NSUInteger i = 0; i < sampleCount; i++) { + double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0 + sum += sample * sample; + } + double rms = sqrt(sum / sampleCount); + float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0 + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) { + [self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat]; + } + }); +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/AudioSessionManager.h b/keyBoard/Class/AiTalk/VM/AudioSessionManager.h new file mode 100644 index 0000000..e0eb59b --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioSessionManager.h @@ -0,0 +1,66 @@ +// +// AudioSessionManager.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 音频会话中断类型 +typedef NS_ENUM(NSInteger, KBAudioSessionInterruptionType) { + KBAudioSessionInterruptionTypeBegan, // 中断开始(来电等) + KBAudioSessionInterruptionTypeEnded // 中断结束 +}; + +/// 音频会话管理器代理 +@protocol AudioSessionManagerDelegate +@optional +/// 音频会话被中断 +- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type; +/// 音频路由发生变化 +- (void)audioSessionManagerRouteDidChange; +/// 麦克风权限状态变化 +- (void)audioSessionManagerMicrophonePermissionDenied; +@end + +/// 音频会话管理器 +/// 负责 AVAudioSession 配置、权限请求、中断处理 +@interface AudioSessionManager : NSObject + +@property(nonatomic, weak) id delegate; + +/// 单例 ++ (instancetype)sharedManager; + +/// 请求麦克风权限 +/// @param completion 完成回调,granted 表示是否获得权限 +- (void)requestMicrophonePermission:(void (^)(BOOL granted))completion; + +/// 检查麦克风权限状态 +- (BOOL)hasMicrophonePermission; + +/// 配置音频会话为对话模式(录音+播放) +/// @param error 错误信息 +/// @return 是否配置成功 +- (BOOL)configureForConversation:(NSError **)error; + +/// 配置音频会话为仅播放模式 +/// @param error 错误信息 +/// @return 是否配置成功 +- (BOOL)configureForPlayback:(NSError **)error; + +/// 激活音频会话 +/// @param error 错误信息 +/// @return 是否激活成功 +- (BOOL)activateSession:(NSError **)error; + +/// 停用音频会话 +- (void)deactivateSession; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/AudioSessionManager.m b/keyBoard/Class/AiTalk/VM/AudioSessionManager.m new file mode 100644 index 0000000..6cdbe8b --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioSessionManager.m @@ -0,0 +1,234 @@ +// +// AudioSessionManager.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "AudioSessionManager.h" + +@interface AudioSessionManager () +@property(nonatomic, assign) BOOL isSessionActive; +@end + +@implementation AudioSessionManager + +#pragma mark - Singleton + ++ (instancetype)sharedManager { + static AudioSessionManager *instance = nil; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + instance = [[AudioSessionManager alloc] init]; + }); + return instance; +} + +- (instancetype)init { + self = [super init]; + if (self) { + _isSessionActive = NO; + [self setupNotifications]; + } + return self; +} + +- (void)dealloc { + [[NSNotificationCenter defaultCenter] removeObserver:self]; +} + +#pragma mark - Notifications + +- (void)setupNotifications { + // 监听音频会话中断通知 + [[NSNotificationCenter defaultCenter] + addObserver:self + selector:@selector(handleInterruption:) + name:AVAudioSessionInterruptionNotification + object:nil]; + + // 监听音频路由变化通知 + [[NSNotificationCenter defaultCenter] + addObserver:self + selector:@selector(handleRouteChange:) + name:AVAudioSessionRouteChangeNotification + object:nil]; +} + +- (void)handleInterruption:(NSNotification *)notification { + NSDictionary *info = notification.userInfo; + AVAudioSessionInterruptionType type = + [info[AVAudioSessionInterruptionTypeKey] unsignedIntegerValue]; + + if (type == AVAudioSessionInterruptionTypeBegan) { + // 中断开始:来电、闹钟等 + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) { + [self.delegate audioSessionManagerDidInterrupt: + KBAudioSessionInterruptionTypeBegan]; + } + }); + } else if (type == AVAudioSessionInterruptionTypeEnded) { + // 中断结束 + AVAudioSessionInterruptionOptions options = + [info[AVAudioSessionInterruptionOptionKey] unsignedIntegerValue]; + if (options & AVAudioSessionInterruptionOptionShouldResume) { + // 可以恢复播放 + [self activateSession:nil]; + } + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) { + [self.delegate audioSessionManagerDidInterrupt: + KBAudioSessionInterruptionTypeEnded]; + } + }); + } +} + +- (void)handleRouteChange:(NSNotification *)notification { + NSDictionary *info = notification.userInfo; + AVAudioSessionRouteChangeReason reason = + [info[AVAudioSessionRouteChangeReasonKey] unsignedIntegerValue]; + + switch (reason) { + case AVAudioSessionRouteChangeReasonOldDeviceUnavailable: + case AVAudioSessionRouteChangeReasonNewDeviceAvailable: { + // 旧设备不可用(如耳机拔出)或新设备可用(如耳机插入) + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector + (audioSessionManagerRouteDidChange)]) { + [self.delegate audioSessionManagerRouteDidChange]; + } + }); + break; + } + default: + break; + } +} + +#pragma mark - Microphone Permission + +- (void)requestMicrophonePermission:(void (^)(BOOL))completion { + AVAudioSession *session = [AVAudioSession sharedInstance]; + + [session requestRecordPermission:^(BOOL granted) { + dispatch_async(dispatch_get_main_queue(), ^{ + if (!granted) { + if ([self.delegate respondsToSelector:@selector + (audioSessionManagerMicrophonePermissionDenied)]) { + [self.delegate audioSessionManagerMicrophonePermissionDenied]; + } + } + if (completion) { + completion(granted); + } + }); + }]; +} + +- (BOOL)hasMicrophonePermission { + AVAudioSession *session = [AVAudioSession sharedInstance]; + return session.recordPermission == AVAudioSessionRecordPermissionGranted; +} + +#pragma mark - Session Configuration + +- (BOOL)configureForConversation:(NSError **)error { + AVAudioSession *session = [AVAudioSession sharedInstance]; + + // 配置为录音+播放模式 + // Category: PlayAndRecord - 同时支持录音和播放 + // Mode: VoiceChat - 优化语音通话场景 + // Options: + // - DefaultToSpeaker: 默认使用扬声器 + // - AllowBluetooth: 允许蓝牙设备 + NSError *categoryError = nil; + BOOL success = + [session setCategory:AVAudioSessionCategoryPlayAndRecord + mode:AVAudioSessionModeVoiceChat + options:(AVAudioSessionCategoryOptionDefaultToSpeaker | + AVAudioSessionCategoryOptionAllowBluetooth) + error:&categoryError]; + + if (!success) { + if (error) { + *error = categoryError; + } + NSLog(@"[AudioSessionManager] Failed to configure session: %@", + categoryError.localizedDescription); + return NO; + } + + return YES; +} + +- (BOOL)configureForPlayback:(NSError **)error { + AVAudioSession *session = [AVAudioSession sharedInstance]; + + // 仅播放模式 + NSError *categoryError = nil; + BOOL success = + [session setCategory:AVAudioSessionCategoryPlayback + mode:AVAudioSessionModeDefault + options:AVAudioSessionCategoryOptionDefaultToSpeaker + error:&categoryError]; + + if (!success) { + if (error) { + *error = categoryError; + } + NSLog(@"[AudioSessionManager] Failed to configure playback: %@", + categoryError.localizedDescription); + return NO; + } + + return YES; +} + +- (BOOL)activateSession:(NSError **)error { + if (self.isSessionActive) { + return YES; + } + + AVAudioSession *session = [AVAudioSession sharedInstance]; + NSError *activationError = nil; + BOOL success = [session setActive:YES error:&activationError]; + + if (!success) { + if (error) { + *error = activationError; + } + NSLog(@"[AudioSessionManager] Failed to activate session: %@", + activationError.localizedDescription); + return NO; + } + + self.isSessionActive = YES; + return YES; +} + +- (void)deactivateSession { + if (!self.isSessionActive) { + return; + } + + AVAudioSession *session = [AVAudioSession sharedInstance]; + NSError *error = nil; + + // 使用 NotifyOthersOnDeactivation 通知其他应用可以恢复播放 + [session setActive:NO + withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation + error:&error]; + + if (error) { + NSLog(@"[AudioSessionManager] Failed to deactivate session: %@", + error.localizedDescription); + } + + self.isSessionActive = NO; +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h new file mode 100644 index 0000000..678e7a6 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h @@ -0,0 +1,63 @@ +// +// AudioStreamPlayer.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 流式音频播放器代理 +@protocol AudioStreamPlayerDelegate +@optional +/// 开始播放片段 +- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId; +/// 播放时间更新 +- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time + segmentId:(NSString *)segmentId; +/// 片段播放完成 +- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId; +@end + +/// PCM 流式播放器 +/// 使用 AVAudioEngine + AVAudioPlayerNode 实现低延迟播放 +@interface AudioStreamPlayer : NSObject + +@property(nonatomic, weak) id delegate; + +/// 是否正在播放 +@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing; + +/// 启动播放器 +/// @param error 错误信息 +/// @return 是否启动成功 +- (BOOL)start:(NSError **)error; + +/// 停止播放器 +- (void)stop; + +/// 入队 PCM 数据块 +/// @param pcmData PCM Int16 数据 +/// @param sampleRate 采样率 +/// @param channels 通道数 +/// @param segmentId 片段 ID +- (void)enqueuePCMChunk:(NSData *)pcmData + sampleRate:(double)sampleRate + channels:(int)channels + segmentId:(NSString *)segmentId; + +/// 获取片段的当前播放时间 +/// @param segmentId 片段 ID +/// @return 当前时间(秒) +- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId; + +/// 获取片段的总时长 +/// @param segmentId 片段 ID +/// @return 总时长(秒) +- (NSTimeInterval)durationForSegment:(NSString *)segmentId; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m new file mode 100644 index 0000000..88f4e30 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m @@ -0,0 +1,246 @@ +// +// AudioStreamPlayer.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "AudioStreamPlayer.h" +#import + +@interface AudioStreamPlayer () + +@property(nonatomic, strong) AVAudioEngine *audioEngine; +@property(nonatomic, strong) AVAudioPlayerNode *playerNode; +@property(nonatomic, strong) AVAudioFormat *playbackFormat; + +// 片段跟踪 +@property(nonatomic, copy) NSString *currentSegmentId; +@property(nonatomic, strong) + NSMutableDictionary *segmentDurations; +@property(nonatomic, strong) + NSMutableDictionary *segmentStartTimes; +@property(nonatomic, assign) NSUInteger scheduledSamples; +@property(nonatomic, assign) NSUInteger playedSamples; + +// 状态 +@property(nonatomic, assign) BOOL playing; +@property(nonatomic, strong) dispatch_queue_t playerQueue; +@property(nonatomic, strong) NSTimer *progressTimer; + +@end + +@implementation AudioStreamPlayer + +- (instancetype)init { + self = [super init]; + if (self) { + _audioEngine = [[AVAudioEngine alloc] init]; + _playerNode = [[AVAudioPlayerNode alloc] init]; + _segmentDurations = [[NSMutableDictionary alloc] init]; + _segmentStartTimes = [[NSMutableDictionary alloc] init]; + _playerQueue = dispatch_queue_create("com.keyboard.aitalk.streamplayer", + DISPATCH_QUEUE_SERIAL); + + // 默认播放格式:16kHz, Mono, Float32 + _playbackFormat = + [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32 + sampleRate:16000 + channels:1 + interleaved:NO]; + } + return self; +} + +- (void)dealloc { + [self stop]; +} + +#pragma mark - Public Methods + +- (BOOL)start:(NSError **)error { + if (self.playing) { + return YES; + } + + // 连接节点 + [self.audioEngine attachNode:self.playerNode]; + [self.audioEngine connect:self.playerNode + to:self.audioEngine.mainMixerNode + format:self.playbackFormat]; + + // 启动引擎 + NSError *startError = nil; + [self.audioEngine prepare]; + + if (![self.audioEngine startAndReturnError:&startError]) { + if (error) { + *error = startError; + } + NSLog(@"[AudioStreamPlayer] Failed to start engine: %@", + startError.localizedDescription); + return NO; + } + + [self.playerNode play]; + self.playing = YES; + + // 启动进度更新定时器 + [self startProgressTimer]; + + NSLog(@"[AudioStreamPlayer] Started"); + return YES; +} + +- (void)stop { + dispatch_async(self.playerQueue, ^{ + [self stopProgressTimer]; + + [self.playerNode stop]; + [self.audioEngine stop]; + + self.playing = NO; + self.currentSegmentId = nil; + self.scheduledSamples = 0; + self.playedSamples = 0; + + [self.segmentDurations removeAllObjects]; + [self.segmentStartTimes removeAllObjects]; + + NSLog(@"[AudioStreamPlayer] Stopped"); + }); +} + +- (void)enqueuePCMChunk:(NSData *)pcmData + sampleRate:(double)sampleRate + channels:(int)channels + segmentId:(NSString *)segmentId { + + if (!pcmData || pcmData.length == 0) + return; + + dispatch_async(self.playerQueue, ^{ + // 检查是否是新片段 + BOOL isNewSegment = ![segmentId isEqualToString:self.currentSegmentId]; + if (isNewSegment) { + self.currentSegmentId = segmentId; + self.scheduledSamples = 0; + self.segmentStartTimes[segmentId] = @(CACurrentMediaTime()); + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector + (audioStreamPlayerDidStartSegment:)]) { + [self.delegate audioStreamPlayerDidStartSegment:segmentId]; + } + }); + } + + // 转换 Int16 -> Float32 + NSUInteger sampleCount = pcmData.length / sizeof(int16_t); + const int16_t *int16Samples = (const int16_t *)pcmData.bytes; + + // 创建播放格式的 buffer + AVAudioFormat *format = + [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32 + sampleRate:sampleRate + channels:channels + interleaved:NO]; + + AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc] + initWithPCMFormat:format + frameCapacity:(AVAudioFrameCount)sampleCount]; + buffer.frameLength = (AVAudioFrameCount)sampleCount; + + float *floatChannel = buffer.floatChannelData[0]; + for (NSUInteger i = 0; i < sampleCount; i++) { + floatChannel[i] = (float)int16Samples[i] / 32768.0f; + } + + // 调度播放 + __weak typeof(self) weakSelf = self; + [self.playerNode scheduleBuffer:buffer + completionHandler:^{ + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + dispatch_async(strongSelf.playerQueue, ^{ + strongSelf.playedSamples += sampleCount; + }); + }]; + + self.scheduledSamples += sampleCount; + + // 更新时长 + NSTimeInterval chunkDuration = (double)sampleCount / sampleRate; + NSNumber *currentDuration = self.segmentDurations[segmentId]; + self.segmentDurations[segmentId] = + @(currentDuration.doubleValue + chunkDuration); + }); +} + +- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId { + if (![segmentId isEqualToString:self.currentSegmentId]) { + return 0; + } + + // 基于已播放的采样数估算时间 + return (double)self.playedSamples / self.playbackFormat.sampleRate; +} + +- (NSTimeInterval)durationForSegment:(NSString *)segmentId { + NSNumber *duration = self.segmentDurations[segmentId]; + return duration ? duration.doubleValue : 0; +} + +#pragma mark - Progress Timer + +- (void)startProgressTimer { + dispatch_async(dispatch_get_main_queue(), ^{ + self.progressTimer = + [NSTimer scheduledTimerWithTimeInterval:1.0 / 30.0 + target:self + selector:@selector(updateProgress) + userInfo:nil + repeats:YES]; + }); +} + +- (void)stopProgressTimer { + dispatch_async(dispatch_get_main_queue(), ^{ + [self.progressTimer invalidate]; + self.progressTimer = nil; + }); +} + +- (void)updateProgress { + if (!self.playing || !self.currentSegmentId) { + return; + } + + NSTimeInterval currentTime = + [self playbackTimeForSegment:self.currentSegmentId]; + NSString *segmentId = self.currentSegmentId; + + if ([self.delegate respondsToSelector:@selector + (audioStreamPlayerDidUpdateTime:segmentId:)]) { + [self.delegate audioStreamPlayerDidUpdateTime:currentTime + segmentId:segmentId]; + } + + // 检查是否播放完成 + NSTimeInterval duration = [self durationForSegment:segmentId]; + if (duration > 0 && currentTime >= duration - 0.1) { + // 播放完成 + dispatch_async(self.playerQueue, ^{ + if ([self.delegate respondsToSelector:@selector + (audioStreamPlayerDidFinishSegment:)]) { + dispatch_async(dispatch_get_main_queue(), ^{ + [self.delegate audioStreamPlayerDidFinishSegment:segmentId]; + }); + } + }); + } +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h new file mode 100644 index 0000000..f33202e --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h @@ -0,0 +1,88 @@ +// +// ConversationOrchestrator.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 对话状态 +typedef NS_ENUM(NSInteger, ConversationState) { + ConversationStateIdle = 0, // 空闲 + ConversationStateListening, // 正在录音 + ConversationStateRecognizing, // 正在识别(等待 ASR 结果) + ConversationStateThinking, // 正在思考(等待 LLM 回复) + ConversationStateSpeaking // 正在播报 TTS +}; + +/// 对话编排器 +/// 核心状态机,串联所有模块,处理打断逻辑 +@interface ConversationOrchestrator : NSObject + +/// 当前状态 +@property(nonatomic, assign, readonly) ConversationState state; + +/// 当前对话 ID +@property(nonatomic, copy, readonly, nullable) NSString *conversationId; + +#pragma mark - Callbacks + +/// 用户最终识别文本回调 +@property(nonatomic, copy, nullable) void (^onUserFinalText)(NSString *text); + +/// AI 可见文本回调(打字机效果) +@property(nonatomic, copy, nullable) void (^onAssistantVisibleText) + (NSString *text); + +/// AI 完整回复文本回调 +@property(nonatomic, copy, nullable) void (^onAssistantFullText)(NSString *text) + ; + +/// 实时识别文本回调(部分结果) +@property(nonatomic, copy, nullable) void (^onPartialText)(NSString *text); + +/// 音量更新回调(用于波形 UI) +@property(nonatomic, copy, nullable) void (^onVolumeUpdate)(float rms); + +/// 状态变化回调 +@property(nonatomic, copy, nullable) void (^onStateChange) + (ConversationState state); + +/// 错误回调 +@property(nonatomic, copy, nullable) void (^onError)(NSError *error); + +/// AI 开始说话回调 +@property(nonatomic, copy, nullable) void (^onSpeakingStart)(void); + +/// AI 说话结束回调 +@property(nonatomic, copy, nullable) void (^onSpeakingEnd)(void); + +#pragma mark - Configuration + +/// ASR 服务器 URL +@property(nonatomic, copy) NSString *asrServerURL; + +/// LLM 服务器 URL +@property(nonatomic, copy) NSString *llmServerURL; + +/// TTS 服务器 URL +@property(nonatomic, copy) NSString *ttsServerURL; + +#pragma mark - User Actions + +/// 用户按下录音按钮 +/// 如果当前正在播放,会自动打断 +- (void)userDidPressRecord; + +/// 用户松开录音按钮 +- (void)userDidReleaseRecord; + +/// 手动停止(退出页面等) +- (void)stop; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m new file mode 100644 index 0000000..c376ee5 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m @@ -0,0 +1,527 @@ +// +// ConversationOrchestrator.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "ConversationOrchestrator.h" +#import "ASRStreamClient.h" +#import "AudioCaptureManager.h" +#import "AudioSessionManager.h" +#import "LLMStreamClient.h" +#import "Segmenter.h" +#import "SubtitleSync.h" +#import "TTSPlaybackPipeline.h" +#import "TTSServiceClient.h" + +@interface ConversationOrchestrator () < + AudioSessionManagerDelegate, AudioCaptureManagerDelegate, + ASRStreamClientDelegate, LLMStreamClientDelegate, TTSServiceClientDelegate, + TTSPlaybackPipelineDelegate> + +// 模块 +@property(nonatomic, strong) AudioSessionManager *audioSession; +@property(nonatomic, strong) AudioCaptureManager *audioCapture; +@property(nonatomic, strong) ASRStreamClient *asrClient; +@property(nonatomic, strong) LLMStreamClient *llmClient; +@property(nonatomic, strong) Segmenter *segmenter; +@property(nonatomic, strong) TTSServiceClient *ttsClient; +@property(nonatomic, strong) TTSPlaybackPipeline *playbackPipeline; +@property(nonatomic, strong) SubtitleSync *subtitleSync; + +// 状态 +@property(nonatomic, assign) ConversationState state; +@property(nonatomic, copy) NSString *conversationId; +@property(nonatomic, copy) NSString *currentSessionId; + +// 文本跟踪 +@property(nonatomic, strong) NSMutableString *fullAssistantText; +@property(nonatomic, strong) + NSMutableDictionary *segmentTextMap; +@property(nonatomic, assign) NSInteger segmentCounter; + +// 队列 +@property(nonatomic, strong) dispatch_queue_t orchestratorQueue; + +@end + +@implementation ConversationOrchestrator + +#pragma mark - Initialization + +- (instancetype)init { + self = [super init]; + if (self) { + _orchestratorQueue = dispatch_queue_create( + "com.keyboard.aitalk.orchestrator", DISPATCH_QUEUE_SERIAL); + _state = ConversationStateIdle; + _conversationId = [[NSUUID UUID] UUIDString]; + + _fullAssistantText = [[NSMutableString alloc] init]; + _segmentTextMap = [[NSMutableDictionary alloc] init]; + _segmentCounter = 0; + + [self setupModules]; + } + return self; +} + +- (void)setupModules { + // Audio Session + self.audioSession = [AudioSessionManager sharedManager]; + self.audioSession.delegate = self; + + // Audio Capture + self.audioCapture = [[AudioCaptureManager alloc] init]; + self.audioCapture.delegate = self; + + // ASR Client + self.asrClient = [[ASRStreamClient alloc] init]; + self.asrClient.delegate = self; + + // LLM Client + self.llmClient = [[LLMStreamClient alloc] init]; + self.llmClient.delegate = self; + + // Segmenter + self.segmenter = [[Segmenter alloc] init]; + + // TTS Client + self.ttsClient = [[TTSServiceClient alloc] init]; + self.ttsClient.delegate = self; + + // Playback Pipeline + self.playbackPipeline = [[TTSPlaybackPipeline alloc] init]; + self.playbackPipeline.delegate = self; + + // Subtitle Sync + self.subtitleSync = [[SubtitleSync alloc] init]; +} + +#pragma mark - Configuration Setters + +- (void)setAsrServerURL:(NSString *)asrServerURL { + _asrServerURL = [asrServerURL copy]; + self.asrClient.serverURL = asrServerURL; +} + +- (void)setLlmServerURL:(NSString *)llmServerURL { + _llmServerURL = [llmServerURL copy]; + self.llmClient.serverURL = llmServerURL; +} + +- (void)setTtsServerURL:(NSString *)ttsServerURL { + _ttsServerURL = [ttsServerURL copy]; + self.ttsClient.serverURL = ttsServerURL; +} + +#pragma mark - User Actions + +- (void)userDidPressRecord { + dispatch_async(self.orchestratorQueue, ^{ + NSLog(@"[Orchestrator] userDidPressRecord, current state: %ld", + (long)self.state); + + // 如果正在播放或思考,执行打断 + if (self.state == ConversationStateSpeaking || + self.state == ConversationStateThinking) { + [self performBargein]; + } + + // 检查麦克风权限 + if (![self.audioSession hasMicrophonePermission]) { + [self.audioSession requestMicrophonePermission:^(BOOL granted) { + if (granted) { + dispatch_async(self.orchestratorQueue, ^{ + [self startRecording]; + }); + } + }]; + return; + } + + [self startRecording]; + }); +} + +- (void)userDidReleaseRecord { + dispatch_async(self.orchestratorQueue, ^{ + NSLog(@"[Orchestrator] userDidReleaseRecord, current state: %ld", + (long)self.state); + + if (self.state != ConversationStateListening) { + return; + } + + // 停止采集 + [self.audioCapture stopCapture]; + + // 请求 ASR 最终结果 + [self.asrClient finalize]; + + // 更新状态 + [self updateState:ConversationStateRecognizing]; + }); +} + +- (void)stop { + dispatch_async(self.orchestratorQueue, ^{ + [self cancelAll]; + [self updateState:ConversationStateIdle]; + }); +} + +#pragma mark - Private: Recording + +- (void)startRecording { + // 配置音频会话 + NSError *error = nil; + if (![self.audioSession configureForConversation:&error]) { + [self reportError:error]; + return; + } + + if (![self.audioSession activateSession:&error]) { + [self reportError:error]; + return; + } + + // 生成新的会话 ID + self.currentSessionId = [[NSUUID UUID] UUIDString]; + + // 启动 ASR + [self.asrClient startWithSessionId:self.currentSessionId]; + + // 启动音频采集 + if (![self.audioCapture startCapture:&error]) { + [self reportError:error]; + [self.asrClient cancel]; + return; + } + + // 更新状态 + [self updateState:ConversationStateListening]; +} + +#pragma mark - Private: Barge-in (打断) + +- (void)performBargein { + NSLog(@"[Orchestrator] Performing barge-in"); + + // 取消所有正在进行的请求 + [self.ttsClient cancel]; + [self.llmClient cancel]; + [self.asrClient cancel]; + + // 停止播放 + [self.playbackPipeline stop]; + + // 清空状态 + [self.segmenter reset]; + [self.segmentTextMap removeAllObjects]; + [self.fullAssistantText setString:@""]; + self.segmentCounter = 0; +} + +- (void)cancelAll { + [self.audioCapture stopCapture]; + [self.asrClient cancel]; + [self.llmClient cancel]; + [self.ttsClient cancel]; + [self.playbackPipeline stop]; + [self.segmenter reset]; + [self.audioSession deactivateSession]; +} + +#pragma mark - Private: State Management + +- (void)updateState:(ConversationState)newState { + if (self.state == newState) + return; + + ConversationState oldState = self.state; + self.state = newState; + + NSLog(@"[Orchestrator] State: %ld -> %ld", (long)oldState, (long)newState); + + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onStateChange) { + self.onStateChange(newState); + } + + // 特殊状态回调 + if (newState == ConversationStateSpeaking && + oldState != ConversationStateSpeaking) { + if (self.onSpeakingStart) { + self.onSpeakingStart(); + } + } + + if (oldState == ConversationStateSpeaking && + newState != ConversationStateSpeaking) { + if (self.onSpeakingEnd) { + self.onSpeakingEnd(); + } + } + }); +} + +- (void)reportError:(NSError *)error { + NSLog(@"[Orchestrator] Error: %@", error.localizedDescription); + + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onError) { + self.onError(error); + } + }); +} + +#pragma mark - AudioCaptureManagerDelegate + +- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame { + // 发送到 ASR + [self.asrClient sendAudioPCMFrame:pcmFrame]; +} + +- (void)audioCaptureManagerDidUpdateRMS:(float)rms { + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onVolumeUpdate) { + self.onVolumeUpdate(rms); + } + }); +} + +#pragma mark - AudioSessionManagerDelegate + +- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type { + dispatch_async(self.orchestratorQueue, ^{ + if (type == KBAudioSessionInterruptionTypeBegan) { + // 中断开始:停止采集和播放 + [self cancelAll]; + [self updateState:ConversationStateIdle]; + } + }); +} + +- (void)audioSessionManagerMicrophonePermissionDenied { + NSError *error = + [NSError errorWithDomain:@"ConversationOrchestrator" + code:-1 + userInfo:@{ + NSLocalizedDescriptionKey : @"请在设置中开启麦克风权限" + }]; + [self reportError:error]; +} + +#pragma mark - ASRStreamClientDelegate + +- (void)asrClientDidReceivePartialText:(NSString *)text { + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onPartialText) { + self.onPartialText(text); + } + }); +} + +- (void)asrClientDidReceiveFinalText:(NSString *)text { + dispatch_async(self.orchestratorQueue, ^{ + NSLog(@"[Orchestrator] ASR final text: %@", text); + + // 回调用户文本 + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onUserFinalText) { + self.onUserFinalText(text); + } + }); + + // 如果文本为空,回到空闲 + if (text.length == 0) { + [self updateState:ConversationStateIdle]; + return; + } + + // 更新状态并开始 LLM 请求 + [self updateState:ConversationStateThinking]; + + // 重置文本跟踪 + [self.fullAssistantText setString:@""]; + [self.segmentTextMap removeAllObjects]; + self.segmentCounter = 0; + [self.segmenter reset]; + + // 启动播放管线 + NSError *error = nil; + if (![self.playbackPipeline start:&error]) { + NSLog(@"[Orchestrator] Failed to start playback pipeline: %@", + error.localizedDescription); + } + + // 发送 LLM 请求 + [self.llmClient sendUserText:text conversationId:self.conversationId]; + }); +} + +- (void)asrClientDidFail:(NSError *)error { + dispatch_async(self.orchestratorQueue, ^{ + [self reportError:error]; + [self updateState:ConversationStateIdle]; + }); +} + +#pragma mark - LLMStreamClientDelegate + +- (void)llmClientDidReceiveToken:(NSString *)token { + dispatch_async(self.orchestratorQueue, ^{ + // 追加到完整文本 + [self.fullAssistantText appendString:token]; + + // 追加到分段器 + [self.segmenter appendToken:token]; + + // 检查是否有可触发 TTS 的片段 + NSArray *segments = [self.segmenter popReadySegments]; + for (NSString *segmentText in segments) { + [self requestTTSForSegment:segmentText]; + } + }); +} + +- (void)llmClientDidComplete { + dispatch_async(self.orchestratorQueue, ^{ + NSLog(@"[Orchestrator] LLM complete"); + + // 处理剩余片段 + NSString *remaining = [self.segmenter flushRemainingSegment]; + if (remaining && remaining.length > 0) { + [self requestTTSForSegment:remaining]; + } + + // 回调完整文本 + NSString *fullText = [self.fullAssistantText copy]; + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onAssistantFullText) { + self.onAssistantFullText(fullText); + } + }); + }); +} + +- (void)llmClientDidFail:(NSError *)error { + dispatch_async(self.orchestratorQueue, ^{ + [self reportError:error]; + [self updateState:ConversationStateIdle]; + }); +} + +#pragma mark - Private: TTS Request + +- (void)requestTTSForSegment:(NSString *)segmentText { + NSString *segmentId = + [NSString stringWithFormat:@"seg_%ld", (long)self.segmentCounter++]; + + // 记录片段文本 + self.segmentTextMap[segmentId] = segmentText; + + NSLog(@"[Orchestrator] Requesting TTS for segment %@: %@", segmentId, + segmentText); + + // 请求 TTS + [self.ttsClient requestTTSForText:segmentText segmentId:segmentId]; +} + +#pragma mark - TTSServiceClientDelegate + +- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId { + dispatch_async(self.orchestratorQueue, ^{ + [self.playbackPipeline enqueueURL:url segmentId:segmentId]; + + // 如果还在 Thinking,切换到 Speaking + if (self.state == ConversationStateThinking) { + [self updateState:ConversationStateSpeaking]; + } + }); +} + +- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk + payloadType:(TTSPayloadType)type + segmentId:(NSString *)segmentId { + dispatch_async(self.orchestratorQueue, ^{ + [self.playbackPipeline enqueueChunk:chunk + payloadType:type + segmentId:segmentId]; + + // 如果还在 Thinking,切换到 Speaking + if (self.state == ConversationStateThinking) { + [self updateState:ConversationStateSpeaking]; + } + }); +} + +- (void)ttsClientDidFinishSegment:(NSString *)segmentId { + dispatch_async(self.orchestratorQueue, ^{ + [self.playbackPipeline markSegmentComplete:segmentId]; + }); +} + +- (void)ttsClientDidFail:(NSError *)error { + dispatch_async(self.orchestratorQueue, ^{ + [self reportError:error]; + }); +} + +#pragma mark - TTSPlaybackPipelineDelegate + +- (void)pipelineDidStartSegment:(NSString *)segmentId + duration:(NSTimeInterval)duration { + NSLog(@"[Orchestrator] Started playing segment: %@", segmentId); +} + +- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time + segmentId:(NSString *)segmentId { + dispatch_async(self.orchestratorQueue, ^{ + // 获取片段文本 + NSString *segmentText = self.segmentTextMap[segmentId]; + if (!segmentText) + return; + + // 计算可见文本 + NSTimeInterval duration = + [self.playbackPipeline durationForSegment:segmentId]; + NSString *visibleText = + [self.subtitleSync visibleTextForFullText:segmentText + currentTime:time + duration:duration]; + + // TODO: 这里应该累加之前片段的文本,实现完整的打字机效果 + // 简化实现:只显示当前片段 + dispatch_async(dispatch_get_main_queue(), ^{ + if (self.onAssistantVisibleText) { + self.onAssistantVisibleText(visibleText); + } + }); + }); +} + +- (void)pipelineDidFinishSegment:(NSString *)segmentId { + NSLog(@"[Orchestrator] Finished playing segment: %@", segmentId); +} + +- (void)pipelineDidFinishAllSegments { + dispatch_async(self.orchestratorQueue, ^{ + NSLog(@"[Orchestrator] All segments finished"); + + // 回到空闲状态 + [self updateState:ConversationStateIdle]; + [self.audioSession deactivateSession]; + }); +} + +- (void)pipelineDidFail:(NSError *)error { + dispatch_async(self.orchestratorQueue, ^{ + [self reportError:error]; + [self updateState:ConversationStateIdle]; + }); +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/LLMStreamClient.h b/keyBoard/Class/AiTalk/VM/LLMStreamClient.h new file mode 100644 index 0000000..6ff2570 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/LLMStreamClient.h @@ -0,0 +1,48 @@ +// +// LLMStreamClient.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// LLM 流式生成客户端代理 +@protocol LLMStreamClientDelegate +@required +/// 收到新的 token +- (void)llmClientDidReceiveToken:(NSString *)token; +/// 生成完成 +- (void)llmClientDidComplete; +/// 生成失败 +- (void)llmClientDidFail:(NSError *)error; +@end + +/// LLM 流式生成客户端 +/// 支持 SSE(Server-Sent Events)或 WebSocket 接收 token 流 +@interface LLMStreamClient : NSObject + +@property(nonatomic, weak) id delegate; + +/// LLM 服务器 URL +@property(nonatomic, copy) NSString *serverURL; + +/// API Key(如需要) +@property(nonatomic, copy, nullable) NSString *apiKey; + +/// 是否正在生成 +@property(nonatomic, assign, readonly, getter=isGenerating) BOOL generating; + +/// 发送用户文本请求 LLM 回复 +/// @param text 用户输入的文本 +/// @param conversationId 对话 ID +- (void)sendUserText:(NSString *)text conversationId:(NSString *)conversationId; + +/// 取消当前请求 +- (void)cancel; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/LLMStreamClient.m b/keyBoard/Class/AiTalk/VM/LLMStreamClient.m new file mode 100644 index 0000000..f8dd316 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/LLMStreamClient.m @@ -0,0 +1,244 @@ +// +// LLMStreamClient.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "LLMStreamClient.h" + +@interface LLMStreamClient () + +@property(nonatomic, strong) NSURLSession *urlSession; +@property(nonatomic, strong) NSURLSessionDataTask *dataTask; +@property(nonatomic, strong) dispatch_queue_t networkQueue; +@property(nonatomic, assign) BOOL generating; +@property(nonatomic, strong) NSMutableString *buffer; // SSE 数据缓冲 + +@end + +@implementation LLMStreamClient + +- (instancetype)init { + self = [super init]; + if (self) { + _networkQueue = dispatch_queue_create("com.keyboard.aitalk.llm.network", + DISPATCH_QUEUE_SERIAL); + _buffer = [[NSMutableString alloc] init]; + // TODO: 替换为实际的 LLM 服务器地址 + _serverURL = @"https://your-llm-server.com/api/chat/stream"; + } + return self; +} + +- (void)dealloc { + [self cancel]; +} + +#pragma mark - Public Methods + +- (void)sendUserText:(NSString *)text + conversationId:(NSString *)conversationId { + dispatch_async(self.networkQueue, ^{ + [self cancelInternal]; + + self.generating = YES; + [self.buffer setString:@""]; + + // 创建请求 + NSURL *url = [NSURL URLWithString:self.serverURL]; + NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url]; + request.HTTPMethod = @"POST"; + [request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"]; + [request setValue:@"text/event-stream" forHTTPHeaderField:@"Accept"]; + + if (self.apiKey) { + [request setValue:[NSString stringWithFormat:@"Bearer %@", self.apiKey] + forHTTPHeaderField:@"Authorization"]; + } + + // 请求体 + NSDictionary *body = @{ + @"message" : text, + @"conversationId" : conversationId, + @"stream" : @YES + }; + + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body + options:0 + error:&jsonError]; + if (jsonError) { + [self reportError:jsonError]; + return; + } + request.HTTPBody = jsonData; + + // 创建会话 + NSURLSessionConfiguration *config = + [NSURLSessionConfiguration defaultSessionConfiguration]; + config.timeoutIntervalForRequest = 60; + config.timeoutIntervalForResource = 300; + + self.urlSession = [NSURLSession sessionWithConfiguration:config + delegate:self + delegateQueue:nil]; + + self.dataTask = [self.urlSession dataTaskWithRequest:request]; + [self.dataTask resume]; + + NSLog(@"[LLMStreamClient] Started request for conversation: %@", + conversationId); + }); +} + +- (void)cancel { + dispatch_async(self.networkQueue, ^{ + [self cancelInternal]; + }); +} + +#pragma mark - Private Methods + +- (void)cancelInternal { + self.generating = NO; + + if (self.dataTask) { + [self.dataTask cancel]; + self.dataTask = nil; + } + + if (self.urlSession) { + [self.urlSession invalidateAndCancel]; + self.urlSession = nil; + } + + [self.buffer setString:@""]; +} + +- (void)reportError:(NSError *)error { + self.generating = NO; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector(llmClientDidFail:)]) { + [self.delegate llmClientDidFail:error]; + } + }); +} + +- (void)reportComplete { + self.generating = NO; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector(llmClientDidComplete)]) { + [self.delegate llmClientDidComplete]; + } + }); +} + +- (void)reportToken:(NSString *)token { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(llmClientDidReceiveToken:)]) { + [self.delegate llmClientDidReceiveToken:token]; + } + }); +} + +#pragma mark - SSE Parsing + +- (void)parseSSEData:(NSData *)data { + NSString *string = [[NSString alloc] initWithData:data + encoding:NSUTF8StringEncoding]; + if (!string) + return; + + [self.buffer appendString:string]; + + // SSE 格式:每个事件以 \n\n 分隔 + NSArray *events = [self.buffer componentsSeparatedByString:@"\n\n"]; + + // 保留最后一个可能不完整的事件 + if (events.count > 1) { + [self.buffer setString:events.lastObject]; + + for (NSUInteger i = 0; i < events.count - 1; i++) { + [self handleSSEEvent:events[i]]; + } + } +} + +- (void)handleSSEEvent:(NSString *)event { + if (event.length == 0) + return; + + // 解析 SSE 事件 + // 格式: data: {...} + NSArray *lines = [event componentsSeparatedByString:@"\n"]; + + for (NSString *line in lines) { + if ([line hasPrefix:@"data: "]) { + NSString *dataString = [line substringFromIndex:6]; + + // 检查是否是结束标志 + if ([dataString isEqualToString:@"[DONE]"]) { + [self reportComplete]; + return; + } + + // 解析 JSON + NSData *jsonData = [dataString dataUsingEncoding:NSUTF8StringEncoding]; + NSError *jsonError = nil; + NSDictionary *json = [NSJSONSerialization JSONObjectWithData:jsonData + options:0 + error:&jsonError]; + + if (jsonError) { + NSLog(@"[LLMStreamClient] Failed to parse SSE data: %@", dataString); + continue; + } + + // 提取 token(根据实际 API 格式调整) + // 常见格式: {"token": "..."} 或 {"choices": [{"delta": {"content": + // "..."}}]} + NSString *token = json[@"token"]; + if (!token) { + // OpenAI 格式 + NSArray *choices = json[@"choices"]; + if (choices.count > 0) { + NSDictionary *delta = choices[0][@"delta"]; + token = delta[@"content"]; + } + } + + if (token && token.length > 0) { + [self reportToken:token]; + } + } + } +} + +#pragma mark - NSURLSessionDataDelegate + +- (void)URLSession:(NSURLSession *)session + dataTask:(NSURLSessionDataTask *)dataTask + didReceiveData:(NSData *)data { + [self parseSSEData:data]; +} + +- (void)URLSession:(NSURLSession *)session + task:(NSURLSessionTask *)task + didCompleteWithError:(NSError *)error { + if (error) { + if (error.code != NSURLErrorCancelled) { + [self reportError:error]; + } + } else { + // 处理缓冲区中剩余的数据 + if (self.buffer.length > 0) { + [self handleSSEEvent:self.buffer]; + [self.buffer setString:@""]; + } + [self reportComplete]; + } +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/Segmenter.h b/keyBoard/Class/AiTalk/VM/Segmenter.h new file mode 100644 index 0000000..de67758 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/Segmenter.h @@ -0,0 +1,37 @@ +// +// Segmenter.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 句子切分器 +/// 将 LLM 输出的 token 流切分成可触发 TTS 的句子片段 +@interface Segmenter : NSObject + +/// 累积字符数阈值(超过此值强制切分) +/// 默认:30 +@property(nonatomic, assign) NSUInteger maxCharacterThreshold; + +/// 追加 token +/// @param token LLM 输出的 token +- (void)appendToken:(NSString *)token; + +/// 获取并移除已准备好的片段 +/// @return 可立即进行 TTS 的片段数组 +- (NSArray *)popReadySegments; + +/// 获取剩余的未完成片段(用于最后 flush) +/// @return 剩余片段,可能为空 +- (NSString *)flushRemainingSegment; + +/// 重置状态 +- (void)reset; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/Segmenter.m b/keyBoard/Class/AiTalk/VM/Segmenter.m new file mode 100644 index 0000000..8e32b79 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/Segmenter.m @@ -0,0 +1,148 @@ +// +// Segmenter.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "Segmenter.h" + +@interface Segmenter () + +@property(nonatomic, strong) NSMutableString *buffer; +@property(nonatomic, strong) NSMutableArray *readySegments; + +@end + +@implementation Segmenter + +- (instancetype)init { + self = [super init]; + if (self) { + _buffer = [[NSMutableString alloc] init]; + _readySegments = [[NSMutableArray alloc] init]; + _maxCharacterThreshold = 30; + } + return self; +} + +#pragma mark - Public Methods + +- (void)appendToken:(NSString *)token { + if (!token || token.length == 0) { + return; + } + + [self.buffer appendString:token]; + + // 检查是否需要切分 + [self checkAndSplit]; +} + +- (NSArray *)popReadySegments { + NSArray *segments = [self.readySegments copy]; + [self.readySegments removeAllObjects]; + return segments; +} + +- (NSString *)flushRemainingSegment { + NSString *remaining = [self.buffer copy]; + [self.buffer setString:@""]; + + // 去除首尾空白 + remaining = [remaining + stringByTrimmingCharactersInSet:[NSCharacterSet + whitespaceAndNewlineCharacterSet]]; + + return remaining.length > 0 ? remaining : nil; +} + +- (void)reset { + [self.buffer setString:@""]; + [self.readySegments removeAllObjects]; +} + +#pragma mark - Private Methods + +- (void)checkAndSplit { + // 句子结束标点 + NSCharacterSet *sentenceEnders = + [NSCharacterSet characterSetWithCharactersInString:@"。!?\n"]; + + while (YES) { + NSString *currentBuffer = self.buffer; + + // 查找第一个句子结束标点 + NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders]; + + if (range.location != NSNotFound) { + // 找到结束标点,切分 + NSUInteger endIndex = range.location + 1; + NSString *segment = [currentBuffer substringToIndex:endIndex]; + segment = [segment stringByTrimmingCharactersInSet: + [NSCharacterSet whitespaceAndNewlineCharacterSet]]; + + if (segment.length > 0) { + [self.readySegments addObject:segment]; + } + + // 移除已切分的部分 + [self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)]; + } else if (currentBuffer.length >= self.maxCharacterThreshold) { + // 未找到标点,但超过阈值,强制切分 + // 尝试在空格或逗号处切分 + NSRange breakRange = [self findBestBreakPoint:currentBuffer]; + + if (breakRange.location != NSNotFound) { + NSString *segment = + [currentBuffer substringToIndex:breakRange.location + 1]; + segment = + [segment stringByTrimmingCharactersInSet: + [NSCharacterSet whitespaceAndNewlineCharacterSet]]; + + if (segment.length > 0) { + [self.readySegments addObject:segment]; + } + + [self.buffer + deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)]; + } else { + // 无法找到合适的断点,直接切分 + NSString *segment = + [currentBuffer substringToIndex:self.maxCharacterThreshold]; + segment = + [segment stringByTrimmingCharactersInSet: + [NSCharacterSet whitespaceAndNewlineCharacterSet]]; + + if (segment.length > 0) { + [self.readySegments addObject:segment]; + } + + [self.buffer + deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)]; + } + } else { + // 未达到切分条件 + break; + } + } +} + +- (NSRange)findBestBreakPoint:(NSString *)text { + // 优先在逗号、分号等处断开 + NSCharacterSet *breakChars = + [NSCharacterSet characterSetWithCharactersInString:@",,、;;:: "]; + + // 从后往前查找,尽可能多包含内容 + for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2; + i--) { + unichar c = [text characterAtIndex:i]; + if ([breakChars characterIsMember:c]) { + return NSMakeRange(i, 1); + } + } + + return NSMakeRange(NSNotFound, 0); +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/SubtitleSync.h b/keyBoard/Class/AiTalk/VM/SubtitleSync.h new file mode 100644 index 0000000..636570d --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/SubtitleSync.h @@ -0,0 +1,36 @@ +// +// SubtitleSync.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 字幕同步器 +/// 根据播放进度映射文字显示,实现打字机效果 +@interface SubtitleSync : NSObject + +/// 获取当前应显示的文本 +/// @param fullText 完整文本 +/// @param currentTime 当前播放时间(秒) +/// @param duration 总时长(秒) +/// @return 应显示的部分文本(打字机效果) +- (NSString *)visibleTextForFullText:(NSString *)fullText + currentTime:(NSTimeInterval)currentTime + duration:(NSTimeInterval)duration; + +/// 获取可见字符数 +/// @param fullText 完整文本 +/// @param currentTime 当前播放时间(秒) +/// @param duration 总时长(秒) +/// @return 应显示的字符数 +- (NSUInteger)visibleCountForFullText:(NSString *)fullText + currentTime:(NSTimeInterval)currentTime + duration:(NSTimeInterval)duration; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/SubtitleSync.m b/keyBoard/Class/AiTalk/VM/SubtitleSync.m new file mode 100644 index 0000000..cdcdacb --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/SubtitleSync.m @@ -0,0 +1,66 @@ +// +// SubtitleSync.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "SubtitleSync.h" + +@implementation SubtitleSync + +- (NSString *)visibleTextForFullText:(NSString *)fullText + currentTime:(NSTimeInterval)currentTime + duration:(NSTimeInterval)duration { + + if (!fullText || fullText.length == 0) { + return @""; + } + + NSUInteger visibleCount = [self visibleCountForFullText:fullText + currentTime:currentTime + duration:duration]; + + if (visibleCount >= fullText.length) { + return fullText; + } + + return [fullText substringToIndex:visibleCount]; +} + +- (NSUInteger)visibleCountForFullText:(NSString *)fullText + currentTime:(NSTimeInterval)currentTime + duration:(NSTimeInterval)duration { + + if (!fullText || fullText.length == 0) { + return 0; + } + + // 边界情况处理 + if (duration <= 0) { + // 如果没有时长信息,直接返回全部 + return fullText.length; + } + + if (currentTime <= 0) { + return 0; + } + + if (currentTime >= duration) { + return fullText.length; + } + + // 计算进度比例 + double progress = currentTime / duration; + + // 计算可见字符数 + // 使用略微超前的策略,确保文字不会落后于语音 + double adjustedProgress = MIN(progress * 1.05, 1.0); + + NSUInteger visibleCount = + (NSUInteger)round(fullText.length * adjustedProgress); + + return MIN(visibleCount, fullText.length); +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h new file mode 100644 index 0000000..5fbfade --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h @@ -0,0 +1,79 @@ +// +// TTSPlaybackPipeline.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "TTSServiceClient.h" +#import + +NS_ASSUME_NONNULL_BEGIN + +/// 播放管线代理 +@protocol TTSPlaybackPipelineDelegate +@optional +/// 开始播放片段 +- (void)pipelineDidStartSegment:(NSString *)segmentId + duration:(NSTimeInterval)duration; +/// 播放时间更新 +- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time + segmentId:(NSString *)segmentId; +/// 片段播放完成 +- (void)pipelineDidFinishSegment:(NSString *)segmentId; +/// 所有片段播放完成 +- (void)pipelineDidFinishAllSegments; +/// 播放出错 +- (void)pipelineDidFail:(NSError *)error; +@end + +/// TTS 播放管线 +/// 根据 payloadType 路由到对应播放器 +@interface TTSPlaybackPipeline : NSObject + +@property(nonatomic, weak) id delegate; + +/// 是否正在播放 +@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing; + +/// 当前播放的片段 ID +@property(nonatomic, copy, readonly, nullable) NSString *currentSegmentId; + +/// 启动管线 +/// @param error 错误信息 +/// @return 是否启动成功 +- (BOOL)start:(NSError **)error; + +/// 停止管线(立即停止,用于打断) +- (void)stop; + +/// 入队 URL 播放 +/// @param url 音频 URL +/// @param segmentId 片段 ID +- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId; + +/// 入队音频数据块 +/// @param chunk 音频数据 +/// @param type 数据类型 +/// @param segmentId 片段 ID +- (void)enqueueChunk:(NSData *)chunk + payloadType:(TTSPayloadType)type + segmentId:(NSString *)segmentId; + +/// 标记片段数据完成(用于流式模式) +/// @param segmentId 片段 ID +- (void)markSegmentComplete:(NSString *)segmentId; + +/// 获取片段的当前播放时间 +/// @param segmentId 片段 ID +/// @return 当前时间(秒),如果未在播放则返回 0 +- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId; + +/// 获取片段的总时长 +/// @param segmentId 片段 ID +/// @return 总时长(秒) +- (NSTimeInterval)durationForSegment:(NSString *)segmentId; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m new file mode 100644 index 0000000..a387bef --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m @@ -0,0 +1,343 @@ +// +// TTSPlaybackPipeline.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "TTSPlaybackPipeline.h" +#import "AudioStreamPlayer.h" +#import + +@interface TTSPlaybackPipeline () + +// 播放器 +@property(nonatomic, strong) AVPlayer *urlPlayer; +@property(nonatomic, strong) AudioStreamPlayer *streamPlayer; + +// 片段队列 +@property(nonatomic, strong) NSMutableArray *segmentQueue; +@property(nonatomic, strong) + NSMutableDictionary *segmentDurations; + +// 状态 +@property(nonatomic, assign) BOOL playing; +@property(nonatomic, copy) NSString *currentSegmentId; +@property(nonatomic, strong) id playerTimeObserver; + +// 队列 +@property(nonatomic, strong) dispatch_queue_t playbackQueue; + +@end + +@implementation TTSPlaybackPipeline + +- (instancetype)init { + self = [super init]; + if (self) { + _segmentQueue = [[NSMutableArray alloc] init]; + _segmentDurations = [[NSMutableDictionary alloc] init]; + _playbackQueue = dispatch_queue_create("com.keyboard.aitalk.playback", + DISPATCH_QUEUE_SERIAL); + } + return self; +} + +- (void)dealloc { + [self stop]; +} + +#pragma mark - Public Methods + +- (BOOL)start:(NSError **)error { + // 初始化 stream player + if (!self.streamPlayer) { + self.streamPlayer = [[AudioStreamPlayer alloc] init]; + self.streamPlayer.delegate = self; + } + + return [self.streamPlayer start:error]; +} + +- (void)stop { + dispatch_async(self.playbackQueue, ^{ + // 停止 URL 播放 + if (self.urlPlayer) { + [self.urlPlayer pause]; + if (self.playerTimeObserver) { + [self.urlPlayer removeTimeObserver:self.playerTimeObserver]; + self.playerTimeObserver = nil; + } + self.urlPlayer = nil; + } + + // 停止流式播放 + [self.streamPlayer stop]; + + // 清空队列 + [self.segmentQueue removeAllObjects]; + [self.segmentDurations removeAllObjects]; + + self.playing = NO; + self.currentSegmentId = nil; + }); +} + +- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId { + if (!url || !segmentId) + return; + + dispatch_async(self.playbackQueue, ^{ + NSDictionary *segment = @{ + @"type" : @(TTSPayloadTypeURL), + @"url" : url, + @"segmentId" : segmentId + }; + [self.segmentQueue addObject:segment]; + + // 如果当前没有在播放,开始播放 + if (!self.playing) { + [self playNextSegment]; + } + }); +} + +- (void)enqueueChunk:(NSData *)chunk + payloadType:(TTSPayloadType)type + segmentId:(NSString *)segmentId { + if (!chunk || !segmentId) + return; + + dispatch_async(self.playbackQueue, ^{ + switch (type) { + case TTSPayloadTypePCMChunk: + // 直接喂给 stream player + [self.streamPlayer enqueuePCMChunk:chunk + sampleRate:16000 + channels:1 + segmentId:segmentId]; + + if (!self.playing) { + self.playing = YES; + self.currentSegmentId = segmentId; + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector + (pipelineDidStartSegment:duration:)]) { + [self.delegate pipelineDidStartSegment:segmentId duration:0]; + } + }); + } + break; + + case TTSPayloadTypeAACChunk: + // TODO: AAC 解码 -> PCM -> streamPlayer + NSLog(@"[TTSPlaybackPipeline] AAC chunk decoding not implemented yet"); + break; + + case TTSPayloadTypeOpusChunk: + // TODO: Opus 解码 -> PCM -> streamPlayer + NSLog(@"[TTSPlaybackPipeline] Opus chunk decoding not implemented yet"); + break; + + default: + break; + } + }); +} + +- (void)markSegmentComplete:(NSString *)segmentId { + // Stream player 会自动处理播放完成 +} + +- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId { + if (![segmentId isEqualToString:self.currentSegmentId]) { + return 0; + } + + if (self.urlPlayer) { + return CMTimeGetSeconds(self.urlPlayer.currentTime); + } + + return [self.streamPlayer playbackTimeForSegment:segmentId]; +} + +- (NSTimeInterval)durationForSegment:(NSString *)segmentId { + NSNumber *duration = self.segmentDurations[segmentId]; + if (duration) { + return duration.doubleValue; + } + + if (self.urlPlayer && [segmentId isEqualToString:self.currentSegmentId]) { + CMTime duration = self.urlPlayer.currentItem.duration; + if (CMTIME_IS_VALID(duration)) { + return CMTimeGetSeconds(duration); + } + } + + return [self.streamPlayer durationForSegment:segmentId]; +} + +#pragma mark - Private Methods + +- (void)playNextSegment { + if (self.segmentQueue.count == 0) { + self.playing = NO; + self.currentSegmentId = nil; + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(pipelineDidFinishAllSegments)]) { + [self.delegate pipelineDidFinishAllSegments]; + } + }); + return; + } + + NSDictionary *segment = self.segmentQueue.firstObject; + [self.segmentQueue removeObjectAtIndex:0]; + + TTSPayloadType type = [segment[@"type"] integerValue]; + NSString *segmentId = segment[@"segmentId"]; + + self.playing = YES; + self.currentSegmentId = segmentId; + + if (type == TTSPayloadTypeURL) { + NSURL *url = segment[@"url"]; + [self playURL:url segmentId:segmentId]; + } +} + +- (void)playURL:(NSURL *)url segmentId:(NSString *)segmentId { + AVPlayerItem *item = [AVPlayerItem playerItemWithURL:url]; + + if (!self.urlPlayer) { + self.urlPlayer = [AVPlayer playerWithPlayerItem:item]; + } else { + [self.urlPlayer replaceCurrentItemWithPlayerItem:item]; + } + + // 监听播放完成 + [[NSNotificationCenter defaultCenter] + addObserver:self + selector:@selector(playerItemDidFinish:) + name:AVPlayerItemDidPlayToEndTimeNotification + object:item]; + + // 添加时间观察器 + __weak typeof(self) weakSelf = self; + self.playerTimeObserver = [self.urlPlayer + addPeriodicTimeObserverForInterval:CMTimeMake(1, 30) + queue:dispatch_get_main_queue() + usingBlock:^(CMTime time) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + NSTimeInterval currentTime = + CMTimeGetSeconds(time); + if ([strongSelf.delegate + respondsToSelector:@selector + (pipelineDidUpdatePlaybackTime: + segmentId:)]) { + [strongSelf.delegate + pipelineDidUpdatePlaybackTime:currentTime + segmentId:segmentId]; + } + }]; + + // 等待资源加载后获取时长并开始播放 + [item.asset + loadValuesAsynchronouslyForKeys:@[ @"duration" ] + completionHandler:^{ + dispatch_async(dispatch_get_main_queue(), ^{ + NSTimeInterval duration = + CMTimeGetSeconds(item.duration); + if (!isnan(duration)) { + self.segmentDurations[segmentId] = @(duration); + } + + if ([self.delegate respondsToSelector:@selector + (pipelineDidStartSegment: + duration:)]) { + [self.delegate pipelineDidStartSegment:segmentId + duration:duration]; + } + + [self.urlPlayer play]; + }); + }]; +} + +- (void)playerItemDidFinish:(NSNotification *)notification { + [[NSNotificationCenter defaultCenter] + removeObserver:self + name:AVPlayerItemDidPlayToEndTimeNotification + object:notification.object]; + + if (self.playerTimeObserver) { + [self.urlPlayer removeTimeObserver:self.playerTimeObserver]; + self.playerTimeObserver = nil; + } + + NSString *finishedSegmentId = self.currentSegmentId; + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(pipelineDidFinishSegment:)]) { + [self.delegate pipelineDidFinishSegment:finishedSegmentId]; + } + }); + + dispatch_async(self.playbackQueue, ^{ + [self playNextSegment]; + }); +} + +#pragma mark - AudioStreamPlayerDelegate + +- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(pipelineDidStartSegment:duration:)]) { + [self.delegate pipelineDidStartSegment:segmentId duration:0]; + } + }); +} + +- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time + segmentId:(NSString *)segmentId { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector + (pipelineDidUpdatePlaybackTime:segmentId:)]) { + [self.delegate pipelineDidUpdatePlaybackTime:time segmentId:segmentId]; + } + }); +} + +- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(pipelineDidFinishSegment:)]) { + [self.delegate pipelineDidFinishSegment:segmentId]; + } + }); + + dispatch_async(self.playbackQueue, ^{ + // 检查是否还有更多片段 + if (self.segmentQueue.count == 0) { + self.playing = NO; + self.currentSegmentId = nil; + + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate + respondsToSelector:@selector(pipelineDidFinishAllSegments)]) { + [self.delegate pipelineDidFinishAllSegments]; + } + }); + } + }); +} + +@end diff --git a/keyBoard/Class/AiTalk/VM/TTSServiceClient.h b/keyBoard/Class/AiTalk/VM/TTSServiceClient.h new file mode 100644 index 0000000..79bb3ec --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/TTSServiceClient.h @@ -0,0 +1,60 @@ +// +// TTSServiceClient.h +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// TTS 返回数据类型 +typedef NS_ENUM(NSInteger, TTSPayloadType) { + TTSPayloadTypeURL = 0, // 模式 A:返回 m4a/MP3 URL + TTSPayloadTypePCMChunk, // 模式 D:返回 PCM chunk + TTSPayloadTypeAACChunk, // 模式 B:返回 AAC chunk + TTSPayloadTypeOpusChunk // 模式 C:返回 Opus chunk +}; + +/// TTS 服务客户端代理 +@protocol TTSServiceClientDelegate +@optional +/// 收到音频 URL(模式 A) +- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId; +/// 收到音频数据块(模式 B/C/D) +- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk + payloadType:(TTSPayloadType)type + segmentId:(NSString *)segmentId; +/// 片段完成 +- (void)ttsClientDidFinishSegment:(NSString *)segmentId; +/// 请求失败 +- (void)ttsClientDidFail:(NSError *)error; +@end + +/// TTS 服务客户端 +/// 统一网络层接口,支持多种 TTS 返回形态 +@interface TTSServiceClient : NSObject + +@property(nonatomic, weak) id delegate; + +/// TTS 服务器 URL +@property(nonatomic, copy) NSString *serverURL; + +/// 当前期望的返回类型(由服务端配置决定) +@property(nonatomic, assign) TTSPayloadType expectedPayloadType; + +/// 是否正在请求 +@property(nonatomic, assign, readonly, getter=isRequesting) BOOL requesting; + +/// 请求 TTS 合成 +/// @param text 要合成的文本 +/// @param segmentId 片段 ID(用于标识和排序) +- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId; + +/// 取消所有请求 +- (void)cancel; + +@end + +NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/TTSServiceClient.m b/keyBoard/Class/AiTalk/VM/TTSServiceClient.m new file mode 100644 index 0000000..e6b9401 --- /dev/null +++ b/keyBoard/Class/AiTalk/VM/TTSServiceClient.m @@ -0,0 +1,298 @@ +// +// TTSServiceClient.m +// keyBoard +// +// Created by Mac on 2026/1/15. +// + +#import "TTSServiceClient.h" + +@interface TTSServiceClient () + +@property(nonatomic, strong) NSURLSession *urlSession; +@property(nonatomic, strong) + NSMutableDictionary *activeTasks; +@property(nonatomic, strong) dispatch_queue_t networkQueue; +@property(nonatomic, assign) BOOL requesting; + +@end + +@implementation TTSServiceClient + +- (instancetype)init { + self = [super init]; + if (self) { + _networkQueue = dispatch_queue_create("com.keyboard.aitalk.tts.network", + DISPATCH_QUEUE_SERIAL); + _activeTasks = [[NSMutableDictionary alloc] init]; + _expectedPayloadType = TTSPayloadTypeURL; // 默认 URL 模式 + // TODO: 替换为实际的 TTS 服务器地址 + _serverURL = @"https://your-tts-server.com/api/tts"; + + [self setupSession]; + } + return self; +} + +- (void)setupSession { + NSURLSessionConfiguration *config = + [NSURLSessionConfiguration defaultSessionConfiguration]; + config.timeoutIntervalForRequest = 30; + config.timeoutIntervalForResource = 120; + + self.urlSession = [NSURLSession sessionWithConfiguration:config + delegate:self + delegateQueue:nil]; +} + +- (void)dealloc { + [self cancel]; +} + +#pragma mark - Public Methods + +- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId { + if (!text || text.length == 0 || !segmentId) { + return; + } + + dispatch_async(self.networkQueue, ^{ + self.requesting = YES; + + switch (self.expectedPayloadType) { + case TTSPayloadTypeURL: + [self requestURLMode:text segmentId:segmentId]; + break; + case TTSPayloadTypePCMChunk: + case TTSPayloadTypeAACChunk: + case TTSPayloadTypeOpusChunk: + [self requestStreamMode:text segmentId:segmentId]; + break; + } + }); +} + +- (void)cancel { + dispatch_async(self.networkQueue, ^{ + for (NSURLSessionTask *task in self.activeTasks.allValues) { + [task cancel]; + } + [self.activeTasks removeAllObjects]; + self.requesting = NO; + }); +} + +#pragma mark - URL Mode (Mode A) + +- (void)requestURLMode:(NSString *)text segmentId:(NSString *)segmentId { + NSURL *url = [NSURL URLWithString:self.serverURL]; + NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url]; + request.HTTPMethod = @"POST"; + [request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"]; + + NSDictionary *body = @{ + @"text" : text, + @"segmentId" : segmentId, + @"format" : @"mp3" // 或 m4a + }; + + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body + options:0 + error:&jsonError]; + if (jsonError) { + [self reportError:jsonError]; + return; + } + request.HTTPBody = jsonData; + + __weak typeof(self) weakSelf = self; + NSURLSessionDataTask *task = [self.urlSession + dataTaskWithRequest:request + completionHandler:^(NSData *_Nullable data, + NSURLResponse *_Nullable response, + NSError *_Nullable error) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + dispatch_async(strongSelf.networkQueue, ^{ + [strongSelf.activeTasks removeObjectForKey:segmentId]; + + if (error) { + if (error.code != NSURLErrorCancelled) { + [strongSelf reportError:error]; + } + return; + } + + // 解析响应 + NSError *parseError = nil; + NSDictionary *json = + [NSJSONSerialization JSONObjectWithData:data + options:0 + error:&parseError]; + if (parseError) { + [strongSelf reportError:parseError]; + return; + } + + NSString *audioURLString = json[@"audioUrl"]; + if (audioURLString) { + NSURL *audioURL = [NSURL URLWithString:audioURLString]; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([strongSelf.delegate respondsToSelector:@selector + (ttsClientDidReceiveURL:segmentId:)]) { + [strongSelf.delegate ttsClientDidReceiveURL:audioURL + segmentId:segmentId]; + } + if ([strongSelf.delegate respondsToSelector:@selector + (ttsClientDidFinishSegment:)]) { + [strongSelf.delegate ttsClientDidFinishSegment:segmentId]; + } + }); + } + }); + }]; + + self.activeTasks[segmentId] = task; + [task resume]; + + NSLog(@"[TTSServiceClient] URL mode request for segment: %@", segmentId); +} + +#pragma mark - Stream Mode (Mode B/C/D) + +- (void)requestStreamMode:(NSString *)text segmentId:(NSString *)segmentId { + // WebSocket 连接用于流式接收 + NSString *wsURL = + [self.serverURL stringByReplacingOccurrencesOfString:@"https://" + withString:@"wss://"]; + wsURL = [wsURL stringByReplacingOccurrencesOfString:@"http://" + withString:@"ws://"]; + wsURL = [wsURL stringByAppendingString:@"/stream"]; + + NSURL *url = [NSURL URLWithString:wsURL]; + NSURLSessionWebSocketTask *wsTask = + [self.urlSession webSocketTaskWithURL:url]; + + self.activeTasks[segmentId] = wsTask; + [wsTask resume]; + + // 发送请求 + NSDictionary *requestDict = @{ + @"text" : text, + @"segmentId" : segmentId, + @"format" : [self formatStringForPayloadType:self.expectedPayloadType] + }; + + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:requestDict + options:0 + error:&jsonError]; + if (jsonError) { + [self reportError:jsonError]; + return; + } + + NSString *jsonString = [[NSString alloc] initWithData:jsonData + encoding:NSUTF8StringEncoding]; + NSURLSessionWebSocketMessage *message = + [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString]; + + __weak typeof(self) weakSelf = self; + [wsTask sendMessage:message + completionHandler:^(NSError *_Nullable error) { + if (error) { + [weakSelf reportError:error]; + } else { + [weakSelf receiveStreamMessage:wsTask segmentId:segmentId]; + } + }]; + + NSLog(@"[TTSServiceClient] Stream mode request for segment: %@", segmentId); +} + +- (void)receiveStreamMessage:(NSURLSessionWebSocketTask *)wsTask + segmentId:(NSString *)segmentId { + __weak typeof(self) weakSelf = self; + [wsTask receiveMessageWithCompletionHandler:^( + NSURLSessionWebSocketMessage *_Nullable message, + NSError *_Nullable error) { + __strong typeof(weakSelf) strongSelf = weakSelf; + if (!strongSelf) + return; + + if (error) { + if (error.code != NSURLErrorCancelled && error.code != 57) { + [strongSelf reportError:error]; + } + return; + } + + if (message.type == NSURLSessionWebSocketMessageTypeData) { + // 音频数据块 + dispatch_async(dispatch_get_main_queue(), ^{ + if ([strongSelf.delegate respondsToSelector:@selector + (ttsClientDidReceiveAudioChunk: + payloadType:segmentId:)]) { + [strongSelf.delegate + ttsClientDidReceiveAudioChunk:message.data + payloadType:strongSelf.expectedPayloadType + segmentId:segmentId]; + } + }); + + // 继续接收 + [strongSelf receiveStreamMessage:wsTask segmentId:segmentId]; + } else if (message.type == NSURLSessionWebSocketMessageTypeString) { + // 控制消息 + NSData *data = [message.string dataUsingEncoding:NSUTF8StringEncoding]; + NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data + options:0 + error:nil]; + + if ([json[@"type"] isEqualToString:@"done"]) { + dispatch_async(strongSelf.networkQueue, ^{ + [strongSelf.activeTasks removeObjectForKey:segmentId]; + }); + dispatch_async(dispatch_get_main_queue(), ^{ + if ([strongSelf.delegate + respondsToSelector:@selector(ttsClientDidFinishSegment:)]) { + [strongSelf.delegate ttsClientDidFinishSegment:segmentId]; + } + }); + } else { + // 继续接收 + [strongSelf receiveStreamMessage:wsTask segmentId:segmentId]; + } + } + }]; +} + +- (NSString *)formatStringForPayloadType:(TTSPayloadType)type { + switch (type) { + case TTSPayloadTypePCMChunk: + return @"pcm"; + case TTSPayloadTypeAACChunk: + return @"aac"; + case TTSPayloadTypeOpusChunk: + return @"opus"; + default: + return @"mp3"; + } +} + +#pragma mark - Error Reporting + +- (void)reportError:(NSError *)error { + self.requesting = NO; + dispatch_async(dispatch_get_main_queue(), ^{ + if ([self.delegate respondsToSelector:@selector(ttsClientDidFail:)]) { + [self.delegate ttsClientDidFail:error]; + } + }); +} + +@end diff --git a/keyBoard/Class/Base/VC/BaseTabBarController.m b/keyBoard/Class/Base/VC/BaseTabBarController.m index 16ad77b..34bd280 100644 --- a/keyBoard/Class/Base/VC/BaseTabBarController.m +++ b/keyBoard/Class/Base/VC/BaseTabBarController.m @@ -9,6 +9,8 @@ #import "HomeMainVC.h" #import "MyVC.h" #import "KBShopVC.h" +#import "KBAiMainVC.h" + //#import "KBCommunityVC.h" #import "BaseNavigationController.h" @@ -43,13 +45,13 @@ image:@"tab_shop" selectedImg:@"tab_shop_selected"]; -// // 社区 -// KBCommunityVC *community = [[KBCommunityVC alloc] init]; -//// community.title = KBLocalized(@"Circle"); -// BaseNavigationController *navCommunity = [[BaseNavigationController alloc] initWithRootViewController:community]; -// navCommunity.tabBarItem = [self tabItemWithTitle:KBLocalized(@"Circle") -// image:@"tab_shequ" -// selectedImg:@"tab_shequ_selected"]; + // AI + KBAiMainVC *aiMainVC = [[KBAiMainVC alloc] init]; +// community.title = KBLocalized(@"Circle"); + BaseNavigationController *navCommunity = [[BaseNavigationController alloc] initWithRootViewController:aiMainVC]; + navCommunity.tabBarItem = [self tabItemWithTitle:KBLocalized(@"Circle") + image:@"tab_shequ" + selectedImg:@"tab_shequ_selected"]; // 我的 MyVC *my = [[MyVC alloc] init]; @@ -58,7 +60,7 @@ image:@"tab_my" selectedImg:@"tab_my_selected"]; - self.viewControllers = @[navHome, navShop, navMy]; + self.viewControllers = @[navHome, navShop,aiMainVC, navMy]; // 测试储存Token // [[KBAuthManager shared] saveAccessToken:@"TEST" refreshToken:nil expiryDate:[NSDate dateWithTimeIntervalSinceNow:3600] userIdentifier:nil];