From 6ad9783bcbf453dc1c1800487a10ee4edca739eb Mon Sep 17 00:00:00 2001 From: CodeST <694468528@qq.com> Date: Thu, 22 Jan 2026 22:03:56 +0800 Subject: [PATCH] 1 --- Shared/KBAPI.h | 2 + Shared/KBConfig.h | 4 +- keyBoard/Class/AiTalk/VC/KBAiMainVC.m | 133 +++++++++- keyBoard/Class/AiTalk/VM/AiVM.h | 44 ++++ keyBoard/Class/AiTalk/VM/AiVM.m | 243 ++++++++++++++++++ .../AiTalk/VM/DeepgramStreamingManager.m | 3 + 6 files changed, 415 insertions(+), 14 deletions(-) diff --git a/Shared/KBAPI.h b/Shared/KBAPI.h index 4cdea9c..b5449c6 100644 --- a/Shared/KBAPI.h +++ b/Shared/KBAPI.h @@ -70,6 +70,8 @@ /// AI #define API_AI_TALK @"/chat/talk" #define API_AI_VOICE_TALK @"/chat/voice" // 语音对话(替换为后端真实路径) +#define API_AI_CHAT_SYNC @"/chat/sync" // 同步对话 +#define API_AI_CHAT_MESSAGE @"/chat/message" // 文本润色 diff --git a/Shared/KBConfig.h b/Shared/KBConfig.h index ed10b90..8a2cce0 100644 --- a/Shared/KBConfig.h +++ b/Shared/KBConfig.h @@ -41,8 +41,8 @@ // 基础baseUrl #ifndef KB_BASE_URL //#define KB_BASE_URL @"https://m1.apifoxmock.com/m1/5438099-5113192-default/" -//#define KB_BASE_URL @"http://192.168.2.21:7529/api" -#define KB_BASE_URL @"https://devcallback.loveamorkey.com/api" +#define KB_BASE_URL @"http://192.168.2.21:7529/api" +//#define KB_BASE_URL @"https://devcallback.loveamorkey.com/api" #endif #import "KBFont.h" diff --git a/keyBoard/Class/AiTalk/VC/KBAiMainVC.m b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m index 481a632..a21c96f 100644 --- a/keyBoard/Class/AiTalk/VC/KBAiMainVC.m +++ b/keyBoard/Class/AiTalk/VC/KBAiMainVC.m @@ -7,17 +7,22 @@ #import "KBAiMainVC.h" #import "ConversationOrchestrator.h" +#import "AiVM.h" +#import "AudioSessionManager.h" #import "DeepgramStreamingManager.h" #import "KBAICommentView.h" #import "KBAiChatView.h" #import "KBAiRecordButton.h" +#import "KBHUD.h" #import "LSTPopView.h" #import "VoiceChatStreamingManager.h" #import "KBUserSessionManager.h" +#import @interface KBAiMainVC () + DeepgramStreamingManagerDelegate, + AVAudioPlayerDelegate> @property(nonatomic, weak) LSTPopView *popView; // UI @@ -36,6 +41,9 @@ @property(nonatomic, strong) ConversationOrchestrator *orchestrator; @property(nonatomic, strong) VoiceChatStreamingManager *streamingManager; @property(nonatomic, strong) DeepgramStreamingManager *deepgramManager; +@property(nonatomic, strong) AiVM *aiVM; +@property(nonatomic, strong) AVAudioPlayer *aiAudioPlayer; +@property(nonatomic, strong) NSMutableData *voiceChatAudioBuffer; // 文本跟踪 @property(nonatomic, strong) NSMutableString *assistantVisibleText; @@ -60,7 +68,8 @@ [self setupUI]; [self setupOrchestrator]; [self setupStreamingManager]; - [self setupDeepgramManager]; + // 切换到 websocket-api 方案,Deepgram 暂不初始化 + // [self setupDeepgramManager]; } - (void)viewWillAppear:(BOOL)animated { @@ -145,15 +154,15 @@ self.transcriptLabel.font = [UIFont systemFontOfSize:16]; self.transcriptLabel.textColor = [UIColor labelColor]; self.transcriptLabel.numberOfLines = 0; - self.transcriptLabel.textAlignment = NSTextAlignmentLeft; + self.transcriptLabel.textAlignment = NSTextAlignmentRight; self.transcriptLabel.translatesAutoresizingMaskIntoConstraints = NO; [self.view addSubview:self.transcriptLabel]; // 聊天视图 - // self.chatView = [[KBAiChatView alloc] init]; - // self.chatView.backgroundColor = [UIColor systemBackgroundColor]; - // self.chatView.translatesAutoresizingMaskIntoConstraints = NO; - // [self.view addSubview:self.chatView]; + self.chatView = [[KBAiChatView alloc] init]; + self.chatView.backgroundColor = [UIColor clearColor]; + self.chatView.translatesAutoresizingMaskIntoConstraints = NO; + [self.view addSubview:self.chatView]; // 录音按钮 self.recordButton = [[KBAiRecordButton alloc] init]; @@ -198,6 +207,12 @@ make.top.equalTo(self.statusLabel.mas_bottom).offset(8); make.left.equalTo(self.view).offset(16); make.right.equalTo(self.view).offset(-16); + }]; + + [self.chatView mas_makeConstraints:^(MASConstraintMaker *make) { + make.top.equalTo(self.transcriptLabel.mas_bottom).offset(8); + make.left.equalTo(self.view).offset(16); + make.right.equalTo(self.view).offset(-16); make.bottom.lessThanOrEqualTo(self.recordButton.mas_top).offset(-16); }]; @@ -325,6 +340,7 @@ self.streamingManager.delegate = self; self.streamingManager.serverURL = @"ws://192.168.2.21:7529/api/ws/chat"; self.assistantVisibleText = [[NSMutableString alloc] init]; + self.voiceChatAudioBuffer = [[NSMutableData alloc] init]; self.lastRMSLogTime = 0; } @@ -346,6 +362,7 @@ [self.deepgramManager prepareConnection]; self.deepgramFullText = [[NSMutableString alloc] init]; + self.aiVM = [[AiVM alloc] init]; } #pragma mark - 事件 @@ -446,19 +463,20 @@ self.statusLabel.text = @"正在连接..."; self.recordButton.state = KBAiRecordButtonStateRecording; - [self.deepgramFullText setString:@""]; self.transcriptLabel.text = @""; - [self.deepgramManager start]; + [self.voiceChatAudioBuffer setLength:0]; + [self.streamingManager startWithToken:token language:@"en" voiceId:nil]; } - (void)recordButtonDidEndPress:(KBAiRecordButton *)button { NSLog(@"[KBAiMainVC] Record button end press"); - [self.deepgramManager stopAndFinalize]; + [self.streamingManager stopAndFinalize]; } - (void)recordButtonDidCancelPress:(KBAiRecordButton *)button { NSLog(@"[KBAiMainVC] Record button cancel press"); - [self.deepgramManager cancel]; + [self.voiceChatAudioBuffer setLength:0]; + [self.streamingManager cancel]; } #pragma mark - VoiceChatStreamingManagerDelegate @@ -503,11 +521,15 @@ } - (void)voiceChatStreamingManagerDidReceiveInterimTranscript:(NSString *)text { - self.statusLabel.text = text.length > 0 ? text : @"正在识别..."; + self.statusLabel.text = @"正在识别..."; + if (text.length > 0) { + self.transcriptLabel.text = text; + } } - (void)voiceChatStreamingManagerDidReceiveFinalTranscript:(NSString *)text { if (text.length > 0) { + self.transcriptLabel.text = @""; [self.chatView addUserMessage:text]; } } @@ -516,6 +538,7 @@ self.statusLabel.text = @"AI 正在思考..."; [self.assistantVisibleText setString:@""]; [self.chatView addAssistantMessage:@""]; + [self.voiceChatAudioBuffer setLength:0]; } - (void)voiceChatStreamingManagerDidReceiveLLMToken:(NSString *)token { @@ -528,15 +551,29 @@ } - (void)voiceChatStreamingManagerDidReceiveAudioChunk:(NSData *)audioData { + if (audioData.length == 0) { + return; + } + [self.voiceChatAudioBuffer appendData:audioData]; } - (void)voiceChatStreamingManagerDidCompleteWithTranscript:(NSString *)transcript aiResponse:(NSString *)aiResponse { NSString *finalText = aiResponse.length > 0 ? aiResponse : self.assistantVisibleText; + if (aiResponse.length > 0) { + [self.assistantVisibleText setString:aiResponse]; + } if (finalText.length > 0) { [self.chatView updateLastAssistantMessage:finalText]; [self.chatView markLastAssistantMessageComplete]; + } else if (transcript.length > 0) { + [self.chatView addAssistantMessage:transcript]; + [self.chatView markLastAssistantMessageComplete]; + } + if (self.voiceChatAudioBuffer.length > 0) { + [self playAiAudioData:self.voiceChatAudioBuffer]; + [self.voiceChatAudioBuffer setLength:0]; } self.recordButton.state = KBAiRecordButtonStateNormal; self.statusLabel.text = @"完成"; @@ -591,6 +628,40 @@ self.transcriptLabel.text = self.deepgramFullText; self.statusLabel.text = @"识别完成"; self.recordButton.state = KBAiRecordButtonStateNormal; + +// NSString *finalText = [self.deepgramFullText copy]; +// if (finalText.length > 0) { +// __weak typeof(self) weakSelf = self; +// [KBHUD show]; +// [self.aiVM syncChatWithTranscript:finalText +// completion:^(KBAiSyncResponse *_Nullable response, +// NSError *_Nullable error) { +// __strong typeof(weakSelf) strongSelf = weakSelf; +// if (!strongSelf) { +// return; +// } +// dispatch_async(dispatch_get_main_queue(), ^{ +// [KBHUD dismiss]; +// if (error) { +// [KBHUD showError:error.localizedDescription ?: @"请求失败"]; +// return; +// } +// +// NSString *aiResponse = response.data.aiResponse ?: @""; +// if (aiResponse.length > 0) { +// NSLog(@"[KBAiMainVC] /chat/sync aiResponse: %@", aiResponse); +// } +// +// NSData *audioData = response.data.audioData; +// if (audioData.length > 0) { +// NSLog(@"[KBAiMainVC] /chat/sync audio ready, start play"); +// [strongSelf playAiAudioData:audioData]; +// } else { +// NSLog(@"[KBAiMainVC] /chat/sync audioData empty"); +// } +// }); +// }]; +// } } - (void)deepgramStreamingManagerDidFail:(NSError *)error { @@ -598,4 +669,42 @@ [self showError:error]; } +#pragma mark - Audio Playback + +- (void)playAiAudioData:(NSData *)audioData { + if (audioData.length == 0) { + return; + } + + NSError *sessionError = nil; + AudioSessionManager *audioSession = [AudioSessionManager sharedManager]; + if (![audioSession configureForPlayback:&sessionError]) { + NSLog(@"[KBAiMainVC] Configure playback failed: %@", + sessionError.localizedDescription ?: @""); + } + if (![audioSession activateSession:&sessionError]) { + NSLog(@"[KBAiMainVC] Activate playback session failed: %@", + sessionError.localizedDescription ?: @""); + } + + NSError *error = nil; + self.aiAudioPlayer = [[AVAudioPlayer alloc] initWithData:audioData + error:&error]; + if (error || !self.aiAudioPlayer) { + NSLog(@"[KBAiMainVC] Audio player init failed: %@", + error.localizedDescription ?: @""); + return; + } + self.aiAudioPlayer.delegate = self; + [self.aiAudioPlayer prepareToPlay]; + [self.aiAudioPlayer play]; +} + +#pragma mark - AVAudioPlayerDelegate + +- (void)audioPlayerDidFinishPlaying:(AVAudioPlayer *)player + successfully:(BOOL)flag { + [[AudioSessionManager sharedManager] deactivateSession]; +} + @end diff --git a/keyBoard/Class/AiTalk/VM/AiVM.h b/keyBoard/Class/AiTalk/VM/AiVM.h index 2bd6fe9..84ed2fc 100644 --- a/keyBoard/Class/AiTalk/VM/AiVM.h +++ b/keyBoard/Class/AiTalk/VM/AiVM.h @@ -9,8 +9,52 @@ NS_ASSUME_NONNULL_BEGIN +@interface KBAiSyncData : NSObject +@property(nonatomic, copy, nullable) NSString *aiResponse; +@property(nonatomic, copy, nullable) NSString *audioBase64; +@property(nonatomic, strong, nullable) NSData *audioData; +@end + +@interface KBAiSyncResponse : NSObject +@property(nonatomic, assign) NSInteger code; +@property(nonatomic, strong, nullable) KBAiSyncData *data; +@end + +typedef void (^AiVMSyncCompletion)(KBAiSyncResponse *_Nullable response, + NSError *_Nullable error); + +@interface KBAiMessageData : NSObject +@property(nonatomic, copy, nullable) NSString *content; +@property(nonatomic, copy, nullable) NSString *text; +@property(nonatomic, copy, nullable) NSString *message; +@end + +@interface KBAiMessageResponse : NSObject +@property(nonatomic, assign) NSInteger code; +@property(nonatomic, strong, nullable) KBAiMessageData *data; +@end + +typedef void (^AiVMMessageCompletion)(KBAiMessageResponse *_Nullable response, + NSError *_Nullable error); + +typedef void (^AiVMElevenLabsCompletion)(NSData *_Nullable audioData, + NSError *_Nullable error); + @interface AiVM : NSObject +- (void)syncChatWithTranscript:(NSString *)transcript + completion:(AiVMSyncCompletion)completion; + +- (void)requestChatMessageWithContent:(NSString *)content + completion:(AiVMMessageCompletion)completion; + +- (void)requestElevenLabsSpeechWithText:(NSString *)text + voiceId:(NSString *)voiceId + apiKey:(NSString *)apiKey + outputFormat:(nullable NSString *)outputFormat + modelId:(nullable NSString *)modelId + completion:(AiVMElevenLabsCompletion)completion; + @end NS_ASSUME_NONNULL_END diff --git a/keyBoard/Class/AiTalk/VM/AiVM.m b/keyBoard/Class/AiTalk/VM/AiVM.m index 322cd55..58dff39 100644 --- a/keyBoard/Class/AiTalk/VM/AiVM.m +++ b/keyBoard/Class/AiTalk/VM/AiVM.m @@ -6,7 +6,250 @@ // #import "AiVM.h" +#import "KBAPI.h" +#import "KBNetworkManager.h" +#import + +@implementation KBAiSyncData + +- (void)setAudioBase64:(NSString *)audioBase64 { + if (![audioBase64 isKindOfClass:[NSString class]]) { + _audioBase64 = nil; + self.audioData = nil; + return; + } + _audioBase64 = [audioBase64 copy]; + if (_audioBase64.length == 0) { + self.audioData = nil; + return; + } + + NSString *cleanBase64 = _audioBase64; + NSRange commaRange = [cleanBase64 rangeOfString:@","]; + if ([cleanBase64 hasPrefix:@"data:"] && commaRange.location != NSNotFound) { + cleanBase64 = [cleanBase64 substringFromIndex:commaRange.location + 1]; + } + + self.audioData = [[NSData alloc] + initWithBase64EncodedString:cleanBase64 + options:NSDataBase64DecodingIgnoreUnknownCharacters]; +} + +@end + +@implementation KBAiSyncResponse +@end + +@implementation KBAiMessageData +@end + +@implementation KBAiMessageResponse +@end @implementation AiVM +- (void)syncChatWithTranscript:(NSString *)transcript + completion:(AiVMSyncCompletion)completion { + if (transcript.length == 0) { + NSError *error = [NSError + errorWithDomain:@"AiVM" + code:-1 + userInfo:@{NSLocalizedDescriptionKey : @"transcript is empty"}]; + if (completion) { + completion(nil, error); + } + return; + } + + NSDictionary *params = @{ @"transcript" : transcript ?: @"" }; + CFAbsoluteTime startTime = CFAbsoluteTimeGetCurrent(); + NSLog(@"[AiVM] /chat/sync request: %@", params); + [[KBNetworkManager shared] + POST:API_AI_CHAT_SYNC + jsonBody:params + headers:nil +autoShowBusinessError:NO + completion:^(NSDictionary *_Nullable json, + NSURLResponse *_Nullable response, + NSError *_Nullable error) { + CFAbsoluteTime elapsed = + (CFAbsoluteTimeGetCurrent() - startTime) * 1000.0; + if (error) { + NSLog(@"[AiVM] /chat/sync failed: %@", + error.localizedDescription ?: @""); + NSLog(@"[AiVM] /chat/sync duration: %.0f ms", elapsed); + if (completion) { + completion(nil, error); + } + return; + } + NSLog(@"[AiVM] /chat/sync response received"); + NSLog(@"[AiVM] /chat/sync duration: %.0f ms", elapsed); + KBAiSyncResponse *model = + [KBAiSyncResponse mj_objectWithKeyValues:json]; + + if (completion) { + completion(model, nil); + } + }]; +} + +- (void)requestChatMessageWithContent:(NSString *)content + completion:(AiVMMessageCompletion)completion { + if (content.length == 0) { + NSError *error = [NSError + errorWithDomain:@"AiVM" + code:-1 + userInfo:@{NSLocalizedDescriptionKey : @"content is empty"}]; + if (completion) { + completion(nil, error); + } + return; + } + + NSString *encodedContent = + [content stringByAddingPercentEncodingWithAllowedCharacters: + [NSCharacterSet URLQueryAllowedCharacterSet]]; + NSString *path = [NSString + stringWithFormat:@"%@?content=%@", API_AI_CHAT_MESSAGE, + encodedContent ?: @""]; + NSDictionary *params = @{ @"content" : content ?: @"" }; + [[KBNetworkManager shared] + POST:path + jsonBody:params + headers:nil +autoShowBusinessError:NO + completion:^(NSDictionary *_Nullable json, + NSURLResponse *_Nullable response, + NSError *_Nullable error) { + if (error) { + if (completion) { + completion(nil, error); + } + return; + } + + KBAiMessageResponse *model = + [KBAiMessageResponse mj_objectWithKeyValues:json]; + if (completion) { + completion(model, nil); + } + }]; +} + +- (void)requestElevenLabsSpeechWithText:(NSString *)text + voiceId:(NSString *)voiceId + apiKey:(NSString *)apiKey + outputFormat:(NSString *)outputFormat + modelId:(NSString *)modelId + completion:(AiVMElevenLabsCompletion)completion { + if (text.length == 0 || voiceId.length == 0 || apiKey.length == 0) { + NSError *error = [NSError + errorWithDomain:@"AiVM" + code:-1 + userInfo:@{NSLocalizedDescriptionKey : @"invalid parameters"}]; + if (completion) { + completion(nil, error); + } + return; + } + + NSString *format = outputFormat.length > 0 ? outputFormat : @"mp3_44100_128"; + NSString *model = modelId.length > 0 ? modelId : @"eleven_multilingual_v2"; + NSString *escapedVoiceId = + [voiceId stringByAddingPercentEncodingWithAllowedCharacters: + [NSCharacterSet URLPathAllowedCharacterSet]]; + NSString *escapedFormat = + [format stringByAddingPercentEncodingWithAllowedCharacters: + [NSCharacterSet URLQueryAllowedCharacterSet]]; + NSString *urlString = + [NSString stringWithFormat:@"https://api.elevenlabs.io/v1/text-to-speech/%@/stream?output_format=%@", + escapedVoiceId ?: @"", + escapedFormat ?: @""]; + NSURL *url = [NSURL URLWithString:urlString]; + if (!url) { + NSError *error = [NSError + errorWithDomain:@"AiVM" + code:-1 + userInfo:@{NSLocalizedDescriptionKey : @"invalid URL"}]; + if (completion) { + completion(nil, error); + } + return; + } + + NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url]; + request.HTTPMethod = @"POST"; + [request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"]; + [request setValue:@"audio/mpeg" forHTTPHeaderField:@"Accept"]; + [request setValue:apiKey forHTTPHeaderField:@"xi-api-key"]; + + NSDictionary *body = @{ + @"text" : text ?: @"", + @"model_id" : model ?: @"" + }; + NSError *jsonError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body + options:0 + error:&jsonError]; + if (jsonError) { + if (completion) { + completion(nil, jsonError); + } + return; + } + request.HTTPBody = jsonData; + + NSURLSessionConfiguration *config = + [NSURLSessionConfiguration defaultSessionConfiguration]; + NSURLSession *session = [NSURLSession sessionWithConfiguration:config]; + NSURLSessionDataTask *task = + [session dataTaskWithRequest:request + completionHandler:^(NSData *_Nullable data, + NSURLResponse *_Nullable response, + NSError *_Nullable error) { + if (error) { + if (completion) { + completion(nil, error); + } + return; + } + + if (![response isKindOfClass:[NSHTTPURLResponse class]]) { + NSError *respError = [NSError + errorWithDomain:@"AiVM" + code:-1 + userInfo:@{ + NSLocalizedDescriptionKey : + @"invalid response" + }]; + if (completion) { + completion(nil, respError); + } + return; + } + + NSInteger status = + ((NSHTTPURLResponse *)response).statusCode; + if (status < 200 || status >= 300 || data.length == 0) { + NSError *respError = [NSError + errorWithDomain:@"AiVM" + code:status + userInfo:@{ + NSLocalizedDescriptionKey : + @"request failed" + }]; + if (completion) { + completion(nil, respError); + } + return; + } + + if (completion) { + completion(data, nil); + } + }]; + [task resume]; +} + @end diff --git a/keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m b/keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m index a961f2b..4658813 100644 --- a/keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m +++ b/keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m @@ -178,6 +178,9 @@ static NSString *const kDeepgramStreamingManagerErrorDomain = } [self.pendingFrames removeAllObjects]; self.pendingStart = NO; + if (self.client.isConnected) { + [self.client finish]; + } [self.client disableAudioSending]; [self startKeepAliveIfNeeded]; });