377 lines
12 KiB
Mathematica
377 lines
12 KiB
Mathematica
|
|
//
|
||
|
|
// VoiceChatStreamingManager.m
|
||
|
|
// keyBoard
|
||
|
|
//
|
||
|
|
// Created by Mac on 2026/1/21.
|
||
|
|
//
|
||
|
|
|
||
|
|
#import "VoiceChatStreamingManager.h"
|
||
|
|
#import "AudioCaptureManager.h"
|
||
|
|
#import "AudioSessionManager.h"
|
||
|
|
#import "VoiceChatWebSocketClient.h"
|
||
|
|
|
||
|
|
static NSString *const kVoiceChatStreamingManagerErrorDomain =
|
||
|
|
@"VoiceChatStreamingManager";
|
||
|
|
|
||
|
|
@interface VoiceChatStreamingManager () <AudioSessionManagerDelegate,
|
||
|
|
AudioCaptureManagerDelegate,
|
||
|
|
VoiceChatWebSocketClientDelegate>
|
||
|
|
|
||
|
|
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||
|
|
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||
|
|
@property(nonatomic, strong) VoiceChatWebSocketClient *webSocketClient;
|
||
|
|
@property(nonatomic, strong) dispatch_queue_t stateQueue;
|
||
|
|
|
||
|
|
@property(nonatomic, assign) BOOL streaming;
|
||
|
|
@property(nonatomic, copy) NSString *sessionId;
|
||
|
|
|
||
|
|
@property(nonatomic, copy) NSString *pendingToken;
|
||
|
|
@property(nonatomic, copy) NSString *pendingLanguage;
|
||
|
|
@property(nonatomic, copy) NSString *pendingVoiceId;
|
||
|
|
|
||
|
|
@end
|
||
|
|
|
||
|
|
@implementation VoiceChatStreamingManager
|
||
|
|
|
||
|
|
- (instancetype)init {
|
||
|
|
self = [super init];
|
||
|
|
if (self) {
|
||
|
|
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.manager",
|
||
|
|
DISPATCH_QUEUE_SERIAL);
|
||
|
|
|
||
|
|
_audioSession = [AudioSessionManager sharedManager];
|
||
|
|
_audioSession.delegate = self;
|
||
|
|
|
||
|
|
_audioCapture = [[AudioCaptureManager alloc] init];
|
||
|
|
_audioCapture.delegate = self;
|
||
|
|
|
||
|
|
_webSocketClient = [[VoiceChatWebSocketClient alloc] init];
|
||
|
|
_webSocketClient.delegate = self;
|
||
|
|
|
||
|
|
_serverURL = @"ws://192.168.2.21:7529/api/ws/chat?token=";
|
||
|
|
_webSocketClient.serverURL = _serverURL;
|
||
|
|
}
|
||
|
|
return self;
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)dealloc {
|
||
|
|
[self disconnect];
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)setServerURL:(NSString *)serverURL {
|
||
|
|
_serverURL = [serverURL copy];
|
||
|
|
self.webSocketClient.serverURL = _serverURL;
|
||
|
|
}
|
||
|
|
|
||
|
|
#pragma mark - Public Methods
|
||
|
|
|
||
|
|
- (void)startWithToken:(NSString *)token
|
||
|
|
language:(nullable NSString *)language
|
||
|
|
voiceId:(nullable NSString *)voiceId {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
self.pendingToken = token ?: @"";
|
||
|
|
self.pendingLanguage = language ?: @"";
|
||
|
|
self.pendingVoiceId = voiceId ?: @"";
|
||
|
|
[self.webSocketClient disableAudioSending];
|
||
|
|
[self startInternal];
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)stopAndFinalize {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
if (self.streaming) {
|
||
|
|
[self.audioCapture stopCapture];
|
||
|
|
self.streaming = NO;
|
||
|
|
}
|
||
|
|
[self.webSocketClient disableAudioSending];
|
||
|
|
[self.webSocketClient endAudio];
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)cancel {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
if (self.streaming) {
|
||
|
|
[self.audioCapture stopCapture];
|
||
|
|
self.streaming = NO;
|
||
|
|
}
|
||
|
|
[self.webSocketClient disableAudioSending];
|
||
|
|
[self.webSocketClient cancel];
|
||
|
|
self.sessionId = nil;
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)disconnect {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
if (self.streaming) {
|
||
|
|
[self.audioCapture stopCapture];
|
||
|
|
self.streaming = NO;
|
||
|
|
}
|
||
|
|
[self.webSocketClient disableAudioSending];
|
||
|
|
[self.webSocketClient disconnect];
|
||
|
|
[self.audioSession deactivateSession];
|
||
|
|
self.sessionId = nil;
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
#pragma mark - Private Methods
|
||
|
|
|
||
|
|
- (void)startInternal {
|
||
|
|
if (self.pendingToken.length == 0) {
|
||
|
|
NSLog(@"[VoiceChatStreamingManager] Start failed: token is empty");
|
||
|
|
[self reportErrorWithMessage:@"Token is required"];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (![self.audioSession hasMicrophonePermission]) {
|
||
|
|
__weak typeof(self) weakSelf = self;
|
||
|
|
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||
|
|
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||
|
|
if (!strongSelf) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (!granted) {
|
||
|
|
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
dispatch_async(strongSelf.stateQueue, ^{
|
||
|
|
[strongSelf startInternal];
|
||
|
|
});
|
||
|
|
}];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
NSError *error = nil;
|
||
|
|
if (![self.audioSession configureForConversation:&error]) {
|
||
|
|
[self reportError:error];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (![self.audioSession activateSession:&error]) {
|
||
|
|
[self reportError:error];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (self.serverURL.length == 0) {
|
||
|
|
NSLog(@"[VoiceChatStreamingManager] Start failed: server URL is empty");
|
||
|
|
[self reportErrorWithMessage:@"Server URL is required"];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
NSLog(@"[VoiceChatStreamingManager] Start streaming, server: %@",
|
||
|
|
self.serverURL);
|
||
|
|
self.webSocketClient.serverURL = self.serverURL;
|
||
|
|
[self.webSocketClient connectWithToken:self.pendingToken];
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)reportError:(NSError *)error {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidFail:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidFail:error];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)reportErrorWithMessage:(NSString *)message {
|
||
|
|
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||
|
|
code:-1
|
||
|
|
userInfo:@{
|
||
|
|
NSLocalizedDescriptionKey : message ?: @""
|
||
|
|
}];
|
||
|
|
[self reportError:error];
|
||
|
|
}
|
||
|
|
|
||
|
|
#pragma mark - AudioCaptureManagerDelegate
|
||
|
|
|
||
|
|
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||
|
|
if (!self.streaming) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
[self.webSocketClient sendAudioPCMFrame:pcmFrame];
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidUpdateRMS:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidUpdateRMS:rms];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
#pragma mark - AudioSessionManagerDelegate
|
||
|
|
|
||
|
|
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||
|
|
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||
|
|
[self cancel];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||
|
|
[self reportErrorWithMessage:@"Microphone permission denied"];
|
||
|
|
}
|
||
|
|
|
||
|
|
#pragma mark - VoiceChatWebSocketClientDelegate
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidConnect {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
[self.webSocketClient startSessionWithLanguage:self.pendingLanguage
|
||
|
|
voiceId:self.pendingVoiceId];
|
||
|
|
});
|
||
|
|
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidConnect)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidConnect];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
if (self.streaming) {
|
||
|
|
[self.audioCapture stopCapture];
|
||
|
|
self.streaming = NO;
|
||
|
|
}
|
||
|
|
[self.audioSession deactivateSession];
|
||
|
|
self.sessionId = nil;
|
||
|
|
});
|
||
|
|
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidDisconnect:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidDisconnect:error];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidStartSession:(NSString *)sessionId {
|
||
|
|
dispatch_async(self.stateQueue, ^{
|
||
|
|
self.sessionId = sessionId;
|
||
|
|
|
||
|
|
NSError *error = nil;
|
||
|
|
if (![self.audioCapture startCapture:&error]) {
|
||
|
|
[self reportError:error];
|
||
|
|
[self.webSocketClient cancel];
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
self.streaming = YES;
|
||
|
|
[self.webSocketClient enableAudioSending];
|
||
|
|
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidStartSession:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidStartSession:sessionId];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidStartTurn:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidStartTurn:turnIndex];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
|
||
|
|
confidence:(double)confidence {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate
|
||
|
|
respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:
|
||
|
|
confidence:)]) {
|
||
|
|
[self.delegate
|
||
|
|
voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:text
|
||
|
|
confidence:confidence];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidResumeTurn {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidResumeTurn)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidResumeTurn];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveInterimTranscript:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidReceiveInterimTranscript:text];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveFinalTranscript:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidReceiveFinalTranscript:text];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveLLMStart {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveLLMStart)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidReceiveLLMStart];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveLLMToken:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidReceiveLLMToken:token];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidReceiveAudioChunk:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidReceiveAudioChunk:audioData];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
|
||
|
|
aiResponse:(NSString *)aiResponse {
|
||
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
||
|
|
if ([self.delegate respondsToSelector:@selector
|
||
|
|
(voiceChatStreamingManagerDidCompleteWithTranscript:
|
||
|
|
aiResponse:)]) {
|
||
|
|
[self.delegate voiceChatStreamingManagerDidCompleteWithTranscript:transcript
|
||
|
|
aiResponse:aiResponse];
|
||
|
|
}
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
|
||
|
|
message:(NSString *)message {
|
||
|
|
NSString *desc = message.length > 0 ? message : @"Server error";
|
||
|
|
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||
|
|
code:-2
|
||
|
|
userInfo:@{
|
||
|
|
NSLocalizedDescriptionKey : desc,
|
||
|
|
@"code" : code ?: @""
|
||
|
|
}];
|
||
|
|
[self reportError:error];
|
||
|
|
}
|
||
|
|
|
||
|
|
- (void)voiceChatClientDidFail:(NSError *)error {
|
||
|
|
[self reportError:error];
|
||
|
|
}
|
||
|
|
|
||
|
|
@end
|