2026-01-16 13:38:03 +08:00
|
|
|
|
//
|
|
|
|
|
|
// ConversationOrchestrator.m
|
|
|
|
|
|
// keyBoard
|
|
|
|
|
|
//
|
|
|
|
|
|
// Created by Mac on 2026/1/15.
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
#import "ConversationOrchestrator.h"
|
|
|
|
|
|
#import "ASRStreamClient.h"
|
|
|
|
|
|
#import "AudioCaptureManager.h"
|
|
|
|
|
|
#import "AudioSessionManager.h"
|
|
|
|
|
|
#import "LLMStreamClient.h"
|
|
|
|
|
|
#import "Segmenter.h"
|
|
|
|
|
|
#import "SubtitleSync.h"
|
|
|
|
|
|
#import "TTSPlaybackPipeline.h"
|
|
|
|
|
|
#import "TTSServiceClient.h"
|
|
|
|
|
|
|
|
|
|
|
|
@interface ConversationOrchestrator () <
|
|
|
|
|
|
AudioSessionManagerDelegate, AudioCaptureManagerDelegate,
|
|
|
|
|
|
ASRStreamClientDelegate, LLMStreamClientDelegate, TTSServiceClientDelegate,
|
|
|
|
|
|
TTSPlaybackPipelineDelegate>
|
|
|
|
|
|
|
|
|
|
|
|
// 模块
|
|
|
|
|
|
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
|
|
|
|
|
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
|
|
|
|
|
@property(nonatomic, strong) ASRStreamClient *asrClient;
|
|
|
|
|
|
@property(nonatomic, strong) LLMStreamClient *llmClient;
|
|
|
|
|
|
@property(nonatomic, strong) Segmenter *segmenter;
|
|
|
|
|
|
@property(nonatomic, strong) TTSServiceClient *ttsClient;
|
|
|
|
|
|
@property(nonatomic, strong) TTSPlaybackPipeline *playbackPipeline;
|
|
|
|
|
|
@property(nonatomic, strong) SubtitleSync *subtitleSync;
|
|
|
|
|
|
|
|
|
|
|
|
// 状态
|
|
|
|
|
|
@property(nonatomic, assign) ConversationState state;
|
|
|
|
|
|
@property(nonatomic, copy) NSString *conversationId;
|
|
|
|
|
|
@property(nonatomic, copy) NSString *currentSessionId;
|
|
|
|
|
|
|
|
|
|
|
|
// 文本跟踪
|
|
|
|
|
|
@property(nonatomic, strong) NSMutableString *fullAssistantText;
|
|
|
|
|
|
@property(nonatomic, strong)
|
|
|
|
|
|
NSMutableDictionary<NSString *, NSString *> *segmentTextMap;
|
|
|
|
|
|
@property(nonatomic, assign) NSInteger segmentCounter;
|
|
|
|
|
|
|
|
|
|
|
|
// 队列
|
|
|
|
|
|
@property(nonatomic, strong) dispatch_queue_t orchestratorQueue;
|
|
|
|
|
|
|
|
|
|
|
|
@end
|
|
|
|
|
|
|
|
|
|
|
|
@implementation ConversationOrchestrator
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Initialization
|
|
|
|
|
|
|
|
|
|
|
|
- (instancetype)init {
|
|
|
|
|
|
self = [super init];
|
|
|
|
|
|
if (self) {
|
|
|
|
|
|
_orchestratorQueue = dispatch_queue_create(
|
|
|
|
|
|
"com.keyboard.aitalk.orchestrator", DISPATCH_QUEUE_SERIAL);
|
|
|
|
|
|
_state = ConversationStateIdle;
|
|
|
|
|
|
_conversationId = [[NSUUID UUID] UUIDString];
|
|
|
|
|
|
|
|
|
|
|
|
_fullAssistantText = [[NSMutableString alloc] init];
|
|
|
|
|
|
_segmentTextMap = [[NSMutableDictionary alloc] init];
|
|
|
|
|
|
_segmentCounter = 0;
|
|
|
|
|
|
|
|
|
|
|
|
[self setupModules];
|
|
|
|
|
|
}
|
|
|
|
|
|
return self;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)setupModules {
|
|
|
|
|
|
// Audio Session
|
|
|
|
|
|
self.audioSession = [AudioSessionManager sharedManager];
|
|
|
|
|
|
self.audioSession.delegate = self;
|
|
|
|
|
|
|
|
|
|
|
|
// Audio Capture
|
|
|
|
|
|
self.audioCapture = [[AudioCaptureManager alloc] init];
|
|
|
|
|
|
self.audioCapture.delegate = self;
|
|
|
|
|
|
|
|
|
|
|
|
// ASR Client
|
|
|
|
|
|
self.asrClient = [[ASRStreamClient alloc] init];
|
|
|
|
|
|
self.asrClient.delegate = self;
|
|
|
|
|
|
|
|
|
|
|
|
// LLM Client
|
|
|
|
|
|
self.llmClient = [[LLMStreamClient alloc] init];
|
|
|
|
|
|
self.llmClient.delegate = self;
|
|
|
|
|
|
|
|
|
|
|
|
// Segmenter
|
|
|
|
|
|
self.segmenter = [[Segmenter alloc] init];
|
|
|
|
|
|
|
|
|
|
|
|
// TTS Client
|
|
|
|
|
|
self.ttsClient = [[TTSServiceClient alloc] init];
|
|
|
|
|
|
self.ttsClient.delegate = self;
|
2026-01-21 17:25:38 +08:00
|
|
|
|
// ElevenLabs 配置(通过后端代理)
|
|
|
|
|
|
self.ttsClient.voiceId = @"JBFqnCBsd6RMkjVDRZzb"; // 默认语音 George
|
|
|
|
|
|
self.ttsClient.languageCode = @"zh"; // 中文
|
|
|
|
|
|
self.ttsClient.expectedPayloadType =
|
|
|
|
|
|
TTSPayloadTypeURL; // 使用 URL 模式(简单)
|
2026-01-16 13:38:03 +08:00
|
|
|
|
|
|
|
|
|
|
// Playback Pipeline
|
|
|
|
|
|
self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];
|
|
|
|
|
|
self.playbackPipeline.delegate = self;
|
|
|
|
|
|
|
|
|
|
|
|
// Subtitle Sync
|
|
|
|
|
|
self.subtitleSync = [[SubtitleSync alloc] init];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Configuration Setters
|
|
|
|
|
|
|
|
|
|
|
|
- (void)setAsrServerURL:(NSString *)asrServerURL {
|
|
|
|
|
|
_asrServerURL = [asrServerURL copy];
|
|
|
|
|
|
self.asrClient.serverURL = asrServerURL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)setLlmServerURL:(NSString *)llmServerURL {
|
|
|
|
|
|
_llmServerURL = [llmServerURL copy];
|
|
|
|
|
|
self.llmClient.serverURL = llmServerURL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)setTtsServerURL:(NSString *)ttsServerURL {
|
|
|
|
|
|
_ttsServerURL = [ttsServerURL copy];
|
|
|
|
|
|
self.ttsClient.serverURL = ttsServerURL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - User Actions
|
|
|
|
|
|
|
|
|
|
|
|
- (void)userDidPressRecord {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
NSLog(@"[Orchestrator] userDidPressRecord, current state: %ld",
|
|
|
|
|
|
(long)self.state);
|
|
|
|
|
|
|
|
|
|
|
|
// 如果正在播放或思考,执行打断
|
|
|
|
|
|
if (self.state == ConversationStateSpeaking ||
|
|
|
|
|
|
self.state == ConversationStateThinking) {
|
|
|
|
|
|
[self performBargein];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 检查麦克风权限
|
|
|
|
|
|
if (![self.audioSession hasMicrophonePermission]) {
|
|
|
|
|
|
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
|
|
|
|
|
if (granted) {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self startRecording];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
}];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
[self startRecording];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)userDidReleaseRecord {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
NSLog(@"[Orchestrator] userDidReleaseRecord, current state: %ld",
|
|
|
|
|
|
(long)self.state);
|
|
|
|
|
|
|
|
|
|
|
|
if (self.state != ConversationStateListening) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 停止采集
|
|
|
|
|
|
[self.audioCapture stopCapture];
|
|
|
|
|
|
|
|
|
|
|
|
// 请求 ASR 最终结果
|
|
|
|
|
|
[self.asrClient finalize];
|
|
|
|
|
|
|
|
|
|
|
|
// 更新状态
|
|
|
|
|
|
[self updateState:ConversationStateRecognizing];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)stop {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self cancelAll];
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Private: Recording
|
|
|
|
|
|
|
|
|
|
|
|
- (void)startRecording {
|
|
|
|
|
|
// 配置音频会话
|
|
|
|
|
|
NSError *error = nil;
|
|
|
|
|
|
if (![self.audioSession configureForConversation:&error]) {
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (![self.audioSession activateSession:&error]) {
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 生成新的会话 ID
|
|
|
|
|
|
self.currentSessionId = [[NSUUID UUID] UUIDString];
|
|
|
|
|
|
|
|
|
|
|
|
// 启动 ASR
|
|
|
|
|
|
[self.asrClient startWithSessionId:self.currentSessionId];
|
|
|
|
|
|
|
|
|
|
|
|
// 启动音频采集
|
|
|
|
|
|
if (![self.audioCapture startCapture:&error]) {
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
[self.asrClient cancel];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 更新状态
|
|
|
|
|
|
[self updateState:ConversationStateListening];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Private: Barge-in (打断)
|
|
|
|
|
|
|
|
|
|
|
|
- (void)performBargein {
|
|
|
|
|
|
NSLog(@"[Orchestrator] Performing barge-in");
|
|
|
|
|
|
|
|
|
|
|
|
// 取消所有正在进行的请求
|
|
|
|
|
|
[self.ttsClient cancel];
|
|
|
|
|
|
[self.llmClient cancel];
|
|
|
|
|
|
[self.asrClient cancel];
|
|
|
|
|
|
|
|
|
|
|
|
// 停止播放
|
|
|
|
|
|
[self.playbackPipeline stop];
|
|
|
|
|
|
|
|
|
|
|
|
// 清空状态
|
|
|
|
|
|
[self.segmenter reset];
|
|
|
|
|
|
[self.segmentTextMap removeAllObjects];
|
|
|
|
|
|
[self.fullAssistantText setString:@""];
|
|
|
|
|
|
self.segmentCounter = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)cancelAll {
|
|
|
|
|
|
[self.audioCapture stopCapture];
|
|
|
|
|
|
[self.asrClient cancel];
|
|
|
|
|
|
[self.llmClient cancel];
|
|
|
|
|
|
[self.ttsClient cancel];
|
|
|
|
|
|
[self.playbackPipeline stop];
|
|
|
|
|
|
[self.segmenter reset];
|
|
|
|
|
|
[self.audioSession deactivateSession];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Private: State Management
|
|
|
|
|
|
|
|
|
|
|
|
- (void)updateState:(ConversationState)newState {
|
|
|
|
|
|
if (self.state == newState)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
ConversationState oldState = self.state;
|
|
|
|
|
|
self.state = newState;
|
|
|
|
|
|
|
|
|
|
|
|
NSLog(@"[Orchestrator] State: %ld -> %ld", (long)oldState, (long)newState);
|
|
|
|
|
|
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onStateChange) {
|
|
|
|
|
|
self.onStateChange(newState);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 特殊状态回调
|
|
|
|
|
|
if (newState == ConversationStateSpeaking &&
|
|
|
|
|
|
oldState != ConversationStateSpeaking) {
|
|
|
|
|
|
if (self.onSpeakingStart) {
|
|
|
|
|
|
self.onSpeakingStart();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (oldState == ConversationStateSpeaking &&
|
|
|
|
|
|
newState != ConversationStateSpeaking) {
|
|
|
|
|
|
if (self.onSpeakingEnd) {
|
|
|
|
|
|
self.onSpeakingEnd();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)reportError:(NSError *)error {
|
|
|
|
|
|
NSLog(@"[Orchestrator] Error: %@", error.localizedDescription);
|
|
|
|
|
|
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onError) {
|
|
|
|
|
|
self.onError(error);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - AudioCaptureManagerDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
|
|
|
|
|
// 发送到 ASR
|
|
|
|
|
|
[self.asrClient sendAudioPCMFrame:pcmFrame];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onVolumeUpdate) {
|
|
|
|
|
|
self.onVolumeUpdate(rms);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - AudioSessionManagerDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
if (type == KBAudioSessionInterruptionTypeBegan) {
|
|
|
|
|
|
// 中断开始:停止采集和播放
|
|
|
|
|
|
[self cancelAll];
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)audioSessionManagerMicrophonePermissionDenied {
|
|
|
|
|
|
NSError *error =
|
|
|
|
|
|
[NSError errorWithDomain:@"ConversationOrchestrator"
|
|
|
|
|
|
code:-1
|
|
|
|
|
|
userInfo:@{
|
|
|
|
|
|
NSLocalizedDescriptionKey : @"请在设置中开启麦克风权限"
|
|
|
|
|
|
}];
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - ASRStreamClientDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)asrClientDidReceivePartialText:(NSString *)text {
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onPartialText) {
|
|
|
|
|
|
self.onPartialText(text);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)asrClientDidReceiveFinalText:(NSString *)text {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
NSLog(@"[Orchestrator] ASR final text: %@", text);
|
|
|
|
|
|
|
|
|
|
|
|
// 回调用户文本
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onUserFinalText) {
|
|
|
|
|
|
self.onUserFinalText(text);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// 如果文本为空,回到空闲
|
|
|
|
|
|
if (text.length == 0) {
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 更新状态并开始 LLM 请求
|
|
|
|
|
|
[self updateState:ConversationStateThinking];
|
|
|
|
|
|
|
|
|
|
|
|
// 重置文本跟踪
|
|
|
|
|
|
[self.fullAssistantText setString:@""];
|
|
|
|
|
|
[self.segmentTextMap removeAllObjects];
|
|
|
|
|
|
self.segmentCounter = 0;
|
|
|
|
|
|
[self.segmenter reset];
|
|
|
|
|
|
|
|
|
|
|
|
// 启动播放管线
|
|
|
|
|
|
NSError *error = nil;
|
|
|
|
|
|
if (![self.playbackPipeline start:&error]) {
|
|
|
|
|
|
NSLog(@"[Orchestrator] Failed to start playback pipeline: %@",
|
|
|
|
|
|
error.localizedDescription);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 发送 LLM 请求
|
|
|
|
|
|
[self.llmClient sendUserText:text conversationId:self.conversationId];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)asrClientDidFail:(NSError *)error {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - LLMStreamClientDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)llmClientDidReceiveToken:(NSString *)token {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
// 追加到完整文本
|
|
|
|
|
|
[self.fullAssistantText appendString:token];
|
|
|
|
|
|
|
|
|
|
|
|
// 追加到分段器
|
|
|
|
|
|
[self.segmenter appendToken:token];
|
|
|
|
|
|
|
|
|
|
|
|
// 检查是否有可触发 TTS 的片段
|
|
|
|
|
|
NSArray<NSString *> *segments = [self.segmenter popReadySegments];
|
|
|
|
|
|
for (NSString *segmentText in segments) {
|
|
|
|
|
|
[self requestTTSForSegment:segmentText];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)llmClientDidComplete {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
NSLog(@"[Orchestrator] LLM complete");
|
|
|
|
|
|
|
|
|
|
|
|
// 处理剩余片段
|
|
|
|
|
|
NSString *remaining = [self.segmenter flushRemainingSegment];
|
|
|
|
|
|
if (remaining && remaining.length > 0) {
|
|
|
|
|
|
[self requestTTSForSegment:remaining];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 回调完整文本
|
|
|
|
|
|
NSString *fullText = [self.fullAssistantText copy];
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onAssistantFullText) {
|
|
|
|
|
|
self.onAssistantFullText(fullText);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)llmClientDidFail:(NSError *)error {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - Private: TTS Request
|
|
|
|
|
|
|
|
|
|
|
|
- (void)requestTTSForSegment:(NSString *)segmentText {
|
|
|
|
|
|
NSString *segmentId =
|
|
|
|
|
|
[NSString stringWithFormat:@"seg_%ld", (long)self.segmentCounter++];
|
|
|
|
|
|
|
|
|
|
|
|
// 记录片段文本
|
|
|
|
|
|
self.segmentTextMap[segmentId] = segmentText;
|
|
|
|
|
|
|
|
|
|
|
|
NSLog(@"[Orchestrator] Requesting TTS for segment %@: %@", segmentId,
|
|
|
|
|
|
segmentText);
|
|
|
|
|
|
|
|
|
|
|
|
// 请求 TTS
|
|
|
|
|
|
[self.ttsClient requestTTSForText:segmentText segmentId:segmentId];
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - TTSServiceClientDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self.playbackPipeline enqueueURL:url segmentId:segmentId];
|
|
|
|
|
|
|
|
|
|
|
|
// 如果还在 Thinking,切换到 Speaking
|
|
|
|
|
|
if (self.state == ConversationStateThinking) {
|
|
|
|
|
|
[self updateState:ConversationStateSpeaking];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
|
|
|
|
|
|
payloadType:(TTSPayloadType)type
|
|
|
|
|
|
segmentId:(NSString *)segmentId {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self.playbackPipeline enqueueChunk:chunk
|
|
|
|
|
|
payloadType:type
|
|
|
|
|
|
segmentId:segmentId];
|
|
|
|
|
|
|
|
|
|
|
|
// 如果还在 Thinking,切换到 Speaking
|
|
|
|
|
|
if (self.state == ConversationStateThinking) {
|
|
|
|
|
|
[self updateState:ConversationStateSpeaking];
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)ttsClientDidFinishSegment:(NSString *)segmentId {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self.playbackPipeline markSegmentComplete:segmentId];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)ttsClientDidFail:(NSError *)error {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#pragma mark - TTSPlaybackPipelineDelegate
|
|
|
|
|
|
|
|
|
|
|
|
- (void)pipelineDidStartSegment:(NSString *)segmentId
|
|
|
|
|
|
duration:(NSTimeInterval)duration {
|
|
|
|
|
|
NSLog(@"[Orchestrator] Started playing segment: %@", segmentId);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
|
|
|
|
|
|
segmentId:(NSString *)segmentId {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
// 获取片段文本
|
|
|
|
|
|
NSString *segmentText = self.segmentTextMap[segmentId];
|
|
|
|
|
|
if (!segmentText)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
// 计算可见文本
|
|
|
|
|
|
NSTimeInterval duration =
|
|
|
|
|
|
[self.playbackPipeline durationForSegment:segmentId];
|
|
|
|
|
|
NSString *visibleText =
|
|
|
|
|
|
[self.subtitleSync visibleTextForFullText:segmentText
|
|
|
|
|
|
currentTime:time
|
|
|
|
|
|
duration:duration];
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: 这里应该累加之前片段的文本,实现完整的打字机效果
|
|
|
|
|
|
// 简化实现:只显示当前片段
|
|
|
|
|
|
dispatch_async(dispatch_get_main_queue(), ^{
|
|
|
|
|
|
if (self.onAssistantVisibleText) {
|
|
|
|
|
|
self.onAssistantVisibleText(visibleText);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)pipelineDidFinishSegment:(NSString *)segmentId {
|
|
|
|
|
|
NSLog(@"[Orchestrator] Finished playing segment: %@", segmentId);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)pipelineDidFinishAllSegments {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
NSLog(@"[Orchestrator] All segments finished");
|
|
|
|
|
|
|
|
|
|
|
|
// 回到空闲状态
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
[self.audioSession deactivateSession];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
- (void)pipelineDidFail:(NSError *)error {
|
|
|
|
|
|
dispatch_async(self.orchestratorQueue, ^{
|
|
|
|
|
|
[self reportError:error];
|
|
|
|
|
|
[self updateState:ConversationStateIdle];
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@end
|