添加语音websocket等，还没测试

2026-01-16 13:38:03 +08:00
parent 169a1929d7
commit b021fd308f
33 changed files with 5098 additions and 8 deletions
--- a/keyBoard/Class/AiTalk/VM/ASRStreamClient.h
+++ b/keyBoard/Class/AiTalk/VM/ASRStreamClient.h
@@ -0,0 +1,51 @@
+//
+//  ASRStreamClient.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// ASR 流式识别客户端代理
+@protocol ASRStreamClientDelegate <NSObject>
+@required
+/// 收到实时识别结果（部分文本）
+- (void)asrClientDidReceivePartialText:(NSString *)text;
+/// 收到最终识别结果
+- (void)asrClientDidReceiveFinalText:(NSString *)text;
+/// 识别失败
+- (void)asrClientDidFail:(NSError *)error;
+@end
+
+/// ASR 流式识别客户端
+/// 使用 NSURLSessionWebSocketTask 实现流式语音识别
+@interface ASRStreamClient : NSObject
+
+@property(nonatomic, weak) id<ASRStreamClientDelegate> delegate;
+
+/// ASR 服务器 WebSocket URL
+@property(nonatomic, copy) NSString *serverURL;
+
+/// 是否已连接
+@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
+
+/// 开始新的识别会话
+/// @param sessionId 会话 ID
+- (void)startWithSessionId:(NSString *)sessionId;
+
+/// 发送 PCM 音频帧（20ms / 640 bytes）
+/// @param pcmFrame PCM 数据
+- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
+
+/// 结束当前会话，请求最终结果
+- (void)finalize;
+
+/// 取消会话
+- (void)cancel;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/ASRStreamClient.m
+++ b/keyBoard/Class/AiTalk/VM/ASRStreamClient.m
@@ -0,0 +1,271 @@
+//
+//  ASRStreamClient.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "ASRStreamClient.h"
+#import "AudioCaptureManager.h"
+
+@interface ASRStreamClient () <NSURLSessionWebSocketDelegate>
+
+@property(nonatomic, strong) NSURLSession *urlSession;
+@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
+@property(nonatomic, copy) NSString *currentSessionId;
+@property(nonatomic, strong) dispatch_queue_t networkQueue;
+@property(nonatomic, assign) BOOL connected;
+
+@end
+
+@implementation ASRStreamClient
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _networkQueue = dispatch_queue_create("com.keyboard.aitalk.asr.network",
+                                          DISPATCH_QUEUE_SERIAL);
+    // TODO: 替换为实际的 ASR 服务器地址
+    _serverURL = @"wss://your-asr-server.com/ws/asr";
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [self cancel];
+}
+
+#pragma mark - Public Methods
+
+- (void)startWithSessionId:(NSString *)sessionId {
+  dispatch_async(self.networkQueue, ^{
+    [self cancelInternal];
+
+    self.currentSessionId = sessionId;
+
+    // 创建 WebSocket 连接
+    NSURL *url = [NSURL URLWithString:self.serverURL];
+    NSURLSessionConfiguration *config =
+        [NSURLSessionConfiguration defaultSessionConfiguration];
+    config.timeoutIntervalForRequest = 30;
+    config.timeoutIntervalForResource = 300;
+
+    self.urlSession = [NSURLSession sessionWithConfiguration:config
+                                                    delegate:self
+                                               delegateQueue:nil];
+
+    self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
+    [self.webSocketTask resume];
+
+    // 发送 start 消息
+    NSDictionary *startMessage = @{
+      @"type" : @"start",
+      @"sessionId" : sessionId,
+      @"format" : @"pcm_s16le",
+      @"sampleRate" : @(kAudioSampleRate),
+      @"channels" : @(kAudioChannels)
+    };
+
+    NSError *jsonError = nil;
+    NSData *jsonData = [NSJSONSerialization dataWithJSONObject:startMessage
+                                                       options:0
+                                                         error:&jsonError];
+    if (jsonError) {
+      [self reportError:jsonError];
+      return;
+    }
+
+    NSString *jsonString = [[NSString alloc] initWithData:jsonData
+                                                 encoding:NSUTF8StringEncoding];
+    NSURLSessionWebSocketMessage *message =
+        [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
+
+    [self.webSocketTask
+              sendMessage:message
+        completionHandler:^(NSError *_Nullable error) {
+          if (error) {
+            [self reportError:error];
+          } else {
+            self.connected = YES;
+            [self receiveMessage];
+            NSLog(@"[ASRStreamClient] Started session: %@", sessionId);
+          }
+        }];
+  });
+}
+
+- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
+  if (!self.connected || !self.webSocketTask) {
+    return;
+  }
+
+  dispatch_async(self.networkQueue, ^{
+    NSURLSessionWebSocketMessage *message =
+        [[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
+    [self.webSocketTask sendMessage:message
+                  completionHandler:^(NSError *_Nullable error) {
+                    if (error) {
+                      NSLog(@"[ASRStreamClient] Failed to send audio frame: %@",
+                            error.localizedDescription);
+                    }
+                  }];
+  });
+}
+
+- (void)finalize {
+  if (!self.connected || !self.webSocketTask) {
+    return;
+  }
+
+  dispatch_async(self.networkQueue, ^{
+    NSDictionary *finalizeMessage =
+        @{@"type" : @"finalize", @"sessionId" : self.currentSessionId ?: @""};
+
+    NSError *jsonError = nil;
+    NSData *jsonData = [NSJSONSerialization dataWithJSONObject:finalizeMessage
+                                                       options:0
+                                                         error:&jsonError];
+    if (jsonError) {
+      [self reportError:jsonError];
+      return;
+    }
+
+    NSString *jsonString = [[NSString alloc] initWithData:jsonData
+                                                 encoding:NSUTF8StringEncoding];
+    NSURLSessionWebSocketMessage *message =
+        [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
+
+    [self.webSocketTask sendMessage:message
+                  completionHandler:^(NSError *_Nullable error) {
+                    if (error) {
+                      [self reportError:error];
+                    } else {
+                      NSLog(@"[ASRStreamClient] Sent finalize for session: %@",
+                            self.currentSessionId);
+                    }
+                  }];
+  });
+}
+
+- (void)cancel {
+  dispatch_async(self.networkQueue, ^{
+    [self cancelInternal];
+  });
+}
+
+#pragma mark - Private Methods
+
+- (void)cancelInternal {
+  self.connected = NO;
+
+  if (self.webSocketTask) {
+    [self.webSocketTask cancel];
+    self.webSocketTask = nil;
+  }
+
+  if (self.urlSession) {
+    [self.urlSession invalidateAndCancel];
+    self.urlSession = nil;
+  }
+
+  self.currentSessionId = nil;
+}
+
+- (void)receiveMessage {
+  if (!self.webSocketTask) {
+    return;
+  }
+
+  __weak typeof(self) weakSelf = self;
+  [self.webSocketTask receiveMessageWithCompletionHandler:^(
+                          NSURLSessionWebSocketMessage *_Nullable message,
+                          NSError *_Nullable error) {
+    __strong typeof(weakSelf) strongSelf = weakSelf;
+    if (!strongSelf)
+      return;
+
+    if (error) {
+      // 检查是否是正常关闭
+      if (error.code != 57 && error.code != NSURLErrorCancelled) {
+        [strongSelf reportError:error];
+      }
+      return;
+    }
+
+    if (message.type == NSURLSessionWebSocketMessageTypeString) {
+      [strongSelf handleTextMessage:message.string];
+    }
+
+    // 继续接收下一条消息
+    [strongSelf receiveMessage];
+  }];
+}
+
+- (void)handleTextMessage:(NSString *)text {
+  NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
+  NSError *jsonError = nil;
+  NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
+                                                       options:0
+                                                         error:&jsonError];
+
+  if (jsonError) {
+    NSLog(@"[ASRStreamClient] Failed to parse message: %@", text);
+    return;
+  }
+
+  NSString *type = json[@"type"];
+
+  if ([type isEqualToString:@"partial"]) {
+    NSString *partialText = json[@"text"] ?: @"";
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate
+              respondsToSelector:@selector(asrClientDidReceivePartialText:)]) {
+        [self.delegate asrClientDidReceivePartialText:partialText];
+      }
+    });
+  } else if ([type isEqualToString:@"final"]) {
+    NSString *finalText = json[@"text"] ?: @"";
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate
+              respondsToSelector:@selector(asrClientDidReceiveFinalText:)]) {
+        [self.delegate asrClientDidReceiveFinalText:finalText];
+      }
+    });
+    // 收到最终结果后关闭连接
+    [self cancelInternal];
+  } else if ([type isEqualToString:@"error"]) {
+    NSInteger code = [json[@"code"] integerValue];
+    NSString *message = json[@"message"] ?: @"Unknown error";
+    NSError *error =
+        [NSError errorWithDomain:@"ASRStreamClient"
+                            code:code
+                        userInfo:@{NSLocalizedDescriptionKey : message}];
+    [self reportError:error];
+  }
+}
+
+- (void)reportError:(NSError *)error {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector(asrClientDidFail:)]) {
+      [self.delegate asrClientDidFail:error];
+    }
+  });
+}
+
+#pragma mark - NSURLSessionWebSocketDelegate
+
+- (void)URLSession:(NSURLSession *)session
+          webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
+    didOpenWithProtocol:(NSString *)protocol {
+  NSLog(@"[ASRStreamClient] WebSocket connected with protocol: %@", protocol);
+}
+
+- (void)URLSession:(NSURLSession *)session
+       webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
+    didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
+              reason:(NSData *)reason {
+  NSLog(@"[ASRStreamClient] WebSocket closed with code: %ld", (long)closeCode);
+  self.connected = NO;
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.h
+++ b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.h
@@ -0,0 +1,55 @@
+//
+//  AudioCaptureManager.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 音频采集参数（固定值，便于端到端稳定）
+/// Sample Rate: 16000 Hz
+/// Channels: 1 (Mono)
+/// Format: PCM Int16 (pcm_s16le)
+/// Frame Duration: 20ms (320 samples, 640 bytes)
+extern const double kAudioSampleRate;        // 16000.0
+extern const int kAudioChannels;             // 1
+extern const NSUInteger kAudioFrameDuration; // 20 (ms)
+extern const NSUInteger kAudioFrameSamples;  // 320 (16000 * 0.02)
+extern const NSUInteger kAudioFrameBytes;    // 640 (320 * 2)
+
+/// 音频采集管理器代理
+@protocol AudioCaptureManagerDelegate <NSObject>
+@required
+/// 输出 PCM 帧（20ms / 640 bytes）
+/// @param pcmFrame 640 字节的 PCM Int16 数据
+- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame;
+
+@optional
+/// 更新 RMS 值（用于波形显示）
+/// @param rms 当前音量的 RMS 值 (0.0 - 1.0)
+- (void)audioCaptureManagerDidUpdateRMS:(float)rms;
+@end
+
+/// 音频采集管理器
+/// 使用 AVAudioEngine 采集麦克风音频，输出 20ms PCM 帧
+@interface AudioCaptureManager : NSObject
+
+@property(nonatomic, weak) id<AudioCaptureManagerDelegate> delegate;
+
+/// 是否正在采集
+@property(nonatomic, assign, readonly, getter=isCapturing) BOOL capturing;
+
+/// 开始采集
+/// @param error 错误信息
+/// @return 是否启动成功
+- (BOOL)startCapture:(NSError **)error;
+
+/// 停止采集
+- (void)stopCapture;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
+++ b/keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
@@ -0,0 +1,269 @@
+//
+//  AudioCaptureManager.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "AudioCaptureManager.h"
+#import <AVFoundation/AVFoundation.h>
+
+// 音频采集参数常量
+const double kAudioSampleRate = 16000.0;
+const int kAudioChannels = 1;
+const NSUInteger kAudioFrameDuration = 20; // ms
+const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
+const NSUInteger kAudioFrameBytes = 640;   // 320 * 2 (Int16)
+
+@interface AudioCaptureManager ()
+
+@property(nonatomic, strong) AVAudioEngine *audioEngine;
+@property(nonatomic, strong) dispatch_queue_t audioQueue;
+@property(nonatomic, assign) BOOL capturing;
+
+// Ring buffer for accumulating samples to form 20ms frames
+@property(nonatomic, strong) NSMutableData *ringBuffer;
+@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
+
+@end
+
+@implementation AudioCaptureManager
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _audioEngine = [[AVAudioEngine alloc] init];
+    _audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
+                                        DISPATCH_QUEUE_SERIAL);
+    _ringBuffer = [[NSMutableData alloc]
+        initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
+    _ringBufferWriteIndex = 0;
+    _capturing = NO;
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [self stopCapture];
+}
+
+#pragma mark - Public Methods
+
+- (BOOL)startCapture:(NSError **)error {
+  if (self.capturing) {
+    return YES;
+  }
+
+  AVAudioInputNode *inputNode = self.audioEngine.inputNode;
+
+  // 获取输入格式
+  AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
+
+  // 目标格式：16kHz, Mono, Int16
+  AVAudioFormat *targetFormat =
+      [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
+                                       sampleRate:kAudioSampleRate
+                                         channels:kAudioChannels
+                                      interleaved:YES];
+
+  // 创建格式转换器
+  AVAudioConverter *converter =
+      [[AVAudioConverter alloc] initFromFormat:inputFormat
+                                      toFormat:targetFormat];
+  if (!converter) {
+    if (error) {
+      *error = [NSError errorWithDomain:@"AudioCaptureManager"
+                                   code:-1
+                               userInfo:@{
+                                 NSLocalizedDescriptionKey :
+                                     @"Failed to create audio converter"
+                               }];
+    }
+    return NO;
+  }
+
+  // 计算合适的 buffer size（约 20ms 的输入采样数）
+  AVAudioFrameCount bufferSize =
+      (AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
+
+  // 安装 tap
+  __weak typeof(self) weakSelf = self;
+  [inputNode installTapOnBus:0
+                  bufferSize:bufferSize
+                      format:inputFormat
+                       block:^(AVAudioPCMBuffer *_Nonnull buffer,
+                               AVAudioTime *_Nonnull when) {
+                         [weakSelf processAudioBuffer:buffer
+                                        withConverter:converter
+                                         targetFormat:targetFormat];
+                       }];
+
+  // 启动引擎
+  NSError *startError = nil;
+  [self.audioEngine prepare];
+
+  if (![self.audioEngine startAndReturnError:&startError]) {
+    [inputNode removeTapOnBus:0];
+    if (error) {
+      *error = startError;
+    }
+    NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
+          startError.localizedDescription);
+    return NO;
+  }
+
+  self.capturing = YES;
+  self.ringBufferWriteIndex = 0;
+
+  NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
+        inputFormat.sampleRate);
+  return YES;
+}
+
+- (void)stopCapture {
+  if (!self.capturing) {
+    return;
+  }
+
+  [self.audioEngine.inputNode removeTapOnBus:0];
+  [self.audioEngine stop];
+
+  self.capturing = NO;
+  self.ringBufferWriteIndex = 0;
+
+  NSLog(@"[AudioCaptureManager] Stopped capturing");
+}
+
+#pragma mark - Audio Processing
+
+- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
+             withConverter:(AVAudioConverter *)converter
+              targetFormat:(AVAudioFormat *)targetFormat {
+
+  if (!self.capturing) {
+    return;
+  }
+
+  // 计算输出帧数
+  AVAudioFrameCount outputFrameCapacity =
+      (AVAudioFrameCount)(buffer.frameLength *
+                          (kAudioSampleRate / buffer.format.sampleRate)) +
+      1;
+
+  // 创建输出 buffer
+  AVAudioPCMBuffer *outputBuffer =
+      [[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
+                                    frameCapacity:outputFrameCapacity];
+
+  // 格式转换
+  NSError *conversionError = nil;
+  AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
+      AVAudioPacketCount inNumberOfPackets,
+      AVAudioConverterInputStatus *_Nonnull outStatus) {
+    *outStatus = AVAudioConverterInputStatus_HaveData;
+    return buffer;
+  };
+
+  AVAudioConverterOutputStatus status =
+      [converter convertToBuffer:outputBuffer
+                           error:&conversionError
+              withInputFromBlock:inputBlock];
+
+  if (status == AVAudioConverterOutputStatus_Error) {
+    NSLog(@"[AudioCaptureManager] Conversion error: %@",
+          conversionError.localizedDescription);
+    return;
+  }
+
+  // 获取 Int16 数据
+  int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
+  NSUInteger sampleCount = outputBuffer.frameLength;
+  NSUInteger byteCount = sampleCount * sizeof(int16_t);
+
+  // 计算 RMS
+  [self calculateAndReportRMS:samples sampleCount:sampleCount];
+
+  // 将数据添加到 ring buffer 并输出完整帧
+  dispatch_async(self.audioQueue, ^{
+    [self appendToRingBuffer:samples byteCount:byteCount];
+  });
+}
+
+- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount {
+  // 将新数据追加到 ring buffer
+  uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
+  NSUInteger ringBufferLength = self.ringBuffer.length;
+
+  NSUInteger bytesToCopy = byteCount;
+  NSUInteger sourceOffset = 0;
+
+  while (bytesToCopy > 0) {
+    NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
+    NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
+
+    memcpy(ringBufferBytes + self.ringBufferWriteIndex,
+           (uint8_t *)samples + sourceOffset, copySize);
+    self.ringBufferWriteIndex += copySize;
+    sourceOffset += copySize;
+    bytesToCopy -= copySize;
+
+    // 检查是否有完整的 20ms 帧
+    while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
+      // 提取一个完整帧
+      NSData *frame = [NSData dataWithBytes:ringBufferBytes
+                                     length:kAudioFrameBytes];
+
+      // 移动剩余数据到开头
+      NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
+      if (remaining > 0) {
+        memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
+      }
+      self.ringBufferWriteIndex = remaining;
+
+      // 回调输出帧
+      [self outputPCMFrame:frame];
+    }
+
+    // 如果 ring buffer 已满，从头开始
+    if (self.ringBufferWriteIndex >= ringBufferLength) {
+      self.ringBufferWriteIndex = 0;
+    }
+  }
+}
+
+- (void)outputPCMFrame:(NSData *)frame {
+  if (!self.capturing) {
+    return;
+  }
+
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector
+                       (audioCaptureManagerDidOutputPCMFrame:)]) {
+      [self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
+    }
+  });
+}
+
+- (void)calculateAndReportRMS:(int16_t *)samples
+                  sampleCount:(NSUInteger)sampleCount {
+  if (sampleCount == 0)
+    return;
+
+  // 计算 RMS
+  double sum = 0.0;
+  for (NSUInteger i = 0; i < sampleCount; i++) {
+    double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
+    sum += sample * sample;
+  }
+  double rms = sqrt(sum / sampleCount);
+  float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
+
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate
+            respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
+      [self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
+    }
+  });
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/AudioSessionManager.h
+++ b/keyBoard/Class/AiTalk/VM/AudioSessionManager.h
@@ -0,0 +1,66 @@
+//
+//  AudioSessionManager.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <AVFoundation/AVFoundation.h>
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 音频会话中断类型
+typedef NS_ENUM(NSInteger, KBAudioSessionInterruptionType) {
+  KBAudioSessionInterruptionTypeBegan, // 中断开始（来电等）
+  KBAudioSessionInterruptionTypeEnded  // 中断结束
+};
+
+/// 音频会话管理器代理
+@protocol AudioSessionManagerDelegate <NSObject>
+@optional
+/// 音频会话被中断
+- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type;
+/// 音频路由发生变化
+- (void)audioSessionManagerRouteDidChange;
+/// 麦克风权限状态变化
+- (void)audioSessionManagerMicrophonePermissionDenied;
+@end
+
+/// 音频会话管理器
+/// 负责 AVAudioSession 配置、权限请求、中断处理
+@interface AudioSessionManager : NSObject
+
+@property(nonatomic, weak) id<AudioSessionManagerDelegate> delegate;
+
+/// 单例
+ (instancetype)sharedManager;
+
+/// 请求麦克风权限
+/// @param completion 完成回调，granted 表示是否获得权限
+- (void)requestMicrophonePermission:(void (^)(BOOL granted))completion;
+
+/// 检查麦克风权限状态
+- (BOOL)hasMicrophonePermission;
+
+/// 配置音频会话为对话模式（录音+播放）
+/// @param error 错误信息
+/// @return 是否配置成功
+- (BOOL)configureForConversation:(NSError **)error;
+
+/// 配置音频会话为仅播放模式
+/// @param error 错误信息
+/// @return 是否配置成功
+- (BOOL)configureForPlayback:(NSError **)error;
+
+/// 激活音频会话
+/// @param error 错误信息
+/// @return 是否激活成功
+- (BOOL)activateSession:(NSError **)error;
+
+/// 停用音频会话
+- (void)deactivateSession;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/AudioSessionManager.m
+++ b/keyBoard/Class/AiTalk/VM/AudioSessionManager.m
@@ -0,0 +1,234 @@
+//
+//  AudioSessionManager.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "AudioSessionManager.h"
+
+@interface AudioSessionManager ()
+@property(nonatomic, assign) BOOL isSessionActive;
+@end
+
+@implementation AudioSessionManager
+
+#pragma mark - Singleton
+
+ (instancetype)sharedManager {
+  static AudioSessionManager *instance = nil;
+  static dispatch_once_t onceToken;
+  dispatch_once(&onceToken, ^{
+    instance = [[AudioSessionManager alloc] init];
+  });
+  return instance;
+}
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _isSessionActive = NO;
+    [self setupNotifications];
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [[NSNotificationCenter defaultCenter] removeObserver:self];
+}
+
+#pragma mark - Notifications
+
+- (void)setupNotifications {
+  // 监听音频会话中断通知
+  [[NSNotificationCenter defaultCenter]
+      addObserver:self
+         selector:@selector(handleInterruption:)
+             name:AVAudioSessionInterruptionNotification
+           object:nil];
+
+  // 监听音频路由变化通知
+  [[NSNotificationCenter defaultCenter]
+      addObserver:self
+         selector:@selector(handleRouteChange:)
+             name:AVAudioSessionRouteChangeNotification
+           object:nil];
+}
+
+- (void)handleInterruption:(NSNotification *)notification {
+  NSDictionary *info = notification.userInfo;
+  AVAudioSessionInterruptionType type =
+      [info[AVAudioSessionInterruptionTypeKey] unsignedIntegerValue];
+
+  if (type == AVAudioSessionInterruptionTypeBegan) {
+    // 中断开始：来电、闹钟等
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate
+              respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) {
+        [self.delegate audioSessionManagerDidInterrupt:
+                           KBAudioSessionInterruptionTypeBegan];
+      }
+    });
+  } else if (type == AVAudioSessionInterruptionTypeEnded) {
+    // 中断结束
+    AVAudioSessionInterruptionOptions options =
+        [info[AVAudioSessionInterruptionOptionKey] unsignedIntegerValue];
+    if (options & AVAudioSessionInterruptionOptionShouldResume) {
+      // 可以恢复播放
+      [self activateSession:nil];
+    }
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate
+              respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) {
+        [self.delegate audioSessionManagerDidInterrupt:
+                           KBAudioSessionInterruptionTypeEnded];
+      }
+    });
+  }
+}
+
+- (void)handleRouteChange:(NSNotification *)notification {
+  NSDictionary *info = notification.userInfo;
+  AVAudioSessionRouteChangeReason reason =
+      [info[AVAudioSessionRouteChangeReasonKey] unsignedIntegerValue];
+
+  switch (reason) {
+  case AVAudioSessionRouteChangeReasonOldDeviceUnavailable:
+  case AVAudioSessionRouteChangeReasonNewDeviceAvailable: {
+    // 旧设备不可用（如耳机拔出）或新设备可用（如耳机插入）
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate respondsToSelector:@selector
+                         (audioSessionManagerRouteDidChange)]) {
+        [self.delegate audioSessionManagerRouteDidChange];
+      }
+    });
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+#pragma mark - Microphone Permission
+
+- (void)requestMicrophonePermission:(void (^)(BOOL))completion {
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+
+  [session requestRecordPermission:^(BOOL granted) {
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if (!granted) {
+        if ([self.delegate respondsToSelector:@selector
+                           (audioSessionManagerMicrophonePermissionDenied)]) {
+          [self.delegate audioSessionManagerMicrophonePermissionDenied];
+        }
+      }
+      if (completion) {
+        completion(granted);
+      }
+    });
+  }];
+}
+
+- (BOOL)hasMicrophonePermission {
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+  return session.recordPermission == AVAudioSessionRecordPermissionGranted;
+}
+
+#pragma mark - Session Configuration
+
+- (BOOL)configureForConversation:(NSError **)error {
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+
+  // 配置为录音+播放模式
+  // Category: PlayAndRecord - 同时支持录音和播放
+  // Mode: VoiceChat - 优化语音通话场景
+  // Options:
+  //   - DefaultToSpeaker: 默认使用扬声器
+  //   - AllowBluetooth: 允许蓝牙设备
+  NSError *categoryError = nil;
+  BOOL success =
+      [session setCategory:AVAudioSessionCategoryPlayAndRecord
+                      mode:AVAudioSessionModeVoiceChat
+                   options:(AVAudioSessionCategoryOptionDefaultToSpeaker |
+                            AVAudioSessionCategoryOptionAllowBluetooth)
+                     error:&categoryError];
+
+  if (!success) {
+    if (error) {
+      *error = categoryError;
+    }
+    NSLog(@"[AudioSessionManager] Failed to configure session: %@",
+          categoryError.localizedDescription);
+    return NO;
+  }
+
+  return YES;
+}
+
+- (BOOL)configureForPlayback:(NSError **)error {
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+
+  // 仅播放模式
+  NSError *categoryError = nil;
+  BOOL success =
+      [session setCategory:AVAudioSessionCategoryPlayback
+                      mode:AVAudioSessionModeDefault
+                   options:AVAudioSessionCategoryOptionDefaultToSpeaker
+                     error:&categoryError];
+
+  if (!success) {
+    if (error) {
+      *error = categoryError;
+    }
+    NSLog(@"[AudioSessionManager] Failed to configure playback: %@",
+          categoryError.localizedDescription);
+    return NO;
+  }
+
+  return YES;
+}
+
+- (BOOL)activateSession:(NSError **)error {
+  if (self.isSessionActive) {
+    return YES;
+  }
+
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+  NSError *activationError = nil;
+  BOOL success = [session setActive:YES error:&activationError];
+
+  if (!success) {
+    if (error) {
+      *error = activationError;
+    }
+    NSLog(@"[AudioSessionManager] Failed to activate session: %@",
+          activationError.localizedDescription);
+    return NO;
+  }
+
+  self.isSessionActive = YES;
+  return YES;
+}
+
+- (void)deactivateSession {
+  if (!self.isSessionActive) {
+    return;
+  }
+
+  AVAudioSession *session = [AVAudioSession sharedInstance];
+  NSError *error = nil;
+
+  // 使用 NotifyOthersOnDeactivation 通知其他应用可以恢复播放
+  [session setActive:NO
+         withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
+               error:&error];
+
+  if (error) {
+    NSLog(@"[AudioSessionManager] Failed to deactivate session: %@",
+          error.localizedDescription);
+  }
+
+  self.isSessionActive = NO;
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h
+++ b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h
@@ -0,0 +1,63 @@
+//
+//  AudioStreamPlayer.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 流式音频播放器代理
+@protocol AudioStreamPlayerDelegate <NSObject>
+@optional
+/// 开始播放片段
+- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId;
+/// 播放时间更新
+- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time
+                             segmentId:(NSString *)segmentId;
+/// 片段播放完成
+- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId;
+@end
+
+/// PCM 流式播放器
+/// 使用 AVAudioEngine + AVAudioPlayerNode 实现低延迟播放
+@interface AudioStreamPlayer : NSObject
+
+@property(nonatomic, weak) id<AudioStreamPlayerDelegate> delegate;
+
+/// 是否正在播放
+@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing;
+
+/// 启动播放器
+/// @param error 错误信息
+/// @return 是否启动成功
+- (BOOL)start:(NSError **)error;
+
+/// 停止播放器
+- (void)stop;
+
+/// 入队 PCM 数据块
+/// @param pcmData PCM Int16 数据
+/// @param sampleRate 采样率
+/// @param channels 通道数
+/// @param segmentId 片段 ID
+- (void)enqueuePCMChunk:(NSData *)pcmData
+             sampleRate:(double)sampleRate
+               channels:(int)channels
+              segmentId:(NSString *)segmentId;
+
+/// 获取片段的当前播放时间
+/// @param segmentId 片段 ID
+/// @return 当前时间（秒）
+- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId;
+
+/// 获取片段的总时长
+/// @param segmentId 片段 ID
+/// @return 总时长（秒）
+- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m
+++ b/keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m
@@ -0,0 +1,246 @@
+//
+//  AudioStreamPlayer.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "AudioStreamPlayer.h"
+#import <AVFoundation/AVFoundation.h>
+
+@interface AudioStreamPlayer ()
+
+@property(nonatomic, strong) AVAudioEngine *audioEngine;
+@property(nonatomic, strong) AVAudioPlayerNode *playerNode;
+@property(nonatomic, strong) AVAudioFormat *playbackFormat;
+
+// 片段跟踪
+@property(nonatomic, copy) NSString *currentSegmentId;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, NSNumber *> *segmentDurations;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, NSNumber *> *segmentStartTimes;
+@property(nonatomic, assign) NSUInteger scheduledSamples;
+@property(nonatomic, assign) NSUInteger playedSamples;
+
+// 状态
+@property(nonatomic, assign) BOOL playing;
+@property(nonatomic, strong) dispatch_queue_t playerQueue;
+@property(nonatomic, strong) NSTimer *progressTimer;
+
+@end
+
+@implementation AudioStreamPlayer
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _audioEngine = [[AVAudioEngine alloc] init];
+    _playerNode = [[AVAudioPlayerNode alloc] init];
+    _segmentDurations = [[NSMutableDictionary alloc] init];
+    _segmentStartTimes = [[NSMutableDictionary alloc] init];
+    _playerQueue = dispatch_queue_create("com.keyboard.aitalk.streamplayer",
+                                         DISPATCH_QUEUE_SERIAL);
+
+    // 默认播放格式：16kHz, Mono, Float32
+    _playbackFormat =
+        [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
+                                         sampleRate:16000
+                                           channels:1
+                                        interleaved:NO];
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [self stop];
+}
+
+#pragma mark - Public Methods
+
+- (BOOL)start:(NSError **)error {
+  if (self.playing) {
+    return YES;
+  }
+
+  // 连接节点
+  [self.audioEngine attachNode:self.playerNode];
+  [self.audioEngine connect:self.playerNode
+                         to:self.audioEngine.mainMixerNode
+                     format:self.playbackFormat];
+
+  // 启动引擎
+  NSError *startError = nil;
+  [self.audioEngine prepare];
+
+  if (![self.audioEngine startAndReturnError:&startError]) {
+    if (error) {
+      *error = startError;
+    }
+    NSLog(@"[AudioStreamPlayer] Failed to start engine: %@",
+          startError.localizedDescription);
+    return NO;
+  }
+
+  [self.playerNode play];
+  self.playing = YES;
+
+  // 启动进度更新定时器
+  [self startProgressTimer];
+
+  NSLog(@"[AudioStreamPlayer] Started");
+  return YES;
+}
+
+- (void)stop {
+  dispatch_async(self.playerQueue, ^{
+    [self stopProgressTimer];
+
+    [self.playerNode stop];
+    [self.audioEngine stop];
+
+    self.playing = NO;
+    self.currentSegmentId = nil;
+    self.scheduledSamples = 0;
+    self.playedSamples = 0;
+
+    [self.segmentDurations removeAllObjects];
+    [self.segmentStartTimes removeAllObjects];
+
+    NSLog(@"[AudioStreamPlayer] Stopped");
+  });
+}
+
+- (void)enqueuePCMChunk:(NSData *)pcmData
+             sampleRate:(double)sampleRate
+               channels:(int)channels
+              segmentId:(NSString *)segmentId {
+
+  if (!pcmData || pcmData.length == 0)
+    return;
+
+  dispatch_async(self.playerQueue, ^{
+    // 检查是否是新片段
+    BOOL isNewSegment = ![segmentId isEqualToString:self.currentSegmentId];
+    if (isNewSegment) {
+      self.currentSegmentId = segmentId;
+      self.scheduledSamples = 0;
+      self.segmentStartTimes[segmentId] = @(CACurrentMediaTime());
+
+      dispatch_async(dispatch_get_main_queue(), ^{
+        if ([self.delegate respondsToSelector:@selector
+                           (audioStreamPlayerDidStartSegment:)]) {
+          [self.delegate audioStreamPlayerDidStartSegment:segmentId];
+        }
+      });
+    }
+
+    // 转换 Int16 -> Float32
+    NSUInteger sampleCount = pcmData.length / sizeof(int16_t);
+    const int16_t *int16Samples = (const int16_t *)pcmData.bytes;
+
+    // 创建播放格式的 buffer
+    AVAudioFormat *format =
+        [[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
+                                         sampleRate:sampleRate
+                                           channels:channels
+                                        interleaved:NO];
+
+    AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc]
+        initWithPCMFormat:format
+            frameCapacity:(AVAudioFrameCount)sampleCount];
+    buffer.frameLength = (AVAudioFrameCount)sampleCount;
+
+    float *floatChannel = buffer.floatChannelData[0];
+    for (NSUInteger i = 0; i < sampleCount; i++) {
+      floatChannel[i] = (float)int16Samples[i] / 32768.0f;
+    }
+
+    // 调度播放
+    __weak typeof(self) weakSelf = self;
+    [self.playerNode scheduleBuffer:buffer
+                  completionHandler:^{
+                    __strong typeof(weakSelf) strongSelf = weakSelf;
+                    if (!strongSelf)
+                      return;
+
+                    dispatch_async(strongSelf.playerQueue, ^{
+                      strongSelf.playedSamples += sampleCount;
+                    });
+                  }];
+
+    self.scheduledSamples += sampleCount;
+
+    // 更新时长
+    NSTimeInterval chunkDuration = (double)sampleCount / sampleRate;
+    NSNumber *currentDuration = self.segmentDurations[segmentId];
+    self.segmentDurations[segmentId] =
+        @(currentDuration.doubleValue + chunkDuration);
+  });
+}
+
+- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId {
+  if (![segmentId isEqualToString:self.currentSegmentId]) {
+    return 0;
+  }
+
+  // 基于已播放的采样数估算时间
+  return (double)self.playedSamples / self.playbackFormat.sampleRate;
+}
+
+- (NSTimeInterval)durationForSegment:(NSString *)segmentId {
+  NSNumber *duration = self.segmentDurations[segmentId];
+  return duration ? duration.doubleValue : 0;
+}
+
+#pragma mark - Progress Timer
+
+- (void)startProgressTimer {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    self.progressTimer =
+        [NSTimer scheduledTimerWithTimeInterval:1.0 / 30.0
+                                         target:self
+                                       selector:@selector(updateProgress)
+                                       userInfo:nil
+                                        repeats:YES];
+  });
+}
+
+- (void)stopProgressTimer {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    [self.progressTimer invalidate];
+    self.progressTimer = nil;
+  });
+}
+
+- (void)updateProgress {
+  if (!self.playing || !self.currentSegmentId) {
+    return;
+  }
+
+  NSTimeInterval currentTime =
+      [self playbackTimeForSegment:self.currentSegmentId];
+  NSString *segmentId = self.currentSegmentId;
+
+  if ([self.delegate respondsToSelector:@selector
+                     (audioStreamPlayerDidUpdateTime:segmentId:)]) {
+    [self.delegate audioStreamPlayerDidUpdateTime:currentTime
+                                        segmentId:segmentId];
+  }
+
+  // 检查是否播放完成
+  NSTimeInterval duration = [self durationForSegment:segmentId];
+  if (duration > 0 && currentTime >= duration - 0.1) {
+    // 播放完成
+    dispatch_async(self.playerQueue, ^{
+      if ([self.delegate respondsToSelector:@selector
+                         (audioStreamPlayerDidFinishSegment:)]) {
+        dispatch_async(dispatch_get_main_queue(), ^{
+          [self.delegate audioStreamPlayerDidFinishSegment:segmentId];
+        });
+      }
+    });
+  }
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h
+++ b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h
@@ -0,0 +1,88 @@
+//
+//  ConversationOrchestrator.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 对话状态
+typedef NS_ENUM(NSInteger, ConversationState) {
+  ConversationStateIdle = 0,    // 空闲
+  ConversationStateListening,   // 正在录音
+  ConversationStateRecognizing, // 正在识别（等待 ASR 结果）
+  ConversationStateThinking,    // 正在思考（等待 LLM 回复）
+  ConversationStateSpeaking     // 正在播报 TTS
+};
+
+/// 对话编排器
+/// 核心状态机，串联所有模块，处理打断逻辑
+@interface ConversationOrchestrator : NSObject
+
+/// 当前状态
+@property(nonatomic, assign, readonly) ConversationState state;
+
+/// 当前对话 ID
+@property(nonatomic, copy, readonly, nullable) NSString *conversationId;
+
+#pragma mark - Callbacks
+
+/// 用户最终识别文本回调
+@property(nonatomic, copy, nullable) void (^onUserFinalText)(NSString *text);
+
+/// AI 可见文本回调（打字机效果）
+@property(nonatomic, copy, nullable) void (^onAssistantVisibleText)
+    (NSString *text);
+
+/// AI 完整回复文本回调
+@property(nonatomic, copy, nullable) void (^onAssistantFullText)(NSString *text)
+    ;
+
+/// 实时识别文本回调（部分结果）
+@property(nonatomic, copy, nullable) void (^onPartialText)(NSString *text);
+
+/// 音量更新回调（用于波形 UI）
+@property(nonatomic, copy, nullable) void (^onVolumeUpdate)(float rms);
+
+/// 状态变化回调
+@property(nonatomic, copy, nullable) void (^onStateChange)
+    (ConversationState state);
+
+/// 错误回调
+@property(nonatomic, copy, nullable) void (^onError)(NSError *error);
+
+/// AI 开始说话回调
+@property(nonatomic, copy, nullable) void (^onSpeakingStart)(void);
+
+/// AI 说话结束回调
+@property(nonatomic, copy, nullable) void (^onSpeakingEnd)(void);
+
+#pragma mark - Configuration
+
+/// ASR 服务器 URL
+@property(nonatomic, copy) NSString *asrServerURL;
+
+/// LLM 服务器 URL
+@property(nonatomic, copy) NSString *llmServerURL;
+
+/// TTS 服务器 URL
+@property(nonatomic, copy) NSString *ttsServerURL;
+
+#pragma mark - User Actions
+
+/// 用户按下录音按钮
+/// 如果当前正在播放，会自动打断
+- (void)userDidPressRecord;
+
+/// 用户松开录音按钮
+- (void)userDidReleaseRecord;
+
+/// 手动停止（退出页面等）
+- (void)stop;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m
+++ b/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m
@@ -0,0 +1,527 @@
+//
+//  ConversationOrchestrator.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "ConversationOrchestrator.h"
+#import "ASRStreamClient.h"
+#import "AudioCaptureManager.h"
+#import "AudioSessionManager.h"
+#import "LLMStreamClient.h"
+#import "Segmenter.h"
+#import "SubtitleSync.h"
+#import "TTSPlaybackPipeline.h"
+#import "TTSServiceClient.h"
+
+@interface ConversationOrchestrator () <
+    AudioSessionManagerDelegate, AudioCaptureManagerDelegate,
+    ASRStreamClientDelegate, LLMStreamClientDelegate, TTSServiceClientDelegate,
+    TTSPlaybackPipelineDelegate>
+
+// 模块
+@property(nonatomic, strong) AudioSessionManager *audioSession;
+@property(nonatomic, strong) AudioCaptureManager *audioCapture;
+@property(nonatomic, strong) ASRStreamClient *asrClient;
+@property(nonatomic, strong) LLMStreamClient *llmClient;
+@property(nonatomic, strong) Segmenter *segmenter;
+@property(nonatomic, strong) TTSServiceClient *ttsClient;
+@property(nonatomic, strong) TTSPlaybackPipeline *playbackPipeline;
+@property(nonatomic, strong) SubtitleSync *subtitleSync;
+
+// 状态
+@property(nonatomic, assign) ConversationState state;
+@property(nonatomic, copy) NSString *conversationId;
+@property(nonatomic, copy) NSString *currentSessionId;
+
+// 文本跟踪
+@property(nonatomic, strong) NSMutableString *fullAssistantText;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, NSString *> *segmentTextMap;
+@property(nonatomic, assign) NSInteger segmentCounter;
+
+// 队列
+@property(nonatomic, strong) dispatch_queue_t orchestratorQueue;
+
+@end
+
+@implementation ConversationOrchestrator
+
+#pragma mark - Initialization
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _orchestratorQueue = dispatch_queue_create(
+        "com.keyboard.aitalk.orchestrator", DISPATCH_QUEUE_SERIAL);
+    _state = ConversationStateIdle;
+    _conversationId = [[NSUUID UUID] UUIDString];
+
+    _fullAssistantText = [[NSMutableString alloc] init];
+    _segmentTextMap = [[NSMutableDictionary alloc] init];
+    _segmentCounter = 0;
+
+    [self setupModules];
+  }
+  return self;
+}
+
+- (void)setupModules {
+  // Audio Session
+  self.audioSession = [AudioSessionManager sharedManager];
+  self.audioSession.delegate = self;
+
+  // Audio Capture
+  self.audioCapture = [[AudioCaptureManager alloc] init];
+  self.audioCapture.delegate = self;
+
+  // ASR Client
+  self.asrClient = [[ASRStreamClient alloc] init];
+  self.asrClient.delegate = self;
+
+  // LLM Client
+  self.llmClient = [[LLMStreamClient alloc] init];
+  self.llmClient.delegate = self;
+
+  // Segmenter
+  self.segmenter = [[Segmenter alloc] init];
+
+  // TTS Client
+  self.ttsClient = [[TTSServiceClient alloc] init];
+  self.ttsClient.delegate = self;
+
+  // Playback Pipeline
+  self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];
+  self.playbackPipeline.delegate = self;
+
+  // Subtitle Sync
+  self.subtitleSync = [[SubtitleSync alloc] init];
+}
+
+#pragma mark - Configuration Setters
+
+- (void)setAsrServerURL:(NSString *)asrServerURL {
+  _asrServerURL = [asrServerURL copy];
+  self.asrClient.serverURL = asrServerURL;
+}
+
+- (void)setLlmServerURL:(NSString *)llmServerURL {
+  _llmServerURL = [llmServerURL copy];
+  self.llmClient.serverURL = llmServerURL;
+}
+
+- (void)setTtsServerURL:(NSString *)ttsServerURL {
+  _ttsServerURL = [ttsServerURL copy];
+  self.ttsClient.serverURL = ttsServerURL;
+}
+
+#pragma mark - User Actions
+
+- (void)userDidPressRecord {
+  dispatch_async(self.orchestratorQueue, ^{
+    NSLog(@"[Orchestrator] userDidPressRecord, current state: %ld",
+          (long)self.state);
+
+    // 如果正在播放或思考，执行打断
+    if (self.state == ConversationStateSpeaking ||
+        self.state == ConversationStateThinking) {
+      [self performBargein];
+    }
+
+    // 检查麦克风权限
+    if (![self.audioSession hasMicrophonePermission]) {
+      [self.audioSession requestMicrophonePermission:^(BOOL granted) {
+        if (granted) {
+          dispatch_async(self.orchestratorQueue, ^{
+            [self startRecording];
+          });
+        }
+      }];
+      return;
+    }
+
+    [self startRecording];
+  });
+}
+
+- (void)userDidReleaseRecord {
+  dispatch_async(self.orchestratorQueue, ^{
+    NSLog(@"[Orchestrator] userDidReleaseRecord, current state: %ld",
+          (long)self.state);
+
+    if (self.state != ConversationStateListening) {
+      return;
+    }
+
+    // 停止采集
+    [self.audioCapture stopCapture];
+
+    // 请求 ASR 最终结果
+    [self.asrClient finalize];
+
+    // 更新状态
+    [self updateState:ConversationStateRecognizing];
+  });
+}
+
+- (void)stop {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self cancelAll];
+    [self updateState:ConversationStateIdle];
+  });
+}
+
+#pragma mark - Private: Recording
+
+- (void)startRecording {
+  // 配置音频会话
+  NSError *error = nil;
+  if (![self.audioSession configureForConversation:&error]) {
+    [self reportError:error];
+    return;
+  }
+
+  if (![self.audioSession activateSession:&error]) {
+    [self reportError:error];
+    return;
+  }
+
+  // 生成新的会话 ID
+  self.currentSessionId = [[NSUUID UUID] UUIDString];
+
+  // 启动 ASR
+  [self.asrClient startWithSessionId:self.currentSessionId];
+
+  // 启动音频采集
+  if (![self.audioCapture startCapture:&error]) {
+    [self reportError:error];
+    [self.asrClient cancel];
+    return;
+  }
+
+  // 更新状态
+  [self updateState:ConversationStateListening];
+}
+
+#pragma mark - Private: Barge-in (打断)
+
+- (void)performBargein {
+  NSLog(@"[Orchestrator] Performing barge-in");
+
+  // 取消所有正在进行的请求
+  [self.ttsClient cancel];
+  [self.llmClient cancel];
+  [self.asrClient cancel];
+
+  // 停止播放
+  [self.playbackPipeline stop];
+
+  // 清空状态
+  [self.segmenter reset];
+  [self.segmentTextMap removeAllObjects];
+  [self.fullAssistantText setString:@""];
+  self.segmentCounter = 0;
+}
+
+- (void)cancelAll {
+  [self.audioCapture stopCapture];
+  [self.asrClient cancel];
+  [self.llmClient cancel];
+  [self.ttsClient cancel];
+  [self.playbackPipeline stop];
+  [self.segmenter reset];
+  [self.audioSession deactivateSession];
+}
+
+#pragma mark - Private: State Management
+
+- (void)updateState:(ConversationState)newState {
+  if (self.state == newState)
+    return;
+
+  ConversationState oldState = self.state;
+  self.state = newState;
+
+  NSLog(@"[Orchestrator] State: %ld -> %ld", (long)oldState, (long)newState);
+
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (self.onStateChange) {
+      self.onStateChange(newState);
+    }
+
+    // 特殊状态回调
+    if (newState == ConversationStateSpeaking &&
+        oldState != ConversationStateSpeaking) {
+      if (self.onSpeakingStart) {
+        self.onSpeakingStart();
+      }
+    }
+
+    if (oldState == ConversationStateSpeaking &&
+        newState != ConversationStateSpeaking) {
+      if (self.onSpeakingEnd) {
+        self.onSpeakingEnd();
+      }
+    }
+  });
+}
+
+- (void)reportError:(NSError *)error {
+  NSLog(@"[Orchestrator] Error: %@", error.localizedDescription);
+
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (self.onError) {
+      self.onError(error);
+    }
+  });
+}
+
+#pragma mark - AudioCaptureManagerDelegate
+
+- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
+  // 发送到 ASR
+  [self.asrClient sendAudioPCMFrame:pcmFrame];
+}
+
+- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (self.onVolumeUpdate) {
+      self.onVolumeUpdate(rms);
+    }
+  });
+}
+
+#pragma mark - AudioSessionManagerDelegate
+
+- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
+  dispatch_async(self.orchestratorQueue, ^{
+    if (type == KBAudioSessionInterruptionTypeBegan) {
+      // 中断开始：停止采集和播放
+      [self cancelAll];
+      [self updateState:ConversationStateIdle];
+    }
+  });
+}
+
+- (void)audioSessionManagerMicrophonePermissionDenied {
+  NSError *error =
+      [NSError errorWithDomain:@"ConversationOrchestrator"
+                          code:-1
+                      userInfo:@{
+                        NSLocalizedDescriptionKey : @"请在设置中开启麦克风权限"
+                      }];
+  [self reportError:error];
+}
+
+#pragma mark - ASRStreamClientDelegate
+
+- (void)asrClientDidReceivePartialText:(NSString *)text {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (self.onPartialText) {
+      self.onPartialText(text);
+    }
+  });
+}
+
+- (void)asrClientDidReceiveFinalText:(NSString *)text {
+  dispatch_async(self.orchestratorQueue, ^{
+    NSLog(@"[Orchestrator] ASR final text: %@", text);
+
+    // 回调用户文本
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if (self.onUserFinalText) {
+        self.onUserFinalText(text);
+      }
+    });
+
+    // 如果文本为空，回到空闲
+    if (text.length == 0) {
+      [self updateState:ConversationStateIdle];
+      return;
+    }
+
+    // 更新状态并开始 LLM 请求
+    [self updateState:ConversationStateThinking];
+
+    // 重置文本跟踪
+    [self.fullAssistantText setString:@""];
+    [self.segmentTextMap removeAllObjects];
+    self.segmentCounter = 0;
+    [self.segmenter reset];
+
+    // 启动播放管线
+    NSError *error = nil;
+    if (![self.playbackPipeline start:&error]) {
+      NSLog(@"[Orchestrator] Failed to start playback pipeline: %@",
+            error.localizedDescription);
+    }
+
+    // 发送 LLM 请求
+    [self.llmClient sendUserText:text conversationId:self.conversationId];
+  });
+}
+
+- (void)asrClientDidFail:(NSError *)error {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self reportError:error];
+    [self updateState:ConversationStateIdle];
+  });
+}
+
+#pragma mark - LLMStreamClientDelegate
+
+- (void)llmClientDidReceiveToken:(NSString *)token {
+  dispatch_async(self.orchestratorQueue, ^{
+    // 追加到完整文本
+    [self.fullAssistantText appendString:token];
+
+    // 追加到分段器
+    [self.segmenter appendToken:token];
+
+    // 检查是否有可触发 TTS 的片段
+    NSArray<NSString *> *segments = [self.segmenter popReadySegments];
+    for (NSString *segmentText in segments) {
+      [self requestTTSForSegment:segmentText];
+    }
+  });
+}
+
+- (void)llmClientDidComplete {
+  dispatch_async(self.orchestratorQueue, ^{
+    NSLog(@"[Orchestrator] LLM complete");
+
+    // 处理剩余片段
+    NSString *remaining = [self.segmenter flushRemainingSegment];
+    if (remaining && remaining.length > 0) {
+      [self requestTTSForSegment:remaining];
+    }
+
+    // 回调完整文本
+    NSString *fullText = [self.fullAssistantText copy];
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if (self.onAssistantFullText) {
+        self.onAssistantFullText(fullText);
+      }
+    });
+  });
+}
+
+- (void)llmClientDidFail:(NSError *)error {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self reportError:error];
+    [self updateState:ConversationStateIdle];
+  });
+}
+
+#pragma mark - Private: TTS Request
+
+- (void)requestTTSForSegment:(NSString *)segmentText {
+  NSString *segmentId =
+      [NSString stringWithFormat:@"seg_%ld", (long)self.segmentCounter++];
+
+  // 记录片段文本
+  self.segmentTextMap[segmentId] = segmentText;
+
+  NSLog(@"[Orchestrator] Requesting TTS for segment %@: %@", segmentId,
+        segmentText);
+
+  // 请求 TTS
+  [self.ttsClient requestTTSForText:segmentText segmentId:segmentId];
+}
+
+#pragma mark - TTSServiceClientDelegate
+
+- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self.playbackPipeline enqueueURL:url segmentId:segmentId];
+
+    // 如果还在 Thinking，切换到 Speaking
+    if (self.state == ConversationStateThinking) {
+      [self updateState:ConversationStateSpeaking];
+    }
+  });
+}
+
+- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
+                          payloadType:(TTSPayloadType)type
+                            segmentId:(NSString *)segmentId {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self.playbackPipeline enqueueChunk:chunk
+                            payloadType:type
+                              segmentId:segmentId];
+
+    // 如果还在 Thinking，切换到 Speaking
+    if (self.state == ConversationStateThinking) {
+      [self updateState:ConversationStateSpeaking];
+    }
+  });
+}
+
+- (void)ttsClientDidFinishSegment:(NSString *)segmentId {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self.playbackPipeline markSegmentComplete:segmentId];
+  });
+}
+
+- (void)ttsClientDidFail:(NSError *)error {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self reportError:error];
+  });
+}
+
+#pragma mark - TTSPlaybackPipelineDelegate
+
+- (void)pipelineDidStartSegment:(NSString *)segmentId
+                       duration:(NSTimeInterval)duration {
+  NSLog(@"[Orchestrator] Started playing segment: %@", segmentId);
+}
+
+- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
+                            segmentId:(NSString *)segmentId {
+  dispatch_async(self.orchestratorQueue, ^{
+    // 获取片段文本
+    NSString *segmentText = self.segmentTextMap[segmentId];
+    if (!segmentText)
+      return;
+
+    // 计算可见文本
+    NSTimeInterval duration =
+        [self.playbackPipeline durationForSegment:segmentId];
+    NSString *visibleText =
+        [self.subtitleSync visibleTextForFullText:segmentText
+                                      currentTime:time
+                                         duration:duration];
+
+    // TODO: 这里应该累加之前片段的文本，实现完整的打字机效果
+    // 简化实现：只显示当前片段
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if (self.onAssistantVisibleText) {
+        self.onAssistantVisibleText(visibleText);
+      }
+    });
+  });
+}
+
+- (void)pipelineDidFinishSegment:(NSString *)segmentId {
+  NSLog(@"[Orchestrator] Finished playing segment: %@", segmentId);
+}
+
+- (void)pipelineDidFinishAllSegments {
+  dispatch_async(self.orchestratorQueue, ^{
+    NSLog(@"[Orchestrator] All segments finished");
+
+    // 回到空闲状态
+    [self updateState:ConversationStateIdle];
+    [self.audioSession deactivateSession];
+  });
+}
+
+- (void)pipelineDidFail:(NSError *)error {
+  dispatch_async(self.orchestratorQueue, ^{
+    [self reportError:error];
+    [self updateState:ConversationStateIdle];
+  });
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/LLMStreamClient.h
+++ b/keyBoard/Class/AiTalk/VM/LLMStreamClient.h
@@ -0,0 +1,48 @@
+//
+//  LLMStreamClient.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// LLM 流式生成客户端代理
+@protocol LLMStreamClientDelegate <NSObject>
+@required
+/// 收到新的 token
+- (void)llmClientDidReceiveToken:(NSString *)token;
+/// 生成完成
+- (void)llmClientDidComplete;
+/// 生成失败
+- (void)llmClientDidFail:(NSError *)error;
+@end
+
+/// LLM 流式生成客户端
+/// 支持 SSE（Server-Sent Events）或 WebSocket 接收 token 流
+@interface LLMStreamClient : NSObject
+
+@property(nonatomic, weak) id<LLMStreamClientDelegate> delegate;
+
+/// LLM 服务器 URL
+@property(nonatomic, copy) NSString *serverURL;
+
+/// API Key（如需要）
+@property(nonatomic, copy, nullable) NSString *apiKey;
+
+/// 是否正在生成
+@property(nonatomic, assign, readonly, getter=isGenerating) BOOL generating;
+
+/// 发送用户文本请求 LLM 回复
+/// @param text 用户输入的文本
+/// @param conversationId 对话 ID
+- (void)sendUserText:(NSString *)text conversationId:(NSString *)conversationId;
+
+/// 取消当前请求
+- (void)cancel;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/LLMStreamClient.m
+++ b/keyBoard/Class/AiTalk/VM/LLMStreamClient.m
@@ -0,0 +1,244 @@
+//
+//  LLMStreamClient.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "LLMStreamClient.h"
+
+@interface LLMStreamClient () <NSURLSessionDataDelegate>
+
+@property(nonatomic, strong) NSURLSession *urlSession;
+@property(nonatomic, strong) NSURLSessionDataTask *dataTask;
+@property(nonatomic, strong) dispatch_queue_t networkQueue;
+@property(nonatomic, assign) BOOL generating;
+@property(nonatomic, strong) NSMutableString *buffer; // SSE 数据缓冲
+
+@end
+
+@implementation LLMStreamClient
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _networkQueue = dispatch_queue_create("com.keyboard.aitalk.llm.network",
+                                          DISPATCH_QUEUE_SERIAL);
+    _buffer = [[NSMutableString alloc] init];
+    // TODO: 替换为实际的 LLM 服务器地址
+    _serverURL = @"https://your-llm-server.com/api/chat/stream";
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [self cancel];
+}
+
+#pragma mark - Public Methods
+
+- (void)sendUserText:(NSString *)text
+      conversationId:(NSString *)conversationId {
+  dispatch_async(self.networkQueue, ^{
+    [self cancelInternal];
+
+    self.generating = YES;
+    [self.buffer setString:@""];
+
+    // 创建请求
+    NSURL *url = [NSURL URLWithString:self.serverURL];
+    NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
+    request.HTTPMethod = @"POST";
+    [request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"];
+    [request setValue:@"text/event-stream" forHTTPHeaderField:@"Accept"];
+
+    if (self.apiKey) {
+      [request setValue:[NSString stringWithFormat:@"Bearer %@", self.apiKey]
+          forHTTPHeaderField:@"Authorization"];
+    }
+
+    // 请求体
+    NSDictionary *body = @{
+      @"message" : text,
+      @"conversationId" : conversationId,
+      @"stream" : @YES
+    };
+
+    NSError *jsonError = nil;
+    NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body
+                                                       options:0
+                                                         error:&jsonError];
+    if (jsonError) {
+      [self reportError:jsonError];
+      return;
+    }
+    request.HTTPBody = jsonData;
+
+    // 创建会话
+    NSURLSessionConfiguration *config =
+        [NSURLSessionConfiguration defaultSessionConfiguration];
+    config.timeoutIntervalForRequest = 60;
+    config.timeoutIntervalForResource = 300;
+
+    self.urlSession = [NSURLSession sessionWithConfiguration:config
+                                                    delegate:self
+                                               delegateQueue:nil];
+
+    self.dataTask = [self.urlSession dataTaskWithRequest:request];
+    [self.dataTask resume];
+
+    NSLog(@"[LLMStreamClient] Started request for conversation: %@",
+          conversationId);
+  });
+}
+
+- (void)cancel {
+  dispatch_async(self.networkQueue, ^{
+    [self cancelInternal];
+  });
+}
+
+#pragma mark - Private Methods
+
+- (void)cancelInternal {
+  self.generating = NO;
+
+  if (self.dataTask) {
+    [self.dataTask cancel];
+    self.dataTask = nil;
+  }
+
+  if (self.urlSession) {
+    [self.urlSession invalidateAndCancel];
+    self.urlSession = nil;
+  }
+
+  [self.buffer setString:@""];
+}
+
+- (void)reportError:(NSError *)error {
+  self.generating = NO;
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector(llmClientDidFail:)]) {
+      [self.delegate llmClientDidFail:error];
+    }
+  });
+}
+
+- (void)reportComplete {
+  self.generating = NO;
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector(llmClientDidComplete)]) {
+      [self.delegate llmClientDidComplete];
+    }
+  });
+}
+
+- (void)reportToken:(NSString *)token {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate
+            respondsToSelector:@selector(llmClientDidReceiveToken:)]) {
+      [self.delegate llmClientDidReceiveToken:token];
+    }
+  });
+}
+
+#pragma mark - SSE Parsing
+
+- (void)parseSSEData:(NSData *)data {
+  NSString *string = [[NSString alloc] initWithData:data
+                                           encoding:NSUTF8StringEncoding];
+  if (!string)
+    return;
+
+  [self.buffer appendString:string];
+
+  // SSE 格式：每个事件以 \n\n 分隔
+  NSArray *events = [self.buffer componentsSeparatedByString:@"\n\n"];
+
+  // 保留最后一个可能不完整的事件
+  if (events.count > 1) {
+    [self.buffer setString:events.lastObject];
+
+    for (NSUInteger i = 0; i < events.count - 1; i++) {
+      [self handleSSEEvent:events[i]];
+    }
+  }
+}
+
+- (void)handleSSEEvent:(NSString *)event {
+  if (event.length == 0)
+    return;
+
+  // 解析 SSE 事件
+  // 格式: data: {...}
+  NSArray *lines = [event componentsSeparatedByString:@"\n"];
+
+  for (NSString *line in lines) {
+    if ([line hasPrefix:@"data: "]) {
+      NSString *dataString = [line substringFromIndex:6];
+
+      // 检查是否是结束标志
+      if ([dataString isEqualToString:@"[DONE]"]) {
+        [self reportComplete];
+        return;
+      }
+
+      // 解析 JSON
+      NSData *jsonData = [dataString dataUsingEncoding:NSUTF8StringEncoding];
+      NSError *jsonError = nil;
+      NSDictionary *json = [NSJSONSerialization JSONObjectWithData:jsonData
+                                                           options:0
+                                                             error:&jsonError];
+
+      if (jsonError) {
+        NSLog(@"[LLMStreamClient] Failed to parse SSE data: %@", dataString);
+        continue;
+      }
+
+      // 提取 token（根据实际 API 格式调整）
+      // 常见格式: {"token": "..."} 或 {"choices": [{"delta": {"content":
+      // "..."}}]}
+      NSString *token = json[@"token"];
+      if (!token) {
+        // OpenAI 格式
+        NSArray *choices = json[@"choices"];
+        if (choices.count > 0) {
+          NSDictionary *delta = choices[0][@"delta"];
+          token = delta[@"content"];
+        }
+      }
+
+      if (token && token.length > 0) {
+        [self reportToken:token];
+      }
+    }
+  }
+}
+
+#pragma mark - NSURLSessionDataDelegate
+
+- (void)URLSession:(NSURLSession *)session
+          dataTask:(NSURLSessionDataTask *)dataTask
+    didReceiveData:(NSData *)data {
+  [self parseSSEData:data];
+}
+
+- (void)URLSession:(NSURLSession *)session
+                    task:(NSURLSessionTask *)task
+    didCompleteWithError:(NSError *)error {
+  if (error) {
+    if (error.code != NSURLErrorCancelled) {
+      [self reportError:error];
+    }
+  } else {
+    // 处理缓冲区中剩余的数据
+    if (self.buffer.length > 0) {
+      [self handleSSEEvent:self.buffer];
+      [self.buffer setString:@""];
+    }
+    [self reportComplete];
+  }
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/Segmenter.h
+++ b/keyBoard/Class/AiTalk/VM/Segmenter.h
@@ -0,0 +1,37 @@
+//
+//  Segmenter.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 句子切分器
+/// 将 LLM 输出的 token 流切分成可触发 TTS 的句子片段
+@interface Segmenter : NSObject
+
+/// 累积字符数阈值（超过此值强制切分）
+/// 默认：30
+@property(nonatomic, assign) NSUInteger maxCharacterThreshold;
+
+/// 追加 token
+/// @param token LLM 输出的 token
+- (void)appendToken:(NSString *)token;
+
+/// 获取并移除已准备好的片段
+/// @return 可立即进行 TTS 的片段数组
+- (NSArray<NSString *> *)popReadySegments;
+
+/// 获取剩余的未完成片段（用于最后 flush）
+/// @return 剩余片段，可能为空
+- (NSString *)flushRemainingSegment;
+
+/// 重置状态
+- (void)reset;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/Segmenter.m
+++ b/keyBoard/Class/AiTalk/VM/Segmenter.m
@@ -0,0 +1,148 @@
+//
+//  Segmenter.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "Segmenter.h"
+
+@interface Segmenter ()
+
+@property(nonatomic, strong) NSMutableString *buffer;
+@property(nonatomic, strong) NSMutableArray<NSString *> *readySegments;
+
+@end
+
+@implementation Segmenter
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _buffer = [[NSMutableString alloc] init];
+    _readySegments = [[NSMutableArray alloc] init];
+    _maxCharacterThreshold = 30;
+  }
+  return self;
+}
+
+#pragma mark - Public Methods
+
+- (void)appendToken:(NSString *)token {
+  if (!token || token.length == 0) {
+    return;
+  }
+
+  [self.buffer appendString:token];
+
+  // 检查是否需要切分
+  [self checkAndSplit];
+}
+
+- (NSArray<NSString *> *)popReadySegments {
+  NSArray *segments = [self.readySegments copy];
+  [self.readySegments removeAllObjects];
+  return segments;
+}
+
+- (NSString *)flushRemainingSegment {
+  NSString *remaining = [self.buffer copy];
+  [self.buffer setString:@""];
+
+  // 去除首尾空白
+  remaining = [remaining
+      stringByTrimmingCharactersInSet:[NSCharacterSet
+                                          whitespaceAndNewlineCharacterSet]];
+
+  return remaining.length > 0 ? remaining : nil;
+}
+
+- (void)reset {
+  [self.buffer setString:@""];
+  [self.readySegments removeAllObjects];
+}
+
+#pragma mark - Private Methods
+
+- (void)checkAndSplit {
+  // 句子结束标点
+  NSCharacterSet *sentenceEnders =
+      [NSCharacterSet characterSetWithCharactersInString:@"。！？\n"];
+
+  while (YES) {
+    NSString *currentBuffer = self.buffer;
+
+    // 查找第一个句子结束标点
+    NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders];
+
+    if (range.location != NSNotFound) {
+      // 找到结束标点，切分
+      NSUInteger endIndex = range.location + 1;
+      NSString *segment = [currentBuffer substringToIndex:endIndex];
+      segment = [segment stringByTrimmingCharactersInSet:
+                             [NSCharacterSet whitespaceAndNewlineCharacterSet]];
+
+      if (segment.length > 0) {
+        [self.readySegments addObject:segment];
+      }
+
+      // 移除已切分的部分
+      [self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)];
+    } else if (currentBuffer.length >= self.maxCharacterThreshold) {
+      // 未找到标点，但超过阈值，强制切分
+      // 尝试在空格或逗号处切分
+      NSRange breakRange = [self findBestBreakPoint:currentBuffer];
+
+      if (breakRange.location != NSNotFound) {
+        NSString *segment =
+            [currentBuffer substringToIndex:breakRange.location + 1];
+        segment =
+            [segment stringByTrimmingCharactersInSet:
+                         [NSCharacterSet whitespaceAndNewlineCharacterSet]];
+
+        if (segment.length > 0) {
+          [self.readySegments addObject:segment];
+        }
+
+        [self.buffer
+            deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)];
+      } else {
+        // 无法找到合适的断点，直接切分
+        NSString *segment =
+            [currentBuffer substringToIndex:self.maxCharacterThreshold];
+        segment =
+            [segment stringByTrimmingCharactersInSet:
+                         [NSCharacterSet whitespaceAndNewlineCharacterSet]];
+
+        if (segment.length > 0) {
+          [self.readySegments addObject:segment];
+        }
+
+        [self.buffer
+            deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)];
+      }
+    } else {
+      // 未达到切分条件
+      break;
+    }
+  }
+}
+
+- (NSRange)findBestBreakPoint:(NSString *)text {
+  // 优先在逗号、分号等处断开
+  NSCharacterSet *breakChars =
+      [NSCharacterSet characterSetWithCharactersInString:@"，,、；;：: "];
+
+  // 从后往前查找，尽可能多包含内容
+  for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2;
+       i--) {
+    unichar c = [text characterAtIndex:i];
+    if ([breakChars characterIsMember:c]) {
+      return NSMakeRange(i, 1);
+    }
+  }
+
+  return NSMakeRange(NSNotFound, 0);
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/SubtitleSync.h
+++ b/keyBoard/Class/AiTalk/VM/SubtitleSync.h
@@ -0,0 +1,36 @@
+//
+//  SubtitleSync.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 字幕同步器
+/// 根据播放进度映射文字显示，实现打字机效果
+@interface SubtitleSync : NSObject
+
+/// 获取当前应显示的文本
+/// @param fullText 完整文本
+/// @param currentTime 当前播放时间（秒）
+/// @param duration 总时长（秒）
+/// @return 应显示的部分文本（打字机效果）
+- (NSString *)visibleTextForFullText:(NSString *)fullText
+                         currentTime:(NSTimeInterval)currentTime
+                            duration:(NSTimeInterval)duration;
+
+/// 获取可见字符数
+/// @param fullText 完整文本
+/// @param currentTime 当前播放时间（秒）
+/// @param duration 总时长（秒）
+/// @return 应显示的字符数
+- (NSUInteger)visibleCountForFullText:(NSString *)fullText
+                          currentTime:(NSTimeInterval)currentTime
+                             duration:(NSTimeInterval)duration;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/SubtitleSync.m
+++ b/keyBoard/Class/AiTalk/VM/SubtitleSync.m
@@ -0,0 +1,66 @@
+//
+//  SubtitleSync.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "SubtitleSync.h"
+
+@implementation SubtitleSync
+
+- (NSString *)visibleTextForFullText:(NSString *)fullText
+                         currentTime:(NSTimeInterval)currentTime
+                            duration:(NSTimeInterval)duration {
+
+  if (!fullText || fullText.length == 0) {
+    return @"";
+  }
+
+  NSUInteger visibleCount = [self visibleCountForFullText:fullText
+                                              currentTime:currentTime
+                                                 duration:duration];
+
+  if (visibleCount >= fullText.length) {
+    return fullText;
+  }
+
+  return [fullText substringToIndex:visibleCount];
+}
+
+- (NSUInteger)visibleCountForFullText:(NSString *)fullText
+                          currentTime:(NSTimeInterval)currentTime
+                             duration:(NSTimeInterval)duration {
+
+  if (!fullText || fullText.length == 0) {
+    return 0;
+  }
+
+  // 边界情况处理
+  if (duration <= 0) {
+    // 如果没有时长信息，直接返回全部
+    return fullText.length;
+  }
+
+  if (currentTime <= 0) {
+    return 0;
+  }
+
+  if (currentTime >= duration) {
+    return fullText.length;
+  }
+
+  // 计算进度比例
+  double progress = currentTime / duration;
+
+  // 计算可见字符数
+  // 使用略微超前的策略，确保文字不会落后于语音
+  double adjustedProgress = MIN(progress * 1.05, 1.0);
+
+  NSUInteger visibleCount =
+      (NSUInteger)round(fullText.length * adjustedProgress);
+
+  return MIN(visibleCount, fullText.length);
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h
+++ b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h
@@ -0,0 +1,79 @@
+//
+//  TTSPlaybackPipeline.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "TTSServiceClient.h"
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// 播放管线代理
+@protocol TTSPlaybackPipelineDelegate <NSObject>
+@optional
+/// 开始播放片段
+- (void)pipelineDidStartSegment:(NSString *)segmentId
+                       duration:(NSTimeInterval)duration;
+/// 播放时间更新
+- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
+                            segmentId:(NSString *)segmentId;
+/// 片段播放完成
+- (void)pipelineDidFinishSegment:(NSString *)segmentId;
+/// 所有片段播放完成
+- (void)pipelineDidFinishAllSegments;
+/// 播放出错
+- (void)pipelineDidFail:(NSError *)error;
+@end
+
+/// TTS 播放管线
+/// 根据 payloadType 路由到对应播放器
+@interface TTSPlaybackPipeline : NSObject
+
+@property(nonatomic, weak) id<TTSPlaybackPipelineDelegate> delegate;
+
+/// 是否正在播放
+@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing;
+
+/// 当前播放的片段 ID
+@property(nonatomic, copy, readonly, nullable) NSString *currentSegmentId;
+
+/// 启动管线
+/// @param error 错误信息
+/// @return 是否启动成功
+- (BOOL)start:(NSError **)error;
+
+/// 停止管线（立即停止，用于打断）
+- (void)stop;
+
+/// 入队 URL 播放
+/// @param url 音频 URL
+/// @param segmentId 片段 ID
+- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId;
+
+/// 入队音频数据块
+/// @param chunk 音频数据
+/// @param type 数据类型
+/// @param segmentId 片段 ID
+- (void)enqueueChunk:(NSData *)chunk
+         payloadType:(TTSPayloadType)type
+           segmentId:(NSString *)segmentId;
+
+/// 标记片段数据完成（用于流式模式）
+/// @param segmentId 片段 ID
+- (void)markSegmentComplete:(NSString *)segmentId;
+
+/// 获取片段的当前播放时间
+/// @param segmentId 片段 ID
+/// @return 当前时间（秒），如果未在播放则返回 0
+- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId;
+
+/// 获取片段的总时长
+/// @param segmentId 片段 ID
+/// @return 总时长（秒）
+- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m
+++ b/keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m
@@ -0,0 +1,343 @@
+//
+//  TTSPlaybackPipeline.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "TTSPlaybackPipeline.h"
+#import "AudioStreamPlayer.h"
+#import <AVFoundation/AVFoundation.h>
+
+@interface TTSPlaybackPipeline () <AudioStreamPlayerDelegate>
+
+// 播放器
+@property(nonatomic, strong) AVPlayer *urlPlayer;
+@property(nonatomic, strong) AudioStreamPlayer *streamPlayer;
+
+// 片段队列
+@property(nonatomic, strong) NSMutableArray<NSDictionary *> *segmentQueue;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, NSNumber *> *segmentDurations;
+
+// 状态
+@property(nonatomic, assign) BOOL playing;
+@property(nonatomic, copy) NSString *currentSegmentId;
+@property(nonatomic, strong) id playerTimeObserver;
+
+// 队列
+@property(nonatomic, strong) dispatch_queue_t playbackQueue;
+
+@end
+
+@implementation TTSPlaybackPipeline
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _segmentQueue = [[NSMutableArray alloc] init];
+    _segmentDurations = [[NSMutableDictionary alloc] init];
+    _playbackQueue = dispatch_queue_create("com.keyboard.aitalk.playback",
+                                           DISPATCH_QUEUE_SERIAL);
+  }
+  return self;
+}
+
+- (void)dealloc {
+  [self stop];
+}
+
+#pragma mark - Public Methods
+
+- (BOOL)start:(NSError **)error {
+  // 初始化 stream player
+  if (!self.streamPlayer) {
+    self.streamPlayer = [[AudioStreamPlayer alloc] init];
+    self.streamPlayer.delegate = self;
+  }
+
+  return [self.streamPlayer start:error];
+}
+
+- (void)stop {
+  dispatch_async(self.playbackQueue, ^{
+    // 停止 URL 播放
+    if (self.urlPlayer) {
+      [self.urlPlayer pause];
+      if (self.playerTimeObserver) {
+        [self.urlPlayer removeTimeObserver:self.playerTimeObserver];
+        self.playerTimeObserver = nil;
+      }
+      self.urlPlayer = nil;
+    }
+
+    // 停止流式播放
+    [self.streamPlayer stop];
+
+    // 清空队列
+    [self.segmentQueue removeAllObjects];
+    [self.segmentDurations removeAllObjects];
+
+    self.playing = NO;
+    self.currentSegmentId = nil;
+  });
+}
+
+- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId {
+  if (!url || !segmentId)
+    return;
+
+  dispatch_async(self.playbackQueue, ^{
+    NSDictionary *segment = @{
+      @"type" : @(TTSPayloadTypeURL),
+      @"url" : url,
+      @"segmentId" : segmentId
+    };
+    [self.segmentQueue addObject:segment];
+
+    // 如果当前没有在播放，开始播放
+    if (!self.playing) {
+      [self playNextSegment];
+    }
+  });
+}
+
+- (void)enqueueChunk:(NSData *)chunk
+         payloadType:(TTSPayloadType)type
+           segmentId:(NSString *)segmentId {
+  if (!chunk || !segmentId)
+    return;
+
+  dispatch_async(self.playbackQueue, ^{
+    switch (type) {
+    case TTSPayloadTypePCMChunk:
+      // 直接喂给 stream player
+      [self.streamPlayer enqueuePCMChunk:chunk
+                              sampleRate:16000
+                                channels:1
+                               segmentId:segmentId];
+
+      if (!self.playing) {
+        self.playing = YES;
+        self.currentSegmentId = segmentId;
+
+        dispatch_async(dispatch_get_main_queue(), ^{
+          if ([self.delegate respondsToSelector:@selector
+                             (pipelineDidStartSegment:duration:)]) {
+            [self.delegate pipelineDidStartSegment:segmentId duration:0];
+          }
+        });
+      }
+      break;
+
+    case TTSPayloadTypeAACChunk:
+      // TODO: AAC 解码 -> PCM -> streamPlayer
+      NSLog(@"[TTSPlaybackPipeline] AAC chunk decoding not implemented yet");
+      break;
+
+    case TTSPayloadTypeOpusChunk:
+      // TODO: Opus 解码 -> PCM -> streamPlayer
+      NSLog(@"[TTSPlaybackPipeline] Opus chunk decoding not implemented yet");
+      break;
+
+    default:
+      break;
+    }
+  });
+}
+
+- (void)markSegmentComplete:(NSString *)segmentId {
+  // Stream player 会自动处理播放完成
+}
+
+- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId {
+  if (![segmentId isEqualToString:self.currentSegmentId]) {
+    return 0;
+  }
+
+  if (self.urlPlayer) {
+    return CMTimeGetSeconds(self.urlPlayer.currentTime);
+  }
+
+  return [self.streamPlayer playbackTimeForSegment:segmentId];
+}
+
+- (NSTimeInterval)durationForSegment:(NSString *)segmentId {
+  NSNumber *duration = self.segmentDurations[segmentId];
+  if (duration) {
+    return duration.doubleValue;
+  }
+
+  if (self.urlPlayer && [segmentId isEqualToString:self.currentSegmentId]) {
+    CMTime duration = self.urlPlayer.currentItem.duration;
+    if (CMTIME_IS_VALID(duration)) {
+      return CMTimeGetSeconds(duration);
+    }
+  }
+
+  return [self.streamPlayer durationForSegment:segmentId];
+}
+
+#pragma mark - Private Methods
+
+- (void)playNextSegment {
+  if (self.segmentQueue.count == 0) {
+    self.playing = NO;
+    self.currentSegmentId = nil;
+
+    dispatch_async(dispatch_get_main_queue(), ^{
+      if ([self.delegate
+              respondsToSelector:@selector(pipelineDidFinishAllSegments)]) {
+        [self.delegate pipelineDidFinishAllSegments];
+      }
+    });
+    return;
+  }
+
+  NSDictionary *segment = self.segmentQueue.firstObject;
+  [self.segmentQueue removeObjectAtIndex:0];
+
+  TTSPayloadType type = [segment[@"type"] integerValue];
+  NSString *segmentId = segment[@"segmentId"];
+
+  self.playing = YES;
+  self.currentSegmentId = segmentId;
+
+  if (type == TTSPayloadTypeURL) {
+    NSURL *url = segment[@"url"];
+    [self playURL:url segmentId:segmentId];
+  }
+}
+
+- (void)playURL:(NSURL *)url segmentId:(NSString *)segmentId {
+  AVPlayerItem *item = [AVPlayerItem playerItemWithURL:url];
+
+  if (!self.urlPlayer) {
+    self.urlPlayer = [AVPlayer playerWithPlayerItem:item];
+  } else {
+    [self.urlPlayer replaceCurrentItemWithPlayerItem:item];
+  }
+
+  // 监听播放完成
+  [[NSNotificationCenter defaultCenter]
+      addObserver:self
+         selector:@selector(playerItemDidFinish:)
+             name:AVPlayerItemDidPlayToEndTimeNotification
+           object:item];
+
+  // 添加时间观察器
+  __weak typeof(self) weakSelf = self;
+  self.playerTimeObserver = [self.urlPlayer
+      addPeriodicTimeObserverForInterval:CMTimeMake(1, 30)
+                                   queue:dispatch_get_main_queue()
+                              usingBlock:^(CMTime time) {
+                                __strong typeof(weakSelf) strongSelf = weakSelf;
+                                if (!strongSelf)
+                                  return;
+
+                                NSTimeInterval currentTime =
+                                    CMTimeGetSeconds(time);
+                                if ([strongSelf.delegate
+                                        respondsToSelector:@selector
+                                        (pipelineDidUpdatePlaybackTime:
+                                                             segmentId:)]) {
+                                  [strongSelf.delegate
+                                      pipelineDidUpdatePlaybackTime:currentTime
+                                                          segmentId:segmentId];
+                                }
+                              }];
+
+  // 等待资源加载后获取时长并开始播放
+  [item.asset
+      loadValuesAsynchronouslyForKeys:@[ @"duration" ]
+                    completionHandler:^{
+                      dispatch_async(dispatch_get_main_queue(), ^{
+                        NSTimeInterval duration =
+                            CMTimeGetSeconds(item.duration);
+                        if (!isnan(duration)) {
+                          self.segmentDurations[segmentId] = @(duration);
+                        }
+
+                        if ([self.delegate respondsToSelector:@selector
+                                           (pipelineDidStartSegment:
+                                                           duration:)]) {
+                          [self.delegate pipelineDidStartSegment:segmentId
+                                                        duration:duration];
+                        }
+
+                        [self.urlPlayer play];
+                      });
+                    }];
+}
+
+- (void)playerItemDidFinish:(NSNotification *)notification {
+  [[NSNotificationCenter defaultCenter]
+      removeObserver:self
+                name:AVPlayerItemDidPlayToEndTimeNotification
+              object:notification.object];
+
+  if (self.playerTimeObserver) {
+    [self.urlPlayer removeTimeObserver:self.playerTimeObserver];
+    self.playerTimeObserver = nil;
+  }
+
+  NSString *finishedSegmentId = self.currentSegmentId;
+
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate
+            respondsToSelector:@selector(pipelineDidFinishSegment:)]) {
+      [self.delegate pipelineDidFinishSegment:finishedSegmentId];
+    }
+  });
+
+  dispatch_async(self.playbackQueue, ^{
+    [self playNextSegment];
+  });
+}
+
+#pragma mark - AudioStreamPlayerDelegate
+
+- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate
+            respondsToSelector:@selector(pipelineDidStartSegment:duration:)]) {
+      [self.delegate pipelineDidStartSegment:segmentId duration:0];
+    }
+  });
+}
+
+- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time
+                             segmentId:(NSString *)segmentId {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector
+                       (pipelineDidUpdatePlaybackTime:segmentId:)]) {
+      [self.delegate pipelineDidUpdatePlaybackTime:time segmentId:segmentId];
+    }
+  });
+}
+
+- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate
+            respondsToSelector:@selector(pipelineDidFinishSegment:)]) {
+      [self.delegate pipelineDidFinishSegment:segmentId];
+    }
+  });
+
+  dispatch_async(self.playbackQueue, ^{
+    // 检查是否还有更多片段
+    if (self.segmentQueue.count == 0) {
+      self.playing = NO;
+      self.currentSegmentId = nil;
+
+      dispatch_async(dispatch_get_main_queue(), ^{
+        if ([self.delegate
+                respondsToSelector:@selector(pipelineDidFinishAllSegments)]) {
+          [self.delegate pipelineDidFinishAllSegments];
+        }
+      });
+    }
+  });
+}
+
+@end
--- a/keyBoard/Class/AiTalk/VM/TTSServiceClient.h
+++ b/keyBoard/Class/AiTalk/VM/TTSServiceClient.h
@@ -0,0 +1,60 @@
+//
+//  TTSServiceClient.h
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// TTS 返回数据类型
+typedef NS_ENUM(NSInteger, TTSPayloadType) {
+  TTSPayloadTypeURL = 0,  // 模式 A：返回 m4a/MP3 URL
+  TTSPayloadTypePCMChunk, // 模式 D：返回 PCM chunk
+  TTSPayloadTypeAACChunk, // 模式 B：返回 AAC chunk
+  TTSPayloadTypeOpusChunk // 模式 C：返回 Opus chunk
+};
+
+/// TTS 服务客户端代理
+@protocol TTSServiceClientDelegate <NSObject>
+@optional
+/// 收到音频 URL（模式 A）
+- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId;
+/// 收到音频数据块（模式 B/C/D）
+- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
+                          payloadType:(TTSPayloadType)type
+                            segmentId:(NSString *)segmentId;
+/// 片段完成
+- (void)ttsClientDidFinishSegment:(NSString *)segmentId;
+/// 请求失败
+- (void)ttsClientDidFail:(NSError *)error;
+@end
+
+/// TTS 服务客户端
+/// 统一网络层接口，支持多种 TTS 返回形态
+@interface TTSServiceClient : NSObject
+
+@property(nonatomic, weak) id<TTSServiceClientDelegate> delegate;
+
+/// TTS 服务器 URL
+@property(nonatomic, copy) NSString *serverURL;
+
+/// 当前期望的返回类型（由服务端配置决定）
+@property(nonatomic, assign) TTSPayloadType expectedPayloadType;
+
+/// 是否正在请求
+@property(nonatomic, assign, readonly, getter=isRequesting) BOOL requesting;
+
+/// 请求 TTS 合成
+/// @param text 要合成的文本
+/// @param segmentId 片段 ID（用于标识和排序）
+- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId;
+
+/// 取消所有请求
+- (void)cancel;
+
+@end
+
+NS_ASSUME_NONNULL_END
--- a/keyBoard/Class/AiTalk/VM/TTSServiceClient.m
+++ b/keyBoard/Class/AiTalk/VM/TTSServiceClient.m
@@ -0,0 +1,298 @@
+//
+//  TTSServiceClient.m
+//  keyBoard
+//
+//  Created by Mac on 2026/1/15.
+//
+
+#import "TTSServiceClient.h"
+
+@interface TTSServiceClient () <NSURLSessionDataDelegate,
+                                NSURLSessionWebSocketDelegate>
+
+@property(nonatomic, strong) NSURLSession *urlSession;
+@property(nonatomic, strong)
+    NSMutableDictionary<NSString *, NSURLSessionTask *> *activeTasks;
+@property(nonatomic, strong) dispatch_queue_t networkQueue;
+@property(nonatomic, assign) BOOL requesting;
+
+@end
+
+@implementation TTSServiceClient
+
+- (instancetype)init {
+  self = [super init];
+  if (self) {
+    _networkQueue = dispatch_queue_create("com.keyboard.aitalk.tts.network",
+                                          DISPATCH_QUEUE_SERIAL);
+    _activeTasks = [[NSMutableDictionary alloc] init];
+    _expectedPayloadType = TTSPayloadTypeURL; // 默认 URL 模式
+    // TODO: 替换为实际的 TTS 服务器地址
+    _serverURL = @"https://your-tts-server.com/api/tts";
+
+    [self setupSession];
+  }
+  return self;
+}
+
+- (void)setupSession {
+  NSURLSessionConfiguration *config =
+      [NSURLSessionConfiguration defaultSessionConfiguration];
+  config.timeoutIntervalForRequest = 30;
+  config.timeoutIntervalForResource = 120;
+
+  self.urlSession = [NSURLSession sessionWithConfiguration:config
+                                                  delegate:self
+                                             delegateQueue:nil];
+}
+
+- (void)dealloc {
+  [self cancel];
+}
+
+#pragma mark - Public Methods
+
+- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId {
+  if (!text || text.length == 0 || !segmentId) {
+    return;
+  }
+
+  dispatch_async(self.networkQueue, ^{
+    self.requesting = YES;
+
+    switch (self.expectedPayloadType) {
+    case TTSPayloadTypeURL:
+      [self requestURLMode:text segmentId:segmentId];
+      break;
+    case TTSPayloadTypePCMChunk:
+    case TTSPayloadTypeAACChunk:
+    case TTSPayloadTypeOpusChunk:
+      [self requestStreamMode:text segmentId:segmentId];
+      break;
+    }
+  });
+}
+
+- (void)cancel {
+  dispatch_async(self.networkQueue, ^{
+    for (NSURLSessionTask *task in self.activeTasks.allValues) {
+      [task cancel];
+    }
+    [self.activeTasks removeAllObjects];
+    self.requesting = NO;
+  });
+}
+
+#pragma mark - URL Mode (Mode A)
+
+- (void)requestURLMode:(NSString *)text segmentId:(NSString *)segmentId {
+  NSURL *url = [NSURL URLWithString:self.serverURL];
+  NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
+  request.HTTPMethod = @"POST";
+  [request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"];
+
+  NSDictionary *body = @{
+    @"text" : text,
+    @"segmentId" : segmentId,
+    @"format" : @"mp3" // 或 m4a
+  };
+
+  NSError *jsonError = nil;
+  NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body
+                                                     options:0
+                                                       error:&jsonError];
+  if (jsonError) {
+    [self reportError:jsonError];
+    return;
+  }
+  request.HTTPBody = jsonData;
+
+  __weak typeof(self) weakSelf = self;
+  NSURLSessionDataTask *task = [self.urlSession
+      dataTaskWithRequest:request
+        completionHandler:^(NSData *_Nullable data,
+                            NSURLResponse *_Nullable response,
+                            NSError *_Nullable error) {
+          __strong typeof(weakSelf) strongSelf = weakSelf;
+          if (!strongSelf)
+            return;
+
+          dispatch_async(strongSelf.networkQueue, ^{
+            [strongSelf.activeTasks removeObjectForKey:segmentId];
+
+            if (error) {
+              if (error.code != NSURLErrorCancelled) {
+                [strongSelf reportError:error];
+              }
+              return;
+            }
+
+            // 解析响应
+            NSError *parseError = nil;
+            NSDictionary *json =
+                [NSJSONSerialization JSONObjectWithData:data
+                                                options:0
+                                                  error:&parseError];
+            if (parseError) {
+              [strongSelf reportError:parseError];
+              return;
+            }
+
+            NSString *audioURLString = json[@"audioUrl"];
+            if (audioURLString) {
+              NSURL *audioURL = [NSURL URLWithString:audioURLString];
+              dispatch_async(dispatch_get_main_queue(), ^{
+                if ([strongSelf.delegate respondsToSelector:@selector
+                                         (ttsClientDidReceiveURL:segmentId:)]) {
+                  [strongSelf.delegate ttsClientDidReceiveURL:audioURL
+                                                    segmentId:segmentId];
+                }
+                if ([strongSelf.delegate respondsToSelector:@selector
+                                         (ttsClientDidFinishSegment:)]) {
+                  [strongSelf.delegate ttsClientDidFinishSegment:segmentId];
+                }
+              });
+            }
+          });
+        }];
+
+  self.activeTasks[segmentId] = task;
+  [task resume];
+
+  NSLog(@"[TTSServiceClient] URL mode request for segment: %@", segmentId);
+}
+
+#pragma mark - Stream Mode (Mode B/C/D)
+
+- (void)requestStreamMode:(NSString *)text segmentId:(NSString *)segmentId {
+  // WebSocket 连接用于流式接收
+  NSString *wsURL =
+      [self.serverURL stringByReplacingOccurrencesOfString:@"https://"
+                                                withString:@"wss://"];
+  wsURL = [wsURL stringByReplacingOccurrencesOfString:@"http://"
+                                           withString:@"ws://"];
+  wsURL = [wsURL stringByAppendingString:@"/stream"];
+
+  NSURL *url = [NSURL URLWithString:wsURL];
+  NSURLSessionWebSocketTask *wsTask =
+      [self.urlSession webSocketTaskWithURL:url];
+
+  self.activeTasks[segmentId] = wsTask;
+  [wsTask resume];
+
+  // 发送请求
+  NSDictionary *requestDict = @{
+    @"text" : text,
+    @"segmentId" : segmentId,
+    @"format" : [self formatStringForPayloadType:self.expectedPayloadType]
+  };
+
+  NSError *jsonError = nil;
+  NSData *jsonData = [NSJSONSerialization dataWithJSONObject:requestDict
+                                                     options:0
+                                                       error:&jsonError];
+  if (jsonError) {
+    [self reportError:jsonError];
+    return;
+  }
+
+  NSString *jsonString = [[NSString alloc] initWithData:jsonData
+                                               encoding:NSUTF8StringEncoding];
+  NSURLSessionWebSocketMessage *message =
+      [[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
+
+  __weak typeof(self) weakSelf = self;
+  [wsTask sendMessage:message
+      completionHandler:^(NSError *_Nullable error) {
+        if (error) {
+          [weakSelf reportError:error];
+        } else {
+          [weakSelf receiveStreamMessage:wsTask segmentId:segmentId];
+        }
+      }];
+
+  NSLog(@"[TTSServiceClient] Stream mode request for segment: %@", segmentId);
+}
+
+- (void)receiveStreamMessage:(NSURLSessionWebSocketTask *)wsTask
+                   segmentId:(NSString *)segmentId {
+  __weak typeof(self) weakSelf = self;
+  [wsTask receiveMessageWithCompletionHandler:^(
+              NSURLSessionWebSocketMessage *_Nullable message,
+              NSError *_Nullable error) {
+    __strong typeof(weakSelf) strongSelf = weakSelf;
+    if (!strongSelf)
+      return;
+
+    if (error) {
+      if (error.code != NSURLErrorCancelled && error.code != 57) {
+        [strongSelf reportError:error];
+      }
+      return;
+    }
+
+    if (message.type == NSURLSessionWebSocketMessageTypeData) {
+      // 音频数据块
+      dispatch_async(dispatch_get_main_queue(), ^{
+        if ([strongSelf.delegate respondsToSelector:@selector
+                                 (ttsClientDidReceiveAudioChunk:
+                                                    payloadType:segmentId:)]) {
+          [strongSelf.delegate
+              ttsClientDidReceiveAudioChunk:message.data
+                                payloadType:strongSelf.expectedPayloadType
+                                  segmentId:segmentId];
+        }
+      });
+
+      // 继续接收
+      [strongSelf receiveStreamMessage:wsTask segmentId:segmentId];
+    } else if (message.type == NSURLSessionWebSocketMessageTypeString) {
+      // 控制消息
+      NSData *data = [message.string dataUsingEncoding:NSUTF8StringEncoding];
+      NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
+                                                           options:0
+                                                             error:nil];
+
+      if ([json[@"type"] isEqualToString:@"done"]) {
+        dispatch_async(strongSelf.networkQueue, ^{
+          [strongSelf.activeTasks removeObjectForKey:segmentId];
+        });
+        dispatch_async(dispatch_get_main_queue(), ^{
+          if ([strongSelf.delegate
+                  respondsToSelector:@selector(ttsClientDidFinishSegment:)]) {
+            [strongSelf.delegate ttsClientDidFinishSegment:segmentId];
+          }
+        });
+      } else {
+        // 继续接收
+        [strongSelf receiveStreamMessage:wsTask segmentId:segmentId];
+      }
+    }
+  }];
+}
+
+- (NSString *)formatStringForPayloadType:(TTSPayloadType)type {
+  switch (type) {
+  case TTSPayloadTypePCMChunk:
+    return @"pcm";
+  case TTSPayloadTypeAACChunk:
+    return @"aac";
+  case TTSPayloadTypeOpusChunk:
+    return @"opus";
+  default:
+    return @"mp3";
+  }
+}
+
+#pragma mark - Error Reporting
+
+- (void)reportError:(NSError *)error {
+  self.requesting = NO;
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([self.delegate respondsToSelector:@selector(ttsClientDidFail:)]) {
+      [self.delegate ttsClientDidFail:error];
+    }
+  });
+}
+
+@end