1
This commit is contained in:
@@ -208,6 +208,7 @@
|
||||
04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038E22F20E500002CA5A0 /* deepgramAPI.md */; };
|
||||
04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */; };
|
||||
04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */; };
|
||||
04E0B1022F300001002CA5A0 /* KBVoiceToTextManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */; };
|
||||
04E038EF2F21F0EC002CA5A0 /* AiVM.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038EE2F21F0EC002CA5A0 /* AiVM.m */; };
|
||||
04E0394B2F236E75002CA5A0 /* KBChatUserMessageCell.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0394A2F236E75002CA5A0 /* KBChatUserMessageCell.m */; };
|
||||
04E0394C2F236E75002CA5A0 /* KBChatTimeCell.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E039482F236E75002CA5A0 /* KBChatTimeCell.m */; };
|
||||
@@ -650,6 +651,8 @@
|
||||
04E038E22F20E500002CA5A0 /* deepgramAPI.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = deepgramAPI.md; sourceTree = "<group>"; };
|
||||
04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramStreamingManager.h; sourceTree = "<group>"; };
|
||||
04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramStreamingManager.m; sourceTree = "<group>"; };
|
||||
04E0B1002F300001002CA5A0 /* KBVoiceToTextManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBVoiceToTextManager.h; sourceTree = "<group>"; };
|
||||
04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBVoiceToTextManager.m; sourceTree = "<group>"; };
|
||||
04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramWebSocketClient.h; sourceTree = "<group>"; };
|
||||
04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramWebSocketClient.m; sourceTree = "<group>"; };
|
||||
04E038ED2F21F0EC002CA5A0 /* AiVM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AiVM.h; sourceTree = "<group>"; };
|
||||
@@ -1074,6 +1077,8 @@
|
||||
04E038DC2F20C420002CA5A0 /* VoiceChatWebSocketClient.m */,
|
||||
04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */,
|
||||
04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */,
|
||||
04E0B1002F300001002CA5A0 /* KBVoiceToTextManager.h */,
|
||||
04E0B1012F300001002CA5A0 /* KBVoiceToTextManager.m */,
|
||||
04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */,
|
||||
04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */,
|
||||
04E038ED2F21F0EC002CA5A0 /* AiVM.h */,
|
||||
@@ -2325,6 +2330,7 @@
|
||||
0450AAE22EF03D5100B6AF06 /* KBPerson.swift in Sources */,
|
||||
04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */,
|
||||
04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */,
|
||||
04E0B1022F300001002CA5A0 /* KBVoiceToTextManager.m in Sources */,
|
||||
048908E32EBF821700FABA60 /* KBSkinDetailVC.m in Sources */,
|
||||
0477BDF32EBB7B850055D639 /* KBDirectionIndicatorView.m in Sources */,
|
||||
048FFD142F274342005D62AE /* KBPersonaChatCell.m in Sources */,
|
||||
|
||||
@@ -9,10 +9,11 @@
|
||||
#import "KBPersonaChatCell.h"
|
||||
#import "KBPersonaModel.h"
|
||||
#import "KBVoiceInputBar.h"
|
||||
#import "KBVoiceToTextManager.h"
|
||||
#import "AiVM.h"
|
||||
#import <Masonry/Masonry.h>
|
||||
|
||||
@interface KBAIHomeVC () <UICollectionViewDelegate, UICollectionViewDataSource, KBVoiceInputBarDelegate>
|
||||
@interface KBAIHomeVC () <UICollectionViewDelegate, UICollectionViewDataSource, KBVoiceToTextManagerDelegate>
|
||||
|
||||
/// 人设列表容器
|
||||
@property (nonatomic, strong) UICollectionView *collectionView;
|
||||
@@ -20,6 +21,9 @@
|
||||
/// 底部语音输入栏
|
||||
@property (nonatomic, strong) KBVoiceInputBar *voiceInputBar;
|
||||
|
||||
/// 语音转写管理器
|
||||
@property (nonatomic, strong) KBVoiceToTextManager *voiceToTextManager;
|
||||
|
||||
/// 人设数据
|
||||
@property (nonatomic, strong) NSMutableArray<KBPersonaModel *> *personas;
|
||||
|
||||
@@ -61,6 +65,7 @@
|
||||
self.aiVM = [[AiVM alloc] init];
|
||||
|
||||
[self setupUI];
|
||||
[self setupVoiceToTextManager];
|
||||
[self loadPersonas];
|
||||
}
|
||||
|
||||
@@ -235,6 +240,14 @@
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - 4:语音转写
|
||||
|
||||
- (void)setupVoiceToTextManager {
|
||||
self.voiceToTextManager = [[KBVoiceToTextManager alloc] initWithInputBar:self.voiceInputBar];
|
||||
self.voiceToTextManager.delegate = self;
|
||||
[self.voiceToTextManager prepareConnection];
|
||||
}
|
||||
|
||||
#pragma mark - Lazy Load
|
||||
|
||||
- (UICollectionView *)collectionView {
|
||||
@@ -263,41 +276,26 @@
|
||||
- (KBVoiceInputBar *)voiceInputBar {
|
||||
if (!_voiceInputBar) {
|
||||
_voiceInputBar = [[KBVoiceInputBar alloc] init];
|
||||
_voiceInputBar.delegate = self;
|
||||
_voiceInputBar.statusText = @"按住按钮开始对话";
|
||||
}
|
||||
return _voiceInputBar;
|
||||
}
|
||||
|
||||
#pragma mark - KBVoiceInputBarDelegate
|
||||
#pragma mark - KBVoiceToTextManagerDelegate
|
||||
|
||||
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
|
||||
NSLog(@"[KBAIHomeVC] 开始录音");
|
||||
inputBar.statusText = @"正在聆听...";
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didReceiveFinalText:(NSString *)text {
|
||||
if (text.length == 0) {
|
||||
return;
|
||||
}
|
||||
NSLog(@"[KBAIHomeVC] 语音识别结果:%@", text);
|
||||
|
||||
// TODO: 开始录音逻辑
|
||||
// 1. 检查登录状态
|
||||
// 2. 连接语音识别服务
|
||||
// 3. 开始录音
|
||||
// TODO: 使用识别文本(例如发起聊天请求)
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
|
||||
NSLog(@"[KBAIHomeVC] 结束录音");
|
||||
inputBar.statusText = @"正在识别...";
|
||||
|
||||
// TODO: 结束录音逻辑
|
||||
// 1. 停止录音
|
||||
// 2. 发送音频数据
|
||||
// 3. 等待识别结果
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
|
||||
NSLog(@"[KBAIHomeVC] 取消录音");
|
||||
inputBar.statusText = @"已取消";
|
||||
|
||||
// TODO: 取消录音逻辑
|
||||
// 1. 停止录音
|
||||
// 2. 清理资源
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didFailWithError:(NSError *)error {
|
||||
NSLog(@"[KBAIHomeVC] 语音识别失败:%@", error.localizedDescription);
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
40
keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.h
Normal file
40
keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.h
Normal file
@@ -0,0 +1,40 @@
|
||||
//
|
||||
// KBVoiceToTextManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/26.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@class KBVoiceInputBar;
|
||||
@class KBVoiceToTextManager;
|
||||
|
||||
@protocol KBVoiceToTextManagerDelegate <NSObject>
|
||||
@optional
|
||||
- (void)voiceToTextManagerDidBeginRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManagerDidEndRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManagerDidCancelRecording:(KBVoiceToTextManager *)manager;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didUpdateInterimText:(NSString *)text;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didReceiveFinalText:(NSString *)text;
|
||||
- (void)voiceToTextManager:(KBVoiceToTextManager *)manager
|
||||
didFailWithError:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// Voice-to-text manager (binds KBVoiceInputBar and uses Deepgram).
|
||||
@interface KBVoiceToTextManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<KBVoiceToTextManagerDelegate> delegate;
|
||||
@property(nonatomic, weak, readonly) KBVoiceInputBar *inputBar;
|
||||
|
||||
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar;
|
||||
- (void)prepareConnection;
|
||||
- (void)disconnect;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
170
keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m
Normal file
170
keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m
Normal file
@@ -0,0 +1,170 @@
|
||||
//
|
||||
// KBVoiceToTextManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/26.
|
||||
//
|
||||
|
||||
#import "KBVoiceToTextManager.h"
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "KBVoiceInputBar.h"
|
||||
|
||||
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate,
|
||||
DeepgramStreamingManagerDelegate>
|
||||
|
||||
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
|
||||
@property(nonatomic, weak) KBVoiceInputBar *inputBar;
|
||||
@property(nonatomic, strong) NSMutableString *fullText;
|
||||
|
||||
@end
|
||||
|
||||
@implementation KBVoiceToTextManager
|
||||
|
||||
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_inputBar = inputBar;
|
||||
_inputBar.delegate = self;
|
||||
_fullText = [[NSMutableString alloc] init];
|
||||
[self setupDeepgram];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self.deepgramManager disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)prepareConnection {
|
||||
[self.deepgramManager prepareConnection];
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
[self.deepgramManager disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)setupDeepgram {
|
||||
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
|
||||
self.deepgramManager.delegate = self;
|
||||
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
|
||||
self.deepgramManager.language = @"en";
|
||||
self.deepgramManager.model = @"nova-3";
|
||||
self.deepgramManager.punctuate = YES;
|
||||
self.deepgramManager.smartFormat = YES;
|
||||
self.deepgramManager.interimResults = YES;
|
||||
self.deepgramManager.encoding = @"linear16";
|
||||
self.deepgramManager.sampleRate = 16000.0;
|
||||
self.deepgramManager.channels = 1;
|
||||
}
|
||||
|
||||
- (void)resetTranscript {
|
||||
[self.fullText setString:@""];
|
||||
}
|
||||
|
||||
#pragma mark - KBVoiceInputBarDelegate
|
||||
|
||||
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
|
||||
[self resetTranscript];
|
||||
inputBar.statusText = @"正在连接...";
|
||||
[self.deepgramManager start];
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidBeginRecording:)]) {
|
||||
[self.delegate voiceToTextManagerDidBeginRecording:self];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = @"正在识别...";
|
||||
[self.deepgramManager stopAndFinalize];
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidEndRecording:)]) {
|
||||
[self.delegate voiceToTextManagerDidEndRecording:self];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
|
||||
inputBar.statusText = @"已取消";
|
||||
[self resetTranscript];
|
||||
[self.deepgramManager cancel];
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManagerDidCancelRecording:)]) {
|
||||
[self.delegate voiceToTextManagerDidCancelRecording:self];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramStreamingManagerDelegate
|
||||
|
||||
- (void)deepgramStreamingManagerDidConnect {
|
||||
self.inputBar.statusText = @"正在聆听...";
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.inputBar.statusText = @"识别失败";
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didFailWithError:)]) {
|
||||
[self.delegate voiceToTextManager:self didFailWithError:error];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
|
||||
[self.inputBar updateVolumeRMS:rms];
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
|
||||
NSString *displayText = text ?: @"";
|
||||
if (self.fullText.length > 0 && displayText.length > 0) {
|
||||
displayText =
|
||||
[NSString stringWithFormat:@"%@ %@", self.fullText, displayText];
|
||||
} else if (self.fullText.length > 0) {
|
||||
displayText = [self.fullText copy];
|
||||
}
|
||||
|
||||
self.inputBar.statusText =
|
||||
displayText.length > 0 ? displayText : @"正在识别...";
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didUpdateInterimText:)]) {
|
||||
[self.delegate voiceToTextManager:self didUpdateInterimText:displayText];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
|
||||
if (text.length > 0) {
|
||||
if (self.fullText.length > 0) {
|
||||
[self.fullText appendString:@" "];
|
||||
}
|
||||
[self.fullText appendString:text];
|
||||
}
|
||||
|
||||
NSString *finalText = [self.fullText copy];
|
||||
self.inputBar.statusText =
|
||||
finalText.length > 0 ? finalText : @"识别完成";
|
||||
|
||||
if (finalText.length > 0 &&
|
||||
[self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didReceiveFinalText:)]) {
|
||||
[self.delegate voiceToTextManager:self didReceiveFinalText:finalText];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
|
||||
self.inputBar.statusText = @"识别失败";
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceToTextManager:didFailWithError:)]) {
|
||||
[self.delegate voiceToTextManager:self didFailWithError:error];
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user