Files
keyboard/keyBoard/Class/AiTalk/VM/KBVoiceToTextManager.m

241 lines
6.7 KiB
Mathematica
Raw Normal View History

2026-01-26 18:43:07 +08:00
//
// KBVoiceToTextManager.m
// keyBoard
//
// Created by Mac on 2026/1/26.
//
#import "KBVoiceToTextManager.h"
#import "DeepgramStreamingManager.h"
#import "KBVoiceInputBar.h"
2026-03-07 13:29:29 +08:00
#import "KBLocalizationManager.h"
2026-01-26 18:43:07 +08:00
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate,
DeepgramStreamingManagerDelegate>
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
@property(nonatomic, weak) KBVoiceInputBar *inputBar;
@property(nonatomic, strong) NSMutableString *fullText;
@end
@implementation KBVoiceToTextManager
2026-01-27 13:57:32 +08:00
- (void)setDeepgramEnabled:(BOOL)deepgramEnabled {
if (_deepgramEnabled == deepgramEnabled) {
return;
}
_deepgramEnabled = deepgramEnabled;
if (!deepgramEnabled) {
[self.deepgramManager cancel];
[self resetTranscript];
} else {
[self.deepgramManager prepareConnection];
}
}
2026-01-26 18:43:07 +08:00
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar {
self = [super init];
if (self) {
_inputBar = inputBar;
_inputBar.delegate = self;
_fullText = [[NSMutableString alloc] init];
2026-01-27 13:57:32 +08:00
_deepgramEnabled = YES;
2026-01-26 18:43:07 +08:00
[self setupDeepgram];
}
return self;
}
- (void)dealloc {
[self.deepgramManager disconnect];
}
#pragma mark - Public Methods
- (void)prepareConnection {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-03-07 13:29:29 +08:00
[self kb_refreshDeepgramLanguage];
2026-01-26 18:43:07 +08:00
[self.deepgramManager prepareConnection];
}
- (void)disconnect {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-01-26 18:43:07 +08:00
[self.deepgramManager disconnect];
}
#pragma mark - Private Methods
- (void)setupDeepgram {
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
self.deepgramManager.delegate = self;
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
2026-03-07 13:29:29 +08:00
[self kb_refreshDeepgramLanguage];
2026-01-26 18:43:07 +08:00
self.deepgramManager.model = @"nova-3";
self.deepgramManager.punctuate = YES;
self.deepgramManager.smartFormat = YES;
self.deepgramManager.interimResults = YES;
self.deepgramManager.encoding = @"linear16";
self.deepgramManager.sampleRate = 16000.0;
self.deepgramManager.channels = 1;
}
- (void)resetTranscript {
[self.fullText setString:@""];
}
2026-03-07 13:29:29 +08:00
- (void)kb_refreshDeepgramLanguage {
self.deepgramManager.language = [self kb_currentDeepgramLanguageCode];
}
- (NSString *)kb_currentDeepgramLanguageCode {
NSString *languageCode = [KBLocalizationManager shared].currentLanguageCode ?: @"en";
NSString *lc = languageCode.lowercaseString;
if ([lc hasPrefix:@"es"]) { return @"es"; }
if ([lc hasPrefix:@"id"]) { return @"id"; }
if ([lc hasPrefix:@"pt"]) { return @"pt"; }
if ([lc hasPrefix:@"zh-hant"] || [lc hasPrefix:@"zh_tw"] || [lc hasPrefix:@"zh-tw"] || [lc hasPrefix:@"zh-hk"]) {
return @"zh-TW";
}
if ([lc hasPrefix:@"zh-hans"] || [lc hasPrefix:@"zh_cn"] || [lc hasPrefix:@"zh-cn"]) {
return @"zh-CN";
}
return @"en";
}
2026-01-26 18:43:07 +08:00
#pragma mark - KBVoiceInputBarDelegate
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
[self resetTranscript];
2026-01-27 13:57:32 +08:00
if (self.deepgramEnabled) {
2026-03-07 13:29:29 +08:00
[self kb_refreshDeepgramLanguage];
inputBar.statusText = KBLocalized(@"Voice Connecting...");
2026-01-27 13:57:32 +08:00
[self.deepgramManager start];
} else {
2026-03-07 13:29:29 +08:00
inputBar.statusText = KBLocalized(@"Voice Recording...");
2026-01-27 13:57:32 +08:00
}
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidBeginRecording:)]) {
[self.delegate voiceToTextManagerDidBeginRecording:self];
}
}
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
2026-01-27 13:57:32 +08:00
if (self.deepgramEnabled) {
2026-03-07 13:29:29 +08:00
inputBar.statusText = KBLocalized(@"Voice Recognizing...");
2026-01-27 13:57:32 +08:00
[self.deepgramManager stopAndFinalize];
} else {
2026-03-07 13:29:29 +08:00
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
2026-01-27 13:57:32 +08:00
}
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidEndRecording:)]) {
[self.delegate voiceToTextManagerDidEndRecording:self];
}
}
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
2026-03-07 13:29:29 +08:00
inputBar.statusText = KBLocalized(@"Voice Cancelled");
2026-01-26 18:43:07 +08:00
[self resetTranscript];
2026-01-27 13:57:32 +08:00
if (self.deepgramEnabled) {
[self.deepgramManager cancel];
}
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManagerDidCancelRecording:)]) {
[self.delegate voiceToTextManagerDidCancelRecording:self];
}
}
#pragma mark - DeepgramStreamingManagerDelegate
- (void)deepgramStreamingManagerDidConnect {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-03-07 13:29:29 +08:00
self.inputBar.statusText = KBLocalized(@"Voice Listening...");
2026-01-26 18:43:07 +08:00
}
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-01-26 18:43:07 +08:00
if (!error) {
return;
}
2026-03-07 13:29:29 +08:00
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didFailWithError:)]) {
[self.delegate voiceToTextManager:self didFailWithError:error];
}
}
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-01-26 18:43:07 +08:00
[self.inputBar updateVolumeRMS:rms];
}
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-01-26 18:43:07 +08:00
NSString *displayText = text ?: @"";
if (self.fullText.length > 0 && displayText.length > 0) {
displayText =
[NSString stringWithFormat:@"%@ %@", self.fullText, displayText];
} else if (self.fullText.length > 0) {
displayText = [self.fullText copy];
}
self.inputBar.statusText =
2026-03-07 13:29:29 +08:00
displayText.length > 0 ? displayText : KBLocalized(@"Voice Recognizing...");
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didUpdateInterimText:)]) {
[self.delegate voiceToTextManager:self didUpdateInterimText:displayText];
}
}
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-01-26 18:43:07 +08:00
if (text.length > 0) {
if (self.fullText.length > 0) {
[self.fullText appendString:@" "];
}
[self.fullText appendString:text];
}
NSString *finalText = [self.fullText copy];
self.inputBar.statusText =
2026-03-07 13:29:29 +08:00
finalText.length > 0 ? finalText : KBLocalized(@"Voice Recognition Completed");
2026-01-26 18:43:07 +08:00
if (finalText.length > 0 &&
[self.delegate respondsToSelector:@selector
(voiceToTextManager:didReceiveFinalText:)]) {
[self.delegate voiceToTextManager:self didReceiveFinalText:finalText];
}
}
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
2026-01-27 13:57:32 +08:00
if (!self.deepgramEnabled) {
return;
}
2026-03-07 13:29:29 +08:00
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
2026-01-26 18:43:07 +08:00
if ([self.delegate respondsToSelector:@selector
(voiceToTextManager:didFailWithError:)]) {
[self.delegate voiceToTextManager:self didFailWithError:error];
}
}
@end