241 lines
6.7 KiB
Objective-C
241 lines
6.7 KiB
Objective-C
//
|
|
// KBVoiceToTextManager.m
|
|
// keyBoard
|
|
//
|
|
// Created by Mac on 2026/1/26.
|
|
//
|
|
|
|
#import "KBVoiceToTextManager.h"
|
|
#import "DeepgramStreamingManager.h"
|
|
#import "KBVoiceInputBar.h"
|
|
#import "KBLocalizationManager.h"
|
|
|
|
@interface KBVoiceToTextManager () <KBVoiceInputBarDelegate,
|
|
DeepgramStreamingManagerDelegate>
|
|
|
|
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
|
|
@property(nonatomic, weak) KBVoiceInputBar *inputBar;
|
|
@property(nonatomic, strong) NSMutableString *fullText;
|
|
|
|
@end
|
|
|
|
@implementation KBVoiceToTextManager
|
|
|
|
- (void)setDeepgramEnabled:(BOOL)deepgramEnabled {
|
|
if (_deepgramEnabled == deepgramEnabled) {
|
|
return;
|
|
}
|
|
_deepgramEnabled = deepgramEnabled;
|
|
if (!deepgramEnabled) {
|
|
[self.deepgramManager cancel];
|
|
[self resetTranscript];
|
|
} else {
|
|
[self.deepgramManager prepareConnection];
|
|
}
|
|
}
|
|
|
|
- (instancetype)initWithInputBar:(KBVoiceInputBar *)inputBar {
|
|
self = [super init];
|
|
if (self) {
|
|
_inputBar = inputBar;
|
|
_inputBar.delegate = self;
|
|
_fullText = [[NSMutableString alloc] init];
|
|
_deepgramEnabled = YES;
|
|
[self setupDeepgram];
|
|
}
|
|
return self;
|
|
}
|
|
|
|
- (void)dealloc {
|
|
[self.deepgramManager disconnect];
|
|
}
|
|
|
|
#pragma mark - Public Methods
|
|
|
|
- (void)prepareConnection {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
[self kb_refreshDeepgramLanguage];
|
|
[self.deepgramManager prepareConnection];
|
|
}
|
|
|
|
- (void)disconnect {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
[self.deepgramManager disconnect];
|
|
}
|
|
|
|
#pragma mark - Private Methods
|
|
|
|
- (void)setupDeepgram {
|
|
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
|
|
self.deepgramManager.delegate = self;
|
|
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
|
|
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
|
|
[self kb_refreshDeepgramLanguage];
|
|
self.deepgramManager.model = @"nova-3";
|
|
self.deepgramManager.punctuate = YES;
|
|
self.deepgramManager.smartFormat = YES;
|
|
self.deepgramManager.interimResults = YES;
|
|
self.deepgramManager.encoding = @"linear16";
|
|
self.deepgramManager.sampleRate = 16000.0;
|
|
self.deepgramManager.channels = 1;
|
|
}
|
|
|
|
- (void)resetTranscript {
|
|
[self.fullText setString:@""];
|
|
}
|
|
|
|
- (void)kb_refreshDeepgramLanguage {
|
|
self.deepgramManager.language = [self kb_currentDeepgramLanguageCode];
|
|
}
|
|
|
|
- (NSString *)kb_currentDeepgramLanguageCode {
|
|
NSString *languageCode = [KBLocalizationManager shared].currentLanguageCode ?: @"en";
|
|
NSString *lc = languageCode.lowercaseString;
|
|
if ([lc hasPrefix:@"es"]) { return @"es"; }
|
|
if ([lc hasPrefix:@"id"]) { return @"id"; }
|
|
if ([lc hasPrefix:@"pt"]) { return @"pt"; }
|
|
if ([lc hasPrefix:@"zh-hant"] || [lc hasPrefix:@"zh_tw"] || [lc hasPrefix:@"zh-tw"] || [lc hasPrefix:@"zh-hk"]) {
|
|
return @"zh-TW";
|
|
}
|
|
if ([lc hasPrefix:@"zh-hans"] || [lc hasPrefix:@"zh_cn"] || [lc hasPrefix:@"zh-cn"]) {
|
|
return @"zh-CN";
|
|
}
|
|
return @"en";
|
|
}
|
|
|
|
#pragma mark - KBVoiceInputBarDelegate
|
|
|
|
- (void)voiceInputBarDidBeginRecording:(KBVoiceInputBar *)inputBar {
|
|
[self resetTranscript];
|
|
if (self.deepgramEnabled) {
|
|
[self kb_refreshDeepgramLanguage];
|
|
inputBar.statusText = KBLocalized(@"Voice Connecting...");
|
|
[self.deepgramManager start];
|
|
} else {
|
|
inputBar.statusText = KBLocalized(@"Voice Recording...");
|
|
}
|
|
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManagerDidBeginRecording:)]) {
|
|
[self.delegate voiceToTextManagerDidBeginRecording:self];
|
|
}
|
|
}
|
|
|
|
- (void)voiceInputBarDidEndRecording:(KBVoiceInputBar *)inputBar {
|
|
if (self.deepgramEnabled) {
|
|
inputBar.statusText = KBLocalized(@"Voice Recognizing...");
|
|
[self.deepgramManager stopAndFinalize];
|
|
} else {
|
|
inputBar.statusText = KBLocalized(@"Voice Recording Ended");
|
|
}
|
|
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManagerDidEndRecording:)]) {
|
|
[self.delegate voiceToTextManagerDidEndRecording:self];
|
|
}
|
|
}
|
|
|
|
- (void)voiceInputBarDidCancelRecording:(KBVoiceInputBar *)inputBar {
|
|
inputBar.statusText = KBLocalized(@"Voice Cancelled");
|
|
[self resetTranscript];
|
|
if (self.deepgramEnabled) {
|
|
[self.deepgramManager cancel];
|
|
}
|
|
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManagerDidCancelRecording:)]) {
|
|
[self.delegate voiceToTextManagerDidCancelRecording:self];
|
|
}
|
|
}
|
|
|
|
#pragma mark - DeepgramStreamingManagerDelegate
|
|
|
|
- (void)deepgramStreamingManagerDidConnect {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
self.inputBar.statusText = KBLocalized(@"Voice Listening...");
|
|
}
|
|
|
|
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
if (!error) {
|
|
return;
|
|
}
|
|
|
|
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManager:didFailWithError:)]) {
|
|
[self.delegate voiceToTextManager:self didFailWithError:error];
|
|
}
|
|
}
|
|
|
|
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
[self.inputBar updateVolumeRMS:rms];
|
|
}
|
|
|
|
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
NSString *displayText = text ?: @"";
|
|
if (self.fullText.length > 0 && displayText.length > 0) {
|
|
displayText =
|
|
[NSString stringWithFormat:@"%@ %@", self.fullText, displayText];
|
|
} else if (self.fullText.length > 0) {
|
|
displayText = [self.fullText copy];
|
|
}
|
|
|
|
self.inputBar.statusText =
|
|
displayText.length > 0 ? displayText : KBLocalized(@"Voice Recognizing...");
|
|
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManager:didUpdateInterimText:)]) {
|
|
[self.delegate voiceToTextManager:self didUpdateInterimText:displayText];
|
|
}
|
|
}
|
|
|
|
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
if (text.length > 0) {
|
|
if (self.fullText.length > 0) {
|
|
[self.fullText appendString:@" "];
|
|
}
|
|
[self.fullText appendString:text];
|
|
}
|
|
|
|
NSString *finalText = [self.fullText copy];
|
|
self.inputBar.statusText =
|
|
finalText.length > 0 ? finalText : KBLocalized(@"Voice Recognition Completed");
|
|
|
|
if (finalText.length > 0 &&
|
|
[self.delegate respondsToSelector:@selector
|
|
(voiceToTextManager:didReceiveFinalText:)]) {
|
|
[self.delegate voiceToTextManager:self didReceiveFinalText:finalText];
|
|
}
|
|
}
|
|
|
|
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
|
|
if (!self.deepgramEnabled) {
|
|
return;
|
|
}
|
|
self.inputBar.statusText = KBLocalized(@"Voice Recognition Failed");
|
|
if ([self.delegate respondsToSelector:@selector
|
|
(voiceToTextManager:didFailWithError:)]) {
|
|
[self.delegate voiceToTextManager:self didFailWithError:error];
|
|
}
|
|
}
|
|
|
|
@end
|