This commit is contained in:
2026-03-07 18:49:34 +08:00
parent 6327f31f11
commit 9fb2e2e694
4 changed files with 85 additions and 2509 deletions

View File

@@ -78,7 +78,6 @@
045ED5282F53F4B000131114 /* KBInputProfileManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 045ED5262F53F4AF00131114 /* KBInputProfileManager.m */; };
045ED52B2F540FBE00131114 /* normal_hei_them.zip in Resources */ = {isa = PBXBuildFile; fileRef = 045ED5292F540FBE00131114 /* normal_hei_them.zip */; };
045ED52C2F540FBE00131114 /* normal_them.zip in Resources */ = {isa = PBXBuildFile; fileRef = 045ED52A2F540FBE00131114 /* normal_them.zip */; };
046086752F191CC700757C95 /* AI技术分析.txt in Resources */ = {isa = PBXBuildFile; fileRef = 046086742F191CC700757C95 /* AI技术分析.txt */; };
0460869A2F19238500757C95 /* KBAiWaveformView.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086992F19238500757C95 /* KBAiWaveformView.m */; };
0460869C2F19238500757C95 /* KBAiRecordButton.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086972F19238500757C95 /* KBAiRecordButton.m */; };
046086B32F19239B00757C95 /* AudioSessionManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 046086A22F19239B00757C95 /* AudioSessionManager.m */; };
@@ -215,13 +214,11 @@
04A9FE0F2EB481100020DB6D /* KBHUD.m in Sources */ = {isa = PBXBuildFile; fileRef = 04FC97082EB31B14007BD342 /* KBHUD.m */; };
04A9FE132EB4D0D20020DB6D /* KBFullAccessManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE112EB4D0D20020DB6D /* KBFullAccessManager.m */; };
04A9FE162EB873C80020DB6D /* UIViewController+Extension.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE152EB873C80020DB6D /* UIViewController+Extension.m */; };
04A9FE1A2EB892460020DB6D /* KBLocalizationManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE192EB892460020DB6D /* KBLocalizationManager.m */; };
04A9FE1B2EB892460020DB6D /* KBLocalizationManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE192EB892460020DB6D /* KBLocalizationManager.m */; };
04A9FE202EB893F10020DB6D /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 04A9FE1E2EB893F10020DB6D /* Localizable.strings */; };
04A9FE212EB893F10020DB6D /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 04A9FE1E2EB893F10020DB6D /* Localizable.strings */; };
E0A100102F60000100ABCDEF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = E0A100002F60000100ABCDEF /* InfoPlist.strings */; };
E0A100112F60000100ABCDEF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = E0A100002F60000100ABCDEF /* InfoPlist.strings */; };
04B5A1A22EEFA12300AAAAAA /* KBPayProductModel.m in Sources */ = {isa = PBXBuildFile; fileRef = 04B5A1A12EEFA12300AAAAAA /* KBPayProductModel.m */; };
04A9FE1A2EB892460020DB6D /* KBLocalizationManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE192EB892460020DB6D /* KBLocalizationManager.m */; };
04A9FE1B2EB892460020DB6D /* KBLocalizationManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04A9FE192EB892460020DB6D /* KBLocalizationManager.m */; };
04A9FE202EB893F10020DB6D /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 04A9FE1E2EB893F10020DB6D /* Localizable.strings */; };
04A9FE212EB893F10020DB6D /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 04A9FE1E2EB893F10020DB6D /* Localizable.strings */; };
04B5A1A22EEFA12300AAAAAA /* KBPayProductModel.m in Sources */ = {isa = PBXBuildFile; fileRef = 04B5A1A12EEFA12300AAAAAA /* KBPayProductModel.m */; };
04BBF89D2F3ACD8800B1FBB2 /* KBKeyboardStressTestVC.m in Sources */ = {isa = PBXBuildFile; fileRef = 04BBF89A2F3ACD8800B1FBB2 /* KBKeyboardStressTestVC.m */; };
04BBF89E2F3ACD8800B1FBB2 /* KBTestVC.m in Sources */ = {isa = PBXBuildFile; fileRef = 04BBF89C2F3ACD8800B1FBB2 /* KBTestVC.m */; };
04BBF9002F3C97CB00B1FBB2 /* DeepgramWebSocketClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 04BBF8FF2F3C97CB00B1FBB2 /* DeepgramWebSocketClient.m */; };
@@ -237,8 +234,6 @@
04D1F6B22EDFF10A00B12345 /* KBSkinInstallBridge.m in Sources */ = {isa = PBXBuildFile; fileRef = 04D1F6B12EDFF10A00B12345 /* KBSkinInstallBridge.m */; };
04D1F6B32EDFF10A00B12345 /* KBSkinInstallBridge.m in Sources */ = {isa = PBXBuildFile; fileRef = 04D1F6B12EDFF10A00B12345 /* KBSkinInstallBridge.m */; };
04E0383E2F1A7C30002CA5A0 /* KBCustomTabBar.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0383D2F1A7C30002CA5A0 /* KBCustomTabBar.m */; };
04E038D82F20BFFB002CA5A0 /* websocket-api.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038D72F20BFFB002CA5A0 /* websocket-api.md */; };
04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038E22F20E500002CA5A0 /* deepgramAPI.md */; };
04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */; };
04E038EF2F21F0EC002CA5A0 /* AiVM.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038EE2F21F0EC002CA5A0 /* AiVM.m */; };
04E0394B2F236E75002CA5A0 /* KBChatUserMessageCell.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E0394A2F236E75002CA5A0 /* KBChatUserMessageCell.m */; };
@@ -339,6 +334,8 @@
B7F1A1E22F90000100000001 /* indonesian_words.json in Resources */ = {isa = PBXBuildFile; fileRef = B7F1A1E42F90000100000001 /* indonesian_words.json */; };
B7F1A1E52F90000100000001 /* english_words.json in Resources */ = {isa = PBXBuildFile; fileRef = B7F1A1E62F90000100000001 /* english_words.json */; };
B7F1A1F32FA0000100000001 /* kb_diacritics_map.json in Resources */ = {isa = PBXBuildFile; fileRef = B7F1A1F22FA0000100000001 /* kb_diacritics_map.json */; };
E0A100102F60000100ABCDEF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = E0A100002F60000100ABCDEF /* InfoPlist.strings */; };
E0A100112F60000100ABCDEF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = E0A100002F60000100ABCDEF /* InfoPlist.strings */; };
EB72B60040437E3C0A4890FC /* KBShopThemeDetailModel.m in Sources */ = {isa = PBXBuildFile; fileRef = B9F60894E529C3EDAF6BAC3D /* KBShopThemeDetailModel.m */; };
ECC9EE02174D86E8D792472F /* Pods_keyBoard.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 967065BB5230E43F293B3AF9 /* Pods_keyBoard.framework */; };
/* End PBXBuildFile section */
@@ -459,7 +456,6 @@
045ED5262F53F4AF00131114 /* KBInputProfileManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBInputProfileManager.m; sourceTree = "<group>"; };
045ED5292F540FBE00131114 /* normal_hei_them.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = normal_hei_them.zip; sourceTree = "<group>"; };
045ED52A2F540FBE00131114 /* normal_them.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = normal_them.zip; sourceTree = "<group>"; };
046086742F191CC700757C95 /* AI技术分析.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "AI技术分析.txt"; sourceTree = "<group>"; };
046086962F19238500757C95 /* KBAiRecordButton.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiRecordButton.h; sourceTree = "<group>"; };
046086972F19238500757C95 /* KBAiRecordButton.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBAiRecordButton.m; sourceTree = "<group>"; };
046086982F19238500757C95 /* KBAiWaveformView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBAiWaveformView.h; sourceTree = "<group>"; };
@@ -551,14 +547,10 @@
04837AE52F5848680012BDE2 /* id */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = id; path = id.lproj/LaunchScreen.strings; sourceTree = "<group>"; };
04837AE62F5848680012BDE2 /* id */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = id; path = id.lproj/Main.strings; sourceTree = "<group>"; };
04837AE72F5848680012BDE2 /* id */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = id; path = id.lproj/Localizable.strings; sourceTree = "<group>"; };
04837AE82F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/LaunchScreen.strings"; sourceTree = "<group>"; };
04837AE92F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/Main.strings"; sourceTree = "<group>"; };
04837AEA2F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/Localizable.strings"; sourceTree = "<group>"; };
E0A100022F60000100ABCDEF /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
E0A100032F60000100ABCDEF /* es */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = es; path = es.lproj/InfoPlist.strings; sourceTree = "<group>"; };
E0A100042F60000100ABCDEF /* id */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = id; path = id.lproj/InfoPlist.strings; sourceTree = "<group>"; };
E0A100052F60000100ABCDEF /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
048908BA2EBE1FCB00FABA60 /* BaseViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = BaseViewController.h; sourceTree = "<group>"; };
04837AE82F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/LaunchScreen.strings"; sourceTree = "<group>"; };
04837AE92F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/Main.strings"; sourceTree = "<group>"; };
04837AEA2F5848820012BDE2 /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/Localizable.strings"; sourceTree = "<group>"; };
048908BA2EBE1FCB00FABA60 /* BaseViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = BaseViewController.h; sourceTree = "<group>"; };
048908BB2EBE1FCB00FABA60 /* BaseViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = BaseViewController.m; sourceTree = "<group>"; };
048908C12EBE32B800FABA60 /* KBSearchVC.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBSearchVC.h; sourceTree = "<group>"; };
048908C22EBE32B800FABA60 /* KBSearchVC.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBSearchVC.m; sourceTree = "<group>"; };
@@ -729,11 +721,10 @@
04A9FE112EB4D0D20020DB6D /* KBFullAccessManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBFullAccessManager.m; sourceTree = "<group>"; };
04A9FE142EB873C80020DB6D /* UIViewController+Extension.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "UIViewController+Extension.h"; sourceTree = "<group>"; };
04A9FE152EB873C80020DB6D /* UIViewController+Extension.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = "UIViewController+Extension.m"; sourceTree = "<group>"; };
04A9FE182EB892460020DB6D /* KBLocalizationManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBLocalizationManager.h; sourceTree = "<group>"; };
04A9FE192EB892460020DB6D /* KBLocalizationManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBLocalizationManager.m; sourceTree = "<group>"; };
04A9FE1C2EB893F10020DB6D /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; };
E0A100012F60000100ABCDEF /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
04B5A1A02EEFA12300AAAAAA /* KBPayProductModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBPayProductModel.h; sourceTree = "<group>"; };
04A9FE182EB892460020DB6D /* KBLocalizationManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBLocalizationManager.h; sourceTree = "<group>"; };
04A9FE192EB892460020DB6D /* KBLocalizationManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBLocalizationManager.m; sourceTree = "<group>"; };
04A9FE1C2EB893F10020DB6D /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; };
04B5A1A02EEFA12300AAAAAA /* KBPayProductModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBPayProductModel.h; sourceTree = "<group>"; };
04B5A1A12EEFA12300AAAAAA /* KBPayProductModel.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBPayProductModel.m; sourceTree = "<group>"; };
04BBF8992F3ACD8800B1FBB2 /* KBKeyboardStressTestVC.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBKeyboardStressTestVC.h; sourceTree = "<group>"; };
04BBF89A2F3ACD8800B1FBB2 /* KBKeyboardStressTestVC.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBKeyboardStressTestVC.m; sourceTree = "<group>"; };
@@ -763,8 +754,6 @@
04D1F6B12EDFF10A00B12345 /* KBSkinInstallBridge.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBSkinInstallBridge.m; sourceTree = "<group>"; };
04E0383C2F1A7C30002CA5A0 /* KBCustomTabBar.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KBCustomTabBar.h; sourceTree = "<group>"; };
04E0383D2F1A7C30002CA5A0 /* KBCustomTabBar.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBCustomTabBar.m; sourceTree = "<group>"; };
04E038D72F20BFFB002CA5A0 /* websocket-api.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = "websocket-api.md"; sourceTree = "<group>"; };
04E038E22F20E500002CA5A0 /* deepgramAPI.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = deepgramAPI.md; sourceTree = "<group>"; };
04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramStreamingManager.h; sourceTree = "<group>"; };
04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramStreamingManager.m; sourceTree = "<group>"; };
04E038ED2F21F0EC002CA5A0 /* AiVM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AiVM.h; sourceTree = "<group>"; };
@@ -954,6 +943,11 @@
B7F1A1F22FA0000100000001 /* kb_diacritics_map.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = kb_diacritics_map.json; sourceTree = "<group>"; };
B8CA018AB878499327504AAD /* Pods-CustomKeyboard.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-CustomKeyboard.debug.xcconfig"; path = "Target Support Files/Pods-CustomKeyboard/Pods-CustomKeyboard.debug.xcconfig"; sourceTree = "<group>"; };
B9F60894E529C3EDAF6BAC3D /* KBShopThemeDetailModel.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBShopThemeDetailModel.m; sourceTree = "<group>"; };
E0A100012F60000100ABCDEF /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
E0A100022F60000100ABCDEF /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
E0A100032F60000100ABCDEF /* es */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = es; path = es.lproj/InfoPlist.strings; sourceTree = "<group>"; };
E0A100042F60000100ABCDEF /* id */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = id; path = id.lproj/InfoPlist.strings; sourceTree = "<group>"; };
E0A100052F60000100ABCDEF /* pt-PT */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "pt-PT"; path = "pt-PT.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
E2A844CD2D8584596DBE6316 /* KBShopThemeTagModel.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = KBShopThemeTagModel.m; sourceTree = "<group>"; };
F67DDBD716E4E616D8CC2C9C /* Pods-keyBoard.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-keyBoard.debug.xcconfig"; path = "Target Support Files/Pods-keyBoard/Pods-keyBoard.debug.xcconfig"; sourceTree = "<group>"; };
/* End PBXFileReference section */
@@ -1325,9 +1319,6 @@
046086702F191A5100757C95 /* AiTalk */ = {
isa = PBXGroup;
children = (
046086742F191CC700757C95 /* AI技术分析.txt */,
04E038D72F20BFFB002CA5A0 /* websocket-api.md */,
04E038E22F20E500002CA5A0 /* deepgramAPI.md */,
0460866C2F191A5100757C95 /* M */,
0460866D2F191A5100757C95 /* V */,
0460866E2F191A5100757C95 /* VC */,
@@ -1697,15 +1688,15 @@
path = Manager;
sourceTree = "<group>";
};
04A9FE1F2EB893F10020DB6D /* Localization */ = {
isa = PBXGroup;
children = (
04A9FE1E2EB893F10020DB6D /* Localizable.strings */,
E0A100002F60000100ABCDEF /* InfoPlist.strings */,
);
path = Localization;
sourceTree = "<group>";
};
04A9FE1F2EB893F10020DB6D /* Localization */ = {
isa = PBXGroup;
children = (
04A9FE1E2EB893F10020DB6D /* Localizable.strings */,
E0A100002F60000100ABCDEF /* InfoPlist.strings */,
);
path = Localization;
sourceTree = "<group>";
};
04C6EAB92EAF86530089C901 /* keyBoard */ = {
isa = PBXGroup;
children = (
@@ -2399,10 +2390,10 @@
04C6EAC42EAF87020089C901 /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
04A9FE202EB893F10020DB6D /* Localizable.strings in Resources */,
E0A100102F60000100ABCDEF /* InfoPlist.strings in Resources */,
041007D22ECE012000D203BB /* KBSkinIconMap.strings in Resources */,
files = (
04A9FE202EB893F10020DB6D /* Localizable.strings in Resources */,
E0A100102F60000100ABCDEF /* InfoPlist.strings in Resources */,
041007D22ECE012000D203BB /* KBSkinIconMap.strings in Resources */,
04E2277F2F516ED3001A8F14 /* PrivacyInfo.xcprivacy in Resources */,
A1B2C3ED2F20000000000001 /* kb_words.txt in Resources */,
A1B2C3F12F20000000000002 /* kb_keyboard_layout_config.json in Resources */,
@@ -2431,14 +2422,12 @@
045ED52B2F540FBE00131114 /* normal_hei_them.zip in Resources */,
045ED52C2F540FBE00131114 /* normal_them.zip in Resources */,
043213C62F56F5280065C888 /* 台湾省初始皮肤注音.zip in Resources */,
04E038D82F20BFFB002CA5A0 /* websocket-api.md in Resources */,
0479200B2ED87CEE004E8522 /* permiss_video.mp4 in Resources */,
04E2277D2F516EBD001A8F14 /* PrivacyInfo.xcprivacy in Resources */,
04C6EABA2EAF86530089C901 /* Assets.xcassets in Resources */,
04A9FE212EB893F10020DB6D /* Localizable.strings in Resources */,
E0A100112F60000100ABCDEF /* InfoPlist.strings in Resources */,
047920072ED86ABC004E8522 /* kb_guide_keyboard.gif in Resources */,
046086752F191CC700757C95 /* AI技术分析.txt in Resources */,
04E2277D2F516EBD001A8F14 /* PrivacyInfo.xcprivacy in Resources */,
04C6EABA2EAF86530089C901 /* Assets.xcassets in Resources */,
04A9FE212EB893F10020DB6D /* Localizable.strings in Resources */,
E0A100112F60000100ABCDEF /* InfoPlist.strings in Resources */,
047920072ED86ABC004E8522 /* kb_guide_keyboard.gif in Resources */,
047920112ED98E7D004E8522 /* permiss_video_2.mp4 in Resources */,
04C6EABC2EAF86530089C901 /* LaunchScreen.storyboard in Resources */,
043213BD2F56A3920065C888 /* 西班牙初始皮肤.zip in Resources */,
@@ -2448,7 +2437,6 @@
04286A132ECDEBF900CE730C /* KBSkinIconMap.strings in Resources */,
04C6EABD2EAF86530089C901 /* Main.storyboard in Resources */,
046086CB2F1A092500757C95 /* comments_mock.json in Resources */,
04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */,
043213A92F5566EF0065C888 /* kb_input_profiles.json in Resources */,
043213C02F56C9330065C888 /* 印度尼西亚初始皮肤.zip in Resources */,
);
@@ -2837,31 +2825,19 @@
/* End PBXTargetDependency section */
/* Begin PBXVariantGroup section */
04A9FE1E2EB893F10020DB6D /* Localizable.strings */ = {
isa = PBXVariantGroup;
children = (
04A9FE1C2EB893F10020DB6D /* en */,
04837AE12F5848050012BDE2 /* zh-Hant */,
04837AE42F58485A0012BDE2 /* es */,
04837AE72F5848680012BDE2 /* id */,
04837AEA2F5848820012BDE2 /* pt-PT */,
);
name = Localizable.strings;
sourceTree = "<group>";
};
E0A100002F60000100ABCDEF /* InfoPlist.strings */ = {
isa = PBXVariantGroup;
children = (
E0A100012F60000100ABCDEF /* en */,
E0A100022F60000100ABCDEF /* zh-Hant */,
E0A100032F60000100ABCDEF /* es */,
E0A100042F60000100ABCDEF /* id */,
E0A100052F60000100ABCDEF /* pt-PT */,
);
name = InfoPlist.strings;
sourceTree = "<group>";
};
04C6EAB12EAF86530089C901 /* LaunchScreen.storyboard */ = {
04A9FE1E2EB893F10020DB6D /* Localizable.strings */ = {
isa = PBXVariantGroup;
children = (
04A9FE1C2EB893F10020DB6D /* en */,
04837AE12F5848050012BDE2 /* zh-Hant */,
04837AE42F58485A0012BDE2 /* es */,
04837AE72F5848680012BDE2 /* id */,
04837AEA2F5848820012BDE2 /* pt-PT */,
);
name = Localizable.strings;
sourceTree = "<group>";
};
04C6EAB12EAF86530089C901 /* LaunchScreen.storyboard */ = {
isa = PBXVariantGroup;
children = (
04C6EAB02EAF86530089C901 /* Base */,
@@ -2885,6 +2861,18 @@
name = Main.storyboard;
sourceTree = "<group>";
};
E0A100002F60000100ABCDEF /* InfoPlist.strings */ = {
isa = PBXVariantGroup;
children = (
E0A100012F60000100ABCDEF /* en */,
E0A100022F60000100ABCDEF /* zh-Hant */,
E0A100032F60000100ABCDEF /* es */,
E0A100042F60000100ABCDEF /* id */,
E0A100052F60000100ABCDEF /* pt-PT */,
);
name = InfoPlist.strings;
sourceTree = "<group>";
};
/* End PBXVariantGroup section */
/* Begin XCBuildConfiguration section */
@@ -2903,20 +2891,20 @@
"$(inherited)",
"KB_KEYCHAIN_ACCESS_GROUP=@\\\"$(AppIdentifierPrefix)com.loveKey.nyx.shared\\\"",
);
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = CustomKeyboard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = CustomKeyboard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
IPHONEOS_DEPLOYMENT_TARGET = 15;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@executable_path/../../Frameworks",
);
MARKETING_VERSION = 1.0.0;
MARKETING_VERSION = 1.0.0;
PRODUCT_BUNDLE_IDENTIFIER = com.loveKey.nyx.CustomKeyboard;
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
@@ -2940,20 +2928,20 @@
"$(inherited)",
"KB_KEYCHAIN_ACCESS_GROUP=@\\\"$(AppIdentifierPrefix)com.loveKey.nyx.shared\\\"",
);
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = CustomKeyboard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = CustomKeyboard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_NSHumanReadableCopyright = "";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
IPHONEOS_DEPLOYMENT_TARGET = 15;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
"@executable_path/../../Frameworks",
);
MARKETING_VERSION = 1.0.0;
MARKETING_VERSION = 1.0.0;
PRODUCT_BUNDLE_IDENTIFIER = com.loveKey.nyx.CustomKeyboard;
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
@@ -2983,9 +2971,9 @@
INFOPLIST_FILE = keyBoard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_CFBundleURLTypes = "{\n CFBundleURLName = \"com.loveKey.nyx.keyboard\";\n CFBundleURLSchemes = (\n kbkeyboardAppExtension\n );\n}";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
INFOPLIST_KEY_UIMainStoryboardFile = Main;
@@ -3034,9 +3022,9 @@
INFOPLIST_FILE = keyBoard/Info.plist;
INFOPLIST_KEY_CFBundleDisplayName = "Key of Love";
INFOPLIST_KEY_CFBundleURLTypes = "{\n CFBundleURLName = \"com.loveKey.nyx.keyboard\";\n CFBundleURLSchemes = (\n kbkeyboardAppExtension\n );\n}";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_NSMicrophoneUsageDescription = "Microphone access is required for voice input.";
INFOPLIST_KEY_NSPhotoLibraryAddUsageDescription = "Photo library write access is required to save images.";
INFOPLIST_KEY_NSPhotoLibraryUsageDescription = "Photo library access is required to change your avatar.";
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchStoryboardName = LaunchScreen;
INFOPLIST_KEY_UIMainStoryboardFile = Main;
@@ -3212,7 +3200,6 @@
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 727EC74B2EAF848B00B36487 /* Project object */;
}

View File

@@ -1,521 +0,0 @@
服务 用途 示例格式
ASR 服务器 语音识别WebSocket wss://api.example.com/asr
LLM 服务器 AI 对话HTTP SSE https://api.example.com/chat
TTS 服务器 语音合成 https://api.example.com/tts
iOSObjective-CiOS 15+)端技术实现文档
低延迟流式语音陪伴聊天(按住说话,类似猫箱首页)
0. 范围与目标
实现首页语音陪伴对话:
按住说话:开始录音并实时流式发送到 ASR
松开结束ASR 立即 finalize返回最终文本并显示
AI 回复:边显示文字(打字机效果)边播放服务端 TTS 音频
延迟低优先:不等待完整回答/完整音频,采用“分句触发 + 流式/准流式播放”
打断Barge-inAI 正在播报时用户再次按住 → 立即停止播报/取消请求,进入新一轮录音
iOS 最低版本iOS 15
1. 总体架构(客户端模块)
KBAiMainVC
└─ ConversationOrchestrator (核心状态机 / 串联模块 / 取消与打断)
├─ AudioSessionManager (AVAudioSession 配置与中断处理)
├─ AudioCaptureManager (AVAudioEngine input tap -> 20ms PCM frames)
├─ ASRStreamClient (NSURLSessionWebSocketTask 流式识别)
├─ LLMStreamClient (SSE/WS token stream)
├─ Segmenter (句子切分:够一句就触发 TTS)
├─ TTSServiceClient (请求 TTS适配多种返回形态)
├─ TTSPlaybackPipeline (可插拔URL播放器 / AAC解码 / PCM直喂)
├─ AudioStreamPlayer (AVAudioEngine + AVAudioPlayerNode 播 PCM)
└─ SubtitleSync (按播放进度映射文字进度)
2. 音频会话AVAudioSession与权限
2.1 麦克风权限
仅在用户第一次按住说话前请求
若用户拒绝:提示到设置开启
2.2 AudioSession 配置(对话模式)
Objective-C建议参数
categoryAVAudioSessionCategoryPlayAndRecord
modeAVAudioSessionModeVoiceChat
options
AVAudioSessionCategoryOptionDefaultToSpeaker
AVAudioSessionCategoryOptionAllowBluetooth
可选AVAudioSessionCategoryOptionMixWithOthers若你希望不打断宿主音频看产品
2.3 中断与路由变化处理(必须)
监听:
AVAudioSessionInterruptionNotification
AVAudioSessionRouteChangeNotification
处理原则:
来电/中断开始:停止采集 + 停止播放 + cancel 网络会话
中断结束:回到 Idle等待用户重新按住
3. 音频采集(按住期间流式上传)
3.1 固定音频参数(锁死,便于端到端稳定)
Sample Rate16000 Hz
Channels1
FormatPCM Int16pcm_s16le
Frame Duration20ms
16kHz * 0.02s = 320 samples
每帧 bytes = 320 * 2 = 640 bytes
3.2 AudioCaptureManagerAVAudioEngine 输入 tap
使用:
AVAudioEngine
inputNode installTapOnBus:bufferSize:format:block:
关键点:
tap 回调线程不可做重活:只做拷贝 + dispatch 到 audioQueue
将 AVAudioPCMBuffer 转成 Int16 PCM NSData
确保稳定输出“20ms帧”如果 tap 回调 buffer 不刚好是 20ms需要做 帧拼接/切片ring buffer
3.3 接口定义OC
@protocol AudioCaptureManagerDelegate <NSObject>
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame; // 20ms/640B
- (void)audioCaptureManagerDidUpdateRMS:(float)rms; // 可选UI波形
@end
@interface AudioCaptureManager : NSObject
@property (nonatomic, weak) id<AudioCaptureManagerDelegate> delegate;
- (BOOL)startCapture:(NSError **)error;
- (void)stopCapture;
@end
4. ASR 流式识别iOS15NSURLSessionWebSocketTask
4.1 建议协议(控制帧 JSON + 音频帧二进制)
Start文本帧
{
"type":"start",
"sessionId":"uuid",
"format":"pcm_s16le",
"sampleRate":16000,
"channels":1
}
Audio二进制帧
直接发送 640B/帧 PCM
频率50fps每秒 50 帧)
Finalize文本帧
{ "type":"finalize", "sessionId":"uuid" }
4.2 下行事件
{ "type":"partial", "text":"今天" }
{ "type":"final", "text":"今天天气怎么样" }
{ "type":"error", "code":123, "message":"..." }
4.3 ASRStreamClient 接口OC
@protocol ASRStreamClientDelegate <NSObject>
- (void)asrClientDidReceivePartialText:(NSString *)text;
- (void)asrClientDidReceiveFinalText:(NSString *)text;
- (void)asrClientDidFail:(NSError *)error;
@end
@interface ASRStreamClient : NSObject
@property (nonatomic, weak) id<ASRStreamClientDelegate> delegate;
- (void)startWithSessionId:(NSString *)sessionId;
- (void)sendAudioPCMFrame:(NSData *)pcmFrame; // 20ms frame
- (void)finalize;
- (void)cancel;
@end
5. LLM 流式生成token stream
5.1 目标
低延迟:不要等整段回答
使用 SSE 或 WS 收 token
token 进入 Segmenter够一句就触发 TTS
5.2 LLMStreamClient 接口OC
@protocol LLMStreamClientDelegate <NSObject>
- (void)llmClientDidReceiveToken:(NSString *)token;
- (void)llmClientDidComplete;
- (void)llmClientDidFail:(NSError *)error;
@end
@interface LLMStreamClient : NSObject
@property (nonatomic, weak) id<LLMStreamClientDelegate> delegate;
- (void)sendUserText:(NSString *)text conversationId:(NSString *)cid;
- (void)cancel;
@end
6. Segmenter句子切分先播第一句
6.1 切分规则(推荐)
任一满足则切分成 segment
遇到 。!?\n 之一
或累积字符数 ≥ 30可配置
6.2 Segmenter 接口OC
@interface Segmenter : NSObject
- (void)appendToken:(NSString *)token;
- (NSArray<NSString *> *)popReadySegments; // 返回立即可TTS的片段数组
- (void)reset;
@end
7. TTS返回形态未定 → 客户端做“可插拔播放管线”
由于服务端同事未定输出格式,客户端必须支持以下 四种 TTS 输出模式 的任意一种:
模式 A返回 m4a/MP3 URL最容易落地
服务端返回 URL或 base64 文件)
客户端用 AVPlayer / AVAudioPlayer 播放
字幕同步用“音频时长映射”(可拿到 duration
优点:服务端简单
缺点:首帧延迟通常更高(要等整段生成、至少等首包)
模式 B返回 AAC chunk流式
服务端 WS 推 AAC 帧
客户端需要 AAC 解码成 PCM再喂 AudioStreamPlayer
模式 C返回 Opus chunk流式
需 Opus 解码库(服务端/客户端成本更高)
解码后喂 PCM 播放
模式 D返回 PCM chunk最适合低延迟
服务端直接推 PCM16 chunk比如 100ms 一块)
客户端直接转 AVAudioPCMBuffer schedule
延迟最低、实现最稳
8. TTSServiceClient统一网络层接口
8.1 统一回调事件(抽象)
typedef NS_ENUM(NSInteger, TTSPayloadType) {
TTSPayloadTypeURL, // A
TTSPayloadTypePCMChunk, // D
TTSPayloadTypeAACChunk, // B
TTSPayloadTypeOpusChunk // C
};
@protocol TTSServiceClientDelegate <NSObject>
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId;
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
payloadType:(TTSPayloadType)type
segmentId:(NSString *)segmentId;
- (void)ttsClientDidFinishSegment:(NSString *)segmentId;
- (void)ttsClientDidFail:(NSError *)error;
@end
@interface TTSServiceClient : NSObject
@property (nonatomic, weak) id<TTSServiceClientDelegate> delegate;
- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId;
- (void)cancel;
@end
这样服务端最后选哪种输出,你只需实现对应分支即可,不需要推翻客户端架构。
9. TTSPlaybackPipeline播放管线根据 payloadType 路由)
9.1 设计目标
支持 URL 播放与流式 chunk 播放
提供统一的“开始播放/停止/进度”接口供字幕同步与打断使用
9.2 Pipeline 结构(建议)
TTSPlaybackPipeline 只做路由与队列管理
URL → TTSURLPlayerAVPlayer
PCM → AudioStreamPlayerAVAudioEngine
AAC/Opus → Decoder → PCM → AudioStreamPlayer
9.3 Pipeline 接口OC
@protocol TTSPlaybackPipelineDelegate <NSObject>
- (void)pipelineDidStartSegment:(NSString *)segmentId duration:(NSTimeInterval)duration;
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time segmentId:(NSString *)segmentId;
- (void)pipelineDidFinishSegment:(NSString *)segmentId;
@end
@interface TTSPlaybackPipeline : NSObject
@property (nonatomic, weak) id<TTSPlaybackPipelineDelegate> delegate;
- (BOOL)start:(NSError **)error; // 启动音频引擎等
- (void)stop; // 立即停止(打断)
- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId;
- (void)enqueueChunk:(NSData *)chunk payloadType:(TTSPayloadType)type segmentId:(NSString *)segmentId;
// 可选:用于字幕同步
- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId;
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
@end
10. AudioStreamPlayerPCM 流式播放,低延迟核心)
10.1 使用 AVAudioEngine + AVAudioPlayerNode
将 PCM chunk 转 AVAudioPCMBuffer
scheduleBuffer 播放
维护“当前 segment 的播放时间/总时长”(可估算或累加 chunk 时长)
10.2 接口OC
@interface AudioStreamPlayer : NSObject
- (BOOL)start:(NSError **)error;
- (void)stop;
- (void)enqueuePCMChunk:(NSData *)pcmData
sampleRate:(double)sampleRate
channels:(int)channels
segmentId:(NSString *)segmentId;
- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId;
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
@end
PCM chunk 的粒度建议50ms~200ms太小 schedule 太频繁,太大延迟高)。
11. 字幕同步(延迟优先)
11.1 策略
对每个 segment 的文本 text按播放进度映射显示字符数
visibleCount = round(text.length * (t / T))
tsegment 当前播放进度pipeline 提供)
Tsegment 总时长URL 模式直接取chunk 模式可累加估算)
11.2 SubtitleSync 接口OC
@interface SubtitleSync : NSObject
- (NSString *)visibleTextForFullText:(NSString *)fullText
currentTime:(NSTimeInterval)t
duration:(NSTimeInterval)T;
@end
12. ConversationOrchestrator状态机 + 打断 + 队列)
12.1 状态
typedef NS_ENUM(NSInteger, ConversationState) {
ConversationStateIdle,
ConversationStateListening,
ConversationStateRecognizing,
ConversationStateThinking,
ConversationStateSpeaking
};
12.2 关键流程
事件用户按住userDidPressRecord
如果正在 Speaking/Thinking
[ttsService cancel]
[llmClient cancel]
[asrClient cancel](如仍在识别)
[pipeline stop](立即停播)
清空 segment 队列、字幕队列
配置/激活 AudioSession
新建 sessionId
[asrClient startWithSessionId:]
[audioCapture startCapture:]
state = Listening
事件用户松开userDidReleaseRecord
[audioCapture stopCapture]
[asrClient finalize]
state = Recognizing
回调ASR final text
UI 显示用户最终文本
state = Thinking
开始 LLM stream[llmClient sendUserText:conversationId:]
回调LLM token
segmenter appendToken
segments = [segmenter popReadySegments]
对每个 segment
生成 segmentId
记录 segmentTextMap[segmentId] = segmentText
[ttsService requestTTSForText:segmentId:]
当收到第一个可播放音频并开始播:
state = Speaking
回调TTS 音频到达
URL[pipeline enqueueURL:segmentId:]
chunk[pipeline enqueueChunk:payloadType:segmentId:]
回调pipeline 播放时间更新(每 30-60fps 或定时器)
根据当前 segmentId 取到 fullText
visible = [subtitleSync visibleTextForFullText:currentTime:duration:]
UI 更新 AI 可见文本
12.3 打断Barge-in
当用户再次按住:
立即 stop 播放
取消所有未完成网络请求
丢弃所有未播放 segments
开始新一轮录音
12.4 Orchestrator 接口OC
@interface ConversationOrchestrator : NSObject
@property (nonatomic, assign, readonly) ConversationState state;
- (void)userDidPressRecord;
- (void)userDidReleaseRecord;
@property (nonatomic, copy) void (^onUserFinalText)(NSString *text);
@property (nonatomic, copy) void (^onAssistantVisibleText)(NSString *text);
@property (nonatomic, copy) void (^onError)(NSError *error);
@end
13. 线程/队列模型(强制要求,避免竞态)
建议三条队列 + 一条 orchestrator 串行队列:
dispatch_queue_t audioQueue;采集帧处理、ring buffer
dispatch_queue_t networkQueue;WS 收发解析)
dispatch_queue_t orchestratorQueue;(状态机串行,唯一修改 state/队列的地方)
UI 更新统一回主线程
规则:
任何网络/音频回调 → dispatch_async(orchestratorQueue, ^{ ... })
Orchestrator 内部再决定是否发 UI 回调(主线程)
14. 关键参数(延迟与稳定性)
音频帧20ms
PCM16k/mono/int16
ASR 上传WS 二进制
LLMtoken stream
TTS优先 chunk若 URL 模式也要尽快开始下载与播放
chunk 播放缓冲100~200ms防抖动
15. 开发落地建议(服务端未定情况下的迭代路径)
Phase 1先跑通端到端用“URL 模式”模拟)
TTSServiceClient 先假定服务端返回 m4a URL或本地 mock URL
Pipeline 实现 URL 播放AVPlayer
打断 + 字幕同步先跑通
Phase 2服务端定了输出后再替换
若服务端给 PCM chunk直接走 AudioStreamPlayer最推荐
若给 AAC chunk补 AAC 解码模块AudioConverter 或第三方)
若给 Opus chunk集成 Opus 解码库,再喂 PCM
关键Orchestrator/Segmenter/ASR/字幕同步都不需要改,只替换 TTSPlaybackPipeline 分支。
16. 合规/体验注意
录音必须由用户动作触发(按住)
明确的“正在录音”提示与波形
避免自动偷录
播放时允许随时打断
文档结束
给“写代码的 AI”的额外要求建议你一并附上
语言Objective-C.h/.m
iOS 15+WebSocket 用 NSURLSessionWebSocketTask
音频采集用 AVAudioEngine + ring buffer 切 20ms 帧
播放管线必须支持URL 播放AVPlayer+ PCM chunk 播放AVAudioEngine
其余 AAC/Opus 分支可留 TODO / stub但接口要预留

File diff suppressed because it is too large Load Diff

View File

@@ -1,771 +0,0 @@
# 实时语音对话 WebSocket API 文档
> Version: 2.0.0 (Flux)
> Last Updated: 2026-01-21
> Author: Backend Team
---
## 概述
本文档描述实时语音对话 WebSocket API用于 iOS 客户端与后端进行实时语音交互。
**v2.0 更新**: 升级为 Deepgram Flux 模型,支持智能轮次检测和 EagerEndOfTurn 提前响应。
### 核心特性
- **智能轮次检测**: Flux 模型语义理解,自动判断用户说完(非简单静默检测)
- **EagerEndOfTurn**: 提前启动 LLM 响应,进一步降低延迟
- **实时语音识别**: 边说边识别,实时显示转写文本
- **流式响应**: AI 响应边生成边返回,无需等待完整响应
- **流式音频**: TTS 音频边合成边播放,极低延迟
- **Barge-in 支持**: 用户可以打断 AI 说话
### 性能指标
| 指标 | 目标值 | 说明 |
|------|--------|------|
| 端点检测延迟 | ~260ms | Flux 智能检测 |
| TTFA (首音频延迟) | < 300ms | EagerEndOfTurn 优化 |
| 端到端延迟 | < 1.5秒 | 完整对话周期 |
| 实时转写延迟 | < 100ms | 中间结果 |
---
## 连接信息
### WebSocket 端点
```
生产环境: wss://api.yourdomain.com/api/ws/chat?token={sa_token}
开发环境: ws://localhost:7529/api/ws/chat?token={sa_token}
```
### 认证方式
通过 URL Query 参数传递 Sa-Token
```
ws://host:port/api/ws/chat?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9...
```
| 参数 | 类型 | 必填 | 描述 |
|------|------|------|------|
| token | String | ✅ | Sa-Token 登录令牌,通过 Apple Sign-In 获取 |
### 认证失败
如果 token 无效或过期WebSocket 连接将被拒绝HTTP 403
---
## 消息格式
### 通用规则
1. **文本消息**: JSON 格式,用于控制指令和状态通知
2. **二进制消息**: 原始字节,用于音频数据传输
3. **编码**: UTF-8
---
## 客户端 → 服务端消息
### 1. 开始会话 (session_start)
**发送时机**: 建立 WebSocket 连接后,准备开始录音前
```json
{
"type": "session_start",
"config": {
"language": "en",
"voice_id": "a5zfmqTslZJBP0jutmVY"
}
}
```
| 字段 | 类型 | 必填 | 描述 |
|------|------|------|------|
| type | String | ✅ | 固定值 `session_start` |
| config | Object | ❌ | 会话配置(可选) |
| config.language | String | ❌ | 语音识别语言,默认 `en` |
| config.voice_id | String | ❌ | TTS 声音 ID默认使用服务端配置 |
**响应**: 服务端返回 `session_started` 消息
---
### 2. 音频数据 (Binary)
**发送时机**: 用户正在录音时,持续发送音频数据
**格式**: Binary WebSocket Frame直接发送原始音频字节
**音频规格要求**:
| 参数 | 值 | 说明 |
|------|------|------|
| 编码格式 | PCM (Linear16) | 未压缩的脉冲编码调制 |
| 采样率 | 16000 Hz | 16kHz |
| 位深度 | 16-bit | 有符号整数 |
| 声道数 | 1 (Mono) | 单声道 |
| 字节序 | Little-Endian | 小端序 |
**iOS 代码示例**:
```swift
// AVAudioEngine
let format = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: 16000,
channels: 1,
interleaved: true
)!
//
audioEngine.inputNode.installTap(
onBus: 0,
bufferSize: 1024,
format: format
) { buffer, time in
let audioData = buffer.int16ChannelData![0]
let byteCount = Int(buffer.frameLength) * 2 // 16-bit = 2 bytes
let data = Data(bytes: audioData, count: byteCount)
webSocket.write(data: data)
}
```
**发送频率**: 建议每 20-100ms 发送一次,每次 320-1600 字节
---
### 3. 结束录音 (audio_end)
**发送时机**: 用户停止录音(松开录音按钮)
```json
{
"type": "audio_end"
}
```
| 字段 | 类型 | 必填 | 描述 |
|------|------|------|------|
| type | String | ✅ | 固定值 `audio_end` |
**说明**: 发送此消息后,服务端将完成语音识别并开始生成 AI 响应
---
### 4. 取消会话 (cancel)
**发送时机**: 用户主动取消对话(如点击取消按钮)
```json
{
"type": "cancel"
}
```
| 字段 | 类型 | 必填 | 描述 |
|------|------|------|------|
| type | String | ✅ | 固定值 `cancel` |
**说明**: 服务端将停止所有处理,不再返回任何消息
---
## 服务端 → 客户端消息
### 1. 会话已启动 (session_started)
**接收时机**: 发送 `session_start`
```json
{
"type": "session_started",
"session_id": "abc123-def456-ghi789"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `session_started` |
| session_id | String | 服务端分配的会话 ID |
**客户端处理**: 收到此消息后,可以开始发送音频数据
---
### 2. 轮次开始 (turn_start) 🆕
**接收时机**: 用户开始说话时Flux 检测到语音活动)
```json
{
"type": "turn_start",
"turn_index": 0
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `turn_start` |
| turn_index | Integer | 当前轮次索引(从 0 开始) |
**客户端处理**:
- 可显示"正在听..."状态
- 准备接收转写结果
---
### 3. 中间转写结果 (transcript_interim)
**接收时机**: 用户说话过程中,实时返回
```json
{
"type": "transcript_interim",
"text": "Hello how are",
"is_final": false
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `transcript_interim` |
| text | String | 当前识别到的文本(可能会变化) |
| is_final | Boolean | 固定为 `false` |
**客户端处理**:
- 实时更新 UI 显示转写文本
- 此文本可能会被后续消息覆盖
- 可用于显示"正在识别..."效果
---
### 3. 最终转写结果 (transcript_final)
**接收时机**: 一句话识别完成时
```json
{
"type": "transcript_final",
"text": "Hello, how are you?"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `transcript_final` |
| text | String | 最终确定的转写文本 |
**客户端处理**:
- 用此文本替换之前的中间结果
- 此文本不会再变化
---
### 6. 提前端点检测 (eager_eot) 🆕
**接收时机**: Flux 检测到用户可能说完时(置信度达到阈值)
```json
{
"type": "eager_eot",
"transcript": "Hello, how are you",
"confidence": 0.65
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `eager_eot` |
| transcript | String | 当前转写文本 |
| confidence | Double | 端点置信度 (0.0-1.0) |
**客户端处理**:
- 这是一个**预测性事件**,表示用户可能说完了
- 服务端已开始提前准备 LLM 响应
- 可显示"准备响应..."状态
- **注意**: 用户可能继续说话,此时会收到 `turn_resumed`
---
### 7. 轮次恢复 (turn_resumed) 🆕
**接收时机**: 收到 `eager_eot` 后,用户继续说话
```json
{
"type": "turn_resumed"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `turn_resumed` |
**客户端处理**:
- 用户继续说话,之前的 `eager_eot` 是误判
- 服务端已取消正在准备的草稿响应
- 恢复"正在听..."状态
- 继续接收 `transcript_interim` 更新
---
### 8. LLM 开始生成 (llm_start)
**接收时机**: 语音识别完成AI 开始生成响应
```json
{
"type": "llm_start"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `llm_start` |
**客户端处理**:
- 可显示"AI 正在思考..."状态
- 准备接收 AI 响应文本和音频
---
### 5. LLM Token (llm_token)
**接收时机**: AI 生成过程中,逐 token 返回
```json
{
"type": "llm_token",
"token": "Hi"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `llm_token` |
| token | String | AI 输出的单个 token词或字符片段 |
**客户端处理**:
- 可选择实现打字机效果
- 逐个 token 追加显示 AI 响应文本
- 如不需要打字效果,可忽略此消息
---
### 6. 音频数据 (Binary)
**接收时机**: TTS 合成过程中,流式返回音频
**格式**: Binary WebSocket FrameMP3 音频块
**音频规格**:
| 参数 | 值 |
|------|------|
| 格式 | MP3 |
| 采样率 | 44100 Hz |
| 比特率 | 64 kbps |
| 声道 | 单声道 |
**客户端处理**:
```swift
// 使 AVAudioEngine AudioQueue
webSocket.onEvent = { event in
switch event {
case .binary(let data):
// 1: 使 AVAudioPlayerNode
audioBuffer.append(data)
playBufferedAudio()
// 2: 使 AVAudioEngine + AVAudioCompressedBuffer
// 3: 使 AVAudioPlayer
}
}
```
**重要提示**:
- 音频是分块返回的,需要正确拼接或流式播放
- 每个二进制消息是 MP3 数据的一部分
- 收到 `complete` 消息后,音频传输完成
---
### 7. 处理完成 (complete)
**接收时机**: AI 响应生成完成,所有音频已发送
```json
{
"type": "complete",
"transcript": "Hello, how are you?",
"ai_response": "Hi! I'm doing great, thanks for asking!"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `complete` |
| transcript | String | 完整的用户语音转写文本 |
| ai_response | String | 完整的 AI 响应文本 |
**客户端处理**:
- 更新 UI 显示完整对话
- 可开始下一轮对话
- 建议保存对话历史
---
### 8. 错误 (error)
**接收时机**: 处理过程中发生错误
```json
{
"type": "error",
"code": "DEEPGRAM_ERROR",
"message": "Speech recognition failed"
}
```
| 字段 | 类型 | 描述 |
|------|------|------|
| type | String | 固定值 `error` |
| code | String | 错误代码 |
| message | String | 错误描述 |
**错误代码列表**:
| 错误代码 | 描述 | 建议处理 |
|----------|------|----------|
| PARSE_ERROR | 消息解析失败 | 检查消息格式 |
| DEEPGRAM_ERROR | 语音识别服务错误 | 重试或提示用户 |
| DEEPGRAM_INIT_ERROR | 语音识别初始化失败 | 重新开始会话 |
| LLM_ERROR | AI 生成错误 | 重试或提示用户 |
| PIPELINE_ERROR | 处理流程错误 | 重新开始会话 |
| EMPTY_TRANSCRIPT | 未检测到语音 | 提示用户重新说话 |
**客户端处理**:
- 显示友好的错误提示
- 根据错误类型决定是否重试
---
## 完整交互流程
### 时序图
```
iOS Client Server
| |
|------ WebSocket Connect --------->|
| ?token=xxx |
| |
|<-------- Connected ---------------|
| |
|------ session_start ------------->|
| |
|<----- session_started ------------|
| {session_id: "abc"} |
| |
|======= 用户开始说话 ===============|
| |
|------ Binary (audio) ------------>|
|------ Binary (audio) ------------>|
|<----- transcript_interim ---------|
| {text: "Hello"} |
|------ Binary (audio) ------------>|
|<----- transcript_interim ---------|
| {text: "Hello how"} |
|------ Binary (audio) ------------>|
|<----- transcript_final -----------|
| {text: "Hello, how are you?"}|
| |
|======= 用户停止说话 ===============|
| |
|------ audio_end ----------------->|
| |
|<----- llm_start ------------------|
| |
|<----- llm_token ------------------|
| {token: "Hi"} |
|<----- llm_token ------------------|
| {token: "!"} |
|<----- Binary (mp3) ---------------|
|<----- Binary (mp3) ---------------|
|<----- llm_token ------------------|
| {token: " I'm"} |
|<----- Binary (mp3) ---------------|
| ... |
|<----- complete -------------------|
| {transcript, ai_response} |
| |
|======= 可以开始下一轮 =============|
| |
```
---
## iOS 代码示例
### 完整 Swift 实现
```swift
import Foundation
import Starscream // WebSocket
class VoiceChatManager: WebSocketDelegate {
private var socket: WebSocket?
private var audioBuffer = Data()
// MARK: -
var onSessionStarted: ((String) -> Void)?
var onTranscriptInterim: ((String) -> Void)?
var onTranscriptFinal: ((String) -> Void)?
var onLLMStart: (() -> Void)?
var onLLMToken: ((String) -> Void)?
var onAudioChunk: ((Data) -> Void)?
var onComplete: ((String, String) -> Void)?
var onError: ((String, String) -> Void)?
// MARK: -
func connect(token: String) {
let urlString = "wss://api.yourdomain.com/api/ws/chat?token=\(token)"
guard let url = URL(string: urlString) else { return }
var request = URLRequest(url: url)
request.timeoutInterval = 30
socket = WebSocket(request: request)
socket?.delegate = self
socket?.connect()
}
func disconnect() {
socket?.disconnect()
socket = nil
}
// MARK: -
func startSession(language: String = "en", voiceId: String? = nil) {
var config: [String: Any] = ["language": language]
if let voiceId = voiceId {
config["voice_id"] = voiceId
}
let message: [String: Any] = [
"type": "session_start",
"config": config
]
sendJSON(message)
}
func sendAudio(_ data: Data) {
socket?.write(data: data)
}
func endAudio() {
sendJSON(["type": "audio_end"])
}
func cancel() {
sendJSON(["type": "cancel"])
}
private func sendJSON(_ dict: [String: Any]) {
guard let data = try? JSONSerialization.data(withJSONObject: dict),
let string = String(data: data, encoding: .utf8) else { return }
socket?.write(string: string)
}
// MARK: - WebSocketDelegate
func didReceive(event: WebSocketEvent, client: WebSocketClient) {
switch event {
case .connected(_):
print("WebSocket connected")
case .disconnected(let reason, let code):
print("WebSocket disconnected: \(reason) (\(code))")
case .text(let text):
handleTextMessage(text)
case .binary(let data):
// MP3
onAudioChunk?(data)
case .error(let error):
print("WebSocket error: \(error?.localizedDescription ?? "unknown")")
default:
break
}
}
private func handleTextMessage(_ text: String) {
guard let data = text.data(using: .utf8),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let type = json["type"] as? String else { return }
switch type {
case "session_started":
if let sessionId = json["session_id"] as? String {
onSessionStarted?(sessionId)
}
case "transcript_interim":
if let text = json["text"] as? String {
onTranscriptInterim?(text)
}
case "transcript_final":
if let text = json["text"] as? String {
onTranscriptFinal?(text)
}
case "llm_start":
onLLMStart?()
case "llm_token":
if let token = json["token"] as? String {
onLLMToken?(token)
}
case "complete":
if let transcript = json["transcript"] as? String,
let aiResponse = json["ai_response"] as? String {
onComplete?(transcript, aiResponse)
}
case "error":
if let code = json["code"] as? String,
let message = json["message"] as? String {
onError?(code, message)
}
default:
print("Unknown message type: \(type)")
}
}
}
```
### 使用示例
```swift
class VoiceChatViewController: UIViewController {
let chatManager = VoiceChatManager()
let audioRecorder = AudioRecorder() //
let audioPlayer = StreamingAudioPlayer() //
override func viewDidLoad() {
super.viewDidLoad()
setupCallbacks()
}
func setupCallbacks() {
chatManager.onSessionStarted = { [weak self] sessionId in
print("Session started: \(sessionId)")
//
self?.audioRecorder.start { audioData in
self?.chatManager.sendAudio(audioData)
}
}
chatManager.onTranscriptInterim = { [weak self] text in
self?.transcriptLabel.text = text + "..."
}
chatManager.onTranscriptFinal = { [weak self] text in
self?.transcriptLabel.text = text
}
chatManager.onLLMStart = { [weak self] in
self?.statusLabel.text = "AI is thinking..."
}
chatManager.onLLMToken = { [weak self] token in
self?.aiResponseLabel.text = (self?.aiResponseLabel.text ?? "") + token
}
chatManager.onAudioChunk = { [weak self] data in
self?.audioPlayer.appendData(data)
}
chatManager.onComplete = { [weak self] transcript, aiResponse in
self?.statusLabel.text = "Complete"
self?.addToHistory(user: transcript, ai: aiResponse)
}
chatManager.onError = { [weak self] code, message in
self?.showError(message)
}
}
@IBAction func startTapped(_ sender: UIButton) {
//
chatManager.connect(token: AuthManager.shared.saToken)
chatManager.onSessionStarted = { [weak self] _ in
self?.chatManager.startSession()
}
}
@IBAction func stopTapped(_ sender: UIButton) {
audioRecorder.stop()
chatManager.endAudio()
}
@IBAction func cancelTapped(_ sender: UIButton) {
audioRecorder.stop()
audioPlayer.stop()
chatManager.cancel()
}
}
```
---
## 注意事项
### 1. 音频录制
- 必须使用 PCM 16-bit, 16kHz, Mono 格式
- 建议每 20-100ms 发送一次音频数据
- 录音权限需要在 Info.plist 中声明
### 2. 音频播放
- 返回的是 MP3 格式音频块
- 需要实现流式播放或缓冲播放
- 建议使用 AVAudioEngine 实现低延迟播放
### 3. 网络处理
- 实现自动重连机制
- 处理网络切换场景
- 设置合理的超时时间
### 4. 用户体验
- 显示实时转写文本
- 显示 AI 响应状态
- 提供取消按钮
- 处理录音权限被拒绝的情况
### 5. 调试建议
- 使用 `wss://` 确保生产环境安全
- 本地开发可使用 `ws://`
- 检查 Sa-Token 是否过期
---
## 版本历史
| 版本 | 日期 | 变更 |
|------|------|------|
| 1.0.0 | 2026-01-21 | 初始版本 |