Files
keyboard/CustomKeyboard/Manager/KBSuggestionEngine.m
2026-03-02 14:39:47 +08:00

535 lines
22 KiB
Objective-C

//
// KBSuggestionEngine.m
// CustomKeyboard
//
#import "KBSuggestionEngine.h"
#import "KBConfig.h"
@interface KBSuggestionEngine ()
@property (nonatomic, copy) NSArray<NSString *> *words;
@property (nonatomic, strong) NSMutableDictionary<NSString *, NSNumber *> *selectionCounts;
@property (nonatomic, strong) NSSet<NSString *> *priorityWords;
@property (nonatomic, copy) NSArray<NSString *> *traditionalChineseWords;
@property (nonatomic, copy) NSArray<NSString *> *simplifiedChineseWords;
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *pinyinToTraditionalMap;
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *bopomofoToChineseMap;
@end
@implementation KBSuggestionEngine
+ (instancetype)shared {
static KBSuggestionEngine *engine;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
engine = [[KBSuggestionEngine alloc] init];
});
return engine;
}
- (instancetype)init {
if (self = [super init]) {
_engineType = KBSuggestionEngineTypeLatin;
_selectionCounts = [NSMutableDictionary dictionary];
NSArray<NSString *> *defaults = [self.class kb_defaultWords];
_priorityWords = [NSSet setWithArray:defaults];
_words = [self kb_loadWords];
_traditionalChineseWords = [self kb_loadTraditionalChineseWords];
_simplifiedChineseWords = [self kb_loadSimplifiedChineseWords];
_pinyinToTraditionalMap = [self kb_loadPinyinToTraditionalMap];
_bopomofoToChineseMap = [self kb_loadBopomofoToChineseMap];
}
return self;
}
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (prefix.length == 0 || limit == 0) { return @[]; }
// 根据引擎类型选择不同的联想逻辑
switch (self.engineType) {
case KBSuggestionEngineTypePinyinTraditional:
return [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypePinyinSimplified:
return [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypeBopomofo:
return [self kb_bopomofoSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypeLatin:
default:
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
}
}
- (void)recordSelection:(NSString *)word {
if (word.length == 0) { return; }
NSString *key = word.lowercaseString;
NSInteger count = self.selectionCounts[key].integerValue + 1;
self.selectionCounts[key] = @(count);
}
#pragma mark - Defaults
- (NSArray<NSString *> *)kb_loadWords {
NSMutableOrderedSet<NSString *> *set = [[NSMutableOrderedSet alloc] init];
[set addObjectsFromArray:[self.class kb_defaultWords]];
NSArray<NSString *> *paths = [self kb_wordListPaths];
for (NSString *path in paths) {
if (path.length == 0) { continue; }
NSString *content = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:nil];
if (content.length == 0) { continue; }
NSArray<NSString *> *lines = [content componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
for (NSString *line in lines) {
NSString *word = [self kb_sanitizedWordFromLine:line];
if (word.length == 0) { continue; }
[set addObject:word];
}
}
NSArray<NSString *> *result = set.array ?: @[];
return result;
}
- (NSArray<NSString *> *)kb_wordListPaths {
NSMutableArray<NSString *> *paths = [NSMutableArray array];
// 1) App Group override (allows server-downloaded large list).
NSURL *containerURL = [[NSFileManager defaultManager] containerURLForSecurityApplicationGroupIdentifier:AppGroup];
if (containerURL.path.length > 0) {
NSString *groupPath = [[containerURL path] stringByAppendingPathComponent:@"kb_words.txt"];
[paths addObject:groupPath];
}
// 2) Bundle fallback.
NSString *bundlePath = [[NSBundle mainBundle] pathForResource:@"kb_words" ofType:@"txt"];
if (bundlePath.length > 0) {
[paths addObject:bundlePath];
}
return paths;
}
- (NSString *)kb_sanitizedWordFromLine:(NSString *)line {
NSString *trimmed = [[line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] lowercaseString];
if (trimmed.length == 0) { return @""; }
static NSCharacterSet *letters = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
letters = [NSCharacterSet characterSetWithCharactersInString:@"abcdefghijklmnopqrstuvwxyz"];
});
for (NSUInteger i = 0; i < trimmed.length; i++) {
if (![letters characterIsMember:[trimmed characterAtIndex:i]]) {
return @"";
}
}
return trimmed;
}
+ (NSArray<NSString *> *)kb_defaultWords {
return @[
@"a", @"an", @"and", @"are", @"as", @"at",
@"app", @"ap", @"apple", @"apply", @"april", @"application",
@"about", @"above", @"after", @"again", @"against", @"all",
@"am", @"among", @"amount", @"any", @"around",
@"be", @"because", @"been", @"before", @"being", @"below",
@"best", @"between", @"both", @"but", @"by",
@"can", @"could", @"come", @"common", @"case",
@"do", @"does", @"down", @"day",
@"each", @"early", @"end", @"even", @"every",
@"for", @"from", @"first", @"found", @"free",
@"get", @"good", @"great", @"go",
@"have", @"has", @"had", @"help", @"how",
@"in", @"is", @"it", @"if", @"into",
@"just", @"keep", @"kind", @"know",
@"like", @"look", @"long", @"last",
@"make", @"more", @"most", @"my",
@"new", @"no", @"not", @"now",
@"of", @"on", @"one", @"or", @"other", @"our", @"out",
@"people", @"place", @"please",
@"quick", @"quite",
@"right", @"read", @"real",
@"see", @"say", @"some", @"such", @"so",
@"the", @"to", @"this", @"that", @"them", @"then", @"there", @"they", @"these", @"time",
@"use", @"up", @"under",
@"very",
@"we", @"with", @"what", @"when", @"where", @"who", @"why", @"will", @"would",
@"you", @"your"
];
}
#pragma mark - Engine Type Management
- (void)setEngineTypeFromString:(NSString *)engineTypeString {
if ([engineTypeString isEqualToString:@"latin"]) {
self.engineType = KBSuggestionEngineTypeLatin;
} else if ([engineTypeString isEqualToString:@"pinyin_traditional"]) {
self.engineType = KBSuggestionEngineTypePinyinTraditional;
} else if ([engineTypeString isEqualToString:@"pinyin_simplified"]) {
self.engineType = KBSuggestionEngineTypePinyinSimplified;
} else if ([engineTypeString isEqualToString:@"bopomofo"]) {
self.engineType = KBSuggestionEngineTypeBopomofo;
} else {
self.engineType = KBSuggestionEngineTypeLatin;
}
NSLog(@"[KBSuggestionEngine] Engine type set to: %@", engineTypeString);
}
#pragma mark - Latin Suggestions
- (NSArray<NSString *> *)kb_latinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.words) {
if ([word hasPrefix:lower]) {
[matches addObject:word];
if (matches.count >= limit * 3) {
break;
}
}
}
if (matches.count == 0) { return @[]; }
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
BOOL pa = [self.priorityWords containsObject:a];
BOOL pb = [self.priorityWords containsObject:b];
if (pa != pb) {
return pa ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
#pragma mark - Traditional Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_traditionalPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackTraditionalSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.traditionalChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
#pragma mark - Simplified Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_simplifiedPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
for (NSString *tradChar in directMatches) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
for (NSString *tradChar in candidates) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackSimplifiedSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackSimplifiedSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.simplifiedChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
- (NSString *)kb_toSimplified:(NSString *)traditional {
static NSDictionary<NSString *, NSString *> *tradToSimpMap = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
tradToSimpMap = @{
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"广", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"怀", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @""
};
});
if (tradToSimpMap[traditional]) {
return tradToSimpMap[traditional];
}
NSMutableString *result = [traditional mutableCopy];
[tradToSimpMap enumerateKeysAndObjectsUsingBlock:^(NSString *trad, NSString *simp, BOOL *stop) {
[result replaceOccurrencesOfString:trad withString:simp options:0 range:NSMakeRange(0, result.length)];
}];
return result.length > 0 ? [result copy] : traditional;
}
#pragma mark - Bopomofo (Zhuyin) Suggestions
- (NSArray<NSString *> *)kb_bopomofoSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.bopomofoToChineseMap[prefix];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.bopomofoToChineseMap) {
if ([key hasPrefix:prefix] && ![key isEqualToString:prefix]) {
NSArray<NSString *> *candidates = self.bopomofoToChineseMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:prefix limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
#pragma mark - Chinese Word Loading
- (NSArray<NSString *> *)kb_loadTraditionalChineseWords {
// 加载繁体中文常用词
// 这里先返回一些示例词,实际应该从文件或数据库加载
return @[
@"你好", @"謝謝", @"對不起", @"再見", @"早安",
@"晚安", @"請問", @"不好意思", @"沒關係", @"加油",
@"台灣", @"台北", @"高雄", @"台中", @"台南",
@"朋友", @"家人", @"工作", @"學習", @"生活",
@"時間", @"地點", @"方法", @"問題", @"答案",
@"喜歡", @"", @"想念", @"開心", @"快樂",
@"美麗", @"漂亮", @"帥氣", @"可愛", @"溫柔"
];
}
- (NSArray<NSString *> *)kb_loadSimplifiedChineseWords {
return @[
@"你好", @"谢谢", @"对不起", @"再见", @"早安",
@"晚安", @"请问", @"不好意思", @"没关系", @"加油",
@"中国", @"北京", @"上海", @"广州", @"深圳",
@"朋友", @"家人", @"工作", @"学习", @"生活",
@"时间", @"地点", @"方法", @"问题", @"答案",
@"喜欢", @"", @"想念", @"开心", @"快乐",
@"美丽", @"漂亮", @"帅气", @"可爱", @"温柔"
];
}
#pragma mark - Pinyin & Bopomofo Map Loading
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadPinyinToTraditionalMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"pinyin_to_traditional" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] pinyin_to_traditional.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read pinyin_to_traditional.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse pinyin_to_traditional.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in pinyin_to_traditional.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu pinyin mappings", (unsigned long)result.count);
return [result copy];
}
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadBopomofoToChineseMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"bopomofo_to_chinese" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] bopomofo_to_chinese.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read bopomofo_to_chinese.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse bopomofo_to_chinese.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in bopomofo_to_chinese.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu bopomofo mappings", (unsigned long)result.count);
return [result copy];
}
@end