Files
keyboard/CustomKeyboard/Manager/KBSuggestionEngine.m

535 lines
22 KiB
Mathematica
Raw Normal View History

2025-12-22 12:54:28 +08:00
//
// KBSuggestionEngine.m
// CustomKeyboard
//
#import "KBSuggestionEngine.h"
#import "KBConfig.h"
@interface KBSuggestionEngine ()
@property (nonatomic, copy) NSArray<NSString *> *words;
@property (nonatomic, strong) NSMutableDictionary<NSString *, NSNumber *> *selectionCounts;
@property (nonatomic, strong) NSSet<NSString *> *priorityWords;
2026-03-02 09:19:06 +08:00
@property (nonatomic, copy) NSArray<NSString *> *traditionalChineseWords;
@property (nonatomic, copy) NSArray<NSString *> *simplifiedChineseWords;
2026-03-02 14:39:47 +08:00
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *pinyinToTraditionalMap;
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *bopomofoToChineseMap;
2025-12-22 12:54:28 +08:00
@end
@implementation KBSuggestionEngine
+ (instancetype)shared {
static KBSuggestionEngine *engine;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
engine = [[KBSuggestionEngine alloc] init];
});
return engine;
}
- (instancetype)init {
if (self = [super init]) {
2026-03-02 09:19:06 +08:00
_engineType = KBSuggestionEngineTypeLatin;
2025-12-22 12:54:28 +08:00
_selectionCounts = [NSMutableDictionary dictionary];
NSArray<NSString *> *defaults = [self.class kb_defaultWords];
_priorityWords = [NSSet setWithArray:defaults];
_words = [self kb_loadWords];
2026-03-02 09:19:06 +08:00
_traditionalChineseWords = [self kb_loadTraditionalChineseWords];
_simplifiedChineseWords = [self kb_loadSimplifiedChineseWords];
2026-03-02 14:39:47 +08:00
_pinyinToTraditionalMap = [self kb_loadPinyinToTraditionalMap];
_bopomofoToChineseMap = [self kb_loadBopomofoToChineseMap];
2025-12-22 12:54:28 +08:00
}
return self;
}
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (prefix.length == 0 || limit == 0) { return @[]; }
2026-03-02 09:19:06 +08:00
//
switch (self.engineType) {
case KBSuggestionEngineTypePinyinTraditional:
return [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypePinyinSimplified:
return [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypeBopomofo:
return [self kb_bopomofoSuggestionsForPrefix:prefix limit:limit];
case KBSuggestionEngineTypeLatin:
default:
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
2025-12-22 12:54:28 +08:00
}
}
- (void)recordSelection:(NSString *)word {
if (word.length == 0) { return; }
NSString *key = word.lowercaseString;
NSInteger count = self.selectionCounts[key].integerValue + 1;
self.selectionCounts[key] = @(count);
}
#pragma mark - Defaults
- (NSArray<NSString *> *)kb_loadWords {
NSMutableOrderedSet<NSString *> *set = [[NSMutableOrderedSet alloc] init];
[set addObjectsFromArray:[self.class kb_defaultWords]];
NSArray<NSString *> *paths = [self kb_wordListPaths];
for (NSString *path in paths) {
if (path.length == 0) { continue; }
NSString *content = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:nil];
if (content.length == 0) { continue; }
NSArray<NSString *> *lines = [content componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
for (NSString *line in lines) {
NSString *word = [self kb_sanitizedWordFromLine:line];
if (word.length == 0) { continue; }
[set addObject:word];
}
}
NSArray<NSString *> *result = set.array ?: @[];
return result;
}
- (NSArray<NSString *> *)kb_wordListPaths {
NSMutableArray<NSString *> *paths = [NSMutableArray array];
// 1) App Group override (allows server-downloaded large list).
NSURL *containerURL = [[NSFileManager defaultManager] containerURLForSecurityApplicationGroupIdentifier:AppGroup];
if (containerURL.path.length > 0) {
NSString *groupPath = [[containerURL path] stringByAppendingPathComponent:@"kb_words.txt"];
[paths addObject:groupPath];
}
// 2) Bundle fallback.
NSString *bundlePath = [[NSBundle mainBundle] pathForResource:@"kb_words" ofType:@"txt"];
if (bundlePath.length > 0) {
[paths addObject:bundlePath];
}
return paths;
}
- (NSString *)kb_sanitizedWordFromLine:(NSString *)line {
NSString *trimmed = [[line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] lowercaseString];
if (trimmed.length == 0) { return @""; }
static NSCharacterSet *letters = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
letters = [NSCharacterSet characterSetWithCharactersInString:@"abcdefghijklmnopqrstuvwxyz"];
});
for (NSUInteger i = 0; i < trimmed.length; i++) {
if (![letters characterIsMember:[trimmed characterAtIndex:i]]) {
return @"";
}
}
return trimmed;
}
+ (NSArray<NSString *> *)kb_defaultWords {
return @[
@"a", @"an", @"and", @"are", @"as", @"at",
@"app", @"ap", @"apple", @"apply", @"april", @"application",
@"about", @"above", @"after", @"again", @"against", @"all",
@"am", @"among", @"amount", @"any", @"around",
@"be", @"because", @"been", @"before", @"being", @"below",
@"best", @"between", @"both", @"but", @"by",
@"can", @"could", @"come", @"common", @"case",
@"do", @"does", @"down", @"day",
@"each", @"early", @"end", @"even", @"every",
@"for", @"from", @"first", @"found", @"free",
@"get", @"good", @"great", @"go",
@"have", @"has", @"had", @"help", @"how",
@"in", @"is", @"it", @"if", @"into",
@"just", @"keep", @"kind", @"know",
@"like", @"look", @"long", @"last",
@"make", @"more", @"most", @"my",
@"new", @"no", @"not", @"now",
@"of", @"on", @"one", @"or", @"other", @"our", @"out",
@"people", @"place", @"please",
@"quick", @"quite",
@"right", @"read", @"real",
@"see", @"say", @"some", @"such", @"so",
@"the", @"to", @"this", @"that", @"them", @"then", @"there", @"they", @"these", @"time",
@"use", @"up", @"under",
@"very",
@"we", @"with", @"what", @"when", @"where", @"who", @"why", @"will", @"would",
@"you", @"your"
];
}
2026-03-02 09:19:06 +08:00
#pragma mark - Engine Type Management
- (void)setEngineTypeFromString:(NSString *)engineTypeString {
if ([engineTypeString isEqualToString:@"latin"]) {
self.engineType = KBSuggestionEngineTypeLatin;
} else if ([engineTypeString isEqualToString:@"pinyin_traditional"]) {
self.engineType = KBSuggestionEngineTypePinyinTraditional;
} else if ([engineTypeString isEqualToString:@"pinyin_simplified"]) {
self.engineType = KBSuggestionEngineTypePinyinSimplified;
} else if ([engineTypeString isEqualToString:@"bopomofo"]) {
self.engineType = KBSuggestionEngineTypeBopomofo;
} else {
self.engineType = KBSuggestionEngineTypeLatin;
}
NSLog(@"[KBSuggestionEngine] Engine type set to: %@", engineTypeString);
}
#pragma mark - Latin Suggestions
- (NSArray<NSString *> *)kb_latinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.words) {
if ([word hasPrefix:lower]) {
[matches addObject:word];
if (matches.count >= limit * 3) {
break;
}
}
}
if (matches.count == 0) { return @[]; }
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
BOOL pa = [self.priorityWords containsObject:a];
BOOL pb = [self.priorityWords containsObject:b];
if (pa != pb) {
return pa ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
#pragma mark - Traditional Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_traditionalPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
2026-03-02 14:39:47 +08:00
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackTraditionalSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
2026-03-02 09:19:06 +08:00
for (NSString *word in self.traditionalChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
#pragma mark - Simplified Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_simplifiedPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
2026-03-02 14:39:47 +08:00
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
for (NSString *tradChar in directMatches) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
for (NSString *tradChar in candidates) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackSimplifiedSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackSimplifiedSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
2026-03-02 09:19:06 +08:00
for (NSString *word in self.simplifiedChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
2026-03-02 14:39:47 +08:00
- (NSString *)kb_toSimplified:(NSString *)traditional {
static NSDictionary<NSString *, NSString *> *tradToSimpMap = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
tradToSimpMap = @{
@"臺": @"台", @"臺": @"台", @"灣": @"湾", @"語": @"语", @"體": @"体",
@"國": @"国", @"學": @"学", @"時": @"时", @"問": @"问", @"見": @"见",
@"經": @"经", @"動": @"动", @"長": @"长", @"開": @"开", @"關": @"关",
@"無": @"无", @"說": @"说", @"書": @"书", @"電": @"电", @"機": @"机",
@"氣": @"气", @"這": @"这", @"們": @"们", @"個": @"个", @"對": @"对",
@"來": @"来", @"還": @"还", @"過": @"过", @"會": @"会", @"進": @"进",
@"開": @"开", @"頭": @"头", @"點": @"点", @"問": @"问", @"題": @"题",
@"變": @"变", @"條": @"条", @"東": @"东", @"車": @"车", @"錢": @"钱",
@"門": @"门", @"聽": @"听", @"聲": @"声", @"醫": @"医", @"讓": @"让",
@"識": @"识", @"務": @"务", @"農": @"农", @"業": @"业", @"產": @"产",
@"黨": @"党", @"歷": @"历", @"史": @"史", @"後": @"后", @"前": @"前",
@"強": @"强", @"當": @"当", @"應": @"应", @"從": @"从", @"優": @"优",
@"兒": @"儿", @"兩": @"两", @"幾": @"几", @"廣": @"广", @"場": @"场",
@"決": @"决", @"許": @"许", @"設": @"设", @"請": @"请", @"論": @"论",
@"認": @"认", @"斷": @"断", @"離": @"离", @"須": @"须", @"導": @"导",
@"爭": @"争", @"重": @"重", @"輕": @"轻", @"難": @"难", @"極": @"极",
@"據": @"据", @"實": @"实", @"際": @"际", @"標": @"标", @"準": @"准",
@"確": @"确", @"證": @"证", @"驗": @"验", @"權": @"权", @"規": @"规",
@"則": @"则", @"劃": @"划", @"計": @"计", @"劃": @"划", @"術": @"术",
@"藝": @"艺", @"術": @"术", @"選": @"选", @"舉": @"举", @"團": @"团",
@"結": @"结", @"組": @"组", @"織": @"织", @"義": @"义", @"務": @"务",
@"親": @"亲", @"愛": @"爱", @"情": @"情", @"懷": @"怀", @"家": @"家",
@"屬": @"属", @"幫": @"帮", @"助": @"助", @"友": @"友", @"誼": @"谊",
@"謝": @"谢", @"謝": @"谢", @"對": @"对", @"起": @"起", @"早": @"早",
@"安": @"安", @"晚": @"晚", @"請": @"请", @"問": @"问", @"沒": @"没",
@"關": @"关", @"係": @"系", @"加": @"加", @"油": @"油", @"台": @"台",
@"北": @"北", @"高": @"高", @"雄": @"雄", @"中": @"中", @"南": @"南",
@"朋": @"朋", @"友": @"友", @"人": @"人", @"工": @"工", @"作": @"作",
@"習": @"习", @"生": @"生", @"活": @"活", @"地": @"地", @"方": @"方",
@"法": @"法", @"答": @"答", @"喜": @"喜", @"歡": @"欢", @"想": @"想",
@"念": @"念", @"開": @"开", @"心": @"心", @"快": @"快", @"樂": @"乐",
@"美": @"美", @"麗": @"丽", @"漂": @"漂", @"亮": @"亮", @"帥": @"帅",
@"氣": @"气", @"可": @"可", @"愛": @"爱", @"溫": @"温", @"柔": @"柔"
};
});
if (tradToSimpMap[traditional]) {
return tradToSimpMap[traditional];
}
NSMutableString *result = [traditional mutableCopy];
[tradToSimpMap enumerateKeysAndObjectsUsingBlock:^(NSString *trad, NSString *simp, BOOL *stop) {
[result replaceOccurrencesOfString:trad withString:simp options:0 range:NSMakeRange(0, result.length)];
}];
return result.length > 0 ? [result copy] : traditional;
}
2026-03-02 09:19:06 +08:00
#pragma mark - Bopomofo (Zhuyin) Suggestions
- (NSArray<NSString *> *)kb_bopomofoSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
NSMutableArray<NSString *> *matches = [NSMutableArray array];
2026-03-02 14:39:47 +08:00
NSArray<NSString *> *directMatches = self.bopomofoToChineseMap[prefix];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.bopomofoToChineseMap) {
if ([key hasPrefix:prefix] && ![key isEqualToString:prefix]) {
NSArray<NSString *> *candidates = self.bopomofoToChineseMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
2026-03-02 09:19:06 +08:00
}
}
2026-03-02 14:39:47 +08:00
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:prefix limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
2026-03-02 09:19:06 +08:00
return matches.copy;
}
#pragma mark - Chinese Word Loading
- (NSArray<NSString *> *)kb_loadTraditionalChineseWords {
//
//
return @[
@"你好", @"謝謝", @"對不起", @"再見", @"早安",
@"晚安", @"請問", @"不好意思", @"沒關係", @"加油",
@"台灣", @"台北", @"高雄", @"台中", @"台南",
@"朋友", @"家人", @"工作", @"學習", @"生活",
@"時間", @"地點", @"方法", @"問題", @"答案",
@"喜歡", @"愛", @"想念", @"開心", @"快樂",
@"美麗", @"漂亮", @"帥氣", @"可愛", @"溫柔"
];
}
- (NSArray<NSString *> *)kb_loadSimplifiedChineseWords {
return @[
@"你好", @"谢谢", @"对不起", @"再见", @"早安",
@"晚安", @"请问", @"不好意思", @"没关系", @"加油",
@"中国", @"北京", @"上海", @"广州", @"深圳",
@"朋友", @"家人", @"工作", @"学习", @"生活",
@"时间", @"地点", @"方法", @"问题", @"答案",
@"喜欢", @"爱", @"想念", @"开心", @"快乐",
@"美丽", @"漂亮", @"帅气", @"可爱", @"温柔"
];
}
2026-03-02 14:39:47 +08:00
#pragma mark - Pinyin & Bopomofo Map Loading
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadPinyinToTraditionalMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"pinyin_to_traditional" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] pinyin_to_traditional.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read pinyin_to_traditional.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse pinyin_to_traditional.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in pinyin_to_traditional.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu pinyin mappings", (unsigned long)result.count);
return [result copy];
}
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadBopomofoToChineseMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"bopomofo_to_chinese" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] bopomofo_to_chinese.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read bopomofo_to_chinese.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse bopomofo_to_chinese.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in bopomofo_to_chinese.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu bopomofo mappings", (unsigned long)result.count);
return [result copy];
}
2025-12-22 12:54:28 +08:00
@end