Files
keyboard/CustomKeyboard/Manager/KBSuggestionEngine.m
2026-03-06 18:54:43 +08:00

975 lines
38 KiB
Objective-C

//
// KBSuggestionEngine.m
// CustomKeyboard
//
#import "KBSuggestionEngine.h"
#import "KBConfig.h"
@interface KBSuggestionEngine ()
@property (nonatomic, copy) NSArray<NSString *> *words;
@property (nonatomic, strong) NSMutableDictionary<NSString *, NSNumber *> *selectionCounts;
@property (nonatomic, strong) NSSet<NSString *> *priorityWords;
@property (nonatomic, copy) NSArray<NSString *> *traditionalChineseWords;
@property (nonatomic, copy) NSArray<NSString *> *simplifiedChineseWords;
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *pinyinToTraditionalMap;
@property (nonatomic, strong) NSDictionary<NSString *, NSArray<NSString *> *> *bopomofoToChineseMap;
@property (nonatomic, copy) NSArray<NSString *> *spanishWords;
@property (nonatomic, copy) NSArray<NSString *> *englishWords;
@property (nonatomic, copy) NSArray<NSString *> *portugueseWords;
@property (nonatomic, copy) NSArray<NSString *> *indonesianWords;
@end
@implementation KBSuggestionEngine
+ (instancetype)shared {
static KBSuggestionEngine *engine;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
engine = [[KBSuggestionEngine alloc] init];
});
return engine;
}
- (instancetype)init {
if (self = [super init]) {
_engineType = KBSuggestionEngineTypeLatin;
_selectionCounts = [NSMutableDictionary dictionary];
NSArray<NSString *> *defaults = [self.class kb_defaultWords];
_priorityWords = [NSSet setWithArray:defaults];
}
return self;
}
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (prefix.length == 0 || limit == 0) { return @[]; }
// 为过滤留出候选空间,避免过滤后数量过少。
NSUInteger fetchLimit = limit;
if (fetchLimit < 80) {
fetchLimit = MIN((NSUInteger)80, MAX(fetchLimit * 4, fetchLimit));
}
NSArray<NSString *> *raw = nil;
switch (self.engineType) {
case KBSuggestionEngineTypeEnglish:
raw = [self kb_englishSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeSpanish:
raw = [self kb_spanishSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePortuguese:
raw = [self kb_portugueseSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeIndonesian:
raw = [self kb_indonesianSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePinyinTraditional:
raw = [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePinyinSimplified:
raw = [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeBopomofo:
raw = [self kb_bopomofoSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeLatin:
default:
raw = [self kb_latinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
}
return [self kb_filterSensitiveSuggestions:raw limit:limit];
}
- (void)recordSelection:(NSString *)word {
if (word.length == 0) { return; }
NSString *key = word.lowercaseString;
NSInteger count = self.selectionCounts[key].integerValue + 1;
self.selectionCounts[key] = @(count);
}
#pragma mark - Defaults
- (NSArray<NSString *> *)kb_loadWords {
NSMutableOrderedSet<NSString *> *set = [[NSMutableOrderedSet alloc] init];
[set addObjectsFromArray:[self.class kb_defaultWords]];
NSArray<NSString *> *paths = [self kb_wordListPaths];
for (NSString *path in paths) {
if (path.length == 0) { continue; }
NSString *content = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:nil];
if (content.length == 0) { continue; }
NSArray<NSString *> *lines = [content componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
for (NSString *line in lines) {
NSString *word = [self kb_sanitizedWordFromLine:line];
if (word.length == 0) { continue; }
[set addObject:word];
}
}
NSArray<NSString *> *result = set.array ?: @[];
return result;
}
- (NSArray<NSString *> *)kb_wordListPaths {
NSMutableArray<NSString *> *paths = [NSMutableArray array];
// 1) App Group override (allows server-downloaded large list).
NSURL *containerURL = [[NSFileManager defaultManager] containerURLForSecurityApplicationGroupIdentifier:AppGroup];
if (containerURL.path.length > 0) {
NSString *groupPath = [[containerURL path] stringByAppendingPathComponent:@"kb_words.txt"];
[paths addObject:groupPath];
}
// 2) Bundle fallback.
NSString *bundlePath = [[NSBundle mainBundle] pathForResource:@"kb_words" ofType:@"txt"];
if (bundlePath.length > 0) {
[paths addObject:bundlePath];
}
return paths;
}
- (NSString *)kb_sanitizedWordFromLine:(NSString *)line {
NSString *trimmed = [[line stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] lowercaseString];
if (trimmed.length == 0) { return @""; }
static NSCharacterSet *letters = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
letters = [NSCharacterSet characterSetWithCharactersInString:@"abcdefghijklmnopqrstuvwxyz"];
});
for (NSUInteger i = 0; i < trimmed.length; i++) {
if (![letters characterIsMember:[trimmed characterAtIndex:i]]) {
return @"";
}
}
return trimmed;
}
+ (NSArray<NSString *> *)kb_defaultWords {
return @[
@"a", @"an", @"and", @"are", @"as", @"at",
@"app", @"ap", @"apple", @"apply", @"april", @"application",
@"about", @"above", @"after", @"again", @"against", @"all",
@"am", @"among", @"amount", @"any", @"around",
@"be", @"because", @"been", @"before", @"being", @"below",
@"best", @"between", @"both", @"but", @"by",
@"can", @"could", @"come", @"common", @"case",
@"do", @"does", @"down", @"day",
@"each", @"early", @"end", @"even", @"every",
@"for", @"from", @"first", @"found", @"free",
@"get", @"good", @"great", @"go",
@"have", @"has", @"had", @"help", @"how",
@"in", @"is", @"it", @"if", @"into",
@"just", @"keep", @"kind", @"know",
@"like", @"look", @"long", @"last",
@"make", @"more", @"most", @"my",
@"new", @"no", @"not", @"now",
@"of", @"on", @"one", @"or", @"other", @"our", @"out",
@"people", @"place", @"please",
@"quick", @"quite",
@"right", @"read", @"real",
@"see", @"say", @"some", @"such", @"so",
@"the", @"to", @"this", @"that", @"them", @"then", @"there", @"they", @"these", @"time",
@"use", @"up", @"under",
@"very",
@"we", @"with", @"what", @"when", @"where", @"who", @"why", @"will", @"would",
@"you", @"your"
];
}
#pragma mark - Engine Type Management
- (void)setEngineTypeFromString:(NSString *)engineTypeString {
if ([engineTypeString isEqualToString:@"latin"]) {
self.engineType = KBSuggestionEngineTypeLatin;
} else if ([engineTypeString isEqualToString:@"spanish"]) {
self.engineType = KBSuggestionEngineTypeSpanish;
} else if ([engineTypeString isEqualToString:@"english"]) {
self.engineType = KBSuggestionEngineTypeEnglish;
} else if ([engineTypeString isEqualToString:@"portuguese"]) {
self.engineType = KBSuggestionEngineTypePortuguese;
} else if ([engineTypeString isEqualToString:@"indonesian"]) {
self.engineType = KBSuggestionEngineTypeIndonesian;
} else if ([engineTypeString isEqualToString:@"pinyin_traditional"]) {
self.engineType = KBSuggestionEngineTypePinyinTraditional;
} else if ([engineTypeString isEqualToString:@"pinyin_simplified"]) {
self.engineType = KBSuggestionEngineTypePinyinSimplified;
} else if ([engineTypeString isEqualToString:@"bopomofo"]) {
self.engineType = KBSuggestionEngineTypeBopomofo;
} else {
self.engineType = KBSuggestionEngineTypeLatin;
}
[self kb_trimCachesForEngineType:self.engineType];
NSLog(@"[KBSuggestionEngine] Engine type set to: %@", engineTypeString);
}
#pragma mark - English Suggestions
- (NSArray<NSString *> *)kb_englishSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.englishWords) {
self.englishWords = [self kb_loadEnglishWords];
}
NSArray<NSString *> *matches = [self kb_suggestionsFromWordList:self.englishWords
prefix:prefix
limit:limit];
if (matches.count == 0) {
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
}
return matches;
}
- (NSArray<NSString *> *)kb_loadEnglishWords {
NSString *path = [[NSBundle mainBundle] pathForResource:@"english_words" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] english_words.json not found, using default words");
return [self.class kb_defaultWords];
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read english_words.json");
return [self.class kb_defaultWords];
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse english_words.json: %@", error);
return [self.class kb_defaultWords];
}
NSArray *wordsArray = json[@"words"];
if (![wordsArray isKindOfClass:NSArray.class]) {
NSLog(@"[KBSuggestionEngine] Invalid words array in english_words.json");
return [self.class kb_defaultWords];
}
NSMutableArray<NSString *> *result = [NSMutableArray array];
for (id item in wordsArray) {
if ([item isKindOfClass:NSString.class]) {
[result addObject:item];
}
}
NSLog(@"[KBSuggestionEngine] Loaded %lu English words", (unsigned long)result.count);
return result.count > 0 ? [result copy] : [self.class kb_defaultWords];
}
#pragma mark - Latin Suggestions
- (NSArray<NSString *> *)kb_latinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.words) {
self.words = [self kb_loadWords];
}
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.words) {
if ([word hasPrefix:lower]) {
[matches addObject:word];
if (matches.count >= limit * 3) {
break;
}
}
}
if (matches.count == 0) { return @[]; }
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
BOOL pa = [self.priorityWords containsObject:a];
BOOL pb = [self.priorityWords containsObject:b];
if (pa != pb) {
return pa ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
#pragma mark - Traditional Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_traditionalPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.pinyinToTraditionalMap) {
self.pinyinToTraditionalMap = [self kb_loadPinyinToTraditionalMap];
}
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackTraditionalSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.traditionalChineseWords) {
self.traditionalChineseWords = [self kb_loadTraditionalChineseWords];
}
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.traditionalChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
#pragma mark - Simplified Chinese Pinyin Suggestions
- (NSArray<NSString *> *)kb_simplifiedPinyinSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.pinyinToTraditionalMap) {
self.pinyinToTraditionalMap = [self kb_loadPinyinToTraditionalMap];
}
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.pinyinToTraditionalMap[lower];
if (directMatches.count > 0) {
for (NSString *tradChar in directMatches) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
}
for (NSString *key in self.pinyinToTraditionalMap) {
if ([key hasPrefix:lower] && ![key isEqualToString:lower]) {
NSArray<NSString *> *candidates = self.pinyinToTraditionalMap[key];
for (NSString *tradChar in candidates) {
NSString *simplified = [self kb_toSimplified:tradChar];
if (simplified.length > 0) {
[matches addObject:simplified];
}
}
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackSimplifiedSuggestions:lower limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (NSArray<NSString *> *)kb_fallbackSimplifiedSuggestions:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.simplifiedChineseWords) {
self.simplifiedChineseWords = [self kb_loadSimplifiedChineseWords];
}
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in self.simplifiedChineseWords) {
[matches addObject:word];
if (matches.count >= limit) {
break;
}
}
return matches.copy;
}
- (NSString *)kb_toSimplified:(NSString *)traditional {
static NSDictionary<NSString *, NSString *> *tradToSimpMap = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
tradToSimpMap = @{
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"广", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"怀", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @"",
@"": @"", @"": @"", @"": @"", @"": @"", @"": @""
};
});
if (tradToSimpMap[traditional]) {
return tradToSimpMap[traditional];
}
NSMutableString *result = [traditional mutableCopy];
[tradToSimpMap enumerateKeysAndObjectsUsingBlock:^(NSString *trad, NSString *simp, BOOL *stop) {
[result replaceOccurrencesOfString:trad withString:simp options:0 range:NSMakeRange(0, result.length)];
}];
return result.length > 0 ? [result copy] : traditional;
}
#pragma mark - Bopomofo (Zhuyin) Suggestions
- (NSArray<NSString *> *)kb_bopomofoSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.bopomofoToChineseMap) {
self.bopomofoToChineseMap = [self kb_loadBopomofoToChineseMap];
}
NSMutableArray<NSString *> *matches = [NSMutableArray array];
NSArray<NSString *> *directMatches = self.bopomofoToChineseMap[prefix];
if (directMatches.count > 0) {
[matches addObjectsFromArray:directMatches];
}
for (NSString *key in self.bopomofoToChineseMap) {
if ([key hasPrefix:prefix] && ![key isEqualToString:prefix]) {
NSArray<NSString *> *candidates = self.bopomofoToChineseMap[key];
[matches addObjectsFromArray:candidates];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) {
return [self kb_fallbackTraditionalSuggestions:prefix limit:limit];
}
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
#pragma mark - Chinese Word Loading
- (NSArray<NSString *> *)kb_loadTraditionalChineseWords {
// 加载繁体中文常用词
// 这里先返回一些示例词,实际应该从文件或数据库加载
return @[
@"你好", @"謝謝", @"對不起", @"再見", @"早安",
@"晚安", @"請問", @"不好意思", @"沒關係", @"加油",
@"台灣", @"台北", @"高雄", @"台中", @"台南",
@"朋友", @"家人", @"工作", @"學習", @"生活",
@"時間", @"地點", @"方法", @"問題", @"答案",
@"喜歡", @"", @"想念", @"開心", @"快樂",
@"美麗", @"漂亮", @"帥氣", @"可愛", @"溫柔"
];
}
- (NSArray<NSString *> *)kb_loadSimplifiedChineseWords {
return @[
@"你好", @"谢谢", @"对不起", @"再见", @"早安",
@"晚安", @"请问", @"不好意思", @"没关系", @"加油",
@"中国", @"北京", @"上海", @"广州", @"深圳",
@"朋友", @"家人", @"工作", @"学习", @"生活",
@"时间", @"地点", @"方法", @"问题", @"答案",
@"喜欢", @"", @"想念", @"开心", @"快乐",
@"美丽", @"漂亮", @"帅气", @"可爱", @"温柔"
];
}
#pragma mark - Pinyin & Bopomofo Map Loading
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadPinyinToTraditionalMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"pinyin_to_traditional" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] pinyin_to_traditional.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read pinyin_to_traditional.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse pinyin_to_traditional.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in pinyin_to_traditional.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu pinyin mappings", (unsigned long)result.count);
return [result copy];
}
- (NSDictionary<NSString *, NSArray<NSString *> *> *)kb_loadBopomofoToChineseMap {
NSString *path = [[NSBundle mainBundle] pathForResource:@"bopomofo_to_chinese" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] bopomofo_to_chinese.json not found, using empty map");
return @{};
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read bopomofo_to_chinese.json");
return @{};
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse bopomofo_to_chinese.json: %@", error);
return @{};
}
NSDictionary *mappings = json[@"mappings"];
if (![mappings isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Invalid mappings in bopomofo_to_chinese.json");
return @{};
}
NSMutableDictionary<NSString *, NSArray<NSString *> *> *result = [NSMutableDictionary dictionary];
[mappings enumerateKeysAndObjectsUsingBlock:^(NSString *key, id obj, BOOL *stop) {
if ([obj isKindOfClass:NSArray.class]) {
NSMutableArray<NSString *> *chars = [NSMutableArray array];
for (id item in (NSArray *)obj) {
if ([item isKindOfClass:NSString.class]) {
[chars addObject:item];
}
}
if (chars.count > 0) {
result[key] = [chars copy];
}
}
}];
NSLog(@"[KBSuggestionEngine] Loaded %lu bopomofo mappings", (unsigned long)result.count);
return [result copy];
}
#pragma mark - Spanish Suggestions
- (NSArray<NSString *> *)kb_spanishSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.spanishWords) {
self.spanishWords = [self kb_loadSpanishWords];
}
NSArray<NSString *> *matches = [self kb_suggestionsFromWordList:self.spanishWords
prefix:prefix
limit:limit];
if (matches.count == 0) {
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
}
return matches;
}
- (NSArray<NSString *> *)kb_loadSpanishWords {
NSString *path = [[NSBundle mainBundle] pathForResource:@"spanish_words" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] spanish_words.json not found, using default words");
return [self.class kb_defaultWords];
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read spanish_words.json");
return [self.class kb_defaultWords];
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse spanish_words.json: %@", error);
return [self.class kb_defaultWords];
}
NSArray *wordsArray = json[@"words"];
if (![wordsArray isKindOfClass:NSArray.class]) {
NSLog(@"[KBSuggestionEngine] Invalid words array in spanish_words.json");
return [self.class kb_defaultWords];
}
NSMutableArray<NSString *> *result = [NSMutableArray array];
for (id item in wordsArray) {
if ([item isKindOfClass:NSString.class]) {
[result addObject:item];
}
}
NSLog(@"[KBSuggestionEngine] Loaded %lu Spanish words", (unsigned long)result.count);
return result.count > 0 ? [result copy] : [self.class kb_defaultWords];
}
#pragma mark - Portuguese Suggestions
- (NSArray<NSString *> *)kb_portugueseSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.portugueseWords) {
self.portugueseWords = [self kb_loadPortugueseWords];
}
NSArray<NSString *> *matches = [self kb_suggestionsFromWordList:self.portugueseWords
prefix:prefix
limit:limit];
if (matches.count == 0) {
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
}
return matches;
}
- (NSArray<NSString *> *)kb_loadPortugueseWords {
NSString *path = [[NSBundle mainBundle] pathForResource:@"portuguese_words" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] portuguese_words.json not found, using default words");
return [self.class kb_defaultWords];
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read portuguese_words.json");
return [self.class kb_defaultWords];
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse portuguese_words.json: %@", error);
return [self.class kb_defaultWords];
}
NSArray *wordsArray = json[@"words"];
if (![wordsArray isKindOfClass:NSArray.class]) {
NSLog(@"[KBSuggestionEngine] Invalid words array in portuguese_words.json");
return [self.class kb_defaultWords];
}
NSMutableArray<NSString *> *result = [NSMutableArray array];
for (id item in wordsArray) {
if ([item isKindOfClass:NSString.class]) {
[result addObject:item];
}
}
NSLog(@"[KBSuggestionEngine] Loaded %lu Portuguese words", (unsigned long)result.count);
return result.count > 0 ? [result copy] : [self.class kb_defaultWords];
}
#pragma mark - Indonesian Suggestions
- (NSArray<NSString *> *)kb_indonesianSuggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (!self.indonesianWords) {
self.indonesianWords = [self kb_loadIndonesianWords];
}
NSArray<NSString *> *matches = [self kb_suggestionsFromWordList:self.indonesianWords
prefix:prefix
limit:limit];
if (matches.count == 0) {
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
}
return matches;
}
- (NSArray<NSString *> *)kb_loadIndonesianWords {
NSString *path = [[NSBundle mainBundle] pathForResource:@"indonesian_words" ofType:@"json"];
if (!path) {
NSLog(@"[KBSuggestionEngine] indonesian_words.json not found, using default words");
return [self.class kb_defaultWords];
}
NSData *data = [NSData dataWithContentsOfFile:path];
if (!data) {
NSLog(@"[KBSuggestionEngine] Failed to read indonesian_words.json");
return [self.class kb_defaultWords];
}
NSError *error = nil;
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:&error];
if (error || ![json isKindOfClass:NSDictionary.class]) {
NSLog(@"[KBSuggestionEngine] Failed to parse indonesian_words.json: %@", error);
return [self.class kb_defaultWords];
}
NSArray *wordsArray = json[@"words"];
if (![wordsArray isKindOfClass:NSArray.class]) {
NSLog(@"[KBSuggestionEngine] Invalid words array in indonesian_words.json");
return [self.class kb_defaultWords];
}
NSMutableArray<NSString *> *result = [NSMutableArray array];
for (id item in wordsArray) {
if ([item isKindOfClass:NSString.class]) {
[result addObject:item];
}
}
NSLog(@"[KBSuggestionEngine] Loaded %lu Indonesian words", (unsigned long)result.count);
return result.count > 0 ? [result copy] : [self.class kb_defaultWords];
}
#pragma mark - Word List Helpers
- (NSArray<NSString *> *)kb_suggestionsFromWordList:(NSArray<NSString *> *)words
prefix:(NSString *)prefix
limit:(NSUInteger)limit {
NSString *lower = prefix.lowercaseString;
NSMutableArray<NSString *> *matches = [NSMutableArray array];
for (NSString *word in words) {
if ([word hasPrefix:lower]) {
[matches addObject:word];
if (matches.count >= limit * 2) {
break;
}
}
}
if (matches.count == 0) { return @[]; }
[matches sortUsingComparator:^NSComparisonResult(NSString *a, NSString *b) {
NSInteger ca = self.selectionCounts[a].integerValue;
NSInteger cb = self.selectionCounts[b].integerValue;
if (ca != cb) {
return (cb > ca) ? NSOrderedAscending : NSOrderedDescending;
}
return [a compare:b];
}];
if (matches.count > limit) {
return [matches subarrayWithRange:NSMakeRange(0, limit)];
}
return matches.copy;
}
- (void)kb_trimCachesForEngineType:(KBSuggestionEngineType)engineType {
switch (engineType) {
case KBSuggestionEngineTypeEnglish:
self.spanishWords = nil;
self.portugueseWords = nil;
self.indonesianWords = nil;
self.words = nil;
self.traditionalChineseWords = nil;
self.simplifiedChineseWords = nil;
self.pinyinToTraditionalMap = nil;
self.bopomofoToChineseMap = nil;
break;
case KBSuggestionEngineTypeSpanish:
self.englishWords = nil;
self.portugueseWords = nil;
self.indonesianWords = nil;
self.words = nil;
self.traditionalChineseWords = nil;
self.simplifiedChineseWords = nil;
self.pinyinToTraditionalMap = nil;
self.bopomofoToChineseMap = nil;
break;
case KBSuggestionEngineTypePortuguese:
self.englishWords = nil;
self.spanishWords = nil;
self.indonesianWords = nil;
self.words = nil;
self.traditionalChineseWords = nil;
self.simplifiedChineseWords = nil;
self.pinyinToTraditionalMap = nil;
self.bopomofoToChineseMap = nil;
break;
case KBSuggestionEngineTypeIndonesian:
self.englishWords = nil;
self.spanishWords = nil;
self.portugueseWords = nil;
self.words = nil;
self.traditionalChineseWords = nil;
self.simplifiedChineseWords = nil;
self.pinyinToTraditionalMap = nil;
self.bopomofoToChineseMap = nil;
break;
case KBSuggestionEngineTypePinyinTraditional:
case KBSuggestionEngineTypePinyinSimplified:
self.words = nil;
self.englishWords = nil;
self.spanishWords = nil;
self.portugueseWords = nil;
self.indonesianWords = nil;
self.bopomofoToChineseMap = nil;
break;
case KBSuggestionEngineTypeBopomofo:
self.words = nil;
self.englishWords = nil;
self.spanishWords = nil;
self.portugueseWords = nil;
self.indonesianWords = nil;
self.pinyinToTraditionalMap = nil;
self.simplifiedChineseWords = nil;
break;
case KBSuggestionEngineTypeLatin:
default:
self.englishWords = nil;
self.spanishWords = nil;
self.portugueseWords = nil;
self.indonesianWords = nil;
self.traditionalChineseWords = nil;
self.simplifiedChineseWords = nil;
self.pinyinToTraditionalMap = nil;
self.bopomofoToChineseMap = nil;
break;
}
}
#pragma mark - Safety Filter
- (NSArray<NSString *> *)kb_filterSensitiveSuggestions:(NSArray<NSString *> *)items
limit:(NSUInteger)limit {
if (items.count == 0 || limit == 0) { return @[]; }
NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSet];
for (id item in items) {
if (![item isKindOfClass:NSString.class]) { continue; }
NSString *word = (NSString *)item;
if (word.length == 0) { continue; }
if ([self kb_isSensitiveSuggestion:word]) { continue; }
[result addObject:word];
if (result.count >= limit) { break; }
}
return result.array ?: @[];
}
- (BOOL)kb_isSensitiveSuggestion:(NSString *)word {
NSString *normalized = [self kb_normalizedSuggestionToken:word];
if (normalized.length == 0) { return YES; }
if ([[self.class kb_blockedSuggestionWords] containsObject:normalized]) {
return YES;
}
for (NSString *fragment in [self.class kb_blockedSuggestionFragments]) {
if ([normalized containsString:fragment]) {
return YES;
}
}
return NO;
}
- (NSString *)kb_normalizedSuggestionToken:(NSString *)word {
if (![word isKindOfClass:NSString.class]) { return @""; }
NSString *value = [[word stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]
lowercaseString];
if (value.length == 0) { return @""; }
value = [value stringByFoldingWithOptions:NSDiacriticInsensitiveSearch
locale:[NSLocale currentLocale]];
NSMutableCharacterSet *trimSet = [[NSCharacterSet punctuationCharacterSet] mutableCopy];
[trimSet formUnionWithCharacterSet:[NSCharacterSet symbolCharacterSet]];
return [value stringByTrimmingCharactersInSet:trimSet];
}
+ (NSSet<NSString *> *)kb_blockedSuggestionWords {
static NSSet<NSString *> *words = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
// 上架合规优先:过滤常见成人、露骨性行为、毒品、暴力武器等高风险词。
words = [NSSet setWithArray:@[
@"sex", @"sexy", @"porn", @"porno", @"xxx", @"nude", @"naked",
@"fuck", @"fucking", @"shit", @"bitch", @"penis", @"vagina",
@"boob", @"rape", @"cocaine", @"heroin", @"drug", @"drugs",
@"kill", @"murder", @"gun", @"weapon",
@"sexo", @"porno", @"pornografia", @"violacion", @"violacao",
@"drogas", @"cocaina", @"heroina", @"arma", @"matar", @"muerte",
@"pene",
@"色情", @"裸露", @"裸体", @"裸聊", @"裸照",
@"强奸", @"毒品", @"海洛因", @"可卡因",
@"", @"武器", @"杀人", @"谋杀"
]];
});
return words;
}
+ (NSArray<NSString *> *)kb_blockedSuggestionFragments {
static NSArray<NSString *> *fragments = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
fragments = @[
@"porn", @"fuck", @"rape", @"cocaine", @"heroin",
@"色情", @"裸聊", @"裸照", @"强奸", @"毒品", @"杀人"
];
});
return fragments;
}
@end