过滤敏感词

This commit is contained in:
2026-03-06 18:54:43 +08:00
parent 442d56decd
commit 987391953a

View File

@@ -43,26 +43,41 @@
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
if (prefix.length == 0 || limit == 0) { return @[]; }
//
NSUInteger fetchLimit = limit;
if (fetchLimit < 80) {
fetchLimit = MIN((NSUInteger)80, MAX(fetchLimit * 4, fetchLimit));
}
NSArray<NSString *> *raw = nil;
switch (self.engineType) {
case KBSuggestionEngineTypeEnglish:
return [self kb_englishSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_englishSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeSpanish:
return [self kb_spanishSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_spanishSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePortuguese:
return [self kb_portugueseSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_portugueseSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeIndonesian:
return [self kb_indonesianSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_indonesianSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePinyinTraditional:
return [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypePinyinSimplified:
return [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeBopomofo:
return [self kb_bopomofoSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_bopomofoSuggestionsForPrefix:prefix limit:fetchLimit];
break;
case KBSuggestionEngineTypeLatin:
default:
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
raw = [self kb_latinSuggestionsForPrefix:prefix limit:fetchLimit];
break;
}
return [self kb_filterSensitiveSuggestions:raw limit:limit];
}
- (void)recordSelection:(NSString *)word {
@@ -880,4 +895,80 @@
}
}
#pragma mark - Safety Filter
- (NSArray<NSString *> *)kb_filterSensitiveSuggestions:(NSArray<NSString *> *)items
limit:(NSUInteger)limit {
if (items.count == 0 || limit == 0) { return @[]; }
NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSet];
for (id item in items) {
if (![item isKindOfClass:NSString.class]) { continue; }
NSString *word = (NSString *)item;
if (word.length == 0) { continue; }
if ([self kb_isSensitiveSuggestion:word]) { continue; }
[result addObject:word];
if (result.count >= limit) { break; }
}
return result.array ?: @[];
}
- (BOOL)kb_isSensitiveSuggestion:(NSString *)word {
NSString *normalized = [self kb_normalizedSuggestionToken:word];
if (normalized.length == 0) { return YES; }
if ([[self.class kb_blockedSuggestionWords] containsObject:normalized]) {
return YES;
}
for (NSString *fragment in [self.class kb_blockedSuggestionFragments]) {
if ([normalized containsString:fragment]) {
return YES;
}
}
return NO;
}
- (NSString *)kb_normalizedSuggestionToken:(NSString *)word {
if (![word isKindOfClass:NSString.class]) { return @""; }
NSString *value = [[word stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]
lowercaseString];
if (value.length == 0) { return @""; }
value = [value stringByFoldingWithOptions:NSDiacriticInsensitiveSearch
locale:[NSLocale currentLocale]];
NSMutableCharacterSet *trimSet = [[NSCharacterSet punctuationCharacterSet] mutableCopy];
[trimSet formUnionWithCharacterSet:[NSCharacterSet symbolCharacterSet]];
return [value stringByTrimmingCharactersInSet:trimSet];
}
+ (NSSet<NSString *> *)kb_blockedSuggestionWords {
static NSSet<NSString *> *words = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
//
words = [NSSet setWithArray:@[
@"sex", @"sexy", @"porn", @"porno", @"xxx", @"nude", @"naked",
@"fuck", @"fucking", @"shit", @"bitch", @"penis", @"vagina",
@"boob", @"rape", @"cocaine", @"heroin", @"drug", @"drugs",
@"kill", @"murder", @"gun", @"weapon",
@"sexo", @"porno", @"pornografia", @"violacion", @"violacao",
@"drogas", @"cocaina", @"heroina", @"arma", @"matar", @"muerte",
@"pene",
@"色情", @"裸露", @"裸体", @"裸聊", @"裸照",
@"强奸", @"毒品", @"海洛因", @"可卡因",
@"枪", @"武器", @"杀人", @"谋杀"
]];
});
return words;
}
+ (NSArray<NSString *> *)kb_blockedSuggestionFragments {
static NSArray<NSString *> *fragments = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
fragments = @[
@"porn", @"fuck", @"rape", @"cocaine", @"heroin",
@"色情", @"裸聊", @"裸照", @"强奸", @"毒品", @"杀人"
];
});
return fragments;
}
@end