过滤敏感词
This commit is contained in:
@@ -43,26 +43,41 @@
|
||||
|
||||
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
|
||||
if (prefix.length == 0 || limit == 0) { return @[]; }
|
||||
// 为过滤留出候选空间,避免过滤后数量过少。
|
||||
NSUInteger fetchLimit = limit;
|
||||
if (fetchLimit < 80) {
|
||||
fetchLimit = MIN((NSUInteger)80, MAX(fetchLimit * 4, fetchLimit));
|
||||
}
|
||||
NSArray<NSString *> *raw = nil;
|
||||
|
||||
switch (self.engineType) {
|
||||
case KBSuggestionEngineTypeEnglish:
|
||||
return [self kb_englishSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_englishSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypeSpanish:
|
||||
return [self kb_spanishSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_spanishSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypePortuguese:
|
||||
return [self kb_portugueseSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_portugueseSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypeIndonesian:
|
||||
return [self kb_indonesianSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_indonesianSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypePinyinTraditional:
|
||||
return [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypePinyinSimplified:
|
||||
return [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypeBopomofo:
|
||||
return [self kb_bopomofoSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_bopomofoSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
case KBSuggestionEngineTypeLatin:
|
||||
default:
|
||||
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
|
||||
raw = [self kb_latinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||
break;
|
||||
}
|
||||
return [self kb_filterSensitiveSuggestions:raw limit:limit];
|
||||
}
|
||||
|
||||
- (void)recordSelection:(NSString *)word {
|
||||
@@ -880,4 +895,80 @@
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Safety Filter
|
||||
|
||||
- (NSArray<NSString *> *)kb_filterSensitiveSuggestions:(NSArray<NSString *> *)items
|
||||
limit:(NSUInteger)limit {
|
||||
if (items.count == 0 || limit == 0) { return @[]; }
|
||||
NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSet];
|
||||
for (id item in items) {
|
||||
if (![item isKindOfClass:NSString.class]) { continue; }
|
||||
NSString *word = (NSString *)item;
|
||||
if (word.length == 0) { continue; }
|
||||
if ([self kb_isSensitiveSuggestion:word]) { continue; }
|
||||
[result addObject:word];
|
||||
if (result.count >= limit) { break; }
|
||||
}
|
||||
return result.array ?: @[];
|
||||
}
|
||||
|
||||
- (BOOL)kb_isSensitiveSuggestion:(NSString *)word {
|
||||
NSString *normalized = [self kb_normalizedSuggestionToken:word];
|
||||
if (normalized.length == 0) { return YES; }
|
||||
if ([[self.class kb_blockedSuggestionWords] containsObject:normalized]) {
|
||||
return YES;
|
||||
}
|
||||
for (NSString *fragment in [self.class kb_blockedSuggestionFragments]) {
|
||||
if ([normalized containsString:fragment]) {
|
||||
return YES;
|
||||
}
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
- (NSString *)kb_normalizedSuggestionToken:(NSString *)word {
|
||||
if (![word isKindOfClass:NSString.class]) { return @""; }
|
||||
NSString *value = [[word stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]
|
||||
lowercaseString];
|
||||
if (value.length == 0) { return @""; }
|
||||
value = [value stringByFoldingWithOptions:NSDiacriticInsensitiveSearch
|
||||
locale:[NSLocale currentLocale]];
|
||||
NSMutableCharacterSet *trimSet = [[NSCharacterSet punctuationCharacterSet] mutableCopy];
|
||||
[trimSet formUnionWithCharacterSet:[NSCharacterSet symbolCharacterSet]];
|
||||
return [value stringByTrimmingCharactersInSet:trimSet];
|
||||
}
|
||||
|
||||
+ (NSSet<NSString *> *)kb_blockedSuggestionWords {
|
||||
static NSSet<NSString *> *words = nil;
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
// 上架合规优先:过滤常见成人、露骨性行为、毒品、暴力武器等高风险词。
|
||||
words = [NSSet setWithArray:@[
|
||||
@"sex", @"sexy", @"porn", @"porno", @"xxx", @"nude", @"naked",
|
||||
@"fuck", @"fucking", @"shit", @"bitch", @"penis", @"vagina",
|
||||
@"boob", @"rape", @"cocaine", @"heroin", @"drug", @"drugs",
|
||||
@"kill", @"murder", @"gun", @"weapon",
|
||||
@"sexo", @"porno", @"pornografia", @"violacion", @"violacao",
|
||||
@"drogas", @"cocaina", @"heroina", @"arma", @"matar", @"muerte",
|
||||
@"pene",
|
||||
@"色情", @"裸露", @"裸体", @"裸聊", @"裸照",
|
||||
@"强奸", @"毒品", @"海洛因", @"可卡因",
|
||||
@"枪", @"武器", @"杀人", @"谋杀"
|
||||
]];
|
||||
});
|
||||
return words;
|
||||
}
|
||||
|
||||
+ (NSArray<NSString *> *)kb_blockedSuggestionFragments {
|
||||
static NSArray<NSString *> *fragments = nil;
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
fragments = @[
|
||||
@"porn", @"fuck", @"rape", @"cocaine", @"heroin",
|
||||
@"色情", @"裸聊", @"裸照", @"强奸", @"毒品", @"杀人"
|
||||
];
|
||||
});
|
||||
return fragments;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
Reference in New Issue
Block a user