过滤敏感词
This commit is contained in:
@@ -43,26 +43,41 @@
|
|||||||
|
|
||||||
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
|
- (NSArray<NSString *> *)suggestionsForPrefix:(NSString *)prefix limit:(NSUInteger)limit {
|
||||||
if (prefix.length == 0 || limit == 0) { return @[]; }
|
if (prefix.length == 0 || limit == 0) { return @[]; }
|
||||||
|
// 为过滤留出候选空间,避免过滤后数量过少。
|
||||||
|
NSUInteger fetchLimit = limit;
|
||||||
|
if (fetchLimit < 80) {
|
||||||
|
fetchLimit = MIN((NSUInteger)80, MAX(fetchLimit * 4, fetchLimit));
|
||||||
|
}
|
||||||
|
NSArray<NSString *> *raw = nil;
|
||||||
|
|
||||||
switch (self.engineType) {
|
switch (self.engineType) {
|
||||||
case KBSuggestionEngineTypeEnglish:
|
case KBSuggestionEngineTypeEnglish:
|
||||||
return [self kb_englishSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_englishSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypeSpanish:
|
case KBSuggestionEngineTypeSpanish:
|
||||||
return [self kb_spanishSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_spanishSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypePortuguese:
|
case KBSuggestionEngineTypePortuguese:
|
||||||
return [self kb_portugueseSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_portugueseSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypeIndonesian:
|
case KBSuggestionEngineTypeIndonesian:
|
||||||
return [self kb_indonesianSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_indonesianSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypePinyinTraditional:
|
case KBSuggestionEngineTypePinyinTraditional:
|
||||||
return [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_traditionalPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypePinyinSimplified:
|
case KBSuggestionEngineTypePinyinSimplified:
|
||||||
return [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_simplifiedPinyinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypeBopomofo:
|
case KBSuggestionEngineTypeBopomofo:
|
||||||
return [self kb_bopomofoSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_bopomofoSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
case KBSuggestionEngineTypeLatin:
|
case KBSuggestionEngineTypeLatin:
|
||||||
default:
|
default:
|
||||||
return [self kb_latinSuggestionsForPrefix:prefix limit:limit];
|
raw = [self kb_latinSuggestionsForPrefix:prefix limit:fetchLimit];
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
return [self kb_filterSensitiveSuggestions:raw limit:limit];
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)recordSelection:(NSString *)word {
|
- (void)recordSelection:(NSString *)word {
|
||||||
@@ -880,4 +895,80 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#pragma mark - Safety Filter
|
||||||
|
|
||||||
|
- (NSArray<NSString *> *)kb_filterSensitiveSuggestions:(NSArray<NSString *> *)items
|
||||||
|
limit:(NSUInteger)limit {
|
||||||
|
if (items.count == 0 || limit == 0) { return @[]; }
|
||||||
|
NSMutableOrderedSet<NSString *> *result = [NSMutableOrderedSet orderedSet];
|
||||||
|
for (id item in items) {
|
||||||
|
if (![item isKindOfClass:NSString.class]) { continue; }
|
||||||
|
NSString *word = (NSString *)item;
|
||||||
|
if (word.length == 0) { continue; }
|
||||||
|
if ([self kb_isSensitiveSuggestion:word]) { continue; }
|
||||||
|
[result addObject:word];
|
||||||
|
if (result.count >= limit) { break; }
|
||||||
|
}
|
||||||
|
return result.array ?: @[];
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)kb_isSensitiveSuggestion:(NSString *)word {
|
||||||
|
NSString *normalized = [self kb_normalizedSuggestionToken:word];
|
||||||
|
if (normalized.length == 0) { return YES; }
|
||||||
|
if ([[self.class kb_blockedSuggestionWords] containsObject:normalized]) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
for (NSString *fragment in [self.class kb_blockedSuggestionFragments]) {
|
||||||
|
if ([normalized containsString:fragment]) {
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
- (NSString *)kb_normalizedSuggestionToken:(NSString *)word {
|
||||||
|
if (![word isKindOfClass:NSString.class]) { return @""; }
|
||||||
|
NSString *value = [[word stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]
|
||||||
|
lowercaseString];
|
||||||
|
if (value.length == 0) { return @""; }
|
||||||
|
value = [value stringByFoldingWithOptions:NSDiacriticInsensitiveSearch
|
||||||
|
locale:[NSLocale currentLocale]];
|
||||||
|
NSMutableCharacterSet *trimSet = [[NSCharacterSet punctuationCharacterSet] mutableCopy];
|
||||||
|
[trimSet formUnionWithCharacterSet:[NSCharacterSet symbolCharacterSet]];
|
||||||
|
return [value stringByTrimmingCharactersInSet:trimSet];
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (NSSet<NSString *> *)kb_blockedSuggestionWords {
|
||||||
|
static NSSet<NSString *> *words = nil;
|
||||||
|
static dispatch_once_t onceToken;
|
||||||
|
dispatch_once(&onceToken, ^{
|
||||||
|
// 上架合规优先:过滤常见成人、露骨性行为、毒品、暴力武器等高风险词。
|
||||||
|
words = [NSSet setWithArray:@[
|
||||||
|
@"sex", @"sexy", @"porn", @"porno", @"xxx", @"nude", @"naked",
|
||||||
|
@"fuck", @"fucking", @"shit", @"bitch", @"penis", @"vagina",
|
||||||
|
@"boob", @"rape", @"cocaine", @"heroin", @"drug", @"drugs",
|
||||||
|
@"kill", @"murder", @"gun", @"weapon",
|
||||||
|
@"sexo", @"porno", @"pornografia", @"violacion", @"violacao",
|
||||||
|
@"drogas", @"cocaina", @"heroina", @"arma", @"matar", @"muerte",
|
||||||
|
@"pene",
|
||||||
|
@"色情", @"裸露", @"裸体", @"裸聊", @"裸照",
|
||||||
|
@"强奸", @"毒品", @"海洛因", @"可卡因",
|
||||||
|
@"枪", @"武器", @"杀人", @"谋杀"
|
||||||
|
]];
|
||||||
|
});
|
||||||
|
return words;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (NSArray<NSString *> *)kb_blockedSuggestionFragments {
|
||||||
|
static NSArray<NSString *> *fragments = nil;
|
||||||
|
static dispatch_once_t onceToken;
|
||||||
|
dispatch_once(&onceToken, ^{
|
||||||
|
fragments = @[
|
||||||
|
@"porn", @"fuck", @"rape", @"cocaine", @"heroin",
|
||||||
|
@"色情", @"裸聊", @"裸照", @"强奸", @"毒品", @"杀人"
|
||||||
|
];
|
||||||
|
});
|
||||||
|
return fragments;
|
||||||
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|||||||
Reference in New Issue
Block a user