Files
keyboard/keyBoard/Class/AiTalk/VM/Segmenter.m

149 lines
3.9 KiB
Mathematica
Raw Normal View History

//
// Segmenter.m
// keyBoard
//
// Created by Mac on 2026/1/15.
//
#import "Segmenter.h"
@interface Segmenter ()
@property(nonatomic, strong) NSMutableString *buffer;
@property(nonatomic, strong) NSMutableArray<NSString *> *readySegments;
@end
@implementation Segmenter
- (instancetype)init {
self = [super init];
if (self) {
_buffer = [[NSMutableString alloc] init];
_readySegments = [[NSMutableArray alloc] init];
_maxCharacterThreshold = 30;
}
return self;
}
#pragma mark - Public Methods
- (void)appendToken:(NSString *)token {
if (!token || token.length == 0) {
return;
}
[self.buffer appendString:token];
//
[self checkAndSplit];
}
- (NSArray<NSString *> *)popReadySegments {
NSArray *segments = [self.readySegments copy];
[self.readySegments removeAllObjects];
return segments;
}
- (NSString *)flushRemainingSegment {
NSString *remaining = [self.buffer copy];
[self.buffer setString:@""];
//
remaining = [remaining
stringByTrimmingCharactersInSet:[NSCharacterSet
whitespaceAndNewlineCharacterSet]];
return remaining.length > 0 ? remaining : nil;
}
- (void)reset {
[self.buffer setString:@""];
[self.readySegments removeAllObjects];
}
#pragma mark - Private Methods
- (void)checkAndSplit {
//
NSCharacterSet *sentenceEnders =
[NSCharacterSet characterSetWithCharactersInString:@"。!?\n"];
while (YES) {
NSString *currentBuffer = self.buffer;
//
NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders];
if (range.location != NSNotFound) {
//
NSUInteger endIndex = range.location + 1;
NSString *segment = [currentBuffer substringToIndex:endIndex];
segment = [segment stringByTrimmingCharactersInSet:
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
if (segment.length > 0) {
[self.readySegments addObject:segment];
}
//
[self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)];
} else if (currentBuffer.length >= self.maxCharacterThreshold) {
//
//
NSRange breakRange = [self findBestBreakPoint:currentBuffer];
if (breakRange.location != NSNotFound) {
NSString *segment =
[currentBuffer substringToIndex:breakRange.location + 1];
segment =
[segment stringByTrimmingCharactersInSet:
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
if (segment.length > 0) {
[self.readySegments addObject:segment];
}
[self.buffer
deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)];
} else {
//
NSString *segment =
[currentBuffer substringToIndex:self.maxCharacterThreshold];
segment =
[segment stringByTrimmingCharactersInSet:
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
if (segment.length > 0) {
[self.readySegments addObject:segment];
}
[self.buffer
deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)];
}
} else {
//
break;
}
}
}
- (NSRange)findBestBreakPoint:(NSString *)text {
//
NSCharacterSet *breakChars =
[NSCharacterSet characterSetWithCharactersInString:@",、;;: "];
//
for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2;
i--) {
unichar c = [text characterAtIndex:i];
if ([breakChars characterIsMember:c]) {
return NSMakeRange(i, 1);
}
}
return NSMakeRange(NSNotFound, 0);
}
@end