添加语音websocket等,还没测试
This commit is contained in:
148
keyBoard/Class/AiTalk/VM/Segmenter.m
Normal file
148
keyBoard/Class/AiTalk/VM/Segmenter.m
Normal file
@@ -0,0 +1,148 @@
|
||||
//
|
||||
// Segmenter.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "Segmenter.h"
|
||||
|
||||
@interface Segmenter ()
|
||||
|
||||
@property(nonatomic, strong) NSMutableString *buffer;
|
||||
@property(nonatomic, strong) NSMutableArray<NSString *> *readySegments;
|
||||
|
||||
@end
|
||||
|
||||
@implementation Segmenter
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_buffer = [[NSMutableString alloc] init];
|
||||
_readySegments = [[NSMutableArray alloc] init];
|
||||
_maxCharacterThreshold = 30;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)appendToken:(NSString *)token {
|
||||
if (!token || token.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self.buffer appendString:token];
|
||||
|
||||
// 检查是否需要切分
|
||||
[self checkAndSplit];
|
||||
}
|
||||
|
||||
- (NSArray<NSString *> *)popReadySegments {
|
||||
NSArray *segments = [self.readySegments copy];
|
||||
[self.readySegments removeAllObjects];
|
||||
return segments;
|
||||
}
|
||||
|
||||
- (NSString *)flushRemainingSegment {
|
||||
NSString *remaining = [self.buffer copy];
|
||||
[self.buffer setString:@""];
|
||||
|
||||
// 去除首尾空白
|
||||
remaining = [remaining
|
||||
stringByTrimmingCharactersInSet:[NSCharacterSet
|
||||
whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
return remaining.length > 0 ? remaining : nil;
|
||||
}
|
||||
|
||||
- (void)reset {
|
||||
[self.buffer setString:@""];
|
||||
[self.readySegments removeAllObjects];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)checkAndSplit {
|
||||
// 句子结束标点
|
||||
NSCharacterSet *sentenceEnders =
|
||||
[NSCharacterSet characterSetWithCharactersInString:@"。!?\n"];
|
||||
|
||||
while (YES) {
|
||||
NSString *currentBuffer = self.buffer;
|
||||
|
||||
// 查找第一个句子结束标点
|
||||
NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders];
|
||||
|
||||
if (range.location != NSNotFound) {
|
||||
// 找到结束标点,切分
|
||||
NSUInteger endIndex = range.location + 1;
|
||||
NSString *segment = [currentBuffer substringToIndex:endIndex];
|
||||
segment = [segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
// 移除已切分的部分
|
||||
[self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)];
|
||||
} else if (currentBuffer.length >= self.maxCharacterThreshold) {
|
||||
// 未找到标点,但超过阈值,强制切分
|
||||
// 尝试在空格或逗号处切分
|
||||
NSRange breakRange = [self findBestBreakPoint:currentBuffer];
|
||||
|
||||
if (breakRange.location != NSNotFound) {
|
||||
NSString *segment =
|
||||
[currentBuffer substringToIndex:breakRange.location + 1];
|
||||
segment =
|
||||
[segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
[self.buffer
|
||||
deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)];
|
||||
} else {
|
||||
// 无法找到合适的断点,直接切分
|
||||
NSString *segment =
|
||||
[currentBuffer substringToIndex:self.maxCharacterThreshold];
|
||||
segment =
|
||||
[segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
[self.buffer
|
||||
deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)];
|
||||
}
|
||||
} else {
|
||||
// 未达到切分条件
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (NSRange)findBestBreakPoint:(NSString *)text {
|
||||
// 优先在逗号、分号等处断开
|
||||
NSCharacterSet *breakChars =
|
||||
[NSCharacterSet characterSetWithCharactersInString:@",,、;;:: "];
|
||||
|
||||
// 从后往前查找,尽可能多包含内容
|
||||
for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2;
|
||||
i--) {
|
||||
unichar c = [text characterAtIndex:i];
|
||||
if ([breakChars characterIsMember:c]) {
|
||||
return NSMakeRange(i, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return NSMakeRange(NSNotFound, 0);
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user