refactor(chat): 拆分聊天逻辑至独立 ChatService 并提取 LLM 配置

将 ChatController 中的聊天与向量搜索流程整体迁移到 ChatServiceImpl,
新增 AppConfig.LLmConfig 集中管理系统提示语与最大消息长度,
消除控制器层复杂逻辑,提升可维护性与配置动态化能力。
This commit is contained in:
2025-12-17 15:36:57 +08:00
parent 86738e3d1b
commit 2621321dea
4 changed files with 290 additions and 182 deletions

View File

@@ -1,6 +1,5 @@
package com.yolo.keyborad.controller;
import cn.dev33.satoken.context.mock.SaTokenContextMockUtil;
import cn.dev33.satoken.stp.StpUtil;
import cn.hutool.core.util.IdUtil;
import cn.hutool.core.util.StrUtil;
@@ -12,29 +11,19 @@ import com.yolo.keyborad.mapper.QdrantPayloadMapper;
import com.yolo.keyborad.model.dto.chat.ChatReq;
import com.yolo.keyborad.model.dto.chat.ChatSaveReq;
import com.yolo.keyborad.model.dto.chat.ChatStreamMessage;
import com.yolo.keyborad.model.entity.KeyboardCharacter;
import com.yolo.keyborad.model.entity.KeyboardUserCallLog;
import com.yolo.keyborad.service.KeyboardCharacterService;
import com.yolo.keyborad.service.KeyboardUserCallLogService;
import com.yolo.keyborad.service.ChatService;
import com.yolo.keyborad.service.impl.QdrantVectorService;
import io.qdrant.client.grpc.JsonWithInt;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.openai.OpenAiEmbeddingModel;
import org.springframework.http.codec.ServerSentEvent;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
import java.math.BigDecimal;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.Map;
/*
* @author: ziin
@@ -46,12 +35,6 @@ import java.util.concurrent.atomic.AtomicReference;
@Tag(name = "聊天", description = "聊天接口")
public class ChatController {
// 最大消息长度限制
private static final int MAX_MESSAGE_LENGTH = 1000;
@Resource
private ChatClient client;
@Resource
private OpenAiEmbeddingModel embeddingModel;
@@ -59,174 +42,13 @@ public class ChatController {
private QdrantVectorService qdrantVectorService;
@Resource
private KeyboardCharacterService keyboardCharacterService;
@Resource
private KeyboardUserCallLogService callLogService;
private ChatService chatService;
@PostMapping("/talk")
@Operation(summary = "聊天润色接口", description = "聊天润色接口")
public Flux<ServerSentEvent<ChatStreamMessage>> talk(@RequestBody ChatReq chatReq){
// 1. 参数校验
if (chatReq == null) {
log.error("聊天请求参数为空");
throw new BusinessException(ErrorCode.PARAMS_ERROR);
}
if (chatReq.getCharacterId() == null) {
log.error("键盘人设ID为空");
throw new BusinessException(ErrorCode.CHAT_CHARACTER_ID_EMPTY);
}
if (StrUtil.isBlank(chatReq.getMessage())) {
log.error("聊天消息为空");
throw new BusinessException(ErrorCode.CHAT_MESSAGE_EMPTY);
}
if (chatReq.getMessage().length() > MAX_MESSAGE_LENGTH) {
log.error("聊天消息过长,长度: {}", chatReq.getMessage().length());
throw new BusinessException(ErrorCode.CHAT_MESSAGE_TOO_LONG);
}
// 2. 验证键盘人设是否存在
KeyboardCharacter character = keyboardCharacterService.getById(chatReq.getCharacterId());
if (character == null) {
log.error("键盘人设不存在ID: {}", chatReq.getCharacterId());
throw new BusinessException(ErrorCode.CHAT_CHARACTER_NOT_FOUND);
}
// 初始化调用日志
String requestId = IdUtil.fastSimpleUUID();
long startTime = System.currentTimeMillis();
AtomicReference<String> modelRef = new AtomicReference<>();
AtomicInteger inputTokens = new AtomicInteger(0);
AtomicInteger outputTokens = new AtomicInteger(0);
AtomicReference<String> errorCodeRef = new AtomicReference<>();
// 3. LLM 流式输出
Flux<ChatStreamMessage> llmFlux = client
.prompt(character.getPrompt())
.system("""
Format rules:
- Return EXACTLY 3 replies.
- Use "<SPLIT>" as the separator.
- reply1<SPLIT>reply2<SPLIT>reply3
""")
.user(chatReq.getMessage())
.options(OpenAiChatOptions.builder()
.user(StpUtil.getLoginIdAsString())
.build())
.stream()
.chatResponse()
.concatMap(response -> {
// 提取 metadata
if (response.getMetadata() != null) {
var metadata = response.getMetadata();
if (metadata.getModel() != null) {
modelRef.set(metadata.getModel());
}
if (metadata.getUsage() != null) {
var usage = metadata.getUsage();
if (usage.getPromptTokens() != null) {
inputTokens.set(usage.getPromptTokens());
}
if (usage.getCompletionTokens() != null) {
outputTokens.set(usage.getCompletionTokens());
}
}
}
// 获取内容
String content = response.getResult().getOutput().getText();
if (content == null || content.isEmpty()) {
return Flux.empty();
}
// 拆成单字符
List<String> chars = content.codePoints()
.mapToObj(cp -> new String(Character.toChars(cp)))
.toList();
// 按 3 个字符批量发送
List<String> batched = new ArrayList<>();
StringBuilder sb = new StringBuilder();
for (String ch : chars) {
sb.append(ch);
if (sb.length() >= 3) {
batched.add(sb.toString());
sb.setLength(0);
}
}
if (!sb.isEmpty()) {
batched.add(sb.toString());
}
return Flux.fromIterable(batched)
.map(s -> new ChatStreamMessage("llm_chunk", s));
})
.doOnError(error -> {
log.error("LLM调用失败", error);
errorCodeRef.set("LLM_ERROR");
})
.onErrorResume(error ->
Flux.just(new ChatStreamMessage("error", "LLM服务暂时不可用请稍后重试"))
);
// 4. 向量搜索Flux一次性发送搜索结果
Flux<ChatStreamMessage> searchFlux = Mono
.fromCallable(() -> qdrantVectorService.searchText(chatReq.getMessage()))
.subscribeOn(Schedulers.boundedElastic()) // 避免阻塞 event-loop
.map(list -> new ChatStreamMessage("search_result", list))
.doOnError(error -> log.error("向量搜索失败", error))
.onErrorResume(error ->
Mono.just(new ChatStreamMessage("search_result", new ArrayList<>()))
)
.flux();
// 5. 结束标记
Flux<ChatStreamMessage> doneFlux =
Flux.just(new ChatStreamMessage("done", null));
// 6. 合并所有Flux
Flux<ChatStreamMessage> merged =
Flux.merge(llmFlux, searchFlux)
.concatWith(doneFlux);
String tokenValue = StpUtil.getTokenValue();
// 7. SSE 包装并记录调用日志
return merged
.doFinally(signalType -> {
// 异步保存调用日志
Mono.fromRunnable(() -> {
try {
KeyboardUserCallLog callLog = new KeyboardUserCallLog();
SaTokenContextMockUtil.setMockContext(()->{
StpUtil.setTokenValueToStorage(tokenValue);
callLog.setUserId(StpUtil.getLoginIdAsLong());
});
callLog.setRequestId(requestId);
callLog.setFeature("chat_talk");
callLog.setModel(modelRef.get());
callLog.setInputTokens(inputTokens.get());
callLog.setOutputTokens(outputTokens.get());
callLog.setTotalTokens(inputTokens.get() + outputTokens.get());
callLog.setSuccess(errorCodeRef.get() == null);
callLog.setLatencyMs((int) (System.currentTimeMillis() - startTime));
callLog.setErrorCode(errorCodeRef.get());
callLog.setCreatedAt(new Date());
callLogService.save(callLog);
} catch (Exception e) {
log.error("保存调用日志失败", e);
}
}).subscribeOn(Schedulers.boundedElastic()).subscribe();
})
.map(msg ->
ServerSentEvent.builder(msg)
.event(msg.getType())
.build()
);
return chatService.talk(chatReq);
}