feat(chat): 重构 LLM 流式输出并扩展 ChatSaveReq 字段

- 将原始整段 chunk 拆分为 3 字批次推送,降低前端卡顿
- ChatSaveReq 新增 userId、lang、liked 等 8 个字段并补充 Swagger 注解
- QdrantVectorService 改用 Map<String,JsonWithInt.Value> 载荷,新增 QdrantPayloadMapper 统一转换
This commit is contained in:
2025-12-09 14:49:14 +08:00
parent 39b19493e2
commit fba6f0d729
4 changed files with 134 additions and 15 deletions

View File

@@ -4,28 +4,32 @@ import cn.dev33.satoken.stp.StpUtil;
import cn.hutool.core.util.IdUtil;
import com.yolo.keyborad.common.BaseResponse;
import com.yolo.keyborad.common.ResultUtils;
import com.yolo.keyborad.mapper.QdrantPayloadMapper;
import com.yolo.keyborad.model.dto.chat.ChatReq;
import com.yolo.keyborad.model.dto.chat.ChatSaveReq;
import com.yolo.keyborad.model.dto.chat.ChatStreamMessage;
import com.yolo.keyborad.model.entity.KeyboardCharacter;
import com.yolo.keyborad.service.KeyboardCharacterService;
import com.yolo.keyborad.service.impl.QdrantVectorService;
import io.qdrant.client.grpc.JsonWithInt;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.embedding.EmbeddingResponse;
import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.openai.OpenAiEmbeddingModel;
import org.springframework.boot.context.properties.bind.DefaultValue;
import org.springframework.http.codec.ServerSentEvent;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/*
* @author: ziin
* @date: 2025/12/8 15:05
@@ -52,7 +56,7 @@ public class ChatController {
@PostMapping("/talk")
@Operation(summary = "聊天润色接口", description = "聊天润色接口")
public Flux<ServerSentEvent<ChatStreamMessage>> testTalk(@RequestBody ChatReq chatReq){
public Flux<ServerSentEvent<ChatStreamMessage>> talk(@RequestBody ChatReq chatReq){
KeyboardCharacter character = keyboardCharacterService.getById(chatReq.getCharacterId());
// 1. LLM 流式输出
Flux<ChatStreamMessage> llmFlux = client
@@ -69,7 +73,30 @@ public class ChatController {
.build())
.stream()
.content()
.map(chunk -> new ChatStreamMessage("llm_chunk", chunk));
.concatMap(chunk -> {
// 拆成单字符
List<String> chars = chunk.codePoints()
.mapToObj(cp -> new String(Character.toChars(cp)))
.toList();
// 你可以在这里按 35 个字符再拼一拼
List<String> batched = new ArrayList<>();
StringBuilder sb = new StringBuilder();
for (String ch : chars) {
sb.append(ch);
if (sb.length() >= 3) { // 这里的 3 可以自己调
batched.add(sb.toString());
sb.setLength(0);
}
}
if (!sb.isEmpty()) {
batched.add(sb.toString());
}
return Flux.fromIterable(batched)
.map(s -> new ChatStreamMessage("llm_chunk", s));
});
// .map(chunk -> new ChatStreamMessage("llm_chunk", chunk));
// 2. 向量搜索Flux一次性发送搜索结果
Flux<ChatStreamMessage> searchFlux = Mono
@@ -99,8 +126,10 @@ public class ChatController {
@Operation(summary = "保存润色后的句子", description = "保存润色后的句子")
@Parameter(name = "userInput",required = true,description = "测试聊天接口",example = "talk to something")
public BaseResponse<Boolean> testTalkWithVector(@RequestBody ChatSaveReq chatSaveReq) {
float[] embed = embeddingModel.embed(chatSaveReq.getUserInputMessage());
qdrantVectorService.upsertPoint(IdUtil.getSnowflakeNextId(), embed, chatSaveReq.getUserSelectMessage());
float[] embed = embeddingModel.embed(chatSaveReq.getUserText());
chatSaveReq.setUserId(StpUtil.getLoginIdAsLong());
Map<String, JsonWithInt.Value> map = QdrantPayloadMapper.toQdrantPayload(chatSaveReq);
qdrantVectorService.upsertPoint(IdUtil.getSnowflakeNextId(), embed, map);
return ResultUtils.success(true);
}
}