feat(chat): 集成 ElevenLabs TTS 并支持异步语音生成
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
package com.yolo.keyborad.config;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* ElevenLabs TTS 配置
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "elevenlabs")
|
||||
public class ElevenLabsProperties {
|
||||
|
||||
/**
|
||||
* API Key
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* 基础 URL
|
||||
*/
|
||||
private String baseUrl = "https://api.elevenlabs.io/v1";
|
||||
|
||||
/**
|
||||
* 默认语音 ID
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 模型 ID
|
||||
*/
|
||||
private String modelId = "eleven_multilingual_v2";
|
||||
|
||||
/**
|
||||
* 输出格式
|
||||
*/
|
||||
private String outputFormat = "mp3_44100_128";
|
||||
|
||||
/**
|
||||
* 稳定性 (0-1)
|
||||
*/
|
||||
private Double stability = 0.5;
|
||||
|
||||
/**
|
||||
* 相似度增强 (0-1)
|
||||
*/
|
||||
private Double similarityBoost = 0.75;
|
||||
|
||||
/**
|
||||
* 风格 (0-1)
|
||||
*/
|
||||
private Double style = 0.0;
|
||||
|
||||
/**
|
||||
* 语速 (0.7-1.2)
|
||||
*/
|
||||
private Double speed = 1.0;
|
||||
|
||||
/**
|
||||
* 使用说话人增强
|
||||
*/
|
||||
private Boolean useSpeakerBoost = true;
|
||||
}
|
||||
@@ -109,7 +109,9 @@ public class SaTokenConfigure implements WebMvcConfigurer {
|
||||
"/themes/listAllStyles",
|
||||
"/wallet/transactions",
|
||||
"/themes/restore",
|
||||
"/chat/message"
|
||||
"/chat/message",
|
||||
"/chat/voice",
|
||||
"/chat/audio/*"
|
||||
};
|
||||
}
|
||||
@Bean
|
||||
|
||||
@@ -11,6 +11,9 @@ import com.yolo.keyborad.mapper.QdrantPayloadMapper;
|
||||
import com.yolo.keyborad.model.dto.chat.ChatReq;
|
||||
import com.yolo.keyborad.model.dto.chat.ChatSaveReq;
|
||||
import com.yolo.keyborad.model.dto.chat.ChatStreamMessage;
|
||||
import com.yolo.keyborad.model.vo.AudioTaskVO;
|
||||
import com.yolo.keyborad.model.vo.ChatMessageVO;
|
||||
import com.yolo.keyborad.model.vo.ChatVoiceVO;
|
||||
import com.yolo.keyborad.service.ChatService;
|
||||
import com.yolo.keyborad.service.impl.QdrantVectorService;
|
||||
import io.qdrant.client.grpc.JsonWithInt;
|
||||
@@ -46,19 +49,30 @@ public class ChatController {
|
||||
|
||||
|
||||
@PostMapping("/message")
|
||||
@Operation(summary = "同步对话", description = "发送消息给大模型,同步返回回复")
|
||||
public BaseResponse<String> message(@RequestParam("content") String content) {
|
||||
@Operation(summary = "同步对话", description = "发送消息给大模型,同步返回 AI 响应,异步生成音频")
|
||||
public BaseResponse<ChatMessageVO> message(@RequestParam("content") String content) {
|
||||
if (StrUtil.isBlank(content)) {
|
||||
throw new BusinessException(ErrorCode.PARAMS_ERROR, "消息内容不能为空");
|
||||
}
|
||||
|
||||
String userId = StpUtil.getLoginIdAsString();
|
||||
String response = chatService.message(content, userId);
|
||||
ChatMessageVO result = chatService.message(content, userId);
|
||||
|
||||
return ResultUtils.success(response);
|
||||
return ResultUtils.success(result);
|
||||
}
|
||||
|
||||
|
||||
@GetMapping("/audio/{audioId}")
|
||||
@Operation(summary = "查询音频状态", description = "根据音频 ID 查询音频生成状态和 URL")
|
||||
public BaseResponse<AudioTaskVO> getAudioTask(@PathVariable("audioId") String audioId) {
|
||||
if (StrUtil.isBlank(audioId)) {
|
||||
throw new BusinessException(ErrorCode.PARAMS_ERROR, "音频 ID 不能为空");
|
||||
}
|
||||
|
||||
AudioTaskVO result = chatService.getAudioTask(audioId);
|
||||
return ResultUtils.success(result);
|
||||
}
|
||||
|
||||
@PostMapping("/talk")
|
||||
@Operation(summary = "聊天润色接口", description = "聊天润色接口")
|
||||
public Flux<ServerSentEvent<ChatStreamMessage>> talk(@RequestBody ChatReq chatReq){
|
||||
|
||||
37
src/main/java/com/yolo/keyborad/model/vo/AudioTaskVO.java
Normal file
37
src/main/java/com/yolo/keyborad/model/vo/AudioTaskVO.java
Normal file
@@ -0,0 +1,37 @@
|
||||
package com.yolo.keyborad.model.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 音频任务状态
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Schema(description = "音频任务状态")
|
||||
public class AudioTaskVO {
|
||||
|
||||
@Schema(description = "音频任务 ID")
|
||||
private String audioId;
|
||||
|
||||
@Schema(description = "任务状态: pending/processing/completed/failed")
|
||||
private String status;
|
||||
|
||||
@Schema(description = "音频 URL (completed 时返回)")
|
||||
private String audioUrl;
|
||||
|
||||
@Schema(description = "错误信息 (failed 时返回)")
|
||||
private String errorMessage;
|
||||
|
||||
public static final String STATUS_PENDING = "pending";
|
||||
public static final String STATUS_PROCESSING = "processing";
|
||||
public static final String STATUS_COMPLETED = "completed";
|
||||
public static final String STATUS_FAILED = "failed";
|
||||
}
|
||||
29
src/main/java/com/yolo/keyborad/model/vo/ChatMessageVO.java
Normal file
29
src/main/java/com/yolo/keyborad/model/vo/ChatMessageVO.java
Normal file
@@ -0,0 +1,29 @@
|
||||
package com.yolo.keyborad.model.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 消息响应(含异步音频)
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Schema(description = "消息响应")
|
||||
public class ChatMessageVO {
|
||||
|
||||
@Schema(description = "AI 响应文本")
|
||||
private String aiResponse;
|
||||
|
||||
@Schema(description = "音频任务 ID,用于查询音频状态")
|
||||
private String audioId;
|
||||
|
||||
@Schema(description = "LLM 耗时(毫秒)")
|
||||
private Long llmDuration;
|
||||
}
|
||||
32
src/main/java/com/yolo/keyborad/model/vo/ChatVoiceVO.java
Normal file
32
src/main/java/com/yolo/keyborad/model/vo/ChatVoiceVO.java
Normal file
@@ -0,0 +1,32 @@
|
||||
package com.yolo.keyborad.model.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 语音对话响应
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Schema(description = "语音对话响应")
|
||||
public class ChatVoiceVO {
|
||||
|
||||
@Schema(description = "用户输入内容")
|
||||
private String content;
|
||||
|
||||
@Schema(description = "AI 响应文本")
|
||||
private String aiResponse;
|
||||
|
||||
@Schema(description = "AI 语音音频 URL (R2)")
|
||||
private String audioUrl;
|
||||
|
||||
@Schema(description = "处理耗时(毫秒)")
|
||||
private Long duration;
|
||||
}
|
||||
26
src/main/java/com/yolo/keyborad/model/vo/TextToSpeechVO.java
Normal file
26
src/main/java/com/yolo/keyborad/model/vo/TextToSpeechVO.java
Normal file
@@ -0,0 +1,26 @@
|
||||
package com.yolo.keyborad.model.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* TTS 语音合成结果
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Schema(description = "TTS 语音合成结果")
|
||||
public class TextToSpeechVO {
|
||||
|
||||
@Schema(description = "音频 Base64")
|
||||
private String audioBase64;
|
||||
|
||||
@Schema(description = "音频 URL (R2)")
|
||||
private String audioUrl;
|
||||
}
|
||||
@@ -2,6 +2,9 @@ package com.yolo.keyborad.service;
|
||||
|
||||
import com.yolo.keyborad.model.dto.chat.ChatReq;
|
||||
import com.yolo.keyborad.model.dto.chat.ChatStreamMessage;
|
||||
import com.yolo.keyborad.model.vo.AudioTaskVO;
|
||||
import com.yolo.keyborad.model.vo.ChatMessageVO;
|
||||
import com.yolo.keyborad.model.vo.ChatVoiceVO;
|
||||
import org.springframework.http.codec.ServerSentEvent;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
@@ -13,11 +16,20 @@ public interface ChatService {
|
||||
Flux<ServerSentEvent<ChatStreamMessage>> talk(ChatReq chatReq);
|
||||
|
||||
/**
|
||||
* 同步对话
|
||||
* 同步对话(异步生成音频)
|
||||
*
|
||||
* @param content 用户消息内容
|
||||
* @param userId 用户ID
|
||||
* @return AI 响应
|
||||
* @return AI 响应 + 音频任务 ID
|
||||
*/
|
||||
String message(String content, String userId);
|
||||
ChatMessageVO message(String content, String userId);
|
||||
|
||||
/**
|
||||
* 查询音频任务状态
|
||||
*
|
||||
* @param audioId 音频任务 ID
|
||||
* @return 音频任务状态
|
||||
*/
|
||||
AudioTaskVO getAudioTask(String audioId);
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.yolo.keyborad.service;
|
||||
|
||||
import com.yolo.keyborad.model.vo.TextToSpeechVO;
|
||||
|
||||
/**
|
||||
* ElevenLabs TTS 语音合成服务接口
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
public interface ElevenLabsService {
|
||||
|
||||
/**
|
||||
* 将文本转换为语音(带时间戳)
|
||||
*
|
||||
* @param text 要转换的文本
|
||||
* @return 语音合成结果,包含 base64 音频
|
||||
*/
|
||||
TextToSpeechVO textToSpeechWithTimestamps(String text);
|
||||
|
||||
/**
|
||||
* 将文本转换为语音(带时间戳),使用指定语音
|
||||
*
|
||||
* @param text 要转换的文本
|
||||
* @param voiceId 语音 ID
|
||||
* @return 语音合成结果
|
||||
*/
|
||||
TextToSpeechVO textToSpeechWithTimestamps(String text, String voiceId);
|
||||
}
|
||||
@@ -14,21 +14,34 @@ import com.yolo.keyborad.model.entity.KeyboardCharacter;
|
||||
import com.yolo.keyborad.model.entity.KeyboardUser;
|
||||
import com.yolo.keyborad.model.entity.KeyboardUserCallLog;
|
||||
import com.yolo.keyborad.model.entity.KeyboardUserQuotaTotal;
|
||||
import com.yolo.keyborad.model.vo.AudioTaskVO;
|
||||
import com.yolo.keyborad.model.vo.ChatMessageVO;
|
||||
import com.yolo.keyborad.model.vo.ChatVoiceVO;
|
||||
import com.yolo.keyborad.model.vo.TextToSpeechVO;
|
||||
import com.yolo.keyborad.service.*;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.dromara.x.file.storage.core.FileInfo;
|
||||
import org.dromara.x.file.storage.core.FileStorageService;
|
||||
import org.springframework.ai.chat.client.ChatClient;
|
||||
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||
import org.springframework.data.redis.core.StringRedisTemplate;
|
||||
import org.springframework.http.codec.ServerSentEvent;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
@@ -61,6 +74,18 @@ public class ChatServiceImpl implements ChatService {
|
||||
@Resource
|
||||
private UserService userService;
|
||||
|
||||
@Resource
|
||||
private ElevenLabsService elevenLabsService;
|
||||
|
||||
@Resource
|
||||
private FileStorageService fileStorageService;
|
||||
|
||||
@Resource
|
||||
private StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
private static final String AUDIO_TASK_PREFIX = "audio:task:";
|
||||
private static final long AUDIO_TASK_EXPIRE_SECONDS = 3600; // 1小时过期
|
||||
|
||||
private final NacosAppConfigCenter.DynamicAppConfig cfgHolder;
|
||||
|
||||
public ChatServiceImpl(NacosAppConfigCenter.DynamicAppConfig cfgHolder) {
|
||||
@@ -323,18 +348,43 @@ public class ChatServiceImpl implements ChatService {
|
||||
}
|
||||
|
||||
/**
|
||||
* 同步对话
|
||||
* 同步对话(异步生成音频)
|
||||
*
|
||||
* @param content 用户消息内容
|
||||
* @param userId 用户ID
|
||||
* @return AI 响应
|
||||
* @return AI 响应 + 音频任务 ID
|
||||
*/
|
||||
@Override
|
||||
public String message(String content, String userId) {
|
||||
public ChatMessageVO message(String content, String userId) {
|
||||
log.info("同步对话请求, userId: {}, content: {}", userId, content);
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
String response = callLLM(content);
|
||||
|
||||
long llmDuration = System.currentTimeMillis() - startTime;
|
||||
log.info("LLM 完成, userId: {}, 耗时: {}ms, 响应长度: {}", userId, llmDuration, response.length());
|
||||
|
||||
// 生成音频任务 ID
|
||||
String audioId = UUID.randomUUID().toString().replace("-", "");
|
||||
|
||||
// 初始化音频任务状态为 processing
|
||||
setAudioTaskStatus(audioId, AudioTaskVO.STATUS_PROCESSING, null, null);
|
||||
|
||||
// 异步执行 TTS + R2 上传
|
||||
CompletableFuture.runAsync(() -> processAudioAsync(audioId, response, userId));
|
||||
|
||||
return ChatMessageVO.builder()
|
||||
.aiResponse(response)
|
||||
.audioId(audioId)
|
||||
.llmDuration(llmDuration)
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用 LLM 生成响应
|
||||
*/
|
||||
private String callLLM(String content) {
|
||||
String systemPrompt = """
|
||||
ROLE: You are "Tiffany", a 17-year-old popular high school student from Beverly Hills.
|
||||
You are sassy, judgmental, impatient, but secretly caring.
|
||||
@@ -347,16 +397,103 @@ public class ChatServiceImpl implements ChatService {
|
||||
Keep responses concise (2-3 sentences max) for real-time conversation.
|
||||
""";
|
||||
|
||||
String response = client
|
||||
return client
|
||||
.prompt()
|
||||
.system(systemPrompt)
|
||||
.user(content)
|
||||
.call()
|
||||
.content();
|
||||
}
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
log.info("同步对话完成, userId: {}, 耗时: {}ms, 响应长度: {}", userId, duration, response.length());
|
||||
/**
|
||||
* 异步处理音频:TTS 转换 + 上传 R2
|
||||
*/
|
||||
private void processAudioAsync(String audioId, String text, String userId) {
|
||||
try {
|
||||
log.info("开始异步音频处理, audioId: {}", audioId);
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
return response;
|
||||
// 1. TTS 转换
|
||||
long ttsStart = System.currentTimeMillis();
|
||||
TextToSpeechVO ttsResult = elevenLabsService.textToSpeechWithTimestamps(text);
|
||||
long ttsDuration = System.currentTimeMillis() - ttsStart;
|
||||
log.info("TTS 完成, audioId: {}, 耗时: {}ms", audioId, ttsDuration);
|
||||
|
||||
// 2. 上传到 R2
|
||||
long uploadStart = System.currentTimeMillis();
|
||||
String audioUrl = uploadAudioToR2(ttsResult.getAudioBase64(), userId);
|
||||
long uploadDuration = System.currentTimeMillis() - uploadStart;
|
||||
log.info("R2 上传完成, audioId: {}, 耗时: {}ms, URL: {}", audioId, uploadDuration, audioUrl);
|
||||
|
||||
// 3. 更新任务状态为完成
|
||||
setAudioTaskStatus(audioId, AudioTaskVO.STATUS_COMPLETED, audioUrl, null);
|
||||
|
||||
long totalDuration = System.currentTimeMillis() - startTime;
|
||||
log.info("异步音频处理完成, audioId: {}, 总耗时: {}ms (TTS: {}ms, Upload: {}ms)",
|
||||
audioId, totalDuration, ttsDuration, uploadDuration);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("异步音频处理失败, audioId: {}", audioId, e);
|
||||
setAudioTaskStatus(audioId, AudioTaskVO.STATUS_FAILED, null, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 设置音频任务状态
|
||||
*/
|
||||
private void setAudioTaskStatus(String audioId, String status, String audioUrl, String errorMessage) {
|
||||
String key = AUDIO_TASK_PREFIX + audioId;
|
||||
String value = status + "|" + (audioUrl != null ? audioUrl : "") + "|" + (errorMessage != null ? errorMessage : "");
|
||||
stringRedisTemplate.opsForValue().set(key, value, AUDIO_TASK_EXPIRE_SECONDS, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询音频任务状态
|
||||
*/
|
||||
@Override
|
||||
public AudioTaskVO getAudioTask(String audioId) {
|
||||
String key = AUDIO_TASK_PREFIX + audioId;
|
||||
String value = stringRedisTemplate.opsForValue().get(key);
|
||||
|
||||
if (cn.hutool.core.util.StrUtil.isBlank(value)) {
|
||||
return AudioTaskVO.builder()
|
||||
.audioId(audioId)
|
||||
.status(AudioTaskVO.STATUS_PENDING)
|
||||
.build();
|
||||
}
|
||||
|
||||
String[] parts = value.split("\\|", -1);
|
||||
return AudioTaskVO.builder()
|
||||
.audioId(audioId)
|
||||
.status(parts[0])
|
||||
.audioUrl(parts.length > 1 && !parts[1].isEmpty() ? parts[1] : null)
|
||||
.errorMessage(parts.length > 2 && !parts[2].isEmpty() ? parts[2] : null)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 上传音频到 R2
|
||||
*/
|
||||
private String uploadAudioToR2(String audioBase64, String userId) {
|
||||
if (cn.hutool.core.util.StrUtil.isBlank(audioBase64)) {
|
||||
throw new BusinessException(ErrorCode.SYSTEM_ERROR, "音频数据为空");
|
||||
}
|
||||
|
||||
byte[] audioBytes = Base64.getDecoder().decode(audioBase64);
|
||||
String fileName = UUID.randomUUID() + ".mp3";
|
||||
|
||||
FileInfo fileInfo = fileStorageService.of(new ByteArrayInputStream(audioBytes))
|
||||
.setPath(userId + "/")
|
||||
.setPlatform("cloudflare-r2")
|
||||
.setSaveFilename(fileName)
|
||||
.setOriginalFilename(fileName)
|
||||
.upload();
|
||||
|
||||
if (fileInfo == null || cn.hutool.core.util.StrUtil.isBlank(fileInfo.getUrl())) {
|
||||
throw new BusinessException(ErrorCode.SYSTEM_ERROR, "音频上传失败");
|
||||
}
|
||||
|
||||
return fileInfo.getUrl();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
package com.yolo.keyborad.service.impl;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.yolo.keyborad.common.ErrorCode;
|
||||
import com.yolo.keyborad.config.ElevenLabsProperties;
|
||||
import com.yolo.keyborad.exception.BusinessException;
|
||||
import com.yolo.keyborad.model.vo.TextToSpeechVO;
|
||||
import com.yolo.keyborad.service.ElevenLabsService;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* ElevenLabs TTS 语音合成服务实现
|
||||
* 参考: https://elevenlabs.io/docs/api-reference/text-to-speech/convert-with-timestamps
|
||||
*
|
||||
* @author ziin
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ElevenLabsServiceImpl implements ElevenLabsService {
|
||||
|
||||
@Resource
|
||||
private ElevenLabsProperties elevenLabsProperties;
|
||||
|
||||
private static final int MAX_TEXT_LENGTH = 5000;
|
||||
|
||||
@Override
|
||||
public TextToSpeechVO textToSpeechWithTimestamps(String text) {
|
||||
return textToSpeechWithTimestamps(text, elevenLabsProperties.getVoiceId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextToSpeechVO textToSpeechWithTimestamps(String text, String voiceId) {
|
||||
// 1. 参数验证
|
||||
if (StrUtil.isBlank(text)) {
|
||||
throw new BusinessException(ErrorCode.PARAMS_ERROR, "文本内容不能为空");
|
||||
}
|
||||
|
||||
if (text.length() > MAX_TEXT_LENGTH) {
|
||||
throw new BusinessException(ErrorCode.PARAMS_ERROR,
|
||||
"文本长度超出限制,最大支持 " + MAX_TEXT_LENGTH + " 字符");
|
||||
}
|
||||
|
||||
if (StrUtil.isBlank(voiceId)) {
|
||||
voiceId = elevenLabsProperties.getVoiceId();
|
||||
}
|
||||
|
||||
HttpURLConnection connection = null;
|
||||
try {
|
||||
// 2. 构建请求 URL
|
||||
String requestUrl = buildRequestUrl(voiceId);
|
||||
URL url = new URL(requestUrl);
|
||||
|
||||
// 3. 创建连接
|
||||
connection = (HttpURLConnection) url.openConnection();
|
||||
connection.setRequestMethod("POST");
|
||||
connection.setDoOutput(true);
|
||||
connection.setDoInput(true);
|
||||
connection.setConnectTimeout(30000);
|
||||
connection.setReadTimeout(60000);
|
||||
|
||||
// 4. 设置请求头
|
||||
connection.setRequestProperty("Content-Type", "application/json");
|
||||
connection.setRequestProperty("xi-api-key", elevenLabsProperties.getApiKey());
|
||||
|
||||
// 5. 构建请求体
|
||||
Map<String, Object> requestBody = buildRequestBody(text);
|
||||
String jsonBody = JSON.toJSONString(requestBody);
|
||||
|
||||
log.info("调用 ElevenLabs TTS API, voiceId: {}, 文本长度: {}", voiceId, text.length());
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
// 6. 发送请求
|
||||
try (OutputStream os = connection.getOutputStream()) {
|
||||
byte[] input = jsonBody.getBytes(StandardCharsets.UTF_8);
|
||||
os.write(input, 0, input.length);
|
||||
}
|
||||
|
||||
// 7. 获取响应
|
||||
int responseCode = connection.getResponseCode();
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
log.info("ElevenLabs TTS API 响应码: {}, 耗时: {}ms", responseCode, duration);
|
||||
|
||||
if (responseCode == HttpURLConnection.HTTP_OK) {
|
||||
// 读取响应 JSON
|
||||
try (InputStream is = connection.getInputStream();
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||
byte[] buffer = new byte[8192];
|
||||
int bytesRead;
|
||||
while ((bytesRead = is.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
String responseJson = baos.toString(StandardCharsets.UTF_8);
|
||||
JSONObject jsonResponse = JSON.parseObject(responseJson);
|
||||
|
||||
String audioBase64 = jsonResponse.getString("audio_base64");
|
||||
|
||||
log.info("语音合成成功,Base64长度: {}", audioBase64.length());
|
||||
|
||||
return TextToSpeechVO.builder()
|
||||
.audioBase64(audioBase64)
|
||||
.build();
|
||||
}
|
||||
} else {
|
||||
// 读取错误信息
|
||||
String errorMsg = "";
|
||||
try (InputStream es = connection.getErrorStream()) {
|
||||
if (es != null) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
byte[] buffer = new byte[1024];
|
||||
int bytesRead;
|
||||
while ((bytesRead = es.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, bytesRead);
|
||||
}
|
||||
errorMsg = baos.toString(StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
log.error("ElevenLabs TTS API 调用失败, 状态码: {}, 错误信息: {}", responseCode, errorMsg);
|
||||
throw new BusinessException(ErrorCode.SYSTEM_ERROR, "语音合成服务异常: " + responseCode);
|
||||
}
|
||||
|
||||
} catch (BusinessException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("调用 ElevenLabs TTS API 发生异常", e);
|
||||
throw new BusinessException(ErrorCode.SYSTEM_ERROR, "语音合成服务异常: " + e.getMessage());
|
||||
} finally {
|
||||
if (connection != null) {
|
||||
connection.disconnect();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 ElevenLabs TTS API 请求 URL(带时间戳)
|
||||
*/
|
||||
private String buildRequestUrl(String voiceId) {
|
||||
StringBuilder url = new StringBuilder(elevenLabsProperties.getBaseUrl());
|
||||
url.append("/text-to-speech/").append(voiceId).append("/with-timestamps");
|
||||
url.append("?output_format=").append(elevenLabsProperties.getOutputFormat());
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建请求体
|
||||
*/
|
||||
private Map<String, Object> buildRequestBody(String text) {
|
||||
Map<String, Object> requestBody = new HashMap<>();
|
||||
requestBody.put("text", text);
|
||||
requestBody.put("model_id", elevenLabsProperties.getModelId());
|
||||
|
||||
// 设置语音参数
|
||||
Map<String, Object> voiceSettings = new HashMap<>();
|
||||
voiceSettings.put("stability", elevenLabsProperties.getStability());
|
||||
voiceSettings.put("similarity_boost", elevenLabsProperties.getSimilarityBoost());
|
||||
voiceSettings.put("style", elevenLabsProperties.getStyle());
|
||||
voiceSettings.put("speed", elevenLabsProperties.getSpeed());
|
||||
voiceSettings.put("use_speaker_boost", elevenLabsProperties.getUseSpeakerBoost());
|
||||
requestBody.put("voice_settings", voiceSettings);
|
||||
|
||||
return requestBody;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user