import torch from TTS.tts.models.xtts import XttsAudioConfig # 允许 XttsAudioConfig from TTS.tts.configs.xtts_config import XttsConfig # 允许 XttsConfig from TTS.config.shared_configs import BaseDatasetConfig # 允许 BaseDatasetConfig from TTS.tts.models.xtts import XttsArgs # 允许 XttsArgs # **添加 `XttsConfig`、`XttsAudioConfig`、`BaseDatasetConfig` 和 `XttsArgs` 到 PyTorch 安全全局对象** torch.serialization.add_safe_globals([XttsConfig, XttsAudioConfig, BaseDatasetConfig, XttsArgs]) # **强制 `weights_only=False`** def load_fsspec_fixed(*args, **kwargs): kwargs["weights_only"] = False # 关键修正 return torch.load(*args, **kwargs) # **覆盖 Coqui TTS 的 `load_fsspec` 方法** import TTS.utils.io TTS.utils.io.load_fsspec = load_fsspec_fixed from TTS.api import TTS import os from pydub import AudioSegment # 用于合并音频 # 选择 XTTS v2 多语言模型 MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2" # 加载 TTS 模型 print("正在加载 TTS 预训练模型,请稍候...") tts = TTS(MODEL_NAME) # **指定参考音频** speaker_wav_zh = "example_speaker_zh.wav" # 你必须提供这个音频 speaker_wav_en = "example_speaker_en.wav" # 你必须提供这个音频 # **确保 `speaker_wav` 存在** if not os.path.exists(speaker_wav_zh): raise FileNotFoundError(f"错误:找不到 {speaker_wav_zh},XTTS v2 需要一个参考音频!") # **拆分文本** text_cn1 = "你好,欢迎使用" text_en = "Coqui TTS" text_cn2 = "进行中文语音合成!" test_all = "Not long ago, my colleague Carlos Fenollosa made a bold claim in his book ‘La singularidad’: ChatGPT is AGI. My initial reaction was immediate dismissal. But knowing his careful reasoning, I kept reading. As I followed his argument, something unexpected happened — I found myself agreeing, though from an entirely different perspective. What I discovered challenges both those waiting for AGI’s arrival and those claiming it’s nowhere near: we might be asking the wrong question altogether." # 生成临时音频文件 tts.tts_to_file(text=text_cn1, file_path="part1.wav", speaker_wav=speaker_wav_zh, language="zh-cn") tts.tts_to_file(text=text_en, file_path="part2.wav", speaker_wav=speaker_wav_en, language="en") tts.tts_to_file(text=text_cn2, file_path="part3.wav", speaker_wav=speaker_wav_zh, language="zh-cn") # tts.tts_to_file(text=test_all, file_path="all.wav", speaker_wav=speaker_wav_en, language="en") # # **合并音频** audio1 = AudioSegment.from_wav("part1.wav") audio2 = AudioSegment.from_wav("part2.wav") audio3 = AudioSegment.from_wav("part3.wav") final_audio = audio1 + audio2 + audio3 final_audio.export("output.wav", format="wav") print(f"✅ 语音合成完成!已保存到 output.wav")