CoquiTTS/fromsoundcardpf32.py

182 lines
6.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pyaudio
import wave
import numpy as np
import time
from pydub import AudioSegment
# 录音参数
FORMAT = pyaudio.paFloat32 # 32-bit 浮点格式
CHANNELS = 1
RATE = 44100
CHUNK = 1024
OUTPUT_FILENAME = "output.wav"
THRESHOLD = 0.008 # 适配 paFloat32范围是 [-1.0, 1.0]
SILENCE_DURATION = 2 # 静音时间(秒)
# **🔍 列出所有可用设备并解释用途**
def list_audio_devices():
audio = pyaudio.PyAudio()
device_info = []
print("\n🎤 **可用的录音设备列表**\n")
print(f"{'ID':<5}{'设备名称':<35}{'输入通道数':<15}{'设备类型'}")
print("="*80)
for i in range(audio.get_device_count()):
dev = audio.get_device_info_by_index(i)
name = dev['name'].lower()
channels = dev['maxInputChannels']
if channels > 0:
# 设备类型判断
if "stereo mix" in name or "what you hear" in name:
device_type = "✅ 立体声混音(推荐)"
elif "loopback" in name:
device_type = "🔄 环回录音(可选)"
elif "mic" in name or "microphone" in name:
device_type = "🎤 麦克风(不推荐)"
else:
device_type = "🎧 其他音频设备"
print(f"{i:<5}{dev['name']:<35}{channels:<15}{device_type}")
device_info.append((i, dev['name'], device_type))
audio.terminate()
print("\n✅ **如果 '立体声混音' 存在,优先使用它**,否则尝试 '环回' 或手动选择。\n")
return device_info
# **🔍 自动选择最佳录音设备**
def get_best_device():
audio = pyaudio.PyAudio()
best_device = None
fallback_device = None
print("\n🔍 **正在尝试自动选择最佳录音设备...**")
for i in range(audio.get_device_count()):
dev = audio.get_device_info_by_index(i)
name = dev['name'].lower()
channels = dev['maxInputChannels']
if channels > 0:
# **优先选择“立体声混音”**
if "stereo mix" in name or "what you hear" in name:
print(f"✅ 选择设备: {dev['name']} (ID: {i}) - 立体声混音(最佳)")
best_device = i
break
# **其次选择“环回loopback”**
if "loopback" in name and best_device is None:
print(f"🔄 选择设备: {dev['name']} (ID: {i}) - 环回录音")
best_device = i
# **如果没有立体声混音或环回,选择一个非麦克风的可用设备**
if "mic" not in name and "microphone" not in name and fallback_device is None:
fallback_device = i
audio.terminate()
if best_device is not None:
return best_device
elif fallback_device is not None:
print(f"⚠️ 没有找到 '立体声混音',使用默认设备 (ID: {fallback_device})")
return fallback_device
else:
print("\n❌ 没有检测到合适的设备,请手动选择一个设备 ID")
device_list = list_audio_devices()
selected_device = input("🔹 请输入你要使用的设备 ID数字: ")
try:
selected_device = int(selected_device)
return selected_device
except ValueError:
raise RuntimeError("❌ 设备 ID 无效,程序终止!")
# **🎤 运行设备列表**
device_list = list_audio_devices()
# **🔧 选择最佳设备**
device_index = get_best_device()
# 初始化 PyAudio
audio = pyaudio.PyAudio()
try:
if device_index is not None:
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
input_device_index=device_index,
frames_per_buffer=CHUNK)
else:
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
frames_per_buffer=CHUNK)
print("🎤 只录制系统声音,麦克风已禁用!")
print("🎤 等待声音触发录音...")
# 等待声音触发
while True:
data = stream.read(CHUNK, exception_on_overflow=False)
audio_data = np.frombuffer(data, dtype=np.float32) # 读取 32-bit 浮点数据
volume = np.max(np.abs(audio_data)) # 计算音量
if volume > THRESHOLD:
print("🎙 检测到声音,开始录音...")
break
frames = [data]
silent_start = None
# 开始录音,直到检测到 2 秒以上静音
while True:
data = stream.read(CHUNK, exception_on_overflow=False)
audio_data = np.frombuffer(data, dtype=np.float32)
# **放大音量**
volume_boost = 5.0 # 放大 5 倍
audio_data = np.clip(audio_data * volume_boost, -1.0, 1.0) # 避免溢出
frames.append(audio_data.tobytes())
volume = np.max(np.abs(audio_data)) # 计算当前音量
if volume < THRESHOLD:
if silent_start is None:
silent_start = time.time()
elif time.time() - silent_start >= SILENCE_DURATION:
print("🤫 检测到静音超过 2 秒,停止录音...")
break
else:
silent_start = None
print("🎼 录音结束,正在保存文件...")
# 关闭流
stream.stop_stream()
stream.close()
audio.terminate()
# **转换 `paFloat32` 录音数据为 `paInt16`(标准 WAV 格式)**
int_frames = []
for frame in frames:
float_data = np.frombuffer(frame, dtype=np.float32) # 读取浮点数据
int_data = np.int16(float_data * 32767) # 转换为 16-bit PCM
int_frames.append(int_data.tobytes())
# 保存为 WAV
wf = wave.open(OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(2) # 16-bit PCM
wf.setframerate(RATE)
wf.writeframes(b''.join(int_frames))
wf.close()
print(f"✅ 录音已保存为 {OUTPUT_FILENAME}")
# **自动归一化音量**
audio = AudioSegment.from_wav(OUTPUT_FILENAME)
normalized_audio = audio.apply_gain(-audio.dBFS)
normalized_audio.export("output_loud.wav", format="wav")
print("✅ 录音音量已调整,保存为 output_loud.wav")
except Exception as e:
print(f"❌ 录音失败: {e}")
audio.terminate()