import pyaudio import wave import numpy as np import time from pydub import AudioSegment # 录音参数 FORMAT = pyaudio.paFloat32 # 32-bit 浮点格式 CHANNELS = 1 RATE = 44100 CHUNK = 1024 OUTPUT_FILENAME = "output.wav" THRESHOLD = 0.008 # 适配 paFloat32(范围是 [-1.0, 1.0]) SILENCE_DURATION = 2 # 静音时间(秒) # **🔍 列出所有可用设备并解释用途** def list_audio_devices(): audio = pyaudio.PyAudio() device_info = [] print("\n🎤 **可用的录音设备列表**\n") print(f"{'ID':<5}{'设备名称':<35}{'输入通道数':<15}{'设备类型'}") print("="*80) for i in range(audio.get_device_count()): dev = audio.get_device_info_by_index(i) name = dev['name'].lower() channels = dev['maxInputChannels'] if channels > 0: # 设备类型判断 if "stereo mix" in name or "what you hear" in name: device_type = "✅ 立体声混音(推荐)" elif "loopback" in name: device_type = "🔄 环回录音(可选)" elif "mic" in name or "microphone" in name: device_type = "🎤 麦克风(不推荐)" else: device_type = "🎧 其他音频设备" print(f"{i:<5}{dev['name']:<35}{channels:<15}{device_type}") device_info.append((i, dev['name'], device_type)) audio.terminate() print("\n✅ **如果 '立体声混音' 存在,优先使用它**,否则尝试 '环回' 或手动选择。\n") return device_info # **🔍 自动选择最佳录音设备** def get_best_device(): audio = pyaudio.PyAudio() best_device = None fallback_device = None print("\n🔍 **正在尝试自动选择最佳录音设备...**") for i in range(audio.get_device_count()): dev = audio.get_device_info_by_index(i) name = dev['name'].lower() channels = dev['maxInputChannels'] if channels > 0: # **优先选择“立体声混音”** if "stereo mix" in name or "what you hear" in name: print(f"✅ 选择设备: {dev['name']} (ID: {i}) - 立体声混音(最佳)") best_device = i break # **其次选择“环回(loopback)”** if "loopback" in name and best_device is None: print(f"🔄 选择设备: {dev['name']} (ID: {i}) - 环回录音") best_device = i # **如果没有立体声混音或环回,选择一个非麦克风的可用设备** if "mic" not in name and "microphone" not in name and fallback_device is None: fallback_device = i audio.terminate() if best_device is not None: return best_device elif fallback_device is not None: print(f"⚠️ 没有找到 '立体声混音',使用默认设备 (ID: {fallback_device})") return fallback_device else: print("\n❌ 没有检测到合适的设备,请手动选择一个设备 ID:") device_list = list_audio_devices() selected_device = input("🔹 请输入你要使用的设备 ID(数字): ") try: selected_device = int(selected_device) return selected_device except ValueError: raise RuntimeError("❌ 设备 ID 无效,程序终止!") # **🎤 运行设备列表** device_list = list_audio_devices() # **🔧 选择最佳设备** device_index = get_best_device() # 初始化 PyAudio audio = pyaudio.PyAudio() try: if device_index is not None: stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=device_index, frames_per_buffer=CHUNK) else: stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("🎤 只录制系统声音,麦克风已禁用!") print("🎤 等待声音触发录音...") # 等待声音触发 while True: data = stream.read(CHUNK, exception_on_overflow=False) audio_data = np.frombuffer(data, dtype=np.float32) # 读取 32-bit 浮点数据 volume = np.max(np.abs(audio_data)) # 计算音量 if volume > THRESHOLD: print("🎙 检测到声音,开始录音...") break frames = [data] silent_start = None # 开始录音,直到检测到 2 秒以上静音 while True: data = stream.read(CHUNK, exception_on_overflow=False) audio_data = np.frombuffer(data, dtype=np.float32) # **放大音量** volume_boost = 5.0 # 放大 5 倍 audio_data = np.clip(audio_data * volume_boost, -1.0, 1.0) # 避免溢出 frames.append(audio_data.tobytes()) volume = np.max(np.abs(audio_data)) # 计算当前音量 if volume < THRESHOLD: if silent_start is None: silent_start = time.time() elif time.time() - silent_start >= SILENCE_DURATION: print("🤫 检测到静音超过 2 秒,停止录音...") break else: silent_start = None print("🎼 录音结束,正在保存文件...") # 关闭流 stream.stop_stream() stream.close() audio.terminate() # **转换 `paFloat32` 录音数据为 `paInt16`(标准 WAV 格式)** int_frames = [] for frame in frames: float_data = np.frombuffer(frame, dtype=np.float32) # 读取浮点数据 int_data = np.int16(float_data * 32767) # 转换为 16-bit PCM int_frames.append(int_data.tobytes()) # 保存为 WAV wf = wave.open(OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(2) # 16-bit PCM wf.setframerate(RATE) wf.writeframes(b''.join(int_frames)) wf.close() print(f"✅ 录音已保存为 {OUTPUT_FILENAME}") # **自动归一化音量** audio = AudioSegment.from_wav(OUTPUT_FILENAME) normalized_audio = audio.apply_gain(-audio.dBFS) normalized_audio.export("output_loud.wav", format="wav") print("✅ 录音音量已调整,保存为 output_loud.wav") except Exception as e: print(f"❌ 录音失败: {e}") audio.terminate()