使用Python实现从麦克风获取音频并识别
作者:凤枭香
这篇文章主要为大家详细介绍了如何使用Python实现从麦克风获取音频并识别功能,文中的示例代码讲解详细,感兴趣的小伙伴可以跟随小编一起学习一下
python麦克风获取音频并识别
麦克风获取
# 打开麦克风流 stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
设置参数
# 录音参数 FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 单声道 RATE = 16000 # 采样率 16kHz CHUNK = 1024 # 数据块大小 RECORD_SECONDS = 5 # 录制时长 (秒) WAVE_OUTPUT_FILENAME = "output.wav"
读取数据块
# 循环读取数据块 for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data)
阿里语音识别模型加载
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revision="v2.0.2", device="cpu" )
阿里语音识别
res = model.generate(input=WAVE_OUTPUT_FILENAME, batch_size_s=16000, hotword='魔搭')
整体代码
import pyaudio import wave import threading import keyboard from funasr import AutoModel # 录音参数 FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 单声道 RATE = 16000 # 采样率 16kHz CHUNK = 1024 # 数据块大小 WAVE_OUTPUT_FILENAME = "./wav_data/output.wav" # 初始化 PyAudio audio = pyaudio.PyAudio() frames = [] stream = None recording = False paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revision="v2.0.2", device="cpu" ) print("加载模型完成!!!") def start_recording(): """ 开始录音 """ global stream, recording if not recording: print("开始录音...") recording = True stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) record_thread = threading.Thread(target=record_audio) record_thread.start() def stop_recording(): """ 停止录音并进行识别 """ global stream, recording if recording: print("录音结束.") recording = False stream.stop_stream() stream.close() save_audio() audio.terminate() print("开始识别!!!") res = model.generate(input=WAVE_OUTPUT_FILENAME, batch_size_s=16000, hotword='魔搭') print("识别结束!!!") print("识别结果:", res) def record_audio(): """ 录音功能实现 """ while recording: data = stream.read(CHUNK) frames.append(data) def save_audio(): """ 保存录音文件 """ wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(audio.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # 监听 Ctrl 键开始录音 keyboard.add_hotkey('ctrl', start_recording) # 监听 Alt 键结束录音 keyboard.add_hotkey('alt', stop_recording) print("按 Ctrl 开始录音,按 Alt 结束录音") keyboard.wait() # 保持程序运行
到此这篇关于使用Python实现从麦克风获取音频并识别的文章就介绍到这了,更多相关Python麦克风获取音频内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!