python使用pyaudio录音和格式转化方式
作者:沐岚浩
这篇文章主要介绍了python使用pyaudio录音和格式转化方式,具有很好的参考价值,希望对大家有所帮助。如有错误或未考虑完全的地方,望不吝赐教
使用pyaudio录音和格式转化
环境
pip3 install pyaudio pip3 install wave pip3 install numpy
- linux 21.04
- python 3.7
代码(Record类)
#!/bin/python3 # 标识引用的python版本 import pyaudio import wave import sys import os import numpy as np """ 首先集成一下录音功能和格式转换功能 """ class Record(): """ 录音的类 CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 声道 RATE = 16000 频率 RECORD_SECONDS = 5 录音时间 单位=> s WAVE_OUTPUT_FILENAME = os.getcwd() + "/python/output1.wav" 录音文件 """ def __init__(self,WAVE_OUTPUT_FILENAME,CHUNK=1024, FORMAT=pyaudio.paInt16,CHANNELS=1,RECORD_SECONDS=5, Input=True,RATE=16000,PCMName="out.pcm",DataType=np.int16): self.CHUNK = CHUNK self.FORMAT = FORMAT self.CHANNELS = CHANNELS self.RECORD_SECONDS = RECORD_SECONDS self.WAVE_OUTPUT_FILENAME = WAVE_OUTPUT_FILENAME self.Input = Input self.RATE = RATE self.PCMName = PCMName self.DataType = DataType def recording(self): """ 这句代码 会屏蔽一些不必要的报错 os.close(sys.stderr.fileno()) """ #隐藏一些报错,这些不影响程序的运行 os.close(sys.stderr.fileno()) print("开始录音") p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=self.Input,#默认为True frames_per_buffer=self.CHUNK) frames = [] for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)): data = stream.read(self.CHUNK) frames.append(data) print("done") # 关闭流 stream.stop_stream() stream.close() p.terminate() wf = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(self.CHANNELS) wf.setsampwidth(p.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(b''.join(frames)) wf.close() def wav2pcm(self): """ 音频文件wav格式 转 pcm格式 """ f = open(self.WAVE_OUTPUT_FILENAME, "rb") f.seek(0) f.read(1024) data = np.fromfile(f, dtype=self.DataType) # 获取 分割后的 数组 filePath = str(self.WAVE_OUTPUT_FILENAME).split('/') path = '' # 拼接路径 取出最后一位 [0,-1) for item in filePath[:-1]: path += item +'/' path += self.PCMName # print("PCM Path =>",path) data.tofile(path) print("结束") # 可以返回一个元组; 也可以把它封成数组返回 return (self.WAVE_OUTPUT_FILENAME,path) def run(self): self.recording() wavpath,path = self.wav2pcm() # print("wave =>",wavpath,"\n","path =>",path) # 这个就不写入那个类里了, 这样方便调用 不需要再初始化类了 # 可直接copy到使用的类中或者文件里 def pcm2wav(pcmfile,wavfile,channels=1,rate=16000): with open(pcmfile,'rb') as fp: pcmdata = fp.read() with wave.open(wavfile, 'wb') as wav: wav.setnchannels(channels) wav.setsampwidth(16 // 8) wav.setframerate(rate) # 写入 wav.writeframes(pcmdata) # 测试 if __name__ == "__main__": wavepath = os.getcwd() + "/python/output1.wav" dev = Record(wavepath) # dev.run() pcmfile = os.getcwd() + '/python/demo.pcm' wavfile = os.getcwd() + '/python/demo.wav' pcm2wav(pcmfile,wavfile)
pyaudio播放声音不清晰问题
在树莓派上使用pyaudio播放pcm文件的时候,发现明显的不清晰,感觉有轻微的断断续续,而使用aplay播放则非常清晰
测试文件:https://sis-sample-audio.obs.cn-north-1.myhuaweicloud.com/16k16bit.pcm
>>> import pyaudio >>> p = pyaudio.PyAudio() >>> stream = p.open(format=pyaudio.paInt16, channels=1, output=True) >>> with open("16k16bit.pcm", "rb") as f: ... stream.write(f.read())
>>> aplay -f cd -c 1 -r 16000 16k16bit.pcm
最后发现是由于缓冲区的帧数过少导致播放不流畅,默认pyaudio缓冲区的帧数为1024。可以通过提高frames_per_buffer参数值来解决。
>>> import pyaudio >>> p = pyaudio.PyAudio() >>> stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, frames_per_buffer=4096, output=True) >>> with open("16k16bit.pcm", "rb") as f: ... stream.write(f.read())
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持脚本之家。