Python 识别录音并转为文字的实现
作者:leader_ww
本文主要介绍了Python 识别录音并转为文字的实现,文中通过示例代码介绍的非常详细,具有一定的参考价值,感兴趣的小伙伴们可以参考一下
程式功能: 用 UI 界面,点击界面上的“开始识别”来录音(调用百度云语音接口),并自动将结果显示在界面的文本框中
Time: 2022/03/06
Author: Xiaohong
功能:Python 更改目录下 目录及文件的 顺序命名
项目的文件结构方式:
1. PyQt5 UI 文件: My_Audio_Record_cloud.ui
2. PyQt5 UI 文件转换生成的 PY 文件: My_Audio_Record_cloud_Ui.py
3. PyQt5 UI 文件对应的 Class 文件: My_Audio_Record_cloud_class.py
4. 通用的消息显示 文件(在My_Audio_Record_cloud_class.py 中被调用): FangMessage.py
本例为实验室产品,不具备直接使用,支持的语音录入长度也较短
主程序界面如下:
主程序 My_Audio_Record_cloud_class.py:
# -*- coding: utf-8 -*- ''' 程式功能: 用 UI 界面,点击界面上的“开始识别”来录音,并自动将结果显示在界面的文本框中 Time: 2022/03/06 Author: Xiaohong ''' import wave # pip3 install wave import My_Audio_Record_cloud_Ui as my_audio_record_cloud from pyaudio import PyAudio, paInt16 # 直接用pip安装的pyaudio不支持3.7 # 若安装失败的话,下载对应的whl 文件 https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio from PyQt5 import QtGui, QtCore, QtWidgets from PyQt5.QtWidgets import ( QApplication, QMainWindow, QDialog, QSplashScreen, QToolButton, QToolTip, QWidget, QMessageBox, QAction, QFileDialog, ) # from PyQt5.QtWidgets import ( # QApplication, # QWidget, # ) import sys, os, json, pycurl, urllib import urllib.request from FangMessage import FangMessage class Audio_record_cloud_class(QMainWindow, my_audio_record_cloud.Ui_MainWindow): def __init__(self, parent=None): super().__init__() self.child = my_audio_record_cloud.Ui_MainWindow() self.child.setupUi(self) self.file_name = "" self.child.pushButton.clicked.connect(self.my_start) # self.child.pb_play.clicked.connect(self.play_audio) # 录音文件参数 self.framerate = 8000 self.NUM_SAMPLES = 2000 self.channels = 1 self.sampwidth = 2 # 录音时长参数 self.TIME = 5 # 播放文件参数 self.chunk = 1024 # 设置默认的录音文件名 # 当前目录+test+当前的时间ID+'.wav' def init_file_name(self): file_path = os.getcwd() file_name = 'test' + self.get_timeseq() + '.wav' file_wav = os.path.join(file_path, file_name) self.file_name = file_wav # self.child.lineEdit.setText(self.file_name) # print(file_wav) return file_wav # 获取当前的时间ID def get_timeseq(self): import time now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) return now # 开始录音 def Start_record(self): self.init_file_name() pa = PyAudio() stream = pa.open( format=paInt16, channels=1, rate=self.framerate, input=True, frames_per_buffer=self.NUM_SAMPLES, ) my_buf = [] count = 0 while count <= self.TIME * 4: string_audio_data = stream.read(self.NUM_SAMPLES) my_buf.append(string_audio_data) count += 1 print("..") # print('begin:') # print(my_buf) self.save_wave_file(self.file_name, my_buf) stream.close() FangMessage1 = FangMessage() FangMessage1.runY('完成', '已完成录音', 'OK') # 保存声音文件 def save_wave_file(self, filename, data): wf = wave.open(filename, 'wb') wf.setnchannels(self.channels) wf.setsampwidth(self.sampwidth) wf.setframerate(self.framerate) for i in data: wf.writeframes(i) wf.close() # 获取 百度返回结果,并 Print def dump_res(self, buf): print(buf) my_temp = json.loads(buf) my_list = my_temp['result'] self.child.textBrowser.setText(my_list[0]) print(my_list[0]) # 访问 百度云语音 网站,根据自己申请的应用Key 获取本次访问的 Token def get_token(self): apiKey = "XXXXXXXXXXXXXXXXXXXXXXX" secretKey = "YYYYYYYYYYYYYYYYYYYYYYYYY" auth_url = ( "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + apiKey + "&client_secret=" + secretKey ) # print(auth_url) res = urllib.request.urlopen(auth_url) json_data = res.read() # print(json_data) # print('.....') # print(json.loads(json_data)) return json.loads(json_data)['access_token'] # 访问 百度云语音 网站,根据 Token,上传 wav 文件 def use_cloud(self, token): fp = wave.open(self.file_name, 'rb') nf = fp.getnframes() print('sampwidth:', fp.getsampwidth()) print('framerate:', fp.getframerate()) print('channels:', fp.getnchannels()) f_len = nf * 2 audio_data = fp.readframes(nf) cuid = "4d36e972-e325-11ce-bfc1-08002be10318" print('token:') print(token) srv_url = ( 'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token ) http_header = ['Content-Type:audio/pcm;rate=8000', 'Content-Length:%d' % f_len] c = pycurl.Curl() c.setopt(pycurl.URL, str(srv_url)) c.setopt(c.HTTPHEADER, http_header) c.setopt(c.POST, 1) c.setopt(c.CONNECTTIMEOUT, 80) c.setopt(c.TIMEOUT, 80) c.setopt(c.WRITEFUNCTION, self.dump_res) c.setopt(c.POSTFIELDS, audio_data) c.setopt(c.POSTFIELDSIZE, f_len) c.perform() def my_start(self): print('OK') self.Start_record() self.use_cloud(self.get_token()) if __name__ == "__main__": app = QApplication(sys.argv) myWin = Audio_record_cloud_class() myWin.show() sys.exit(app.exec_())
Ui 转化py文件如下:My_Audio_Record_cloud_Ui.py
# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'd:\vscode_2020\My_Audio\My_Audio\My_Audio_Record_cloud.ui' # # Created by: PyQt5 UI code generator 5.15.0 # # WARNING: Any manual changes made to this file will be lost when pyuic5 is # run again. Do not edit this file unless you know what you are doing. from PyQt5 import QtCore, QtGui, QtWidgets class Ui_MainWindow(object): def setupUi(self, MainWindow): MainWindow.setObjectName("MainWindow") MainWindow.resize(558, 525) self.centralwidget = QtWidgets.QWidget(MainWindow) self.centralwidget.setObjectName("centralwidget") self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget) self.textBrowser.setGeometry(QtCore.QRect(30, 50, 501, 351)) self.textBrowser.setObjectName("textBrowser") self.pushButton = QtWidgets.QPushButton(self.centralwidget) self.pushButton.setGeometry(QtCore.QRect(40, 420, 75, 23)) self.pushButton.setObjectName("pushButton") self.label = QtWidgets.QLabel(self.centralwidget) self.label.setGeometry(QtCore.QRect(40, 460, 491, 16)) self.label.setObjectName("label") self.label_2 = QtWidgets.QLabel(self.centralwidget) self.label_2.setGeometry(QtCore.QRect(30, 30, 161, 16)) self.label_2.setObjectName("label_2") self.label_3 = QtWidgets.QLabel(self.centralwidget) self.label_3.setGeometry(QtCore.QRect(180, 10, 111, 31)) font = QtGui.QFont() font.setFamily("Agency FB") font.setPointSize(18) font.setBold(True) font.setWeight(75) self.label_3.setFont(font) self.label_3.setObjectName("label_3") self.label_4 = QtWidgets.QLabel(self.centralwidget) self.label_4.setGeometry(QtCore.QRect(480, 20, 54, 12)) self.label_4.setObjectName("label_4") self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget) self.pushButton_2.setGeometry(QtCore.QRect(450, 420, 75, 23)) self.pushButton_2.setObjectName("pushButton_2") MainWindow.setCentralWidget(self.centralwidget) self.menubar = QtWidgets.QMenuBar(MainWindow) self.menubar.setGeometry(QtCore.QRect(0, 0, 558, 23)) self.menubar.setObjectName("menubar") MainWindow.setMenuBar(self.menubar) self.statusbar = QtWidgets.QStatusBar(MainWindow) self.statusbar.setObjectName("statusbar") MainWindow.setStatusBar(self.statusbar) self.retranslateUi(MainWindow) self.pushButton_2.clicked.connect(MainWindow.close) QtCore.QMetaObject.connectSlotsByName(MainWindow) def retranslateUi(self, MainWindow): _translate = QtCore.QCoreApplication.translate MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) self.pushButton.setText(_translate("MainWindow", "开始识别")) self.label.setText(_translate("MainWindow", "说明:点击“开始识别”按钮来录音,并通过百度语音的功能,自动将结果显示在文本框中")) self.label_2.setText(_translate("MainWindow", "语音识别的结果:")) self.label_3.setText(_translate("MainWindow", "语音识别")) self.label_4.setText(_translate("MainWindow", "v20220306")) self.pushButton_2.setText(_translate("MainWindow", "结束"))
到此这篇关于Python 识别录音并转为文字的实现的文章就介绍到这了,更多相关Python 识别录音转为文字内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!