Python实现语音启动电脑应用程序
作者:Atlas Shepherd
这篇文章主要为大家详细介绍了如何使用Python实现语音启动电脑应用程序功能,文中的示例代码讲解详细,感兴趣的小伙伴可以跟随小编一学习一下
实现思路
osk模型进行输入语音转换
txt字典导航程序路径
pyttsx3引擎进行语音打印输出
关键词=程序路径
完整代码
import os import json import queue import sounddevice as sd from vosk import Model, KaldiRecognizer import subprocess import time import pyttsx3 import threading # 初始化 pyttsx3 引擎 engine = pyttsx3.init() engine.setProperty('rate', 150) # 设置语速 engine.setProperty('volume', 1.0) # 设置音量 # 加载Vosk模型 model_path = r"D:\daku\yuyinshibie\vosk-model-small-cn-0.22" if not os.path.exists(model_path): print(f"模型路径不存在: {model_path}") engine.say(f"模型路径不存在: {model_path}") engine.runAndWait() exit(1) # 读取字典文件,格式为 "命令=程序路径" def load_app_dict(file_path): app_dict = {} if not os.path.exists(file_path): print(f"字典文件不存在: {file_path}") engine.say(f"字典文件不存在: {file_path}") engine.runAndWait() return app_dict with open(file_path, 'r', encoding='utf-8') as file: for line in file: parts = line.strip().split('=') if len(parts) == 2: keys, value = parts # 处理可能存在的别名情况,例如 "微信,weixin" for key in keys.split(','): app_dict[key.strip()] = value.strip() return app_dict # 启动应用程序 def launch_application(app_name, app_dict): if app_name in app_dict: app_path = app_dict[app_name] response = f"正在启动 {app_name}..." say(response) subprocess.Popen(app_path) time.sleep(2) # 等待2秒再继续监听 else: response = f"找不到与 '{app_name}' 对应的应用程序。" say(response) # 定义一个函数用于语音输出,并在说的时候暂停监听 def say(text): global stream, callback_func if stream is not None: with stream_lock: stream.callback = None # 移除回调函数以暂停监听 stream.stop() # 暂停音频流 engine.say(text) engine.runAndWait() if stream is not None: with stream_lock: stream.start() # 恢复音频流 stream.callback = callback_func # 重新设置回调函数 # 初始化模型和识别器 model = Model(model_path) rec = KaldiRecognizer(model, 16000) q = queue.Queue() last_partial_result = "" last_full_command = "" stream_lock = threading.Lock() stream = None callback_func = None def callback(indata, frames, time, status): if status: print(status, file=sys.stderr) q.put(bytes(indata)) # 主程序 if __name__ == "__main__": dict_file = r"D:\daku\yuyinshibie\zidian.txt" # 字典文件路径 app_dict = load_app_dict(dict_file) try: # 提前初始化音频流 callback_func = callback stream = sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16', channels=1, callback=callback) stream.start() say("请说:") while True: data = q.get() if rec.AcceptWaveform(data): result = json.loads(rec.Result()) command = result['text'].strip() if command and command != last_full_command: print(f"你说的是: {command}") say(f"你说的是: {command}") if "打开" in command: app_to_open = command.replace("打开", "").strip() launch_application(app_to_open, app_dict) last_full_command = command elif rec.PartialResult(): partial_result = json.loads(rec.PartialResult())['partial'] if partial_result and "打开" in partial_result and partial_result != last_partial_result: print(f"部分结果: {partial_result}") say(f"部分结果: {partial_result}") last_partial_result = partial_result except KeyboardInterrupt: say("\n退出程序。") finally: if stream is not None: stream.stop() stream.close()
关键词部分,为了识别准确以及出现谐音内容可以增添多个关键词使用,作为分割
字典路径如果出现中文字符有可能会报错!
代码意义不大,如果考虑深入:可以尝试增加快捷键,以及相关应用接口可以更好控制
上班族打开电脑i第一件事情是启动相关应用,同样可以尝试多应用编组启动
到此这篇关于Python实现语音启动电脑应用程序的文章就介绍到这了,更多相关Python语音启动电脑内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!