vscode录音及语音实时转写插件开发并在工作区生成本地mp3文件附踩坑日记!
作者:拾荒李
前言
最近接到一个需求,实现录音功能并生成mp3文件到本地工作区,一开始考虑到的是在vscode主体代码里面开发,但这可不是一个小的工作量。时间紧,任务重!市面上实现录音功能的案例其实很多,一些功能代码是可以复用过来的,最后决定写一个插件去实现这个需求!但是插件页面是浏览器环境,想要生成mp3文件是不可能的!需要把语音数据传到node环境。
以目前的vscode
版本来说,作者并没有开放访问本地媒体权限,所以插件市场里面的所有语音相关插件也并没有直接获取vscode
的媒体权限。毕竟vscode是开源项目,有广大的插件市场,如果开放了所有权限,遇到了图谋不轨的人 ,想通过插件获取你的个人信息很容易,比如打开你的麦克风 打开你的摄像头 获取地理定位,在你不经意间可能就获取了你的个人信息,所以作者对权限做了限制。 这样如果想单纯通过写插件调用本地媒体设备的同学,可以放弃你的想法了。
对于一些二次开发的同学则很容易,在主体代码里面放开对应权限。
这里我们主要讲下 插件实现:
对应目录结构,主要文件
本地录音及生成mp3文件
对应demo页面
wavesurfer.js
->实现 声纹图、声谱图、播放录音lame.min.js
-> mp3编码器record.js
的 -> 实现录音
这几个文件github有很多例子,大同小异,核心api都是一样的
初始化
我们再了解几个js关于语音的apinavigator.getUserMedia
: 该对象可提供对相机和麦克风等媒体输入设备的连接访问,也包括屏幕共享。AudioContext
: 接口表示由链接在一起的音频模块构建的音频处理图,每个模块由一个AudioNode表示。音频上下文控制它包含的节点的创建和音频处理或解码的执行。在做任何其他操作之前,您需要创建一个AudioContext对象,因为所有事情都是在上下文中发生的。建议创建一个AudioContext对象并复用它,而不是每次初始化一个新的AudioContext对象,并且可以对多个不同的音频源和管道同时使用一个AudioContext对象。createMediaStreamSource
: 方法用于创建一个新的 MediaStreamAudioSourceNode 对象,需要传入一个媒体流对象 (MediaStream 对象)(可以从 navigator.getUserMedia 获得 MediaStream 对象实例), 然后来自 MediaStream 的音频就可以被播放和操作。createScriptProcessor
: 处理音频。onaudioprocess
: 监听音频录制过程,实时获取语音流
页面
mounted() { this.wavesurfer = WaveSurfer.create({ container: '#waveform', waveColor: 'black', interact: false, cursorWidth: 1, barWidth: 1, plugins: [ WaveSurfer.microphone.create() ] }); this.wavesurfer.microphone.on('deviceReady', function (stream) { console.log('Device ready!', stream); }); this.wavesurfer.microphone.on('deviceError', function (code) { console.warn('Device error: ' + code); }); this.recorder = new Recorder({ sampleRate: 44100, //采样频率,默认为44100Hz(标准MP3采样率) bitRate: 128, //比特率,默认为128kbps(标准MP3质量) success: function() { //成功回调函数 console.log('success-->') // start.disabled = false; }, error: function(msg) { //失败回调函数 alert('msg-->', msg); }, fix: function(msg) { //不支持H5录音回调函数 alert('msg--->', msg); } }); }
点击开始和结束录音
start() { // start the microphone this.wavesurfer.microphone.start(); // 开始录音 this.recorder.start(); }, end() { // same as stopDevice() but also clears the wavesurfer canvas this.wavesurfer.microphone.stop(); // 结束录音 this.recorder.stop(); let that = this; this.recorder.getBlob(function(blob) { that.audioPath = URL.createObjectURL(blob); that.$refs.myAudio.load(); }); }
recorder.js
//初始化 init: function () { navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia; window.AudioContext = window.AudioContext || window.webkitAudioContext; },
// 访问媒体设备 navigator.getUserMedia({ audio: true //配置对象 }, function (stream) { //成功回调 var context = new AudioContext(), microphone = context.createMediaStreamSource(stream), //媒体流音频源 processor = context.createScriptProcessor(0, 1, 1), //js音频处理器 } })
// 开始录音 _this.start = function () { if (processor && microphone) { microphone.connect(processor); processor.connect(context.destination); Util.log('开始录音'); } };
//结束录音 _this.stop = function () { if (processor && microphone) { microphone.disconnect(); processor.disconnect(); Util.log('录音结束'); } };
// new worker 开启后台线程,为数据编码,这里我部署到线上 是为了避免访问限制 fetch( "https://lawdawn-download.oss-cn-beijing.aliyuncs.com/js/recorderWorker.js" ) .then((response) => response.blob()) .then((blob) => { const url = URL.createObjectURL(blob); realTimeWorker = new Worker(url); realTimeWorker.onmessage = async function (e) { ... } })
recorderWorker.js
// 后台线程接受到语音流数据之后做编码 (function(){ 'use strict'; importScripts('https://lawdawn-download.oss-cn-beijing.aliyuncs.com/js/lame.min.js'); var mp3Encoder, maxSamples = 1152, samplesMono, lame, config, dataBuffer; var clearBuffer = function(){ dataBuffer = []; }; var appendToBuffer = function(mp3Buf){ dataBuffer.push(new Int8Array(mp3Buf)); }; var init = function(prefConfig){ config = prefConfig || {}; lame = new lamejs(); mp3Encoder = new lame.Mp3Encoder(1, config.sampleRate || 44100, config.bitRate || 128); clearBuffer(); self.postMessage({ cmd: 'init' }); }; var floatTo16BitPCM = function(input, output){ for(var i = 0; i < input.length; i++){ var s = Math.max(-1, Math.min(1, input[i])); output[i] = (s < 0 ? s * 0x8000 : s * 0x7FFF); } }; var convertBuffer = function(arrayBuffer){ var data = new Float32Array(arrayBuffer); var out = new Int16Array(arrayBuffer.length); floatTo16BitPCM(data, out); return out; }; var encode = function(arrayBuffer){ samplesMono = convertBuffer(arrayBuffer); var remaining = samplesMono.length; for(var i = 0; remaining >= 0; i += maxSamples){ var left = samplesMono.subarray(i, i + maxSamples); var mp3buf = mp3Encoder.encodeBuffer(left); appendToBuffer(mp3buf); remaining -= maxSamples; } }; var finish = function(){ appendToBuffer(mp3Encoder.flush()); self.postMessage({ cmd: 'end', buf: dataBuffer }); clearBuffer(); }; self.onmessage = function(e){ switch(e.data.cmd){ case 'init': init(e.data.config); break; case 'encode': encode(e.data.buf); break; case 'finish': finish(); break; } }; })();
整个recorder.js
(function (exports) { //公共方法 var Util = { //初始化 init: function () { navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia; window.AudioContext = window.AudioContext || window.webkitAudioContext; }, //日志 log: function () { console.log.apply(console, arguments); } }; let realTimeWorker; var Recorder = function (config) { var _this = this; config = config || {}; //初始化配置对象 config.sampleRate = config.sampleRate || 44100; //采样频率,默认为44100Hz(标准MP3采样率) config.bitRate = config.bitRate || 128; //比特率,默认为128kbps(标准MP3质量) Util.init(); if (navigator.getUserMedia) { navigator.getUserMedia({ audio: true //配置对象 }, function (stream) { //成功回调 var context = new AudioContext(), microphone = context.createMediaStreamSource(stream), //媒体流音频源 processor = context.createScriptProcessor(0, 1, 1), //js音频处理器 successCallback, errorCallback; config.sampleRate = context.sampleRate; processor.onaudioprocess = function (event) { //监听音频录制过程 var array = event.inputBuffer.getChannelData(0); realTimeWorker.postMessage({ cmd: 'encode', buf: array }); }; fetch( "https://lawdawn-download.oss-cn-beijing.aliyuncs.com/js/recorderWorker.js" ) .then((response) => response.blob()) .then((blob) => { const url = URL.createObjectURL(blob); realTimeWorker = new Worker(url); realTimeWorker.onmessage = async function (e) { //主线程监听后台线程,实时通信 switch (e.data.cmd) { case 'init': Util.log('初始化成功'); if (config.success) { config.success(); } break; case 'end': if (successCallback) { var blob = new Blob(e.data.buf, { type: 'audio/mp3' }); let formData = new FormData(); formData.append('file', blob, 'main.mp3'); fetch("http://127.0.0.1:8840/microm", { method: 'POST', body: formData }) successCallback(blob); Util.log('MP3大小:' + blob.size + '%cB', 'color:#0000EE'); } break; case 'error': Util.log('错误信息:' + e.data.error); if (errorCallback) { errorCallback(e.data.error); } break; default: Util.log('未知信息:' + e.data); } }; _this.start = function () { if (processor && microphone) { microphone.connect(processor); processor.connect(context.destination); Util.log('开始录音'); } }; //结束录音 _this.stop = function () { if (processor && microphone) { microphone.disconnect(); processor.disconnect(); Util.log('录音结束'); } }; //获取blob格式录音文件 _this.getBlob = function (onSuccess, onError) { successCallback = onSuccess; errorCallback = onError; realTimeWorker.postMessage({ cmd: 'finish' }); }; realTimeWorker.postMessage({ cmd: 'init', config: { sampleRate: config.sampleRate, bitRate: config.bitRate } }); }); // var realTimeWorker = new Worker('js/recorderWorker.js'); //开启后台线程 //接口列表 //开始录音 }, function (error) { //失败回调 var msg; switch (error.code || error.name) { case 'PermissionDeniedError': case 'PERMISSION_DENIED': case 'NotAllowedError': msg = '用户拒绝访问麦克风'; break; case 'NOT_SUPPORTED_ERROR': case 'NotSupportedError': msg = '浏览器不支持麦克风'; break; case 'MANDATORY_UNSATISFIED_ERROR': case 'MandatoryUnsatisfiedError': msg = '找不到麦克风设备'; break; default: msg = '无法打开麦克风,异常信息:' + (error.code || error.name); break; } Util.log(msg); if (config.error) { config.error(msg); } }); } else { Util.log('当前浏览器不支持录音功能'); if (config.fix) { config.fix('当前浏览器不支持录音功能'); } } }; //模块接口 exports.Recorder = Recorder; })(window);
踩坑!
前面录音都进行的很顺利,录音在渲染进程里都可以正常播放,接下来就是生成本地mp3文件的处理了。
第一次尝试:浏览器端获取的Blob数据,通过vscode.postMessage
传送过去之后获取的竟是{}
空对象。
肯能原因 数据传输过程做了序列化,导致丢失,或者Blob
只是浏览器API,node无法获取
第二次尝试:将实时获取的语音数据流传递过去
数据 是Float32Array格式数据,但是通过序列化传递过去之后,发现数据无法恢复原来的样子,还是失败!
经过苦想后接下来,用了一个方法
第三次尝试:
在extension.ts
里面开启了一个本地服务
export async function activate(context: vscode.ExtensionContext) { (global as any).audioWebview = null; context.subscriptions.push(VsAudioEditorProvider.register(context)); const server = http.createServer((req, res) => { res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, PATCH, DELETE'); res.setHeader('Access-Control-Allow-Headers', 'X-Requested-With,content-type'); res.setHeader('Access-Control-Allow-Credentials', 'true'); if (req.method === 'POST' && req.url === '/microm') { const stream = fs.createWriteStream(path.join(path.dirname((global as any).documentUri!.fsPath), 'main.mp3')); req.on("data", (chunck) => { stream.write(chunck); }); req.on("end", () => { vscode.window.showInformationMessage('语音文件生成成功!'); res.writeHead(200); res.end('success!'); }); } }).listen(8840); }
在浏览器端录音结束后,调用了接口把数据传送了过来
经过在本地和生产环境测试都没有问题~ 大工告成!
语音实时转写功能
语音实时转写调用的科大讯飞的接口,科大讯飞给出的demo页面
js
/** * Created by lcw on 2023/5/20. * * 实时语音转写 WebAPI 接口调用示例 接口文档(必看):https://www.xfyun.cn/doc/asr/rtasr/API.html * 错误码链接: * https://www.xfyun.cn/doc/asr/rtasr/API.html * https://www.xfyun.cn/document/error-code (code返回错误码时必看) * */ if (typeof (Worker) == undefined) { // 不支持 Web Workers alert('不支持 Web Workers'); } else { // 支持 Web Workers console.log('支持 Web Workers'); } let recorderWorker fetch( "https://lawdawn-download.oss-cn-beijing.aliyuncs.com/js/transformpcm.worker.js" ) .then((response) => response.blob()) .then((blob) => { const url = URL.createObjectURL(blob); recorderWorker = new Worker(url); recorderWorker.onmessage = function (e) { buffer.push(...e.data.buffer) } }); // 音频转码worker // let recorderWorker = new Worker('transformpcm.worker.js'); // 记录处理的缓存音频 let buffer = [] let AudioContext = window.AudioContext || window.webkitAudioContext navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia class IatRecorder { constructor(config) { this.config = config this.state = 'ing' //以下信息在控制台-我的应用-实时语音转写 页面获取 this.appId = 'xxxxx' this.apiKey = 'xxxxx' } start() { this.stop() if (navigator.getUserMedia && AudioContext) { this.state = 'ing' if (!this.recorder) { var context = new AudioContext() this.context = context this.recorder = context.createScriptProcessor(0, 1, 1) var getMediaSuccess = (stream) => { var mediaStream = this.context.createMediaStreamSource(stream) this.mediaStream = mediaStream this.recorder.onaudioprocess = (e) => { this.sendData(e.inputBuffer.getChannelData(0)) } this.connectWebsocket() } var getMediaFail = (e) => { this.recorder = null this.mediaStream = null this.context = null console.log('请求麦克风失败') } if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { navigator.mediaDevices.getUserMedia({ audio: true, video: false }).then((stream) => { getMediaSuccess(stream) }).catch((e) => { getMediaFail(e) }) } else { navigator.getUserMedia({ audio: true, video: false }, (stream) => { getMediaSuccess(stream) }, function (e) { getMediaFail(e) }) } } else { this.connectWebsocket() } } } stop() { this.state = 'end' try { this.mediaStream.disconnect(this.recorder) this.recorder.disconnect() } catch (e) { } } sendData(buffer) { recorderWorker.postMessage({ command: 'transform', buffer: buffer }) } // 生成握手参数 getHandShakeParams() { var appId = this.appId var secretKey = this.apiKey var ts = Math.floor(new Date().getTime() / 1000);//new Date().getTime()/1000+''; var signa = hex_md5(appId + ts)//hex_md5(encodeURIComponent(appId + ts));//EncryptUtil.HmacSHA1Encrypt(EncryptUtil.MD5(appId + ts), secretKey); var signatureSha = CryptoJSNew.HmacSHA1(signa, secretKey) var signature = CryptoJS.enc.Base64.stringify(signatureSha) signature = encodeURIComponent(signature) return "?appid=" + appId + "&ts=" + ts + "&signa=" + signature; } connectWebsocket() { // var url = 'wss://rtasr.xfyun.cn/v1/ws' var url = 'ws://xxxx.xxx.xx/xf-rtasr'; var urlParam = this.getHandShakeParams() // url = `${url}${urlParam}` if ('WebSocket' in window) { this.ws = new WebSocket(url) } else if ('MozWebSocket' in window) { this.ws = new MozWebSocket(url) } else { alert(notSupportTip) return null } this.ws.onopen = (e) => { if (e.isTrusted) { this.mediaStream.connect(this.recorder) this.recorder.connect(this.context.destination) setTimeout(() => { this.wsOpened(e) }, 500) this.config.onStart && this.config.onStart(e) } else { alert('出现错误'); } } this.ws.onmessage = (e) => { // this.config.onMessage && this.config.onMessage(e) this.wsOnMessage(e) } this.ws.onerror = (e) => { console.log('err-->', e) this.stop() console.log("关闭连接ws.onerror"); this.config.onError && this.config.onError(e) } this.ws.onclose = (e) => { this.stop() console.log("关闭连接ws.onclose"); $('.start-button').attr('disabled', false); this.config.onClose && this.config.onClose(e) } } wsOpened() { if (this.ws.readyState !== 1) { return } var audioData = buffer.splice(0, 1280) this.ws.send(new Int8Array(audioData)) this.handlerInterval = setInterval(() => { // websocket未连接 if (this.ws.readyState !== 1) { clearInterval(this.handlerInterval) return } if (buffer.length === 0) { if (this.state === 'end') { this.ws.send("{\"end\": true}") console.log("发送结束标识"); clearInterval(this.handlerInterval) } return false } var audioData = buffer.splice(0, 1280) if (audioData.length > 0) { this.ws.send(new Int8Array(audioData)) } }, 40) } wsOnMessage(e) { let jsonData = JSON.parse(e.data) // if (jsonData.action == "started") { // // 握手成功 // console.log("握手成功"); // } else if (jsonData.action == "result") { // 转写结果 if (this.config.onMessage && typeof this.config.onMessage == 'function') { this.config.onMessage(jsonData) } // } else if (jsonData.action == "error") { // // 连接发生错误 // console.log("出错了:", jsonData); // } } ArrayBufferToBase64(buffer) { var binary = '' var bytes = new Uint8Array(buffer) var len = bytes.byteLength for (var i = 0; i < len; i++) { binary += String.fromCharCode(bytes[i]) } return window.btoa(binary) } } class IatTaste { constructor() { var iatRecorder = new IatRecorder({ onClose: () => { this.stop() this.reset() }, onError: (data) => { this.stop() this.reset() alert('WebSocket连接失败') }, onMessage: (message) => { // this.setResult(JSON.parse(message)) this.setResult(message) }, onStart: () => { $('hr').addClass('hr') var dialect = $('.dialect-select').find('option:selected').text() $('.taste-content').css('display', 'none') $('.start-taste').addClass('flex-display-1') $('.dialect-select').css('display', 'none') $('.start-button').text('结束转写') $('.time-box').addClass('flex-display-1') $('.dialect').text(dialect).css('display', 'inline-block') this.counterDown($('.used-time')) } }) this.iatRecorder = iatRecorder this.counterDownDOM = $('.used-time') this.counterDownTime = 0 this.text = { start: '开始转写', stop: '结束转写' } this.resultText = '' } start() { this.iatRecorder.start() } stop() { $('hr').removeClass('hr') this.iatRecorder.stop() } reset() { this.counterDownTime = 0 clearTimeout(this.counterDownTimeout) buffer = [] $('.time-box').removeClass('flex-display-1').css('display', 'none') $('.start-button').text(this.text.start) $('.dialect').css('display', 'none') $('.dialect-select').css('display', 'inline-block') $('.taste-button').css('background', '#0b99ff') } init() { let self = this //开始 $('#taste_button').click(function () { if (navigator.getUserMedia && AudioContext && recorderWorker) { self.start() } else { alert(notSupportTip) } }) //结束 $('.start-button').click(function () { if ($(this).text() === self.text.start && !$(this).prop('disabled')) { $('#result_output').text('') self.resultText = '' self.start() //console.log("按钮非禁用状态,正常启动" + $(this).prop('disabled')) } else { //$('.taste-content').css('display', 'none') $('.start-button').attr('disabled', true); self.stop() //reset this.counterDownTime = 0 clearTimeout(this.counterDownTimeout) buffer = [] $('.time-box').removeClass('flex-display-1').css('display', 'none') $('.start-button').text('转写停止中...') $('.dialect').css('display', 'none') $('.taste-button').css('background', '#8E8E8E') $('.dialect-select').css('display', 'inline-block') //console.log("按钮非禁用状态,正常停止" + $(this).prop('disabled')) } }) } setResult(data) { let rtasrResult = [] var currentText = $('#result_output').html() rtasrResult[data.seg_id] = data rtasrResult.forEach(i => { let str = "实时转写" str += (i.cn.st.type == 0) ? "【最终】识别结果:" : "【中间】识别结果:" i.cn.st.rt.forEach(j => { j.ws.forEach(k => { k.cw.forEach(l => { str += l.w }) }) }) if (currentText.length == 0) { $('#result_output').html(str) } else { $('#result_output').html(currentText + "<br>" + str) } var ele = document.getElementById('result_output'); ele.scrollTop = ele.scrollHeight; }) } counterDown() { /*//计时5分钟 if (this.counterDownTime === 300) { this.counterDownDOM.text('05: 00') this.stop() } else if (this.counterDownTime > 300) { this.reset() return false } else */ if (this.counterDownTime >= 0 && this.counterDownTime < 10) { this.counterDownDOM.text('00: 0' + this.counterDownTime) } else if (this.counterDownTime >= 10 && this.counterDownTime < 60) { this.counterDownDOM.text('00: ' + this.counterDownTime) } else if (this.counterDownTime % 60 >= 0 && this.counterDownTime % 60 < 10) { this.counterDownDOM.text('0' + parseInt(this.counterDownTime / 60) + ': 0' + this.counterDownTime % 60) } else { this.counterDownDOM.text('0' + parseInt(this.counterDownTime / 60) + ': ' + this.counterDownTime % 60) } this.counterDownTime++ this.counterDownTimeout = setTimeout(() => { this.counterDown() }, 1000) } } var iatTaste = new IatTaste() iatTaste.init()
到此这篇关于vscode录音及语音实时转写插件开发并在工作区生成本地mp3文件 踩坑日记!的文章就介绍到这了,更多相关vscode录音内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!