vue wisper 语音识别 speechrecognizer

转载

mob6454cc72ae38 2024-09-14 13:19:52

文章标签 vue.js javascript 前端 Data API 文章分类 NLP 人工智能

参考3：将PC浏览器、ZOOM等软件正在播放的音频实时转成文字！讯飞语音输入法的妙用 - 知乎

vue wisper 语音识别 speechrecognizer_API

1.查看node版本，本人如下：

本人项目目录，主要用到的画红色圈圈的三个文件

vue wisper 语音识别 speechrecognizer_javascript_02

2.添加 package.json 文件版本：

"dependencies": {
    enc": "^0.4.0",
    "jquery": "^3.4.1",
    
},
"devDependencies": {
    "crypto-js": "^4.0.0",
    "vconsole": "^3.3.4",
    "vue-template-compiler": "2.6.12",
    "worker-loader": "^2.0.0"
}

3. 配置 vue.config.js 文件

不配置会报错 TypeError: _transcodeWorker.default is not a constructor

vue wisper 语音识别 speechrecognizer_vue.js_03

vue.config.js要添加以下配置：

configureWebpack: config => {
    config.module.rules.push({
      test: /\.worker.js$/,
      use: {
        loader: 'worker-loader',
        options: { inline: true, name: 'workerName.[hash].js' }
      }
    })
  },

或（因本人配置不是函数模式，采用的是以下对象模式配置）：

chainWebpack(config) {
    config.output.globalObject('this')

    config.module
      .rule('worker')
      .test(/\.worker.js$/)
      .use('worker-loader')
      .loader('worker-loader')
      .options({ inline: true, name: 'workerName.[hash].js' })

}

在你运行时候,会发现控制台会报错，“window is undefined”，这个是因为worker线程中不存在window对象，因此不能直接使用，要用this代替，要在vue.config.js中添加以下配置

chainWebpack: config => {
    config.output.globalObject('this')
 }

打包的时候报错就加上:

parallel: false

合成一起就是

module.exports = {
  configureWebpack: config => {
    config.module.rules.push({
      test: /\.worker.js$/,
      use: {
        loader: 'worker-loader',
        options: { inline: true, name: 'workerName.[hash].js' }
      }
    })
  },
  parallel: false,
  chainWebpack: config => {
    config.output.globalObject('this')
  }
}

配置完后你会发现不会报错了，然后就可以正常运行了!

3. 创建 transcode.worker.js 文件，（在语音听写流式API demo js语言讯飞源文件中取）

// (function(){
self.onmessage = function(e){
  transAudioData.transcode(e.data)
}
let transAudioData = {
  transcode(audioData) {
    let output = transAudioData.to16kHz(audioData)
    output = transAudioData.to16BitPCM(output)
    output = Array.from(new Uint8Array(output.buffer))
    self.postMessage(output)
    // return output
  },
  to16kHz(audioData) {
    var data = new Float32Array(audioData)
    var fitCount = Math.round(data.length * (16000 / 44100))
    var newData = new Float32Array(fitCount)
    var springFactor = (data.length - 1) / (fitCount - 1)
    newData[0] = data[0]
    for (let i = 1; i < fitCount - 1; i++) {
      var tmp = i * springFactor
      var before = Math.floor(tmp).toFixed()
      var after = Math.ceil(tmp).toFixed()
      var atPoint = tmp - before
      newData[i] = data[before] + (data[after] - data[before]) * atPoint
    }
    newData[fitCount - 1] = data[data.length - 1]
    return newData
  },
  to16BitPCM(input) {
    var dataLength = input.length * (16 / 8)
    var dataBuffer = new ArrayBuffer(dataLength)
    var dataView = new DataView(dataBuffer)
    var offset = 0
    for (var i = 0; i < input.length; i++, offset += 2) {
      var s = Math.max(-1, Math.min(1, input[i]))
      dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true)
    }
    return dataView
  },
}
// })()

4. 创建 translation.js 文件，（在语音听写流式API demo js语言讯飞源文件中取 src\pages\index\index.js）

import CryptoJS from 'crypto-js'
import Enc from 'enc'
import VConsole from 'vconsole'
import $ from 'jquery'
import TransWorker from './js/transcode.worker.js'
import './index.css'

let transWorker = new TransWorker()
//APPID，APISecret，APIKey在控制台-我的应用-语音听写（流式版）页面获取
const APPID = 'd7b51bcb'
const API_SECRET = 'ZTNmZjk3N2FkZTljZjg0YTYzMGZiNmZj'
const API_KEY = '4bc26e6fd3868195919a8c14054eac66'

/**
 * 获取websocket url
 * 该接口需要后端提供，这里为了方便前端处理
 */
function getWebSocketUrl() {
  return new Promise((resolve, reject) => {
    // 请求地址根据语种不同变化
    var url = 'wss://iat-api.xfyun.cn/v2/iat'
    var host = 'iat-api.xfyun.cn'
    var apiKey = API_KEY
    var apiSecret = API_SECRET
    var date = new Date().toGMTString()
    var algorithm = 'hmac-sha256'
    var headers = 'host date request-line'
    var signatureOrigin = `host: ${host}\ndate: ${date}\nGET /v2/iat HTTP/1.1`
    var signatureSha = CryptoJS.HmacSHA256(signatureOrigin, apiSecret)
    var signature = CryptoJS.enc.Base64.stringify(signatureSha)
    var authorizationOrigin = `api_key="${apiKey}", algorithm="${algorithm}", headers="${headers}", signature="${signature}"`
    var authorization = btoa(authorizationOrigin)
    url = `${url}?authorization=${authorization}&date=${date}&host=${host}`
    resolve(url)
  })
}
class IatRecorder {
  constructor({ language, accent, appId } = {}) {
    let self = this
    this.status = 'null'
    this.language = language || 'zh_cn'
    this.accent = accent || 'mandarin'
    this.appId = appId || APPID
    // 记录音频数据
    this.audioData = []
    // 记录听写结果
    this.resultText = ''
    // wpgs下的听写结果需要中间状态辅助记录
    this.resultTextTemp = ''
    transWorker.onmessage = function (event) {
      self.audioData.push(...event.data)
    }
  }
  // 修改录音听写状态
  setStatus(status) {
    this.onWillStatusChange && this.status !== status && this.onWillStatusChange(this.status, status)
    this.status = status
  }
  setResultText({ resultText, resultTextTemp } = {}) {
    this.onTextChange && this.onTextChange(resultTextTemp || resultText || '')
    resultText !== undefined && (this.resultText = resultText)
    resultTextTemp !== undefined && (this.resultTextTemp = resultTextTemp)
  }
  // 修改听写参数
  setParams({ language, accent } = {}) {
    language && (this.language = language)
    accent && (this.accent = accent)
  }
  // 连接websocket
  connectWebSocket() {
    return getWebSocketUrl().then(url => {
      console.log(url)
      let iatWS
      if ('WebSocket' in window) {
        iatWS = new WebSocket(url)
      } else if ('MozWebSocket' in window) {
        iatWS = new MozWebSocket(url)
      } else {
        alert('浏览器不支持WebSocket')
        return
      }
      this.webSocket = iatWS
      this.setStatus('init')
      iatWS.onopen = e => {
        this.setStatus('ing')
        // 重新开始录音
        setTimeout(() => {
          this.webSocketSend()
        }, 100)
      }
      iatWS.onmessage = e => {
        this.result(e.data)
      }
      iatWS.onerror = e => {
        this.recorderStop()
      }
      iatWS.onclose = e => {
        this.recorderStop()
      }
    })
  }
  // 初始化浏览器录音
  recorderInit() {
    navigator.getUserMedia =
      navigator.getUserMedia ||
      navigator.webkitGetUserMedia ||
      navigator.mozGetUserMedia ||
      navigator.msGetUserMedia

    // 创建音频环境
    try {
      this.audioContext = new (window.AudioContext || window.webkitAudioContext)()
      this.audioContext.resume()
      if (!this.audioContext) {
        alert('浏览器不支持webAudioApi相关接口')
        return
      }
    } catch (e) {
      if (!this.audioContext) {
        alert('浏览器不支持webAudioApi相关接口')
        return
      }
    }

    // 获取浏览器录音权限
    if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
      navigator.mediaDevices
        .getUserMedia({
          audio: true,
          video: false,
        })
        .then(stream => {
          getMediaSuccess(stream)
        })
        .catch(e => {
          getMediaFail(e)
        })
    } else if (navigator.getUserMedia) {
      navigator.getUserMedia(
        {
          audio: true,
          video: false,
        },
        stream => {
          getMediaSuccess(stream)
        },
        function(e) {
          getMediaFail(e)
        }
      )
    } else {
      if (navigator.userAgent.toLowerCase().match(/chrome/) && location.origin.indexOf('https://') < 0) {
        alert('chrome下获取浏览器录音功能，因为安全性问题，需要在localhost或127.0.0.1或https下才能获取权限')
      } else {
        alert('无法获取浏览器录音功能，请升级浏览器或使用chrome')
      }
      this.audioContext && this.audioContext.close()
      return
    }
    // 获取浏览器录音权限成功的回调
    let getMediaSuccess = stream => {
      console.log('getMediaSuccess')
      // 创建一个用于通过JavaScript直接处理音频
      this.scriptProcessor = this.audioContext.createScriptProcessor(0, 1, 1)
      this.scriptProcessor.onaudioprocess = e => {
        // 去处理音频数据
        if (this.status === 'ing') {
          transWorker.postMessage(e.inputBuffer.getChannelData(0))
        }
      }
      // 创建一个新的MediaStreamAudioSourceNode 对象，使来自MediaStream的音频可以被播放和操作
      this.mediaSource = this.audioContext.createMediaStreamSource(stream)
      // 连接
      this.mediaSource.connect(this.scriptProcessor)
      this.scriptProcessor.connect(this.audioContext.destination)
      this.connectWebSocket()
    }

    let getMediaFail = (e) => {
      alert('请求麦克风失败')
      console.log(e)
      this.audioContext && this.audioContext.close()
      this.audioContext = undefined
      // 关闭websocket
      if (this.webSocket && this.webSocket.readyState === 1) {
        this.webSocket.close()
      }
    }
  }
  recorderStart() {
    if (!this.audioContext) {
      console.log("11111111")
      this.recorderInit()
    } else {
      console.log("22222222")
      this.audioContext.resume()
      this.connectWebSocket()
    }
  }
  // 暂停录音
  recorderStop() {
    // safari下suspend后再次resume录音内容将是空白，设置safari下不做suspend
    if (!(/Safari/.test(navigator.userAgent) && !/Chrome/.test(navigator.userAgen))){
      this.audioContext && this.audioContext.suspend()
    }
    this.setStatus('end')
  }
  // 处理音频数据
  // transAudioData(audioData) {
  //   audioData = transAudioData.transaction(audioData)
  //   this.audioData.push(...audioData)
  // }
  // 对处理后的音频数据进行base64编码，
  toBase64(buffer) {
    var binary = ''
    var bytes = new Uint8Array(buffer)
    var len = bytes.byteLength
    for (var i = 0; i < len; i++) {
      binary += String.fromCharCode(bytes[i])
    }
    return window.btoa(binary)
  }
  // 向webSocket发送数据
  webSocketSend() {
    if (this.webSocket.readyState !== 1) {
      return
    }
    let audioData = this.audioData.splice(0, 1280)
    console.log(audioData)
    var params = {
      common: {
        app_id: this.appId,
      },
      business: {
        language: this.language, //小语种可在控制台--语音听写（流式）--方言/语种处添加试用
        domain: 'iat',
        accent: this.accent, //中文方言可在控制台--语音听写（流式）--方言/语种处添加试用
        vad_eos: 60*60*1000,
        dwa: 'wpgs', //为使该功能生效，需到控制台开通动态修正功能（该功能免费）
      },
      data: {
        status: 0,
        format: 'audio/L16;rate=16000',
        encoding: 'raw',
        audio: this.toBase64(audioData),
      },
    }
    this.webSocket.send(JSON.stringify(params))
    this.handlerInterval = setInterval(() => {
      // websocket未连接
      if (this.webSocket.readyState !== 1) {
        console.log("websocket未连接")
        this.audioData = []
        clearInterval(this.handlerInterval)
        return
      }
      if (this.audioData.length === 0) {
        console.log("自动关闭",this.status)
        if (this.status === 'end') {
          this.webSocket.send(
            JSON.stringify({
              data: {
                status: 2,
                format: 'audio/L16;rate=16000',
                encoding: 'raw',
                audio: '',
              },
            })
          )
          this.audioData = []
          clearInterval(this.handlerInterval)
        }
        return false
      }
      audioData = this.audioData.splice(0, 1280)
      // 中间帧
      this.webSocket.send(
        JSON.stringify({
          data: {
            status: 1,
            format: 'audio/L16;rate=16000',
            encoding: 'raw',
            audio: this.toBase64(audioData),
          },
        })
      )
    }, 40)
  }
  result(resultData) {
    // 识别结束
    let jsonData = JSON.parse(resultData)
    if (jsonData.data && jsonData.data.result) {
      let data = jsonData.data.result
      let str = ''
      let resultStr = ''
      let ws = data.ws
      for (let i = 0; i < ws.length; i++) {
        str = str + ws[i].cw[0].w
      }
      console.log("识别的结果为：",str)
      // 开启wpgs会有此字段(前提：在控制台开通动态修正功能)
      // 取值为 "apd"时表示该片结果是追加到前面的最终结果；取值为"rpl" 时表示替换前面的部分结果，替换范围为rg字段
      if (data.pgs) {
        if (data.pgs === 'apd') {
          // 将resultTextTemp同步给resultText
          this.setResultText({
            resultText: this.resultTextTemp,
          })
        }
        // 将结果存储在resultTextTemp中
        this.setResultText({
          resultTextTemp: this.resultText + str,
        })
      } else {
        this.setResultText({
          resultText: this.resultText + str,
        })
      }
    }
    if (jsonData.code === 0 && jsonData.data.status === 2) {
      this.webSocket.close()
    }
    if (jsonData.code !== 0) {
      this.webSocket.close()
      console.log(`${jsonData.code}:${jsonData.message}`)
    }
  }
  start() {
    this.recorderStart()
    this.setResultText({ resultText: '', resultTextTemp: '' })
  }
  stop() {
    this.recorderStop()
  }
}

// ======================开始调用=============================
var vConsole = new VConsole()
let iatRecorder = new IatRecorder()
let countInterval
// 状态改变时处罚
iatRecorder.onWillStatusChange = function(oldStatus, status) {
  // 可以在这里进行页面中一些交互逻辑处理：倒计时（听写只有60s）,录音的动画，按钮交互等
  // 按钮中的文字
  let text = {
    null: '开始识别', // 最开始状态
    init: '开始识别', // 初始化状态
    ing: '结束识别', // 正在录音状态
    end: '开始识别', // 结束状态
  }
  let senconds = 0
  $('.taste-button')
    .removeClass(`status-${oldStatus}`)
    .addClass(`status-${status}`)
    .text(text[status])
  if (status === 'ing') {
    $('hr').addClass('hr')
    $('.taste-content').css('display', 'none')
    $('.start-taste').addClass('flex-display-1')
    // 倒计时相关
    countInterval = setInterval(()=>{
      senconds++
      console.log("==========="+senconds)
      $('.used-time').text(`0${Math.floor(senconds/60)}：${Math.floor(senconds/10)}${senconds%10}`)
      /*if (senconds >= 60) {
        this.stop()
        clearInterval(countInterval)
      }*/
    }, 1000)
  } else if (status === 'init') {
    $('.time-box').show()
    $('.used-time').text('00：00')
  } else {
    $('.time-box').hide()
    $('hr').removeClass('hr')
    clearInterval(countInterval)
  }
}


$(function () {
// 监听识别结果的变化
  iatRecorder.onTextChange = function(text) {
    $('#result_output').text(text)
  }
  $('#taste_button, .taste-button').click(function() {
    if (iatRecorder.status === 'ing') {
      iatRecorder.stop()
    } else {
      iatRecorder.start()
    }
  })

});

5. 创建 translation.vue 文件，（在语音听写流式API demo js语言讯飞源文件中取 src\pages\index\index.html）

<template>
  <div class="service-item service-item-taste">
    <h2 class="service-item-title">产品体验-语音听写（流式版）WebAPI</h2>
    <a href="/doc" target="_blank">demo文档说明</a>
    <div class="service-item-content service-item-taste-content">
      <div class="taste-content">
        <button class="taste-button ready-button" id="taste_button">开始识别</button>
      </div>
      <div class="start-taste">
        <div class="start-taste-left">
          <div class="time-box">
            <span class="start-taste-line">
              <hr class="hr hr1">
              <hr class="hr hr2">
              <hr class="hr hr3">
              <hr class="hr hr4">
              <hr class="hr hr5">
              <hr class="hr hr6">
              <hr class="hr hr7">
              <hr class="hr hr8">
              <hr class="hr hr9">
              <hr class="hr hr10">
            </span>
            <span class="total-time"><span class="used-time">00: 00</span> / 01: 00</span>
          </div>
          <div class="start-taste-button">
            <button class="taste-button start-button">结束识别</button>
          </div>
        </div>
        <div class="output-box" id="result_output"></div>
      </div>
    </div>
  </div>

</template>
<script>


import translation from './translation.js'

export default {
  name: 'translation',
  data() {
    return {
      phone: '',
    }
  },
  // mixins: [translation],
  created() {
    // transWorker
    // translation.transW
  },
  mounted() {
  },
  methods: {

  },
  destroyed() {

  },
}

</script>

<style >
.service-item-taste button {
  cursor: pointer;
}

.service-item-taste .taste-button {
  background: #187cff;
  border: 1px solid;
  border-color: #478eea;
  color: #fff;
  text-align: center;
  border-radius: 3px;
}

.service-item-taste .taste-header .dialect-select {
  margin-left: 20px;
  height: 26px;
}

.service-item-taste .taste-header .dialect {
  margin-left: 20px;
  height: 26px;
  line-height: 26px;
  display: none;
}

.service-item-taste .taste-header a {
  border: none;
  border-radius: 4px;
  color: #fff;
  height: 26px;
  width: 100px;
  float: right;
  text-align: center;
  line-height: 26px;
}

.service-item-taste .taste-content {
  display: -ms-flexbox;
  display: flex;
  -ms-flex-align: center;
  align-items: center;
  margin-top: 100px;
}

.service-item-taste .start-taste {
  margin-top: 30px;
  display: none;
  -ms-flex-pack: justify;
  justify-content: space-between;
}
.service-item-taste .start-taste.flex-display-1{
  display: flex;
}

.service-item-taste .start-taste .start-taste-left {
  width: 40%;
  margin-left: 30px;
}

.service-item-taste .start-taste .start-taste-left .time-box {
  margin-top: 40px;
  display: -ms-flexbox;
  display: flex;
  -ms-flex-pack: center;
  justify-content: center;
}

.service-item-taste .start-taste .start-taste-left .time-box .total-time {
  margin-left: 20px;
}

.service-item-taste .start-taste .start-taste-left .time-box .start-taste-line {
  display: inline-block;
  margin-right: 20px;
}

.service-item-taste .start-taste .start-taste-left .time-box .start-taste-line hr {
  background-color: #187cff;
  width: 3px;
  height: 10px;
  margin: 0 5px;
  display: inline-block;
  border: none;
}

.service-item-taste .start-taste .start-taste-left .start-taste-button {
  display: -ms-flexbox;
  display: flex;
  margin-top: 70px;
}

.service-item-taste .start-taste .output-box {
  height: 200px;
  overflow: auto;
  background: #f0f0f0;
  width: 50%;
  line-height: 1.5;
  padding-left: 10px;
  padding-top: 10px;
}

.hr {
  animation: note 0.2s ease-in-out;
  animation-iteration-count: infinite;
  animation-direction: alternate;
}

.hr1 {
  animation-delay: -1s;
}

.hr2 {
  animation-delay: -0.9s;
}

.hr3 {
  animation-delay: -0.8s;
}

.hr4 {
  animation-delay: -0.7s;
}

.hr5 {
  animation-delay: -0.6s;
}

.hr6 {
  animation-delay: -0.5s;
}

.hr7 {
  animation-delay: -0.4s;
}

.hr8 {
  animation-delay: -0.3s;
}

.hr9 {
  animation-delay: -0.2s;
}

.hr10 {
  animation-delay: -0.1s;
}

@keyframes note {
  from {
    transform: scaleY(1);
  }
  to {
    transform: scaleY(4);
  }
}

.ready-button,
.start-button {
  margin: 0 auto;
  height: 40px;
  width: 160px;
  font-size: 16px;
  letter-spacing: 6px;
}

.taste-button:hover {
  background: #0b99ff;
}
</style>

测试成功

vue wisper 语音识别 speechrecognizer_vue.js_04

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：axios上传图片资源

下一篇：windows平台下python控制I2C总线系统

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯