【小沐学Python】Python实现语音识别（SpeechRecognition）

原创

爱看书的小沐 2024-03-14 08:55:21 博主文章分类：Python ©著作权

文章标签 python chrome json 文章分类 游戏开发 yyds干货盘点

©著作权归作者所有：来自51CTO博客作者爱看书的小沐的原创作品，请联系作者获取转载授权，否则将追究法律责任

1、简介

https://pypi.org/project/SpeechRecognition/ https://github.com/Uberi/speech_recognition

SpeechRecognition用于执行语音识别的库，支持多个引擎和 API，在线和离线。

在这里插入图片描述

2 测试

2.1 命令

python -m speech_recognition

在这里插入图片描述

2.2 fastapi

import json
import os
from pprint import pprint

import speech_recognition
import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import soundfile
import whisper
import vosk

class ResponseModel(BaseModel):
    path: str


app = FastAPI()


def get_path(req: ResponseModel):
    path = req.path
    if path == "":
        raise HTTPException(status_code=400, detail="No path provided")

    if not path.endswith(".wav"):
        raise HTTPException(status_code=400, detail="Invalid file type")

    if not os.path.exists(path):
        raise HTTPException(status_code=404, detail="File does not exist")

    return path


@app.get("/")
def root():
    return {"message": "speech-recognition api"}


@app.post("/recognize-google")
def recognize_google(req: ResponseModel):
    path = get_path(req)
    r = speech_recognition.Recognizer()

    with speech_recognition.AudioFile(path) as source:
        audio = r.record(source)

    return r.recognize_google(audio, language='ja-JP', show_all=True)


@app.post("/recognize-vosk")
def recognize_vosk(req: ResponseModel):
    path = get_path(req)
    r = speech_recognition.Recognizer()

    with speech_recognition.AudioFile(path) as source:
        audio = r.record(source)

    return json.loads(r.recognize_vosk(audio, language='ja'))


@app.post("/recognize-whisper")
def recognize_whisper(req: ResponseModel):
    path = get_path(req)
    r = speech_recognition.Recognizer()

    with speech_recognition.AudioFile(path) as source:
        audio = r.record(source)

    result = r.recognize_whisper(audio, language='ja')
    try:
        return json.loads(result)
    except:
        return {"text": result}


if __name__ == "__main__":
    host = os.environ.get('HOST', '0.0.0.0')
    port: int = os.environ.get('PORT', 8080)

    uvicorn.run("main:app", host=host, port=int(port))

2.3 google

import speech_recognition as sr
import webbrowser as wb
import speak

chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s'

r = sr.Recognizer()

with sr.Microphone() as source:
    print ('Say Something!')
    audio = r.listen(source)
    print ('Done!')
 
try:
    text = r.recognize_google(audio)
    print('Google thinks you said:\n' + text)
    lang = 'en'

    speak.tts(text, lang)

    f_text = 'https://www.google.co.in/search?q=' + text
    wb.get(chrome_path).open(f_text)
 
except Exception as e:
    print (e)

2.4 recognize_sphinx


import logging
import speech_recognition as sr


def audio_Sphinx(filename):
    logging.info('开始识别语音文件...')
    # use the audio file as the audio source
    r = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = r.record(source)  # read the entire audio file

    # recognize speech using Sphinx
    try:
        print("Sphinx thinks you said: " + r.recognize_sphinx(audio, language='zh-cn'))
    except sr.UnknownValueError:
        print("Sphinx could not understand audio")
    except sr.RequestError as e:
        print("Sphinx error; {0}".format(e))    

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    wav_num = 0
    while True:
        r = sr.Recognizer()
        #启用麦克风
        mic = sr.Microphone()
        logging.info('录音中...')
        with mic as source:
            #降噪
            r.adjust_for_ambient_noise(source)
            audio = r.listen(source)
        with open(f"00{wav_num}.wav", "wb") as f:
            #将麦克风录到的声音保存为wav文件
            f.write(audio.get_wav_data(convert_rate=16000))
        logging.info('录音结束，识别中...')

        target = audio_Sphinx(f"00{wav_num}.wav")
        wav_num += 1

2.5 语音生成音频文件

方法1：


import speech_recognition as sr
 
# Use SpeechRecognition to record 使用语音识别包录制音频
def my_record(rate=16000):
    r = sr.Recognizer()
    with sr.Microphone(sample_rate=rate) as source:
        print("please say something")
        audio = r.listen(source)
 
    with open("voices/myvoices.wav", "wb") as f:
        f.write(audio.get_wav_data())
    print("录音完成！")
 
my_record()

方法2：


import wave
from pyaudio import PyAudio, paInt16
 
framerate = 16000  # 采样率
num_samples = 2000  # 采样点
channels = 1  # 声道
sampwidth = 2  # 采样宽度2bytes
FILEPATH = 'voices/myvoices.wav'
 
 
def save_wave_file(filepath, data):
    wf = wave.open(filepath, 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(sampwidth)
    wf.setframerate(framerate)
    wf.writeframes(b''.join(data))
    wf.close()
 
 
#录音
def my_record():
    pa = PyAudio()
    #打开一个新的音频stream
    stream = pa.open(format=paInt16, channels=channels,
                     rate=framerate, input=True, frames_per_buffer=num_samples)
    my_buf = [] #存放录音数据
 
    t = time.time()
    print('正在录音...')
 
    while time.time() < t + 10:  # 设置录音时间（秒）
    	#循环read，每次read 2000frames
        string_audio_data = stream.read(num_samples)
        my_buf.append(string_audio_data)
    print('录音结束.')
    save_wave_file(FILEPATH, my_buf)
    stream.close()

结语

如果您觉得该方法或代码有一点点用处，可以给作者点个赞，或打赏杯咖啡；╮(￣▽￣)╭ 如果您感觉方法或代码不咋地//(ㄒoㄒ)//，就在评论处留言，作者继续改进；o_O??? 如果您需要相关功能的代码定制化开发，可以留言私信作者；(✿◡‿◡) 感谢各位大佬童鞋们的支持！( ´ ▽´ )ﾉ ( ´ ▽´)っ！！！