1、简介
https://pypi.org/project/SpeechRecognition/ https://github.com/Uberi/speech_recognition
SpeechRecognition用于执行语音识别的库,支持多个引擎和 API,在线和离线。
2 测试
2.1 命令
python -m speech_recognition
2.2 fastapi
import json
import os
from pprint import pprint
import speech_recognition
import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import soundfile
import whisper
import vosk
class ResponseModel(BaseModel):
path: str
app = FastAPI()
def get_path(req: ResponseModel):
path = req.path
if path == "":
raise HTTPException(status_code=400, detail="No path provided")
if not path.endswith(".wav"):
raise HTTPException(status_code=400, detail="Invalid file type")
if not os.path.exists(path):
raise HTTPException(status_code=404, detail="File does not exist")
return path
@app.get("/")
def root():
return {"message": "speech-recognition api"}
@app.post("/recognize-google")
def recognize_google(req: ResponseModel):
path = get_path(req)
r = speech_recognition.Recognizer()
with speech_recognition.AudioFile(path) as source:
audio = r.record(source)
return r.recognize_google(audio, language='ja-JP', show_all=True)
@app.post("/recognize-vosk")
def recognize_vosk(req: ResponseModel):
path = get_path(req)
r = speech_recognition.Recognizer()
with speech_recognition.AudioFile(path) as source:
audio = r.record(source)
return json.loads(r.recognize_vosk(audio, language='ja'))
@app.post("/recognize-whisper")
def recognize_whisper(req: ResponseModel):
path = get_path(req)
r = speech_recognition.Recognizer()
with speech_recognition.AudioFile(path) as source:
audio = r.record(source)
result = r.recognize_whisper(audio, language='ja')
try:
return json.loads(result)
except:
return {"text": result}
if __name__ == "__main__":
host = os.environ.get('HOST', '0.0.0.0')
port: int = os.environ.get('PORT', 8080)
uvicorn.run("main:app", host=host, port=int(port))
2.3 google
import speech_recognition as sr
import webbrowser as wb
import speak
chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s'
r = sr.Recognizer()
with sr.Microphone() as source:
print ('Say Something!')
audio = r.listen(source)
print ('Done!')
try:
text = r.recognize_google(audio)
print('Google thinks you said:\n' + text)
lang = 'en'
speak.tts(text, lang)
f_text = 'https://www.google.co.in/search?q=' + text
wb.get(chrome_path).open(f_text)
except Exception as e:
print (e)
2.4 recognize_sphinx
import logging
import speech_recognition as sr
def audio_Sphinx(filename):
logging.info('开始识别语音文件...')
# use the audio file as the audio source
r = sr.Recognizer()
with sr.AudioFile(filename) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Sphinx
try:
print("Sphinx thinks you said: " + r.recognize_sphinx(audio, language='zh-cn'))
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
wav_num = 0
while True:
r = sr.Recognizer()
#启用麦克风
mic = sr.Microphone()
logging.info('录音中...')
with mic as source:
#降噪
r.adjust_for_ambient_noise(source)
audio = r.listen(source)
with open(f"00{wav_num}.wav", "wb") as f:
#将麦克风录到的声音保存为wav文件
f.write(audio.get_wav_data(convert_rate=16000))
logging.info('录音结束,识别中...')
target = audio_Sphinx(f"00{wav_num}.wav")
wav_num += 1
2.5 语音生成音频文件
- 方法1:
import speech_recognition as sr
# Use SpeechRecognition to record 使用语音识别包录制音频
def my_record(rate=16000):
r = sr.Recognizer()
with sr.Microphone(sample_rate=rate) as source:
print("please say something")
audio = r.listen(source)
with open("voices/myvoices.wav", "wb") as f:
f.write(audio.get_wav_data())
print("录音完成!")
my_record()
- 方法2:
import wave
from pyaudio import PyAudio, paInt16
framerate = 16000 # 采样率
num_samples = 2000 # 采样点
channels = 1 # 声道
sampwidth = 2 # 采样宽度2bytes
FILEPATH = 'voices/myvoices.wav'
def save_wave_file(filepath, data):
wf = wave.open(filepath, 'wb')
wf.setnchannels(channels)
wf.setsampwidth(sampwidth)
wf.setframerate(framerate)
wf.writeframes(b''.join(data))
wf.close()
#录音
def my_record():
pa = PyAudio()
#打开一个新的音频stream
stream = pa.open(format=paInt16, channels=channels,
rate=framerate, input=True, frames_per_buffer=num_samples)
my_buf = [] #存放录音数据
t = time.time()
print('正在录音...')
while time.time() < t + 10: # 设置录音时间(秒)
#循环read,每次read 2000frames
string_audio_data = stream.read(num_samples)
my_buf.append(string_audio_data)
print('录音结束.')
save_wave_file(FILEPATH, my_buf)
stream.close()
结语
如果您觉得该方法或代码有一点点用处,可以给作者点个赞,或打赏杯咖啡;
╮( ̄▽ ̄)╭
如果您感觉方法或代码不咋地
//(ㄒoㄒ)//,就在评论处留言,作者继续改进;
o_O???
如果您需要相关功能的代码定制化开发,可以留言私信作者;
(✿◡‿◡)
感谢各位大佬童鞋们的支持!
( ´ ▽´ )ノ ( ´ ▽´)っ!!!