利用免费的百度语音及合成服务,百度机器人聊天服务,我们就可以用树莓派搭建自己的语音机器人
ps:本来想用图灵机器人,但是最近貌似收费,因此还是换成百度。有条件的可以用图灵机器人百度机器人有点傻。
基本硬件:树莓派3B,麦克风(不需要带声卡,淘宝10块钱的就可以),有线音响.
基本框图
翻译文字代码:
# coding=utf-8
import sys
import json
import base64
import time
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode
timer = time.perf_counter
else:
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import URLError
from urllib import urlencode
if sys.platform == "win32":
timer = time.clock
else:
# On most other platforms the best timer is time.time()
timer = time.time
API_KEY = '***'
SECRET_KEY = '***'
# 需要识别的文件
AUDIO_FILE = 'record_voice.wav' # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
# 文件格式
FORMAT = AUDIO_FILE[-3:] # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
CUID = '123456PYTHON'
# 采样率
RATE = 16000 # 固定值
# 普通版
DEV_PID = 1537 # 1537 表示识别普通话,使用输入法模型。根据文档填写PID,选择语言及识别模型
ASR_URL = 'http://vop.baidu.com/server_api'
SCOPE = 'audio_voice_assistant_get' # 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有
#测试自训练平台需要打开以下信息, 自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
# DEV_PID = 8001 ;
# LM_ID = 1234 ;
# 极速版 打开注释的话请填写自己申请的appkey appSecret ,并在网页中开通极速版(开通后可能会收费)
# DEV_PID = 80001
# ASR_URL = 'http://vop.baidu.com/pro_api'
# SCOPE = 'brain_enhanced_asr' # 有此scope表示有极速版能力,没有请在网页里开通极速版
# 忽略scope检查,非常旧的应用可能没有
# SCOPE = False
class DemoError(Exception):
pass
""" TOKEN start """
TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'
def fetch_token():
params = {'grant_type': 'client_credentials',
'client_id': API_KEY,
'client_secret': SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode( 'utf-8')
req = Request(TOKEN_URL, post_data)
try:
f = urlopen(req)
result_str = f.read()
except URLError as err:
print('token http response http code : ' + str(err.code))
result_str = err.read()
if (IS_PY3):
result_str = result_str.decode()
print(result_str)
result = json.loads(result_str)
print(result)
if ('access_token' in result.keys() and 'scope' in result.keys()):
print(SCOPE)
if SCOPE and (not SCOPE in result['scope'].split(' ')): # SCOPE = False 忽略检查
raise DemoError('scope is not correct')
print('SUCCESS WITH TOKEN: %s EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
return result['access_token']
else:
raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
def translation_text():
token = fetch_token()
speech_data = []
with open(AUDIO_FILE, 'rb') as speech_file:
speech_data = speech_file.read()
length = len(speech_data)
if length == 0:
raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)
speech = base64.b64encode(speech_data)
if (IS_PY3):
speech = str(speech, 'utf-8')
params = {'dev_pid': DEV_PID,
#"lm_id" : LM_ID, #测试自训练平台开启此项
'format': FORMAT,
'rate': RATE,
'token': token,
'cuid': CUID,
'channel': 1,
'speech': speech,
'len': length
}
post_data = json.dumps(params, sort_keys=False)
# print post_data
req = Request(ASR_URL, post_data.encode('utf-8'))
req.add_header('Content-Type', 'application/json')
try:
begin = timer()
f = urlopen(req)
result_str = f.read()
print ("Request time cost %f" % (timer() - begin))
except URLError as err:
print('asr http response http code : ' + str(err.code))
result_str = err.read()
if (IS_PY3):
result_str= str(result_str, 'utf-8')
a=json.loads(result_str)
b=a['result']
print(a['result'])
with open("result.txt","w") as of:
of.write(result_str)
return b
""" TOKEN end """
if __name__ == '__main__':
translation_text()
语音输出代码:
# coding=utf-8
from aip import AipSpeech
import os
import sys
#import importlib
# reload(sys)
""" 你的 APPID AK SK """
APP_ID = '25326941'
API_KEY = 'U9B7tKwMqwxhqvZ9oAgUEvD3'
SECRET_KEY = 'rpr1q0gC7ELOUIXIfXPHj4ATHzZnfUji'
def synath(speech):
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
result = client.synthesis(speech)
# 识别正确返回语音二进制 错误则返回dict 参照下面错误码
if not isinstance(result, dict):
with open('au.mp3', 'wb') as f:
f.write(result)
if __name__ == '__main__':
APP_ID = '25326941'
API_KEY = 'U9B7tKwMqwxhqvZ9oAgUEvD3'
SECRET_KEY = 'rpr1q0gC7ELOUIXIfXPHj4ATHzZnfUji'
synath('zjk')
遍利字典
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'遍历字典,查找键值'
# get_value.py 文件内容
def get_target_value(key, dic, tmp_list):
"""
:param key: 目标key值
:param dic: JSON数据
:param tmp_list: 用于存储获取的数据
:return: list
"""
if not isinstance(dic, dict) or not isinstance(tmp_list, list): # 对传入数据进行格式校验
return 'argv[1] not an dict or argv[-1] not an list '
if key in dic.keys():
tmp_list.append(dic[key]) # 传入数据存在则存入tmp_list
for value in dic.values(): # 传入数据不符合则对其value值进行遍历
if isinstance(value, dict):
get_target_value(key, value, tmp_list) # 传入数据的value值是字典,则直接调用自身
elif isinstance(value, (list, tuple)):
_get_value(key, value, tmp_list) # 传入数据的value值是列表或者元组,则调用_get_value
return tmp_list
def _get_value(key, val, tmp_list):
for val_ in val:
if isinstance(val_, dict):
get_target_value(key, val_, tmp_list) # 传入数据的value值是字典,则调用get_target_value
elif isinstance(val_, (list, tuple)):
_get_value(key, val_, tmp_list) # 传入数据的value值是列表或者元组,则调用自身
主函数
import api
import record
import sys
import os
import speech
import datetime
import requests
import respond
import get_value
import json
client_id = 'MWgD1pMT99mHuPk2wknoeMCf'
client_secret = 'Nfrg6LcuuTvArQRSQp5X6gjhU0fxHbI5'
print('start,record')
pathename= record.record()
print('beginning change')
result=api.k()
r=respond.get_token(client_id,client_secret)
url = 'https://aip.baidubce.com/rpc/2.0/unit/service/v3/chat?access_token=' + access_token
post_data ={"version":"3.0","service_id":"S62437","session_id":"","log_id":"7758521","request":{"terminal_id":"88888","query":""}}
post_data["request"]["query"]=str(result)
print(type(result),result)
encoded_data = json.dumps(post_data).encode('utf-8')
headers = {'content-type': 'application'}
responses = requests.post(url, encoded_data, headers=headers)
f_zero_dict=responses.json()
f=get_value.get_target_value("say",f_zero_dict,[])
speech.synath(f[0])
os.system('mplayer au.mp3')
if responses:
print(f[0])
百度的语音识别文字精准度很高,但是机器人有点傻,可以训练 ,有条件可以用图灵。
实例测试