利用免费的百度语音及合成服务,百度机器人聊天服务,我们就可以用树莓派搭建自己的语音机器人

ps:本来想用图灵机器人,但是最近貌似收费,因此还是换成百度。有条件的可以用图灵机器人百度机器人有点傻。

基本硬件:树莓派3B,麦克风(不需要带声卡,淘宝10块钱的就可以),有线音响.

         基本框图

树莓派 微信聊天机器人 树莓派 语音对话_python

翻译文字代码:

# coding=utf-8

import sys
import json
import base64
import time

IS_PY3 = sys.version_info.major == 3

if IS_PY3:
    from urllib.request import urlopen
    from urllib.request import Request
    from urllib.error import URLError
    from urllib.parse import urlencode
    timer = time.perf_counter
else:
    from urllib2 import urlopen
    from urllib2 import Request
    from urllib2 import URLError
    from urllib import urlencode
    if sys.platform == "win32":
        timer = time.clock
    else:
        # On most other platforms the best timer is time.time()
        timer = time.time

API_KEY = '***'
SECRET_KEY = '***'

# 需要识别的文件
AUDIO_FILE = 'record_voice.wav'  # 只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式
# 文件格式
FORMAT = AUDIO_FILE[-3:]  # 文件后缀只支持 pcm/wav/amr 格式,极速版额外支持m4a 格式

CUID = '123456PYTHON'
# 采样率
RATE = 16000  # 固定值

# 普通版

DEV_PID = 1537  # 1537 表示识别普通话,使用输入法模型。根据文档填写PID,选择语言及识别模型
ASR_URL = 'http://vop.baidu.com/server_api'
SCOPE = 'audio_voice_assistant_get'  # 有此scope表示有asr能力,没有请在网页里勾选,非常旧的应用可能没有

#测试自训练平台需要打开以下信息, 自训练平台模型上线后,您会看见 第二步:“”获取专属模型参数pid:8001,modelid:1234”,按照这个信息获取 dev_pid=8001,lm_id=1234
# DEV_PID = 8001 ;   
# LM_ID = 1234 ;

# 极速版 打开注释的话请填写自己申请的appkey appSecret ,并在网页中开通极速版(开通后可能会收费)

# DEV_PID = 80001
# ASR_URL = 'http://vop.baidu.com/pro_api'
# SCOPE = 'brain_enhanced_asr'  # 有此scope表示有极速版能力,没有请在网页里开通极速版

# 忽略scope检查,非常旧的应用可能没有
# SCOPE = False

class DemoError(Exception):
    pass


"""  TOKEN start """

TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'


def fetch_token():
    params = {'grant_type': 'client_credentials',
              'client_id': API_KEY,
              'client_secret': SECRET_KEY}
    post_data = urlencode(params)
    if (IS_PY3):
        post_data = post_data.encode( 'utf-8')
    req = Request(TOKEN_URL, post_data)
    try:
        f = urlopen(req)
        result_str = f.read()
    except URLError as err:
        print('token http response http code : ' + str(err.code))
        result_str = err.read()
    if (IS_PY3):
        result_str =  result_str.decode()

    print(result_str)
    result = json.loads(result_str)
    print(result)
    if ('access_token' in result.keys() and 'scope' in result.keys()):
        print(SCOPE)
        if SCOPE and (not SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查
            raise DemoError('scope is not correct')
        print('SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
        return result['access_token']
    else:
        raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
def translation_text():
    token = fetch_token()

    speech_data = []
    with open(AUDIO_FILE, 'rb') as speech_file:
        speech_data = speech_file.read()

    length = len(speech_data)
    if length == 0:
        raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)
    speech = base64.b64encode(speech_data)
    if (IS_PY3):
        speech = str(speech, 'utf-8')
    params = {'dev_pid': DEV_PID,
             #"lm_id" : LM_ID,    #测试自训练平台开启此项
              'format': FORMAT,
              'rate': RATE,
              'token': token,
              'cuid': CUID,
              'channel': 1,
              'speech': speech,
              'len': length
              }
    post_data = json.dumps(params, sort_keys=False)
    # print post_data
    req = Request(ASR_URL, post_data.encode('utf-8'))
    req.add_header('Content-Type', 'application/json')
    try:
        begin = timer()
        f = urlopen(req)
        result_str = f.read()
        print ("Request time cost %f" % (timer() - begin))
    except URLError as err:
        print('asr http response http code : ' + str(err.code))
        result_str = err.read()

    if (IS_PY3):
        result_str= str(result_str, 'utf-8')
        a=json.loads(result_str)
        b=a['result']
    print(a['result'])
    with open("result.txt","w") as of:
        of.write(result_str)
    return b

"""  TOKEN end """

if __name__ == '__main__':
  translation_text()

语音输出代码:

# coding=utf-8
from aip import AipSpeech
import os
import sys
#import importlib 
# reload(sys)
""" 你的 APPID AK SK """
APP_ID = '25326941'
API_KEY = 'U9B7tKwMqwxhqvZ9oAgUEvD3'
SECRET_KEY = 'rpr1q0gC7ELOUIXIfXPHj4ATHzZnfUji'
def synath(speech):
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result  = client.synthesis(speech)
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    if not isinstance(result, dict):
        with open('au.mp3', 'wb') as f:
            f.write(result)
if __name__ == '__main__':
    APP_ID = '25326941'
    API_KEY = 'U9B7tKwMqwxhqvZ9oAgUEvD3'
    SECRET_KEY = 'rpr1q0gC7ELOUIXIfXPHj4ATHzZnfUji'
    synath('zjk')

遍利字典

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'遍历字典,查找键值'

# get_value.py 文件内容
def get_target_value(key, dic, tmp_list):
    """
    :param key: 目标key值
    :param dic: JSON数据
    :param tmp_list: 用于存储获取的数据
    :return: list
    """
    if not isinstance(dic, dict) or not isinstance(tmp_list, list):  # 对传入数据进行格式校验
        return 'argv[1] not an dict or argv[-1] not an list '

    if key in dic.keys():
        tmp_list.append(dic[key])  # 传入数据存在则存入tmp_list

    for value in dic.values():  # 传入数据不符合则对其value值进行遍历
        if isinstance(value, dict):
            get_target_value(key, value, tmp_list)  # 传入数据的value值是字典,则直接调用自身
        elif isinstance(value, (list, tuple)):
            _get_value(key, value, tmp_list)  # 传入数据的value值是列表或者元组,则调用_get_value


    return tmp_list


def _get_value(key, val, tmp_list):
    for val_ in val:
        if isinstance(val_, dict):  
            get_target_value(key, val_, tmp_list)  # 传入数据的value值是字典,则调用get_target_value
        elif isinstance(val_, (list, tuple)):
            _get_value(key, val_, tmp_list)   # 传入数据的value值是列表或者元组,则调用自身

主函数

import api
import record
import sys
import os
import speech
import datetime
import requests
import respond
import get_value
import json
client_id = 'MWgD1pMT99mHuPk2wknoeMCf'
client_secret = 'Nfrg6LcuuTvArQRSQp5X6gjhU0fxHbI5'
print('start,record')
pathename= record.record()
print('beginning change')
result=api.k()
r=respond.get_token(client_id,client_secret)
url = 'https://aip.baidubce.com/rpc/2.0/unit/service/v3/chat?access_token=' + access_token
post_data ={"version":"3.0","service_id":"S62437","session_id":"","log_id":"7758521","request":{"terminal_id":"88888","query":""}}
post_data["request"]["query"]=str(result)
print(type(result),result)
encoded_data = json.dumps(post_data).encode('utf-8') 
headers = {'content-type': 'application'}
responses = requests.post(url, encoded_data, headers=headers)
f_zero_dict=responses.json()
f=get_value.get_target_value("say",f_zero_dict,[])
speech.synath(f[0])
os.system('mplayer au.mp3')
if responses:

 print(f[0])

百度的语音识别文字精准度很高,但是机器人有点傻,可以训练 ,有条件可以用图灵。

实例测试

树莓派 微信聊天机器人 树莓派 语音对话_自然语言处理_02

 

树莓派 微信聊天机器人 树莓派 语音对话_百度_03