百度语音识别合成案例

本文截取之前项目里涉及到的语音识别和合成部分进行代码展示。其中读取语音部分需要优化,这里是demo读取的是存在本地的音频文件,建议走音频流。

1 控制器示例代码

package com.cn.artifact.admin.controller;
import com.shenl.entry.startModel;
import com.shenl.model.Model2Ans;
import com.shenl.utils.HttpRequestUtils;
import com.shenl.voice.Sample;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.PrintWriter;
import java.net.URLDecoder;
import java.util.Iterator;

/**
* Created by SHENLIANG5 on 2017/6/13.
*/
@Controller
public class VoicegenController {
private static final long serialVersionUID = 1L;
private String url;
String src;

@RequestMapping("/getVoice")
@ResponseBody
public void str2Voice(HttpServletRequest request, HttpServletResponse response) throws Exception{
response.setContentType("text/html;charset=utf-8");
response.setCharacterEncoding("utf-8");
url = "http://tsn.baidu.com/text2audio";

Sample.getToken();
String rs = Sample.method2("G:\\test3.wav");
System.out.println(rs+"hello text");
Model2Ans.mod2Ans(rs);
String ans=startModel.res.replace("", "");
//Model2Ans.mod2Ans("红烧肉怎么做?");

String strParam = "tex=" + ans + "&lan=zh&cuid=" + "30-B4-9E-07-C7-E0" + "&ctp=1&tok=" + Sample.getRefreToken();
src = url + "?" + strParam;
src = URLDecoder.decode(src,"UTF-8");
System.out.println(src+"hello there");
JSONObject jsonResult = HttpRequestUtils.httpPostVoiceGen(url, strParam, false);
JSONObject result = new JSONObject();
try{
result =ParsaResult(jsonResult);
}
catch(JSONException e){
System.out.println("You got an error in Json Parse!");
e.printStackTrace();
}
PrintWriter pw = response.getWriter();
pw.print(result.toString());
pw.flush();
pw.close();

}

private JSONObject ParsaResult(JSONObject jsonResult) throws JSONException {
JSONObject result = new JSONObject();
String key;
Iterator<?> keys = jsonResult.keys();
while(keys.hasNext()) {
key = (String)keys.next();
if(key.equals("succ")) {
result.put("result", "succ");
result.put("content", src);
break;
}
if(key.equals("err_no")) {
result.put("result", "fail");
result.put("content", jsonResult.getString(key));
break;
}
}

return result;
}

}

2 音频合成、语音识别相关代码:

package com.shenl.voice;

import com.shenl.utils.set.JSONObject;

import javax.xml.bind.DatatypeConverter;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Iterator;
import java.net.URLEncoder;
import java.net.URLDecoder;

public class Sample {

private static final String serverURL = "http://vop.baidu.com/server_api";
private static String token = "";
//put your own params here
private static final String apiKey = "J2pqeSuyxJehdFy4kuDaeWdx";
private static final String secretKey = "fwiurrdjyxli5jd6wex3io8enxxkogje";
private static final String cuid = "***";
private static final String lan="zk";

public static void main(String[] args) throws Exception {
getToken();
System.out.println("result:\t:"+voice2str("G:/test3.wav"));
}

public Sample() {
super();
}

public static void getToken() throws Exception {
String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +
"&client_id=" + apiKey + "&client_secret=" + secretKey+"&lan="+lan;
HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
//token = new JSONObject(printResponse(conn)).getString("access_token").toString();
String strResponse = printResponse(conn).toString();
int i = strResponse.indexOf("{");
strResponse = strResponse.substring(i);
token = new JSONObject(strResponse.trim()).getString("access_token");

if (token == null)
System.out.println("空");
}

public static String getRefreToken() throws Exception {
String getTokenURL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials" +
"&client_id=" + apiKey + "&client_secret=" + secretKey+"&lan="+lan;
HttpURLConnection conn = (HttpURLConnection) new URL(getTokenURL).openConnection();
token = new JSONObject(printResponse(conn)).getString("access_token");
return token;
}

public static String voice2str(String testFileName) throws Exception {
File pcmFile = new File(testFileName);
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL).openConnection();
String finInput ="";
// construct params
JSONObject params = new JSONObject();
params.put("format", "wav");
params.put("rate", 8000);
params.put("channel", "1");
params.put("token", token);
params.put("cuid", cuid);
params.put("len", pcmFile.length());
params.put("speech", DatatypeConverter.printBase64Binary(loadFile(pcmFile)));

// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8"); //utf-8
conn.setDoInput(true);
conn.setDoOutput(true);

// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.writeBytes(params.toString());
wr.flush();
wr.close();
JSONObject jObject = new JSONObject(printResponse(conn));



for (Iterator<String> iterator = jObject.keys(); iterator.hasNext();) {
String key = iterator.next();
if ((key.equals("result"))&& jObject.get("err_msg").equals("success.")){
finInput = JSONObject.valueToString(jObject.get("result"));
finInput = finInput.replaceAll("[\\[\\]\"]","");
}
}

return finInput;
}


public static String getStrFromInsByCode(InputStream is, String code){
StringBuilder builder=new StringBuilder();
BufferedReader reader=null;


try {
reader = new BufferedReader(new InputStreamReader(is,code));
String line;
while((line=reader.readLine())!=null){
builder.append(line+"\n");
}
} catch (Exception e) {
e.printStackTrace();
}finally{
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return builder.toString();
}

private static String printResponse(HttpURLConnection conn) throws Exception {
if (conn.getResponseCode() != 200) {
// request error
return "";
}
InputStream is = conn.getInputStream();
BufferedReader rd = new BufferedReader(new InputStreamReader(is));
String line;
StringBuffer response = new StringBuffer();
while ((line = rd.readLine()) != null) {
response.append(line);
response.append('\r');
}
rd.close();
//打印响应里的详细信息.
System.out.println(new JSONObject(response.toString()).toString(4));
return response.toString();
}

// GBK编码转为UTF-8
private static String getUtf8String(String s) throws UnsupportedEncodingException
{
StringBuffer sb = new StringBuffer();
sb.append(s);
String xmlString = "";
String xmlUtf8 = "";
xmlString = new String(sb.toString().getBytes("GBK"));
xmlUtf8 = URLEncoder.encode(xmlString , "GBK");

return URLDecoder.decode(xmlUtf8, "UTF-8");
}
private static byte[] loadFile(File file) throws IOException {
InputStream is = new FileInputStream(file);

long length = file.length();
byte[] bytes = new byte[(int) length];

int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead = is.read(bytes, offset, bytes.length - offset)) >= 0) {
offset += numRead;
}

if (offset < bytes.length) {
is.close();
throw new IOException("Could not completely read file " + file.getName());
}

is.close();
return bytes;
}

public static String method2(String testFileName) throws Exception {
File pcmFile = new File(testFileName);
String finInput="";
HttpURLConnection conn = (HttpURLConnection) new URL(serverURL
+ "?cuid=" + cuid + "&token=" + token).openConnection();

// add request header
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "audio/pcm; rate=8000");
conn.setRequestProperty("Charset", "utf-8");

conn.setDoInput(true);
conn.setDoOutput(true);

// send request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.write(loadFile(pcmFile));
wr.flush();
wr.close();


JSONObject jObject = new JSONObject(getUtf8String(printResponse(conn)));



for (Iterator<String> iterator = jObject.keys(); iterator.hasNext();) {
String key = iterator.next();
if ((key.equals("result"))&& jObject.get("err_msg").equals("success.")){
finInput = JSONObject.valueToString(jObject.get("result"));
finInput = finInput.replaceAll("[\\[\\]\"]","");
}
}
return finInput;
}
}