参考文章:​​http://www.qingfengju.com/article.asp?id=245​


现在的网站,经常返回下面这样的字符串:
"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"}
其中的\u7528等就是汉字的UTF8编码了,如何将其还原成相应的字符呢?

代码如下:

#include <string>
using std::string;

string Utf8Code2String(char* szCode)
{
string strRet = "";
for (int i = 0; i < 4; i++)
{
if (szCode[i] >= '0' && szCode[i] <= '9') continue;
if (szCode[i] >= 'A' && szCode[i] <= 'F') continue;
if (szCode[i] >= 'a' && szCode[i] <= 'f') continue;
return strRet;
}

char unicode_hex[5] = {0};
memcpy(unicode_hex, szCode, 4);
unsigned int iCode = 0;
sscanf_s(unicode_hex,"%04x", &iCode);
wchar_t wchChar[4] = {0};
wchChar[0] = iCode;

char szAnsi[8] = {0};
WideCharToMultiByte(CP_ACP, NULL, wchChar, 1, szAnsi, sizeof(szAnsi), NULL, NULL);
strRet = string(szAnsi);

return strRet;
}


string MyUnEscape(char* szUtf8Code)
{
// {"error":"\u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef"}
string strRet = "";
char* szDelimiters = "\\u";
char* pTok = strtok(szUtf8Code, szDelimiters);
while(pTok != NULL){
/*OutputDebugString(pTok);
OutputDebugString("\r\n");*/
if (strlen(pTok) >= 4)
{
strRet += Utf8Code2String(pTok);
}
pTok = strtok(NULL, szDelimiters);
}
return strRet;
}


// 调用例子
MyUnEscape("test \u7528\u6237\u4e0d\u5b58\u5728\u6216\u5bc6\u7801\u9519\u8bef hahah ok");

-----------------------------------------------------------------------------------------

贴下参考文章的代码:

#include <string.h>

#include <iostream>
#include <string>
using namespace std;

#include <atlconv.h>

enum
{
UNICODE_CALC_SIZE = 1,
UNICODE_GET_BYTES = 2
};

//将unicode转义字符序列转换为内存中的unicode字符串
int unicode_bytes(char* p_unicode_escape_chars,wchar_t *bytes,int flag)
{
/*
char* p_unicode_escape_chars="pp\\u4fddp\\u5b58\\u6210pp\\u529f0a12";

//通过此函数获知转换后需要的字节数
int n_length=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);

//再次调用此函数,取得字节序列
wchar_t *bytes=new wchar_t[n_length+sizeof(wchar_t)];
unicode_bytes(p_unicode_escape_chars,bytes,UNICODE_GET_BYTES);
bytes[n_length]=0;

//此时的bytes中是转换后的字节序列
delete[] bytes;
*/

int unicode_count=0;
int length=strlen(p_unicode_escape_chars);
for (int char_index=0;char_index<length;char_index++)
{
char unicode_hex[5];
memset(unicode_hex,0,5);

char ascii[2];
memset(ascii,0,2);

if (*(p_unicode_escape_chars+char_index)=='\\')
{
char_index++;
if (char_index<length)
{
if (*(p_unicode_escape_chars+char_index)=='u')
{
if (flag==UNICODE_GET_BYTES)
{
memcpy(unicode_hex,p_unicode_escape_chars+char_index+1,4);

//sscanf不可以使用unsigned short类型
//否则:Run-Time Check Failure #2 - Stack around the variable 'a' was corrupted.
unsigned int a=0;
sscanf_s(unicode_hex,"%04x",&a);
bytes[unicode_count++]=a;
}
else if(flag==UNICODE_CALC_SIZE)
{
unicode_count++;
}
char_index+=4;
}
}
}
else
{
if (flag==UNICODE_GET_BYTES)
{
memcpy(ascii,p_unicode_escape_chars+char_index,1);
unsigned int a=0;
sscanf_s(ascii,"%c",&a);
bytes[unicode_count++]=a;
}
else if(flag==UNICODE_CALC_SIZE)
{
unicode_count++;
}
}
}

return unicode_count;
}

string UnEscape(char* p_unicode_escape_chars)
{
int nBytes=unicode_bytes(p_unicode_escape_chars,NULL,UNICODE_CALC_SIZE);

wchar_t *p_bytes=new wchar_t[nBytes+sizeof(wchar_t)];
unicode_bytes(p_unicode_escape_chars,p_bytes,UNICODE_GET_BYTES);
p_bytes[nBytes]=0;

USES_CONVERSION;
string cs_return=W2A((wchar_t*)p_bytes);

delete[] p_bytes;

return cs_return;
}

int _tmain(int argc, _TCHAR* argv[])
{
// 发送成功条
// \u53d1\u9001\u6210\u529f1\u6761
char* p_unicode_escape_chars="\\u53d1\\u9001\\u6210\\u529f1\\u6761";
cout<<UnEscape(p_unicode_escape_chars)<<endl;
return 0;
}