作者:朱金灿

 

           发现开源代码的可利用资源真多,从sqlite3的源码中抠出了几个字符转换接口,稍微改造下了发现还挺好用的。下面是实现代码:


/*
** Convert a UTF-8 string to microsoft unicode (UTF-16?).
**
** Space to hold the returned string is obtained from malloc.
*/
static WCHAR *utf8ToUnicode(const char *zFilename){

int nChar;
WCHAR *zWideFilename;

nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0])));
if( zWideFilename==0 ){

return 0;

}
nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar);
if( nChar==0 ){

free(zWideFilename);
zWideFilename = 0;

}
return zWideFilename;

}

/*
** Convert microsoft unicode to UTF-8. Space to hold the returned string is
** obtained from malloc().
*/
static char *unicodeToUtf8(const WCHAR *zWideFilename){

int nByte;
char *zFilename;

nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
zFilename = static_cast<char*>(malloc( nByte ));
if( zFilename==0 ){

return 0;

}
nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,
0, 0);
if( nByte == 0 )
{
free(zFilename);
zFilename = 0;
}
return zFilename;

}

/*
** Convert an ansi string to microsoft unicode, based on the
** current codepage settings for file apis.
**
** Space to hold the returned string is obtained
** from malloc.
*/
static WCHAR *mbcsToUnicode(const char *zFilename){

int nByte;
WCHAR *zMbcsFilename;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR);
zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0])));
if( zMbcsFilename==0 ){

return 0;

}
nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte);
if( nByte==0 )
{
free(zMbcsFilename);
zMbcsFilename = 0;
}
return zMbcsFilename;

}

/*
** Convert microsoft unicode to multibyte character string, based on the
** user's Ansi codepage.
**
** Space to hold the returned string is obtained from
** malloc().
*/
static char* unicodeToMbcs(const WCHAR* zWideFilename){

int nByte;
char *zFilename;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);
zFilename = static_cast<char*>(malloc(nByte ));
if( zFilename==0 ){

return 0;

}
nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte,
0, 0);
if( nByte == 0 ){

free(zFilename);
zFilename = 0;
}
return zFilename;

}

/*
** Convert multibyte character string to UTF-8. Space to hold the
** returned string is obtained from malloc().
*/
static char* mbcsToUtf8(const char *zFilename){

char *zFilenameUtf8;
WCHAR *zTmpWide;

zTmpWide = mbcsToUnicode(zFilename);
if( zTmpWide==0 ){

return 0;

}
zFilenameUtf8 = unicodeToUtf8(zTmpWide);
free(zTmpWide);
return zFilenameUtf8;
}

/*
** Convert UTF-8 to multibyte character string. Space to hold the
** returned string is obtained from malloc().
*/
static char* utf8ToMbcs(const char *zFilename){

char *zFilenameMbcs;
WCHAR* zTmpWide;

zTmpWide = utf8ToUnicode(zFilename);
if( zTmpWide==0 ){

return 0;

}
zFilenameMbcs = unicodeToMbcs(zTmpWide);
free(zTmpWide);
return zFilenameMbcs;
}

std::string MbcsToUtf8( const char* pszMbcs )
{
std::string str;
WCHAR *pwchar=0;
CHAR *pchar=0;
int len=0;
int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);
pwchar=new WCHAR[len];
if(pwchar!=0)
{
len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);
if( len!=0 )
{
len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);
pchar=new CHAR[len];
if(pchar!=0)
{
len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);
if(len!=0)
{
str = pchar;
}
delete pchar;
}
delete pwchar;
}
}
return str;
}

         要测试这些接口,为此我写了一个测试工程,是读取一个xml文件把里面的字符进行转换的,测试工程的代码下载地址如下:

unicode字符和多字节字符的相互转换接口及测试工程