unicode字符和多字节字符的相互转换接口

原创

explorer101 2021-12-15 11:36:10 博主文章分类：C/C++ ©著作权

©著作权归作者所有：来自51CTO博客作者explorer101的原创作品，请联系作者获取转载授权，否则将追究法律责任

作者：朱金灿

发现开源代码的可利用资源真多，从sqlite3的源码中抠出了几个字符转换接口，稍微改造下了发现还挺好用的。下面是实现代码：

/*
** Convert a UTF-8 string to microsoft unicode (UTF-16?). 
**
** Space to hold the returned string is obtained from malloc.
*/
static WCHAR *utf8ToUnicode(const char *zFilename){

  int nChar;
  WCHAR *zWideFilename;

  nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
  zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0])));
  if( zWideFilename==0 ){

    return 0;

  }
  nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar);
  if( nChar==0 ){

    free(zWideFilename);
    zWideFilename = 0;

  }
  return zWideFilename;

}

/*
** Convert microsoft unicode to UTF-8.  Space to hold the returned string is
** obtained from malloc().
*/
static char *unicodeToUtf8(const WCHAR *zWideFilename){

  int nByte;
  char *zFilename;

  nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
  zFilename = static_cast<char*>(malloc( nByte ));
  if( zFilename==0 ){

    return 0;

  }
  nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,
    0, 0);
  if( nByte == 0 )
  {
    free(zFilename);
    zFilename = 0;
  }
  return zFilename;

}

/*
** Convert an ansi string to microsoft unicode, based on the
** current codepage settings for file apis.
** 
** Space to hold the returned string is obtained
** from malloc.
*/
static WCHAR *mbcsToUnicode(const char *zFilename){

  int nByte;
  WCHAR *zMbcsFilename;
  int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

  nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR);
  zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0])));
  if( zMbcsFilename==0 ){

    return 0;

  }
  nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte);
  if( nByte==0 )
  {
    free(zMbcsFilename);
    zMbcsFilename = 0;
  }
  return zMbcsFilename;

}

/*
** Convert microsoft unicode to multibyte character string, based on the
** user's Ansi codepage.
**
** Space to hold the returned string is obtained from
** malloc().
*/
static char* unicodeToMbcs(const WCHAR* zWideFilename){

  int nByte;
  char *zFilename;
  int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;

  nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);
  zFilename = static_cast<char*>(malloc(nByte ));
  if( zFilename==0 ){

    return 0;

  }
  nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte,
    0, 0);
  if( nByte == 0 ){

    free(zFilename);
    zFilename = 0;
  }
  return zFilename;

}

/*
** Convert multibyte character string to UTF-8.  Space to hold the
** returned string is obtained from malloc().
*/
static char* mbcsToUtf8(const char *zFilename){

  char *zFilenameUtf8;
  WCHAR *zTmpWide;

  zTmpWide = mbcsToUnicode(zFilename);
  if( zTmpWide==0 ){

    return 0;

  }
  zFilenameUtf8 = unicodeToUtf8(zTmpWide);
  free(zTmpWide);
  return zFilenameUtf8;
}

/*
** Convert UTF-8 to multibyte character string.  Space to hold the 
** returned string is obtained from malloc().
*/
static char* utf8ToMbcs(const char *zFilename){

  char *zFilenameMbcs;
  WCHAR* zTmpWide;

  zTmpWide = utf8ToUnicode(zFilename);
  if( zTmpWide==0 ){

    return 0;

  }
  zFilenameMbcs = unicodeToMbcs(zTmpWide);
  free(zTmpWide);
  return zFilenameMbcs;
}

std::string MbcsToUtf8( const char* pszMbcs )
{
  std::string str;
  WCHAR   *pwchar=0;
  CHAR    *pchar=0;
  int len=0;
  int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
  len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);
  pwchar=new WCHAR[len];
  if(pwchar!=0)
  {
    len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);
    if( len!=0 )
    {
      len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);
      pchar=new CHAR[len];
      if(pchar!=0)
      {
        len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);
        if(len!=0)                
        {
          str = pchar;                   
        }
        delete pchar;
      }
      delete pwchar;
    }
  }
  return str;
}