iconv series functions (iconv_open/iconv/iconv_close) are native support of charset encoding conversion on ***nix based OS. Poor documentation, and less sample code on the use of them. Even though there was a version that can be found online, it might not work on Mac. Here is my version…
#include <iostream>
#include <iconv.h>
#include <stdlib.h>
#define _MAC_OS_X
#ifdef _MAC_OS_X
namespace
{
class iconv_wrapper
{
public:
iconv_wrapper( const char *tocode, const char *fromcode )
{
__cd = iconv_open( tocode, fromcode );
}
~iconv_wrapper()
{
iconv_close( __cd );
}
size_t __iconv ( char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft )
{
if ( (iconv_t)-1 == __cd )
return -1;
return iconv( __cd, inbuf, inbytesleft, outbuf, outbytesleft );
}
private:
iconv_t __cd;
};
static size_t __wcstoutf8( const wchar_t *in, size_t in_len,
char *out, size_t out_len )
{
iconv_wrapper conv ( "UTF8", "UCS-4-INTERNAL" );
char *pin = (char *)in;
size_t in_len_in_byte = wcslen( in ) * sizeof( wchar_t );
return conv.__iconv( &pin, &in_len_in_byte, &out, &out_len );
}
static size_t __wcstoutf8( const std::wstring &in, std::string &out )
{
const wchar_t *pin = in.c_str();
size_t in_len = in.length() + 1;
// UTF8 at most will be 6 bytes at length.
size_t out_max = in_len * 6;
std::auto_ptr<char> buffer( new char[out_max] );
memset( buffer.get(), 0, out_max );
size_t ret = __wcstoutf8 ( pin, in_len * sizeof( wchar_t ), buffer.get(), out_max );
if ( -1 != ret ) {
out = buffer.get();
}
return ret;
}
static size_t __utf8towcs( const char *in, size_t in_len,
wchar_t *out, size_t out_len )
{
iconv_wrapper conv ( "UCS-4-INTERNAL", "UTF8" );
char *pout = (char *)out;
return conv.__iconv( (char**)&in, &in_len, &pout, &out_len );
}
static size_t __utf8towcs( const std::string &in, std::wstring &out )
{
const char *pin = in.c_str();
size_t in_len = in.length() + 1;
// A char in UTF8 may be in ASCII range.
size_t out_max = in_len * 4;
std::auto_ptr<char> buffer( new char[out_max] );
memset( buffer.get(), 0, out_max );
size_t ret = __utf8towcs ( pin, in_len, (wchar_t*)buffer.get(), out_max );
if ( -1 != ret ) {
out = (wchar_t*)buffer.get();
}
return ret;
}
}
#endif
int main (int argc, char * const argv[]) {
std::wstring in = L"china 你";
std::string out;
// Demo1: unicode (4 bytes on Mac, Unix, Linux) to UTF8
__wcstoutf8( in, out );
std::cout << "UTF8:" << out << std::endl;
// Demo1: UTF8 to unicode (4 bytes on Mac, Unix, Linux).
__utf8towcs( out, in );
std::wcout << L"UCS-4-INTERNAL:" << in << std::endl;
return 0;
}