Added: More comprehensive codepage conversion for tstring

This commit is contained in:
kaetemi 2019-05-01 05:01:30 +08:00
parent 39c313aeb6
commit 8efe1d91c4
4 changed files with 381 additions and 58 deletions

View file

@ -303,23 +303,23 @@ inline sint nlstricmp(const std::string &lhs, const std::string &rhs) { return s
inline sint nlstricmp(const std::string &lhs, const char *rhs) { return stricmp(lhs.c_str(),rhs); }
inline sint nlstricmp(const char *lhs, const std::string &rhs) { return stricmp(lhs,rhs.c_str()); }
// TODO: Can we prefix these with 'nl' like other methods?
// TODO: Can we prefix these with 'nl' like other macros?
// Macros helper to convert UTF-8 std::string and wchar_t*
#define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8())
#define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str())
// #define wideToUtf8(str) (ucstring((ucchar*)str).toUtf8())
// #define utf8ToWide(str) ((wchar_t*)ucstring::makeFromUtf8(str).c_str())
// Macros helper to convert UTF-8 std::string and TCHAR*
#ifdef _UNICODE
#define tStrToUtf8(str) (ucstring((ucchar*)(LPCWSTR)str).toUtf8())
#define utf8ToTStr(str) ((const wchar_t *)ucstring::makeFromUtf8(str).c_str())
#define tstring wstring
// #define tstring wstring
#else
// FIXME: This is not accurate, it should be a conversion between local charset and utf8
#define tStrToUtf8(str) (std::string((LPCSTR)str))
inline const char *nlutf8ToTStr(const char *str) { return str; }
inline const char *nlutf8ToTStr(const std::string &str) { return str.c_str(); }
#define utf8ToTStr(str) NLMISC::nlutf8ToTStr(str)
#define tstring string
// #define tstring string
#endif
#if (NL_COMP_VC_VERSION <= 90)

View file

@ -246,55 +246,7 @@ inline bool fromString(const std::string &str, double &val) { bool ret = sscanf(
// (str[0] == '1' || (str[0] & 0xD2) == 0x50)
// - Kaetemi
inline bool fromString(const std::string &str, bool &val)
{
if (str.length() == 1)
{
const char c = str[0];
switch(c)
{
case '1':
case 't':
case 'T':
case 'y':
case 'Y':
val = true;
break;
case '0':
case 'f':
case 'F':
case 'n':
case 'N':
val = false;
break;
default:
val = false;
return false;
}
}
else
{
std::string strl = toLower(str);
if (strl == "true" || strl == "yes")
{
val = true;
}
else if (strl == "false" || strl == "no")
{
val = false;
}
else
{
val = false;
return false;
}
}
return true;
}
bool fromString(const std::string &str, bool &val);
inline bool fromString(const std::string &str, std::string &val) { val = str; return true; }
@ -305,6 +257,107 @@ inline bool fromString(const std::string &str, uint &val) { return sscanf(str.c_
inline bool fromString(const std::string &str, sint &val) { return sscanf(str.c_str(), "%d", &val) == 1; }
#endif // NL_COMP_VC6
// Convert local codepage to UTF-8
// On Windows, the local codepage is undetermined
// On Linux, the local codepage is always UTF-8 (no-op)
std::string mbcsToUtf8(const char *str, size_t len = 0);
std::string mbcsToUtf8(const std::string &str);
// Convert wide codepage to UTF-8
// On Windows, the wide codepage is UTF-16
// On Linux, the wide codepage is UTF-32
std::string wideToUtf8(const wchar_t *str, size_t len = 0);
std::string wideToUtf8(const std::wstring &str);
// Convert UTF-8 to wide character set
std::wstring utf8ToWide(const char *str, size_t len = 0);
std::wstring utf8ToWide(const std::string &str);
// Convert UTF-8 to local multibyte character set
std::string utf8ToMbcs(const char *str, size_t len = 0);
std::string utf8ToMbcs(const std::string &str);
// Convert wide to local multibyte character set
std::string wideToMbcs(const wchar_t *str, size_t len = 0);
std::string wideToMbcs(const std::wstring &str);
// Convert local multibyte to wide character set
std::wstring mbcsToWide(const char *str, size_t len = 0);
std::wstring mbcsToWide(const std::string &str);
inline const char* asCStr(const char *str) { return str; }
inline const char* asCStr(const std::string &str) { return str.c_str(); }
inline const wchar_t* asCStr(const wchar_t *str) { return str; }
inline const wchar_t* asCStr(const std::wstring &str) { return str.c_str(); }
#if defined(NL_OS_WINDOWS)
#define nlUtf8ToMbcs(str) (NLMISC::utf8ToMbcs(str).c_str())
#define nlMbcsToUtf8(str) (NLMISC::mbcsToUtf8(str).c_str())
#else
#define nlUtf8ToMbcs(str) (NLMISC::asCStr(str))
#define nlMbcsToUtf8(str) (NLMISC::asCStr(str))
#endif
#define nlWideToUtf8(str) (NLMISC::wideToUtf8(str).c_str())
#define nlUtf8ToWide(str) (NLMISC::utf8ToWide(str).c_str()
#define nlWideToMbcs(str) (NLMISC::wideToMbcs(str).c_str())
#define nlMbcsToWide(str) (NLMISC::mbcsToWide(str).c_str())
// On Windows, tstring is either local multibyte or utf-16 wide
// On Linux, tstring is always utf-8
#if defined(NL_OS_WINDOWS) && (defined(UNICODE) || defined(_UNICODE))
typedef std::wstring tstring;
typedef wchar_t tchar;
#define nltmain wmain
inline std::string tStrToUtf8(const tchar *str) { return wideToUtf8((const wchar_t *)str); }
inline std::string tStrToUtf8(const tstring &str) { return wideToUtf8((const std::wstring &)str); }
inline std::wstring tStrToWide(const tchar *str) { return (const wchar_t *)str; }
inline std::wstring tStrToWide(const tstring &str) { return (const std::wstring &)str; }
inline std::string tStrToMbcs(const tchar *str) { return wideToMbcs((const wchar_t *)str); }
inline std::string tStrToMbcs(const tstring &str) { return wideToMbcs((const std::wstring &)str); }
#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str())
#define nlTStrToWide(str) ((const wchar_t *)NLMISC::asCStr(str))
#define nlTStrToMbcs(str) (NLMISC::tStrToMbcs(str).c_str())
inline tstring utf8ToTStr(const char *str) {return (const tstring &)utf8ToWide(str); }
inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToWide(str); }
inline tstring wideToTStr(const wchar_t *str) { return (const tchar *)str; }
inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)str; }
inline tstring mbcsToTStr(const char *str) { return (const tstring &)mbcsToWide(str); }
inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)mbcsToWide(str); }
#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str())
#define nlWideToTStr(str) ((const tchar *)NLMISC::asCStr(str))
#define nlMbcsToTStr(str) (NLMISC::mbcsToTStr(str).c_str())
#else
typedef std::string tstring;
typedef char tchar;
#define nltmain main
inline std::string tStrToUtf8(const tchar *str) { return mbcsToUtf8((const char *)str); }
inline std::string tStrToUtf8(const tstring &str) { return mbcsToUtf8((const std::string &)str); }
inline std::wstring tStrToWide(const tchar *str) { return mbcsToWide((const char *)str); }
inline std::wstring tStrToWide(const tstring &str) { return mbcsToWide((const std::string &)str); }
inline std::string tStrToMbcs(const tchar *str) { return (const char *)str; }
inline std::string tStrToMbcs(const tstring &str) { return (const std::string &)str; }
#if defined(NL_OS_WINDOWS)
#define nlTStrToUtf8(str) (NLMISC::tStrToUtf8(str).c_str())
#else
#define nlTStrToUtf8(str) ((const char *)NLMISC::asCStr(str))
#endif
#define nlTStrToWide(str) (NLMISC::tStrToWide(str).c_str())
#define nlTStrToMbcs(str) ((const char *)NLMISC::asCStr(str))
inline tstring utf8ToTStr(const char *str) { return (const tstring &)utf8ToMbcs(str); }
inline tstring utf8ToTStr(const std::string &str) { return (const tstring &)utf8ToMbcs(str); }
inline tstring wideToTStr(const wchar_t *str) { return (const tstring &)wideToMbcs(str); }
inline tstring wideToTStr(const std::wstring &str) { return (const tstring &)wideToMbcs(str); }
inline tstring mbcsToTStr(const char *str) { return (const tchar *)str; }
inline tstring mbcsToTStr(const std::string &str) { return (const tstring &)str; }
#if defined(NL_OS_WINDOWS)
#define nlUtf8ToTStr(str) (NLMISC::utf8ToTStr(str).c_str())
#else
#define nlUtf8ToTStr(str) ((const tchar *)NLMISC::asCStr(str))
#endif
#define nlWideToTStr(str) (NLMISC::wideToTStr(str).c_str())
#define nlMbcsToTStr(str) ((const tchar *)NLMISC::asCStr(str))
#endif
} // NLMISC

View file

@ -173,6 +173,12 @@
# define NL_NO_EXCEPTION_SPECS
#endif
#if defined(NL_COMP_VC) && (NL_COMP_VC_VERSION >= 140)
#define nlmove(v) std::move(v)
#else
#define nlmove(v) (v)
#endif
// gcc 3.4 introduced ISO C++ with tough template rules
//
// NL_ISO_SYNTAX can be used using #if NL_ISO_SYNTAX or #if !NL_ISO_SYNTAX

View file

@ -17,23 +17,24 @@
#include "stdmisc.h"
#include "nel/misc/string_common.h"
#include "nel/misc/sstring.h"
using namespace std;
#ifdef DEBUG_NEW
#define new DEBUG_NEW
#define new DEBUG_NEW
#endif
namespace NLMISC
{
string addSlashR (const string &str)
string addSlashR(const string &str)
{
string formatedStr;
// replace \n with \r\n
for (uint i = 0; i < str.size(); i++)
{
if (str[i] == '\n' && i > 0 && str[i-1] != '\r')
if (str[i] == '\n' && i > 0 && str[i - 1] != '\r')
{
formatedStr += '\r';
}
@ -42,7 +43,7 @@ string addSlashR (const string &str)
return formatedStr;
}
string removeSlashR (const string &str)
string removeSlashR(const string &str)
{
string formatedStr;
// remove \r
@ -54,4 +55,267 @@ string removeSlashR (const string &str)
return formatedStr;
}
bool fromString(const std::string &str, bool &val)
{
if (str.length() == 1)
{
const char c = str[0];
switch (c)
{
case '1':
case 't':
case 'T':
case 'y':
case 'Y':
val = true;
break;
case '0':
case 'f':
case 'F':
case 'n':
case 'N':
val = false;
break;
default:
val = false;
return false;
}
}
else
{
std::string strl = toLower(str);
if (strl == "true" || strl == "yes")
{
val = true;
}
else if (strl == "false" || strl == "no")
{
val = false;
}
else
{
val = false;
return false;
}
}
return true;
}
#if defined(NL_OS_WINDOWS)
std::string winWideToCp(const wchar_t *str, size_t len, UINT cp)
{
if (!len)
len = wcslen(str);
if (!len)
return std::string();
// Convert from wide to codepage
char *tmp = (char *)_malloca((len + 1) * 4);
if (!tmp)
return std::string();
int tmpLen = WideCharToMultiByte(cp, 0,
str, (int)(len + 1),
tmp, (int)((len + 1) * 4),
NULL, NULL);
if (tmpLen <= 1)
{
_freea(tmp);
return std::string();
}
std::string res = tmp;
_freea(tmp);
return res;
}
std::string winCpToCp(const char *str, size_t len, UINT srcCp, UINT dstCp)
{
if (!len)
len = strlen(str);
if (!len)
return std::string();
// First convert from codepage to wide
wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4);
if (!tmp)
return std::string();
int tmpLen = MultiByteToWideChar(srcCp, MB_PRECOMPOSED,
str, (int)(len + 1), /* include null-termination */
tmp, (int)((len + 1) * 4));
if (tmpLen <= 1)
{
_freea(tmp);
return std::string();
}
// Then convert from wide to codepage
std::string res = winWideToCp(tmp, (size_t)tmpLen - 1, dstCp); /* tmpLen includes null-term */
_freea(tmp);
return res;
}
std::wstring winCpToWide(const char *str, size_t len, UINT cp)
{
if (!len)
len = strlen(str);
if (!len)
return std::wstring();
// Convert from codepage to wide
wchar_t *tmp = (wchar_t *)_malloca((len + 1) * 4);
if (!tmp)
return std::wstring();
int tmpLen = MultiByteToWideChar(cp, MB_PRECOMPOSED,
str, (int)(len + 1), /* include null-termination */
tmp, (int)((len + 1) * 4));
if (tmpLen <= 1)
{
_freea(tmp);
return std::wstring();
}
std::wstring res = tmp;
_freea(tmp);
return res;
}
#endif
// Convert local codepage to UTF-8
// On Windows, the local codepage is undetermined
// On Linux, the local codepage is always UTF-8 (no-op)
std::string mbcsToUtf8(const char *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
UINT codePage = GetACP();
// Windows 10 allows setting the local codepage to UTF-8
if (codePage == CP_UTF8) /* 65001 */
return str;
return winCpToCp(str, len, CP_ACP, CP_UTF8);
#else
return str; /* no-op */
#endif
}
std::string mbcsToUtf8(const std::string &str)
{
#if defined(NL_OS_WINDOWS)
if (str.empty())
return str;
UINT codePage = GetACP();
// Windows 10 allows setting the local codepage to UTF-8
if (codePage == CP_UTF8) /* 65001 */
return str;
return winCpToCp(str.c_str(), str.size(), CP_ACP, CP_UTF8);
#else
return str; /* no-op */
#endif
}
// Convert wide codepage to UTF-8
// On Windows, the wide codepage is UTF-16
// On Linux, the wide codepage is UTF-32
std::string wideToUtf8(const wchar_t *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
return winWideToCp(str, len, CP_UTF8);
#else
// TODO: UTF-32 to UTF-8
nlassert(false);
#endif
}
std::string wideToUtf8(const std::wstring &str)
{
return wideToUtf8(str.c_str(), str.size());
}
// Convert UTF-8 to wide character set
std::wstring utf8ToWide(const char *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
return winCpToWide(str, len, CP_UTF8);
#else
// TODO: UTF-32 to UTF-8
nlassert(false);
#endif
}
std::wstring utf8ToWide(const std::string &str)
{
return utf8ToWide(str.c_str(), str.size());
}
// Convert UTF-8 to local multibyte character set
std::string utf8ToMbcs(const char *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
UINT codePage = GetACP();
// Windows 10 allows setting the local codepage to UTF-8
if (codePage == CP_UTF8) /* 65001 */
return str;
return winCpToCp(str, len, CP_UTF8, CP_ACP);
#else
return str; /* no-op */
#endif
}
std::string utf8ToMbcs(const std::string &str)
{
#if defined(NL_OS_WINDOWS)
if (str.empty())
return str;
UINT codePage = GetACP();
// Windows 10 allows setting the local codepage to UTF-8
if (codePage == CP_UTF8) /* 65001 */
return str;
return winCpToCp(str.c_str(), str.size(), CP_UTF8, CP_ACP);
#else
return str; /* no-op */
#endif
}
// Convert wide to local multibyte character set
std::string wideToMbcs(const wchar_t *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
return winWideToCp(str, len, CP_ACP);
#else
return wideToUTf8(str, len);
#endif
}
std::string wideToMbcs(const std::wstring &str)
{
#if defined(NL_OS_WINDOWS)
return winWideToCp(str.c_str(), str.size(), CP_ACP);
#else
return wideToUTf8(str);
#endif
}
// Convert local multibyte to wide character set
std::wstring mbcsToWide(const char *str, size_t len)
{
#if defined(NL_OS_WINDOWS)
return winCpToWide(str, len, CP_ACP);
#else
return utf8ToWide(str, len);
#endif
}
std::wstring mbcsToWide(const std::string &str)
{
#if defined(NL_OS_WINDOWS)
return winCpToWide(str.c_str(), str.size(), CP_ACP);
#else
return utf8ToWide(str);
#endif
}
}