sqlite中文乱码问题原因分析及解决
作者:
打开数据库,插入,查询数据库等,如果操作接口输入参数包含中文字符,会导致操作异常,这是由于sqlite数据库使用的是UTF-8编码方式
在VC++中通过sqlite3.dll接口对sqlite数据库进行操作,包括打开数据库,插入,查询数据库等,如果操作接口输入参数包含中文字符,会导致操作异常。例如调用sqlite3_open打开数据库文件,如果文件路径出现中文,就会导致打开失败。sqlite3_exec执行sql语句,如果包含中文对应字符就会变成乱码。
这是由于sqlite数据库使用的是UTF-8编码方式,而传入的字符串是ASCII编码或Unicode编码,导致字符串格式错误。解决方案是在调用sqlite接口之前,先将字符串转换成UTF-8编码,以下提供各种字符串编码转换函数。
//UTF-8转Unicode
std::wstring Utf82Unicode(const std::string& utf8string)
{
int widesize = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (widesize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<wchar_t> resultstring(widesize);
int convresult = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
{
throw std::exception("La falla!");
}
return std::wstring(&resultstring[0]);
}
//unicode 转为 ascii
string WideByte2Acsi(wstring& wstrcode)
{
int asciisize = ::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, NULL, 0, NULL, NULL);
if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (asciisize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<char> resultstring(asciisize);
int convresult =::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, &resultstring[0], asciisize, NULL, NULL);
if (convresult != asciisize)
{
throw std::exception("La falla!");
}
return std::string(&resultstring[0]);
}
//utf-8 转 ascii
string UTF_82ASCII(string& strUtf8Code)
{
string strRet("");
//先把 utf8 转为 unicode
wstring wstr = Utf82Unicode(strUtf8Code);
//最后把 unicode 转为 ascii
strRet = WideByte2Acsi(wstr);
return strRet;
}
///////////////////////////////////////////////////////////////////////
//ascii 转 Unicode
wstring Acsi2WideByte(string& strascii)
{
int widesize = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (widesize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<wchar_t> resultstring(widesize);
int convresult = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
{
throw std::exception("La falla!");
}
return std::wstring(&resultstring[0]);
}
//Unicode 转 Utf8
std::string Unicode2Utf8(const std::wstring& widestring)
{
int utf8size = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, NULL, 0, NULL, NULL);
if (utf8size == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<char> resultstring(utf8size);
int convresult = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, &resultstring[0], utf8size, NULL, NULL);
if (convresult != utf8size)
{
throw std::exception("La falla!");
}
return std::string(&resultstring[0]);
}
//ascii 转 Utf8
string ASCII2UTF_8(string& strAsciiCode)
{
string strRet("");
//先把 ascii 转为 unicode
wstring wstr = Acsi2WideByte(strAsciiCode);
//最后把 unicode 转为 utf8
strRet = Unicode2Utf8(wstr);
return strRet;
}
这是由于sqlite数据库使用的是UTF-8编码方式,而传入的字符串是ASCII编码或Unicode编码,导致字符串格式错误。解决方案是在调用sqlite接口之前,先将字符串转换成UTF-8编码,以下提供各种字符串编码转换函数。
复制代码 代码如下:
//UTF-8转Unicode
std::wstring Utf82Unicode(const std::string& utf8string)
{
int widesize = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (widesize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<wchar_t> resultstring(widesize);
int convresult = ::MultiByteToWideChar(CP_UTF8, 0, utf8string.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
{
throw std::exception("La falla!");
}
return std::wstring(&resultstring[0]);
}
//unicode 转为 ascii
string WideByte2Acsi(wstring& wstrcode)
{
int asciisize = ::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, NULL, 0, NULL, NULL);
if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (asciisize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<char> resultstring(asciisize);
int convresult =::WideCharToMultiByte(CP_OEMCP, 0, wstrcode.c_str(), -1, &resultstring[0], asciisize, NULL, NULL);
if (convresult != asciisize)
{
throw std::exception("La falla!");
}
return std::string(&resultstring[0]);
}
//utf-8 转 ascii
string UTF_82ASCII(string& strUtf8Code)
{
string strRet("");
//先把 utf8 转为 unicode
wstring wstr = Utf82Unicode(strUtf8Code);
//最后把 unicode 转为 ascii
strRet = WideByte2Acsi(wstr);
return strRet;
}
///////////////////////////////////////////////////////////////////////
//ascii 转 Unicode
wstring Acsi2WideByte(string& strascii)
{
int widesize = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, NULL, 0);
if (widesize == ERROR_NO_UNICODE_TRANSLATION)
{
throw std::exception("Invalid UTF-8 sequence.");
}
if (widesize == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<wchar_t> resultstring(widesize);
int convresult = MultiByteToWideChar (CP_ACP, 0, (char*)strascii.c_str(), -1, &resultstring[0], widesize);
if (convresult != widesize)
{
throw std::exception("La falla!");
}
return std::wstring(&resultstring[0]);
}
//Unicode 转 Utf8
std::string Unicode2Utf8(const std::wstring& widestring)
{
int utf8size = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, NULL, 0, NULL, NULL);
if (utf8size == 0)
{
throw std::exception("Error in conversion.");
}
std::vector<char> resultstring(utf8size);
int convresult = ::WideCharToMultiByte(CP_UTF8, 0, widestring.c_str(), -1, &resultstring[0], utf8size, NULL, NULL);
if (convresult != utf8size)
{
throw std::exception("La falla!");
}
return std::string(&resultstring[0]);
}
//ascii 转 Utf8
string ASCII2UTF_8(string& strAsciiCode)
{
string strRet("");
//先把 ascii 转为 unicode
wstring wstr = Acsi2WideByte(strAsciiCode);
//最后把 unicode 转为 utf8
strRet = Unicode2Utf8(wstr);
return strRet;
}