0000-007F | 0xxxxxxx #UTF-8规定,若1字符=1字节,首位须为00080-07FF | 110xxxxx 10xxxxxx #UTF-8规定,若1字符=2字节,高位前3位为110,低位前2位为100800-FFFF | 1110xxxx 10xxxxxx 10xxxxxx #UTF-8规定,若1字符=3字节,高位前4位为1110,后面低位前2位均为10 比如,张三的UTF-8编码为: E5 BC A0 E4 B8 89 E5 ----- 1110 0101 BC ----- 1011 1100 A0 ----- 1010 0000 E4 ----- 1110 0100 B8 ----- 1011 1000 89 ----- 1000 1001
#include#include using namespace std;std::string string_To_UTF8(const std::string & str){ int nwLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0); wchar_t * pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴 ZeroMemory(pwBuf, nwLen * 2 + 2); ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), pwBuf, nwLen); int nLen = ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, -1, NULL, NULL, NULL, NULL); char * pBuf = new char[nLen + 1]; ZeroMemory(pBuf, nLen + 1); ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL); std::string retStr(pBuf); delete []pwBuf; delete []pBuf; pwBuf = NULL; pBuf = NULL; return retStr;}std::string UTF8_To_string(const std::string & str){ int nwLen = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0); wchar_t * pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴 memset(pwBuf, 0, nwLen * 2 + 2); MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), pwBuf, nwLen); int nLen = WideCharToMultiByte(CP_ACP, 0, pwBuf, -1, NULL, NULL, NULL, NULL); char * pBuf = new char[nLen + 1]; memset(pBuf, 0, nLen + 1); WideCharToMultiByte(CP_ACP, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL); std::string retStr = pBuf; delete []pBuf; delete []pwBuf; pBuf = NULL; pwBuf = NULL; return retStr;}int main(){ string str1("迪丽热巴·阿凡提13800000000"); string str2 = string_To_UTF8(str1); string str3 = UTF8_To_string(str2); cout< <