mirror of
https://github.com/zeux/pugixml.git
synced 2024-12-26 21:04:25 +08:00
Minor UTF8 conversion refactoring, added as_utf8 and as_wide overloads with string arguments
git-svn-id: http://pugixml.googlecode.com/svn/trunk@749 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
parent
ac31030886
commit
a590a69bda
148
src/pugixml.cpp
148
src/pugixml.cpp
@ -1272,6 +1272,74 @@ namespace
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t as_utf8_begin(const wchar_t* str, size_t length)
|
||||
{
|
||||
STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
||||
|
||||
// get length in utf8 characters
|
||||
return sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
|
||||
utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
|
||||
}
|
||||
|
||||
void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
|
||||
{
|
||||
STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
||||
|
||||
// convert to utf8
|
||||
uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
|
||||
uint8_t* end = sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
|
||||
utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
|
||||
|
||||
assert(begin + size == end);
|
||||
(void)!end;
|
||||
|
||||
// zero-terminate
|
||||
buffer[size] = 0;
|
||||
}
|
||||
|
||||
#ifndef PUGIXML_NO_STL
|
||||
std::string as_utf8_impl(const wchar_t* str, size_t length)
|
||||
{
|
||||
// first pass: get length in utf8 characters
|
||||
size_t size = as_utf8_begin(str, length);
|
||||
|
||||
// allocate resulting string
|
||||
std::string result;
|
||||
result.resize(size);
|
||||
|
||||
// second pass: convert to utf8
|
||||
if (size > 0) as_utf8_end(&result[0], size, str, length);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::wstring as_wide_impl(const char* str, size_t size)
|
||||
{
|
||||
const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
|
||||
|
||||
// first pass: get length in wchar_t units
|
||||
size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
|
||||
|
||||
// allocate resulting string
|
||||
std::wstring result;
|
||||
result.resize(length);
|
||||
|
||||
// second pass: convert to wchar_t
|
||||
if (length > 0)
|
||||
{
|
||||
wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
|
||||
wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
|
||||
|
||||
assert(begin + length == end);
|
||||
(void)!end;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline bool strcpy_insitu_allow(size_t length, uintptr_t allocated, char_t* target)
|
||||
{
|
||||
assert(target);
|
||||
@ -3096,33 +3164,16 @@ namespace
|
||||
{
|
||||
assert(str);
|
||||
|
||||
STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
||||
|
||||
size_t length = wcslen(str);
|
||||
|
||||
// first pass: get length in utf8 characters
|
||||
size_t size = sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
|
||||
utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
|
||||
size_t length = wcslen(str);
|
||||
size_t size = as_utf8_begin(str, length);
|
||||
|
||||
// allocate resulting string
|
||||
char* result = static_cast<char*>(global_allocate(size + 1));
|
||||
if (!result) return 0;
|
||||
|
||||
// second pass: convert to utf8
|
||||
if (size > 0)
|
||||
{
|
||||
uint8_t* begin = reinterpret_cast<uint8_t*>(result);
|
||||
uint8_t* end = sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
|
||||
utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
|
||||
|
||||
assert(begin + size == end);
|
||||
(void)!end;
|
||||
}
|
||||
|
||||
// zero-terminate
|
||||
result[size] = 0;
|
||||
as_utf8_end(result, size, str, length);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -4504,59 +4555,24 @@ namespace pugi
|
||||
{
|
||||
assert(str);
|
||||
|
||||
STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
|
||||
return as_utf8_impl(str, wcslen(str));
|
||||
}
|
||||
|
||||
size_t length = wcslen(str);
|
||||
|
||||
// first pass: get length in utf8 characters
|
||||
size_t size = sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_counter>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, 0) :
|
||||
utf_decoder<utf8_counter>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, 0);
|
||||
|
||||
// allocate resulting string
|
||||
std::string result;
|
||||
result.resize(size);
|
||||
|
||||
// second pass: convert to utf8
|
||||
if (size > 0)
|
||||
{
|
||||
uint8_t* begin = reinterpret_cast<uint8_t*>(&result[0]);
|
||||
uint8_t* end = sizeof(wchar_t) == 2 ?
|
||||
utf_decoder<utf8_writer>::decode_utf16_block(reinterpret_cast<const uint16_t*>(str), length, begin) :
|
||||
utf_decoder<utf8_writer>::decode_utf32_block(reinterpret_cast<const uint32_t*>(str), length, begin);
|
||||
|
||||
assert(begin + result.size() == end);
|
||||
(void)!end;
|
||||
}
|
||||
|
||||
return result;
|
||||
std::string PUGIXML_FUNCTION as_utf8(const std::wstring& str)
|
||||
{
|
||||
return as_utf8_impl(str.c_str(), str.size());
|
||||
}
|
||||
|
||||
std::wstring PUGIXML_FUNCTION as_wide(const char* str)
|
||||
{
|
||||
assert(str);
|
||||
|
||||
const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
|
||||
size_t size = strlen(str);
|
||||
|
||||
// first pass: get length in wchar_t
|
||||
size_t length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
|
||||
|
||||
// allocate resulting string
|
||||
std::wstring result;
|
||||
result.resize(length);
|
||||
|
||||
// second pass: convert to wchar_t
|
||||
if (length > 0)
|
||||
{
|
||||
wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
|
||||
wchar_writer::value_type end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, begin);
|
||||
|
||||
assert(begin + result.size() == end);
|
||||
(void)!end;
|
||||
}
|
||||
|
||||
return result;
|
||||
return as_wide_impl(str, strlen(str));
|
||||
}
|
||||
|
||||
std::wstring PUGIXML_FUNCTION as_wide(const std::string& str)
|
||||
{
|
||||
return as_wide_impl(str.c_str(), str.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -713,7 +713,6 @@ namespace pugi
|
||||
// Destructor, invalidates all node/attribute handles to this document
|
||||
~xml_document();
|
||||
|
||||
public:
|
||||
#ifndef PUGIXML_NO_STL
|
||||
// Load document from stream.
|
||||
xml_parse_result load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto);
|
||||
@ -1036,9 +1035,11 @@ namespace pugi
|
||||
#ifndef PUGIXML_NO_STL
|
||||
// Convert wide string to UTF8
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
|
||||
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
|
||||
|
||||
// Convert UTF8 to wide string
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
|
||||
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
|
||||
#endif
|
||||
|
||||
// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
|
||||
|
@ -69,6 +69,13 @@ TEST(as_wide_invalid)
|
||||
CHECK(b5 == L"\nbcd");
|
||||
}
|
||||
|
||||
TEST(as_wide_string)
|
||||
{
|
||||
std::string s = "abcd";
|
||||
|
||||
CHECK(as_wide(s) == L"abcd");
|
||||
}
|
||||
|
||||
TEST(as_utf8_empty)
|
||||
{
|
||||
CHECK(as_utf8(L"") == "");
|
||||
@ -134,4 +141,11 @@ TEST(as_utf8_invalid)
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
TEST(as_utf8_string)
|
||||
{
|
||||
std::wstring s = L"abcd";
|
||||
|
||||
CHECK(as_utf8(s) == "abcd");
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user