mirror of
https://github.com/zeux/pugixml.git
synced 2024-12-26 21:04:25 +08:00
5f996eba6d
Previously we omitted extra whitespace for single PCDATA/CDATA children, but in mixed content there was extra indentation before/after text nodes. One of the problems with that is that the text that you saved is not exactly the same as the parsing result using default flags (parse_trim_pcdata helps). Another problem is that parse-format cycles do not have a fixed point for mixed content - the result expands indefinitely. Some XML libraries, like Python minidom, have the same issue, but this is definitely a problem. Pretty-printing mixed content is hard. It seems that the only other sensible choice is to switch mixed content nodes to raw formatting. In a way the code in this change is a weaker version of that - it removes indentation around text nodes but still keeps it around element siblings/children. Thus we can switch to mixed-raw formatting at some point later, which will be a superset of the current behavior. To do this we have to either switch at the first text node (.NET XmlDocument does that), or scan the children of each element for a possible text node and switch before we output the first child. The former behavior seems non-intuitive (and a bit broken); unfortunately, the latter behavior can cost up to 20% of the output time for trees *without* mixed content. Fixes #13.
80 lines
2.5 KiB
C++
80 lines
2.5 KiB
C++
#include "writer_string.hpp"
|
|
|
|
#include "test.hpp"
|
|
|
|
static bool test_narrow(const std::string& result, const char* expected, size_t length)
|
|
{
|
|
// check result
|
|
if (result != std::string(expected, expected + length)) return false;
|
|
|
|
// check comparison operator (incorrect implementation can theoretically early-out on zero terminators...)
|
|
if (length > 0 && result == std::string(expected, expected + length - 1) + "?") return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void xml_writer_string::write(const void* data, size_t size)
|
|
{
|
|
contents.append(static_cast<const char*>(data), size);
|
|
}
|
|
|
|
std::string xml_writer_string::as_narrow() const
|
|
{
|
|
return contents;
|
|
}
|
|
|
|
std::basic_string<wchar_t> xml_writer_string::as_wide() const
|
|
{
|
|
CHECK(contents.size() % sizeof(wchar_t) == 0);
|
|
|
|
// round-trip pointer through void* to avoid pointer alignment warnings; contents data should be heap allocated => safe to cast
|
|
return std::basic_string<wchar_t>(static_cast<const wchar_t*>(static_cast<const void*>(contents.data())), contents.size() / sizeof(wchar_t));
|
|
}
|
|
|
|
std::basic_string<pugi::char_t> xml_writer_string::as_string() const
|
|
{
|
|
#ifdef PUGIXML_WCHAR_MODE // to avoid "condition is always true" warning in BCC
|
|
CHECK(contents.size() % sizeof(pugi::char_t) == 0);
|
|
#endif
|
|
|
|
// round-trip pointer through void* to avoid pointer alignment warnings; contents data should be heap allocated => safe to cast
|
|
return std::basic_string<pugi::char_t>(static_cast<const pugi::char_t*>(static_cast<const void*>(contents.data())), contents.size() / sizeof(pugi::char_t));
|
|
}
|
|
|
|
std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding)
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
doc.save(writer, STR("\t"), flags, encoding);
|
|
|
|
return writer.as_narrow();
|
|
}
|
|
|
|
bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length)
|
|
{
|
|
return test_narrow(save_narrow(doc, flags, encoding), expected, length);
|
|
}
|
|
|
|
std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding)
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
node.print(writer, STR("\t"), flags, encoding);
|
|
|
|
return writer.as_narrow();
|
|
}
|
|
|
|
bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length)
|
|
{
|
|
return test_narrow(write_narrow(node, flags, encoding), expected, length);
|
|
}
|
|
|
|
std::basic_string<wchar_t> write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding)
|
|
{
|
|
xml_writer_string writer;
|
|
|
|
node.print(writer, STR("\t"), flags, encoding);
|
|
|
|
return writer.as_wide();
|
|
}
|