0
0
mirror of https://github.com/zeux/pugixml.git synced 2025-01-04 02:25:23 +08:00
pugixml/tests/writer_string.cpp
Arseny Kapoulkine 5f996eba6d Do not emit surrounding whitespace for text nodes
Previously we omitted extra whitespace for single PCDATA/CDATA children, but in
mixed content there was extra indentation before/after text nodes.

One of the problems with that is that the text that you saved is not exactly
the same as the parsing result using default flags (parse_trim_pcdata helps).

Another problem is that parse-format cycles do not have a fixed point for mixed
content - the result expands indefinitely. Some XML libraries, like Python
minidom, have the same issue, but this is definitely a problem.

Pretty-printing mixed content is hard. It seems that the only other sensible
choice is to switch mixed content nodes to raw formatting. In a way the code in
this change is a weaker version of that - it removes indentation around text
nodes but still keeps it around element siblings/children.

Thus we can switch to mixed-raw formatting at some point later, which will be
a superset of the current behavior.

To do this we have to either switch at the first text node (.NET XmlDocument
does that), or scan the children of each element for a possible text node and
switch before we output the first child.

The former behavior seems non-intuitive (and a bit broken); unfortunately, the
latter behavior can cost up to 20% of the output time for trees *without* mixed
content.

Fixes #13.
2015-03-18 09:59:17 -07:00

80 lines
2.5 KiB
C++

#include "writer_string.hpp"
#include "test.hpp"
static bool test_narrow(const std::string& result, const char* expected, size_t length)
{
// check result
if (result != std::string(expected, expected + length)) return false;
// check comparison operator (incorrect implementation can theoretically early-out on zero terminators...)
if (length > 0 && result == std::string(expected, expected + length - 1) + "?") return false;
return true;
}
void xml_writer_string::write(const void* data, size_t size)
{
contents.append(static_cast<const char*>(data), size);
}
std::string xml_writer_string::as_narrow() const
{
return contents;
}
std::basic_string<wchar_t> xml_writer_string::as_wide() const
{
CHECK(contents.size() % sizeof(wchar_t) == 0);
// round-trip pointer through void* to avoid pointer alignment warnings; contents data should be heap allocated => safe to cast
return std::basic_string<wchar_t>(static_cast<const wchar_t*>(static_cast<const void*>(contents.data())), contents.size() / sizeof(wchar_t));
}
std::basic_string<pugi::char_t> xml_writer_string::as_string() const
{
#ifdef PUGIXML_WCHAR_MODE // to avoid "condition is always true" warning in BCC
CHECK(contents.size() % sizeof(pugi::char_t) == 0);
#endif
// round-trip pointer through void* to avoid pointer alignment warnings; contents data should be heap allocated => safe to cast
return std::basic_string<pugi::char_t>(static_cast<const pugi::char_t*>(static_cast<const void*>(contents.data())), contents.size() / sizeof(pugi::char_t));
}
std::string save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding)
{
xml_writer_string writer;
doc.save(writer, STR("\t"), flags, encoding);
return writer.as_narrow();
}
bool test_save_narrow(const pugi::xml_document& doc, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length)
{
return test_narrow(save_narrow(doc, flags, encoding), expected, length);
}
std::string write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding)
{
xml_writer_string writer;
node.print(writer, STR("\t"), flags, encoding);
return writer.as_narrow();
}
bool test_write_narrow(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding, const char* expected, size_t length)
{
return test_narrow(write_narrow(node, flags, encoding), expected, length);
}
std::basic_string<wchar_t> write_wide(pugi::xml_node node, unsigned int flags, pugi::xml_encoding encoding)
{
xml_writer_string writer;
node.print(writer, STR("\t"), flags, encoding);
return writer.as_wide();
}