#include "common.hpp"
#include "writer_string.hpp"
TEST(parse_pi_skip)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR(" value?>"), flags));
CHECK(!doc.first_child());
}
}
TEST(parse_pi_parse)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi));
xml_node pi1 = doc.first_child();
xml_node pi2 = doc.last_child();
CHECK(pi1 != pi2);
CHECK(pi1.type() == node_pi);
CHECK_STRING(pi1.name(), STR("pi1"));
CHECK_STRING(pi1.value(), STR(""));
CHECK(pi2.type() == node_pi);
CHECK_STRING(pi2.name(), STR("pi2"));
CHECK_STRING(pi2.value(), STR("value"));
}
TEST(parse_pi_parse_spaces)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi));
xml_node pi = doc.first_child();
CHECK(pi.type() == node_pi);
CHECK_STRING(pi.name(), STR("target"));
CHECK_STRING(pi.value(), STR("value "));
}
TEST(parse_pi_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(">"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("#?>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(""), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(""), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(" "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi);
}
TEST(parse_comments_skip)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment));
CHECK(!doc.first_child());
}
TEST(parse_comments_parse)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_comments));
xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child();
CHECK(c1 != c2);
CHECK(c1.type() == node_comment);
CHECK_STRING(c1.name(), STR(""));
CHECK_STRING(c1.value(), STR(""));
CHECK(c2.type() == node_comment);
CHECK_STRING(c2.name(), STR(""));
CHECK_STRING(c2.value(), STR("value"));
}
TEST(parse_comments_parse_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_comments));
xml_node c = doc.first_child();
CHECK(c.type() == node_comment);
CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_comments_parse_eol)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_comments | parse_eol));
xml_node c = doc.first_child();
CHECK(c.type() == node_comment);
CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_comments_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_comments, parse_fragment | parse_comments | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags).status == status_bad_comment);
CHECK(doc.load_string(STR(""), flags).status == status_bad_comment);
CHECK(doc.load_string(STR(""), flags).status == status_bad_comment);
}
}
TEST(parse_cdata_skip)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment));
CHECK(!doc.first_child());
}
TEST(parse_cdata_skip_contents)
{
xml_document doc;
CHECK(doc.load_string(STR("hello, world!"), parse_fragment));
CHECK_NODE(doc, STR("hello, world!"));
}
TEST(parse_cdata_parse)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata));
xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child();
CHECK(c1 != c2);
CHECK(c1.type() == node_cdata);
CHECK_STRING(c1.name(), STR(""));
CHECK_STRING(c1.value(), STR(""));
CHECK(c2.type() == node_cdata);
CHECK_STRING(c2.name(), STR(""));
CHECK_STRING(c2.value(), STR("value"));
}
TEST(parse_cdata_parse_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata));
xml_node c = doc.first_child();
CHECK(c.type() == node_cdata);
CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_cdata_parse_eol)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata | parse_eol));
xml_node c = doc.first_child();
CHECK(c.type() == node_cdata);
CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_cdata_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_cdata, parse_fragment | parse_cdata | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR(""), flags).status == status_bad_cdata);
}
}
TEST(parse_ws_pcdata_skip)
{
xml_document doc;
CHECK(doc.load_string(STR(" "), parse_fragment));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR(" "), parse_minimal));
xml_node root = doc.child(STR("root"));
CHECK(root.first_child() == root.last_child());
CHECK(!root.first_child().first_child());
}
TEST(parse_ws_pcdata_parse)
{
xml_document doc;
CHECK(doc.load_string(STR(" "), parse_minimal | parse_ws_pcdata));
xml_node root = doc.child(STR("root"));
xml_node c1 = root.first_child();
xml_node c2 = c1.next_sibling();
xml_node c3 = c2.next_sibling();
CHECK(c3 == root.last_child());
CHECK(c1.type() == node_pcdata);
CHECK_STRING(c1.value(), STR(" "));
CHECK(c3.type() == node_pcdata);
CHECK_STRING(c3.value(), STR(" "));
CHECK(c2.first_child() == c2.last_child());
CHECK(c2.first_child().type() == node_pcdata);
CHECK_STRING(c2.first_child().value(), STR(" "));
}
static int get_tree_node_count(xml_node n)
{
int result = 1;
for (xml_node c = n.first_child(); c; c = c.next_sibling())
result += get_tree_node_count(c);
return result;
}
TEST(parse_ws_pcdata_permutations)
{
struct test_data_t
{
unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
const pugi::char_t* source;
const pugi::char_t* result;
int nodes; // negative if parsing should fail
};
test_data_t test_data[] =
{
// external pcdata should be discarded (whitespace or not)
{7, STR("ext1"), STR(""), 2},
{7, STR("ext1ext2"), STR(""), 2},
{7, STR(" "), STR(""), 2},
{7, STR(" "), STR(""), 2},
{7, STR(" "), STR(""), 2},
// inner pcdata should be preserved
{7, STR("inner"), STR("inner"), 3},
{7, STR("inner1inner2"), STR("inner1inner2"), 5},
{7, STR("inner1deepinner2"), STR("inner1deepinner2"), 6},
// empty pcdata nodes should never be created
{7, STR("inner1inner2"), STR("inner1inner2"), 5},
{7, STR("inner2"), STR("inner2"), 4},
{7, STR("inner1"), STR("inner1"), 4},
{7, STR(""), STR(""), 3},
// comments, pi or other nodes should not cause pcdata creation either
{7, STR(""), STR(""), 4},
// leading/trailing pcdata whitespace should be preserved (note: this will change if parse_ws_pcdata_trim is introduced)
{7, STR("\t \tinner1 deep \t\ninner2\n\t"), STR("\t \tinner1 deep \t\ninner2\n\t"), 6},
// whitespace-only pcdata preservation depends on the parsing mode
{1, STR("\n\t \n\t \n\t\n\t"), STR(""), 5},
{2, STR("\n\t \n\t \n\t\n\t"), STR("\n\t \n\t \n\t\n\t"), 13},
{4, STR("\n\t \n\t \n\t\n\t"), STR(" "), 7},
// current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here
{4, STR("\t\t\n\n"), STR("\n\n"), 3},
// error case: terminate PCDATA in the middle
{7, STR("abcdef"), STR("abcdef"), -3},
{5, STR(" "), STR(""), -2},
{2, STR(" "), STR(" "), -3},
// error case: terminate PCDATA as early as possible
{7, STR(""), STR(""), -2},
{7, STR("a"), STR("a"), -3},
{5, STR(" "), STR(""), -2},
{2, STR(" "), STR(" "), -3},
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
for (int flag = 0; flag < 3; ++flag)
{
if (td.mask & (1 << flag))
{
unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single};
xml_document doc;
CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag]));
CHECK_NODE(doc, td.result);
int nodes = get_tree_node_count(doc);
CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes);
}
}
}
}
TEST(parse_ws_pcdata_fragment_permutations)
{
struct test_data_t
{
unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
const pugi::char_t* source;
const pugi::char_t* result;
int nodes; // negative if parsing should fail
};
test_data_t test_data[] =
{
// external pcdata should be preserved
{7, STR("ext1"), STR("ext1"), 2},
{5, STR(" "), STR(""), 1},
{2, STR(" "), STR(" "), 2},
{7, STR("ext1"), STR("ext1"), 3},
{7, STR("ext2"), STR("ext2"), 3},
{7, STR("ext1ext2"), STR("ext1ext2"), 4},
{7, STR("ext1ext2ext3"), STR("ext1ext2ext3"), 6},
{5, STR(" "), STR(""), 2},
{2, STR(" "), STR(" "), 3},
{5, STR(" "), STR(""), 2},
{2, STR(" "), STR(" "), 3},
{5, STR(" "), STR(""), 2},
{2, STR(" "), STR(" "), 4},
{5, STR(" "), STR(""), 3},
{2, STR(" "), STR(" "), 6},
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
for (int flag = 0; flag < 3; ++flag)
{
if (td.mask & (1 << flag))
{
unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single};
xml_document doc;
CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag] | parse_fragment));
CHECK_NODE(doc, td.result);
int nodes = get_tree_node_count(doc);
CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes);
}
}
}
}
TEST(parse_pcdata_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal));
CHECK_STRING(doc.child_value(STR("root")), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_pcdata_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal | parse_eol));
CHECK_STRING(doc.child_value(STR("root")), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_pcdata_skip_ext)
{
xml_document doc;
CHECK(doc.load_string(STR("prepost"), parse_minimal));
CHECK(doc.first_child() == doc.last_child());
CHECK(doc.first_child().type() == node_element);
}
TEST(parse_pcdata_error)
{
xml_document doc;
CHECK(doc.load_string(STR("pcdata"), parse_minimal).status == status_end_element_mismatch);
}
TEST(parse_pcdata_trim)
{
struct test_data_t
{
const pugi::char_t* source;
const pugi::char_t* result;
unsigned int flags;
};
test_data_t test_data[] =
{
{ STR(" text"), STR("text"), 0 },
{ STR("\t\n text"), STR("text"), 0 },
{ STR("text "), STR("text"), 0 },
{ STR("text \t\n"), STR("text"), 0 },
{ STR("\r\n\t text \t\n\r"), STR("text"), 0 },
{ STR(" text"), STR("text"), parse_fragment },
{ STR("\t\n text"), STR("text"), parse_fragment },
{ STR("text "), STR("text"), parse_fragment },
{ STR("text \t\n"), STR("text"), parse_fragment },
{ STR("\r\n\t text \t\n\r"), STR("text"), parse_fragment },
{ STR("\r\n\t text \t\n\r more \r\n\t"), STR("text \t\n\r more"), 0 },
{ STR("\r\n\t text \t\n\r more \r\n\t"), STR("text \t\n\n more"), parse_eol },
{ STR("\r\n\t text \r\n\r\n\r\n\r\n\r\n\r\n\r\n more \r\n\t"), STR("text \n\n\n\n\n\n\n more"), parse_eol },
{ STR(" test&&&&&&& "), STR("test&&&&&&&"), 0 },
{ STR(" test&&&&&&& "), STR("test&&&&&&&"), parse_escapes },
{ STR(" test&&&&&&& "), STR("test&&&&&&&"), parse_fragment | parse_escapes },
{ STR("\r\n\t text \t\n\r m&&e \r\n\t"), STR("text \t\n\n m&&e"), parse_eol | parse_escapes }
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
xml_document doc;
CHECK(doc.load_string(td.source, td.flags | parse_trim_pcdata));
const pugi::char_t* value = doc.child(STR("node")) ? doc.child_value(STR("node")) : doc.text().get();
CHECK_STRING(value, td.result);
}
}
TEST(parse_pcdata_trim_empty)
{
unsigned int flags[] = { 0, parse_ws_pcdata, parse_ws_pcdata_single, parse_ws_pcdata | parse_ws_pcdata_single };
for (size_t i = 0; i < sizeof(flags) / sizeof(flags[0]); ++i)
{
xml_document doc;
CHECK(doc.load_string(STR(" "), flags[i] | parse_trim_pcdata));
xml_node node = doc.child(STR("node"));
CHECK(node);
CHECK(!node.first_child());
}
}
TEST(parse_escapes_skip)
{
xml_document doc;
CHECK(doc.load_string(STR("<>&'""), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'""));
}
TEST(parse_escapes_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<>&'""), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("<>&'\""));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'\""));
}
TEST(parse_escapes_code)
{
xml_document doc;
CHECK(doc.load_string(STR(" "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("\01 "));
}
TEST(parse_escapes_code_exhaustive_dec)
{
xml_document doc;
CHECK(doc.load_string(STR("/; :;a;A;
"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("/;\x1\x2\x3\x4\x5\x6\x7\x8\x9:;a;A;
"));
}
TEST(parse_escapes_code_exhaustive_hex)
{
xml_document doc;
CHECK(doc.load_string(STR("/; :;@;
G;`;
g;"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("/;\x1\x2\x3\x4\x5\x6\x7\x8\x9:;@;\xa\xb\xc\xd\xe\xfG;`;\xa\xb\xc\xd\xe\xfg;"));
}
TEST(parse_escapes_code_restore)
{
xml_document doc;
CHECK(doc.load_string(STR(" - - "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR(" - - "));
}
TEST(parse_escapes_char_restore)
{
xml_document doc;
CHECK(doc.load_string(STR("&q &qu &quo " "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " "));
CHECK(doc.load_string(STR("&a &ap &apo &apos "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos "));
CHECK(doc.load_string(STR("&a &am & "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & "));
CHECK(doc.load_string(STR("&l < "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&l < "));
CHECK(doc.load_string(STR("&g > "), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&g > "));
}
TEST(parse_escapes_unicode)
{
xml_document doc;
CHECK(doc.load_string(STR("γγ𤭢"), parse_minimal | parse_escapes));
#ifdef PUGIXML_WCHAR_MODE
const pugi::char_t* v = doc.child_value(STR("node"));
size_t wcharsize = sizeof(wchar_t);
CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == wchar_cast(0xd852) && v[3] == wchar_cast(0xdf62) : v[2] == wchar_cast(0x24b62)));
#else
CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2");
#endif
}
TEST(parse_escapes_error)
{
xml_document doc;
CHECK(doc.load_string(STR("g;ab;""), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("g;ab;""));
CHECK(!doc.load_string(STR("&;-;-;"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&;-;-;"));
}
TEST(parse_escapes_attribute)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_escapes;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR(""), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("\""));
}
}
TEST(parse_attribute_spaces)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id3")).value(), STR("v3"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id4")).value(), STR("v4"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id5")).value(), STR("v5"));
}
TEST(parse_attribute_quot)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2"));
}
TEST(parse_attribute_no_eol_no_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\r\rval1 \rval2\r\nval3\nval4\r\r"));
}
TEST(parse_attribute_eol_no_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal | parse_eol));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\n\nval1 \nval2\nval3\nval4\n\n"));
}
TEST(parse_attribute_no_eol_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal | parse_wconv_attribute));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 "));
}
TEST(parse_attribute_eol_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal | parse_eol | parse_wconv_attribute));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 "));
}
TEST(parse_attribute_wnorm)
{
xml_document doc;
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_minimal | parse_wnorm_attribute | (eol ? parse_eol : 0) | (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR(""), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("val1 val2 val3 val4"));
}
}
TEST(parse_attribute_variations)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
for (int escapes = 0; escapes < 2; ++escapes)
{
unsigned int flags = parse_minimal;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
flags |= (escapes ? parse_escapes : 0);
CHECK(doc.load_string(STR(""), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("1"));
}
}
TEST(parse_attribute_error)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
}
TEST(parse_attribute_termination_error)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_minimal;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR(""), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("\""));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("'"));
}
}
TEST(parse_tag_single)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal));
CHECK_NODE(doc, STR(""));
}
TEST(parse_tag_hierarchy)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_minimal));
CHECK_NODE(doc, STR(""));
}
TEST(parse_tag_error)
{
xml_document doc;
CHECK(doc.load_string(STR("<"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR(" node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(" node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(">"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(">"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_end_element);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element);
}
TEST(parse_declaration_cases)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi));
CHECK(!doc.first_child());
}
TEST(parse_declaration_attr_cases)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_pi));
CHECK(!doc.first_child());
}
TEST(parse_declaration_skip)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR(" ?>"), flags));
CHECK(!doc.first_child());
}
}
TEST(parse_declaration_parse)
{
xml_document doc;
CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration));
xml_node d1 = doc.first_child();
xml_node d2 = doc.last_child();
CHECK(d1 != d2);
CHECK(d1.type() == node_declaration);
CHECK_STRING(d1.name(), STR("xml"));
CHECK(d2.type() == node_declaration);
CHECK_STRING(d2.name(), STR("xml"));
CHECK_STRING(d2.attribute(STR("version")).value(), STR("1.0"));
}
TEST(parse_declaration_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR(""), flags).status == status_bad_pi);
CHECK(doc.load_string(STR(""), flags).status == status_bad_pi);
}
CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration).status == status_bad_attribute);
CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration).status == status_bad_pi);
}
TEST(parse_empty)
{
xml_document doc;
CHECK(doc.load_string(STR("")).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_string(STR(""), parse_fragment) && !doc.first_child());
}
TEST(parse_out_of_memory)
{
test_runner::_memory_fail_threshold = 256;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_string(STR("")).status == status_out_of_memory));
CHECK(!doc.first_child());
}
TEST(parse_out_of_memory_halfway_node)
{
const unsigned int count = 10000;
static char_t text[count * 4];
for (unsigned int i = 0; i < count; ++i)
{
text[4*i + 0] = '<';
text[4*i + 1] = 'n';
text[4*i + 2] = '/';
text[4*i + 3] = '>';
}
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory));
CHECK_NODE(doc.first_child(), STR(""));
}
TEST(parse_out_of_memory_halfway_attr)
{
const unsigned int count = 10000;
static char_t text[count * 5 + 4];
text[0] = '<';
text[1] = 'n';
for (unsigned int i = 0; i < count; ++i)
{
text[5*i + 2] = ' ';
text[5*i + 3] = 'a';
text[5*i + 4] = '=';
text[5*i + 5] = '"';
text[5*i + 6] = '"';
}
text[5 * count + 2] = '/';
text[5 * count + 3] = '>';
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 5 + 4).status == status_out_of_memory));
CHECK_STRING(doc.first_child().name(), STR("n"));
CHECK_STRING(doc.first_child().first_attribute().name(), STR("a"));
CHECK_STRING(doc.first_child().last_attribute().name(), STR("a"));
}
TEST(parse_out_of_memory_conversion)
{
test_runner::_memory_fail_threshold = 1;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7, parse_default, encoding_latin1).status == status_out_of_memory));
CHECK(!doc.first_child());
}
#ifdef PUGIXML_WCHAR_MODE
TEST(parse_out_of_memory_conversion_wchar)
{
test_runner::_memory_fail_threshold = 1;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7).status == status_out_of_memory));
CHECK(!doc.first_child());
}
#endif
TEST(parse_out_of_memory_allocator_state_sync)
{
const unsigned int count = 10000;
static char_t text[count * 4];
for (unsigned int i = 0; i < count; ++i)
{
text[4*i + 0] = '<';
text[4*i + 1] = 'n';
text[4*i + 2] = '/';
text[4*i + 3] = '>';
}
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory));
CHECK_NODE(doc.first_child(), STR(""));
test_runner::_memory_fail_threshold = 0;
for (unsigned int j = 0; j < count; ++j)
CHECK(doc.append_child(STR("n")));
}
static bool test_offset(const char_t* contents, unsigned int options, pugi::xml_parse_status status, ptrdiff_t offset)
{
xml_document doc;
xml_parse_result res = doc.load_string(contents, options);
return res.status == status && res.offset == offset;
}
#define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset))
TEST(parse_error_offset)
{
CHECK_OFFSET("", parse_default, status_ok, 0);
test_runner::_memory_fail_threshold = 1;
CHECK_ALLOC_FAIL(CHECK_OFFSET("", parse_default, status_out_of_memory, 0));
test_runner::_memory_fail_threshold = 0;
CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1);
CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2);
CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 1);
CHECK_OFFSET("", parse_default, status_bad_start_element, 5);
CHECK_OFFSET("", parse_default, status_bad_attribute, 8);
CHECK_OFFSET("valuevalue1value2outertext"));
CHECK_NODE_EX(doc, STR("\nvalue\n\nvalue1\nvalue2outer\ntext\n\n\n"), STR("\t"), 0);
CHECK_NODE_EX(doc, STR("\n\tvalue\n\t\n\t\tvalue1\n\t\tvalue2outer\n\ttext\n\t\n\n"), STR("\t"), format_indent);
}
}
TEST(parse_encoding_detect)
{
char test[] = "";
xml_document doc;
CHECK(doc.load_buffer(test, sizeof(test)));
}
TEST(parse_encoding_detect_latin1)
{
char test0[] = "";
char test1[] = "";
char test2[] = "";
char test3[] = "";
char test4[] = "";
xml_document doc;
CHECK(doc.load_buffer(test0, sizeof(test0)).encoding == encoding_utf8);
CHECK(doc.load_buffer(test1, sizeof(test1)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test2, sizeof(test2)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1);
}
TEST(parse_encoding_detect_auto)
{
struct data_t
{
const char* contents;
size_t size;
xml_encoding encoding;
};
const data_t data[] =
{
// BOM
{ "\x00\x00\xfe\xff", 4, encoding_utf32_be },
{ "\xff\xfe\x00\x00", 4, encoding_utf32_le },
{ "\xfe\xff ", 4, encoding_utf16_be },
{ "\xff\xfe ", 4, encoding_utf16_le },
{ "\xef\xbb\xbf ", 4, encoding_utf8 },
// automatic tag detection for < or
{ "\x00\x00\x00<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>", 16, encoding_utf32_be },
{ "<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 16, encoding_utf32_le },
{ "\x00<\x00?\x00n\x00?\x00>", 10, encoding_utf16_be },
{ "<\x00?\x00n\x00?\x00>\x00", 10, encoding_utf16_le },
{ "\x00<\x00n\x00/\x00>", 8, encoding_utf16_be },
{ "<\x00n\x00/\x00>\x00", 8, encoding_utf16_le },
// ", 25, encoding_latin1 },
};
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
{
xml_document doc;
xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment);
CHECK(result);
CHECK(result.encoding == data[i].encoding);
}
}
TEST(parse_encoding_detect_auto_incomplete)
{
struct data_t
{
const char* contents;
size_t size;
xml_encoding encoding;
};
const data_t data[] =
{
// BOM
{ "\x00\x00\xfe ", 4, encoding_utf8 },
{ "\x00\x00 ", 4, encoding_utf8 },
{ "\xff\xfe\x00 ", 4, encoding_utf16_le },
{ "\xfe ", 4, encoding_utf8 },
{ "\xff ", 4, encoding_utf8 },
{ "\xef\xbb ", 4, encoding_utf8 },
{ "\xef ", 4, encoding_utf8 },
// automatic tag detection for < or
{ "\x00\x00\x00 ", 4, encoding_utf8 },
{ "<\x00\x00n/\x00>\x00", 8, encoding_utf16_le },
{ "\x00", 8, encoding_utf16_be },
{ "<\x00?n/\x00>\x00", 8, encoding_utf16_le },
{ "\x00 ", 8, encoding_utf8 },
// ", 25, encoding_utf8 },
{ "", 25, encoding_utf8 },
{ "", 25, encoding_utf8 },
{ "<_ABC encoding='latin1'/>", 25, encoding_utf8 },
};
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
{
xml_document doc;
xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment);
CHECK(result);
CHECK(result.encoding == data[i].encoding);
}
}