#include "test.hpp" #include "writer_string.hpp" using namespace pugi; TEST(parse_pi_skip) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags)); CHECK(!doc.first_child()); CHECK(doc.load_string(STR(" value?>"), flags)); CHECK(!doc.first_child()); } } TEST(parse_pi_parse) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_pi)); xml_node pi1 = doc.first_child(); xml_node pi2 = doc.last_child(); CHECK(pi1 != pi2); CHECK(pi1.type() == node_pi); CHECK_STRING(pi1.name(), STR("pi1")); CHECK_STRING(pi1.value(), STR("")); CHECK(pi2.type() == node_pi); CHECK_STRING(pi2.name(), STR("pi2")); CHECK_STRING(pi2.value(), STR("value")); } TEST(parse_pi_parse_spaces) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_pi)); xml_node pi = doc.first_child(); CHECK(pi.type() == node_pi); CHECK_STRING(pi.name(), STR("target")); CHECK_STRING(pi.value(), STR("value ")); } TEST(parse_pi_error) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); CHECK(doc.load_string(STR(" "), flags).status == status_bad_pi); CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi); CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi); CHECK(doc.load_string(STR(""), parse_fragment | parse_pi).status == status_bad_pi); } TEST(parse_pi_error_buffer_boundary) { char buf1[] = ""; char buf2[] = ""), parse_fragment)); CHECK(!doc.first_child()); } TEST(parse_comments_parse) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_comments)); xml_node c1 = doc.first_child(); xml_node c2 = doc.last_child(); CHECK(c1 != c2); CHECK(c1.type() == node_comment); CHECK_STRING(c1.name(), STR("")); CHECK_STRING(c1.value(), STR("")); CHECK(c2.type() == node_comment); CHECK_STRING(c2.name(), STR("")); CHECK_STRING(c2.value(), STR("value")); } TEST(parse_comments_parse_no_eol) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_comments)); xml_node c = doc.first_child(); CHECK(c.type() == node_comment); CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); } TEST(parse_comments_parse_eol) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_comments | parse_eol)); xml_node c = doc.first_child(); CHECK(c.type() == node_comment); CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n")); } TEST(parse_comments_error) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_comments, parse_fragment | parse_comments | parse_eol}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags).status == status_bad_comment); CHECK(doc.load_string(STR(""), flags).status == status_bad_comment); CHECK(doc.load_string(STR(""), flags).status == status_bad_comment); } } TEST(parse_cdata_skip) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment)); CHECK(!doc.first_child()); } TEST(parse_cdata_skip_contents) { xml_document doc; CHECK(doc.load_string(STR("hello, world!"), parse_fragment)); CHECK_NODE(doc, STR("hello, world!")); } TEST(parse_cdata_parse) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata)); xml_node c1 = doc.first_child(); xml_node c2 = doc.last_child(); CHECK(c1 != c2); CHECK(c1.type() == node_cdata); CHECK_STRING(c1.name(), STR("")); CHECK_STRING(c1.value(), STR("")); CHECK(c2.type() == node_cdata); CHECK_STRING(c2.name(), STR("")); CHECK_STRING(c2.value(), STR("value")); } TEST(parse_cdata_parse_no_eol) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata)); xml_node c = doc.first_child(); CHECK(c.type() == node_cdata); CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); } TEST(parse_cdata_parse_eol) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_cdata | parse_eol)); xml_node c = doc.first_child(); CHECK(c.type() == node_cdata); CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n")); } TEST(parse_cdata_error) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_cdata, parse_fragment | parse_cdata | parse_eol}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags).status == status_bad_cdata); CHECK(doc.load_string(STR(""), flags).status == status_bad_cdata); } } TEST(parse_ws_pcdata_skip) { xml_document doc; CHECK(doc.load_string(STR(" "), parse_fragment)); CHECK(!doc.first_child()); CHECK(doc.load_string(STR(" "), parse_minimal)); xml_node root = doc.child(STR("root")); CHECK(root.first_child() == root.last_child()); CHECK(!root.first_child().first_child()); } TEST(parse_ws_pcdata_parse) { xml_document doc; CHECK(doc.load_string(STR(" "), parse_minimal | parse_ws_pcdata)); xml_node root = doc.child(STR("root")); xml_node c1 = root.first_child(); xml_node c2 = c1.next_sibling(); xml_node c3 = c2.next_sibling(); CHECK(c3 == root.last_child()); CHECK(c1.type() == node_pcdata); CHECK_STRING(c1.value(), STR(" ")); CHECK(c3.type() == node_pcdata); CHECK_STRING(c3.value(), STR(" ")); CHECK(c2.first_child() == c2.last_child()); CHECK(c2.first_child().type() == node_pcdata); CHECK_STRING(c2.first_child().value(), STR(" ")); } static int get_tree_node_count(xml_node n) { int result = 1; for (xml_node c = n.first_child(); c; c = c.next_sibling()) result += get_tree_node_count(c); return result; } TEST(parse_ws_pcdata_permutations) { struct test_data_t { unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single const char_t* source; const char_t* result; int nodes; // negative if parsing should fail }; test_data_t test_data[] = { // external pcdata should be discarded (whitespace or not) {7, STR("ext1"), STR(""), 2}, {7, STR("ext1ext2"), STR(""), 2}, {7, STR(" "), STR(""), 2}, {7, STR(" "), STR(""), 2}, {7, STR(" "), STR(""), 2}, // inner pcdata should be preserved {7, STR("inner"), STR("inner"), 3}, {7, STR("inner1inner2"), STR("inner1inner2"), 5}, {7, STR("inner1deepinner2"), STR("inner1deepinner2"), 6}, // empty pcdata nodes should never be created {7, STR("inner1inner2"), STR("inner1inner2"), 5}, {7, STR("inner2"), STR("inner2"), 4}, {7, STR("inner1"), STR("inner1"), 4}, {7, STR(""), STR(""), 3}, // comments, pi or other nodes should not cause pcdata creation either {7, STR(""), STR(""), 4}, // leading/trailing pcdata whitespace should be preserved (note: this will change if parse_ws_pcdata_trim is introduced) {7, STR("\t \tinner1 deep \t\ninner2\n\t"), STR("\t \tinner1 deep \t\ninner2\n\t"), 6}, // whitespace-only pcdata preservation depends on the parsing mode {1, STR("\n\t \n\t \n\t\n\t"), STR(""), 5}, {2, STR("\n\t \n\t \n\t\n\t"), STR("\n\t \n\t \n\t\n\t"), 13}, {4, STR("\n\t \n\t \n\t\n\t"), STR(" "), 7}, // current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here {4, STR("\t\t\n\n"), STR("\n\n"), 3}, // error case: terminate PCDATA in the middle {7, STR("abcdef"), STR("abcdef"), -3}, {5, STR(" "), STR(""), -2}, {2, STR(" "), STR(" "), -3}, // error case: terminate PCDATA as early as possible {7, STR(""), STR(""), -2}, {7, STR("a"), STR("a"), -3}, {5, STR(" "), STR(""), -2}, {2, STR(" "), STR(" "), -3}, }; for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) { const test_data_t& td = test_data[i]; for (int flag = 0; flag < 3; ++flag) { if (td.mask & (1 << flag)) { unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single}; xml_document doc; CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag])); CHECK_NODE(doc, td.result); int nodes = get_tree_node_count(doc); CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes); } } } } TEST(parse_ws_pcdata_fragment_permutations) { struct test_data_t { unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single const char_t* source; const char_t* result; int nodes; // negative if parsing should fail }; test_data_t test_data[] = { // external pcdata should be preserved {7, STR("ext1"), STR("ext1"), 2}, {5, STR(" "), STR(""), 1}, {2, STR(" "), STR(" "), 2}, {7, STR("ext1"), STR("ext1"), 3}, {7, STR("ext2"), STR("ext2"), 3}, {7, STR("ext1ext2"), STR("ext1ext2"), 4}, {7, STR("ext1ext2ext3"), STR("ext1ext2ext3"), 6}, {5, STR(" "), STR(""), 2}, {2, STR(" "), STR(" "), 3}, {5, STR(" "), STR(""), 2}, {2, STR(" "), STR(" "), 3}, {5, STR(" "), STR(""), 2}, {2, STR(" "), STR(" "), 4}, {5, STR(" "), STR(""), 3}, {2, STR(" "), STR(" "), 6}, }; for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) { const test_data_t& td = test_data[i]; for (int flag = 0; flag < 3; ++flag) { if (td.mask & (1 << flag)) { unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single}; xml_document doc; CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag] | parse_fragment)); CHECK_NODE(doc, td.result); int nodes = get_tree_node_count(doc); CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes); } } } } TEST(parse_pcdata_no_eol) { xml_document doc; CHECK(doc.load_string(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal)); CHECK_STRING(doc.child_value(STR("root")), STR("\r\rval1\rval2\r\nval3\nval4\r\r")); } TEST(parse_pcdata_eol) { xml_document doc; CHECK(doc.load_string(STR("\r\rval1\rval2\r\nval3\nval4\r\r"), parse_minimal | parse_eol)); CHECK_STRING(doc.child_value(STR("root")), STR("\n\nval1\nval2\nval3\nval4\n\n")); } TEST(parse_pcdata_skip_ext) { xml_document doc; CHECK(doc.load_string(STR("prepost"), parse_minimal)); CHECK(doc.first_child() == doc.last_child()); CHECK(doc.first_child().type() == node_element); } TEST(parse_pcdata_error) { xml_document doc; CHECK(doc.load_string(STR("pcdata"), parse_minimal).status == status_end_element_mismatch); } TEST(parse_pcdata_trim) { struct test_data_t { const char_t* source; const char_t* result; unsigned int flags; }; test_data_t test_data[] = { { STR(" text"), STR("text"), 0 }, { STR("\t\n text"), STR("text"), 0 }, { STR("text "), STR("text"), 0 }, { STR("text \t\n"), STR("text"), 0 }, { STR("\r\n\t text \t\n\r"), STR("text"), 0 }, { STR(" text"), STR("text"), parse_fragment }, { STR("\t\n text"), STR("text"), parse_fragment }, { STR("text "), STR("text"), parse_fragment }, { STR("text \t\n"), STR("text"), parse_fragment }, { STR("\r\n\t text \t\n\r"), STR("text"), parse_fragment }, { STR("\r\n\t text \t\n\r more \r\n\t"), STR("text \t\n\r more"), 0 }, { STR("\r\n\t text \t\n\r more \r\n\t"), STR("text \t\n\n more"), parse_eol }, { STR("\r\n\t text \r\n\r\n\r\n\r\n\r\n\r\n\r\n more \r\n\t"), STR("text \n\n\n\n\n\n\n more"), parse_eol }, { STR(" test&&&&&&& "), STR("test&&&&&&&"), 0 }, { STR(" test&&&&&&& "), STR("test&&&&&&&"), parse_escapes }, { STR(" test&&&&&&& "), STR("test&&&&&&&"), parse_fragment | parse_escapes }, { STR("\r\n\t text \t\n\r m&&e \r\n\t"), STR("text \t\n\n m&&e"), parse_eol | parse_escapes } }; for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) { const test_data_t& td = test_data[i]; xml_document doc; CHECK(doc.load_string(td.source, td.flags | parse_trim_pcdata)); const char_t* value = doc.child(STR("node")) ? doc.child_value(STR("node")) : doc.text().get(); CHECK_STRING(value, td.result); } } TEST(parse_pcdata_trim_empty) { unsigned int flags[] = { 0, parse_ws_pcdata, parse_ws_pcdata_single, parse_ws_pcdata | parse_ws_pcdata_single }; for (size_t i = 0; i < sizeof(flags) / sizeof(flags[0]); ++i) { xml_document doc; CHECK(doc.load_string(STR(" "), flags[i] | parse_trim_pcdata)); xml_node node = doc.child(STR("node")); CHECK(node); CHECK(!node.first_child()); } } TEST(parse_escapes_skip) { xml_document doc; CHECK(doc.load_string(STR("<>&'""), parse_minimal)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'"")); } TEST(parse_escapes_parse) { xml_document doc; CHECK(doc.load_string(STR("<>&'""), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("<>&'\"")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'\"")); } TEST(parse_escapes_code) { xml_document doc; CHECK(doc.load_string(STR(" "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("\01 ")); } TEST(parse_escapes_code_exhaustive_dec) { xml_document doc; CHECK(doc.load_string(STR("&#/; &#:;&#a;&#A; "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A; ")); } TEST(parse_escapes_code_exhaustive_hex) { xml_document doc; CHECK(doc.load_string(STR("&#x/; &#x:;&#x@; &#xG;&#x`; &#xg;"), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;")); } TEST(parse_escapes_code_restore) { xml_document doc; CHECK(doc.load_string(STR("  - - "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("  - - ")); } TEST(parse_escapes_char_restore) { xml_document doc; CHECK(doc.load_string(STR("&q &qu &quo " "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " ")); CHECK(doc.load_string(STR("&a &ap &apo &apos "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos ")); CHECK(doc.load_string(STR("&a &am & "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & ")); CHECK(doc.load_string(STR("&l < "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&l < ")); CHECK(doc.load_string(STR("&g > "), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&g > ")); } TEST(parse_escapes_unicode) { xml_document doc; CHECK(doc.load_string(STR("γγ𤭢"), parse_minimal | parse_escapes)); #ifdef PUGIXML_WCHAR_MODE const char_t* v = doc.child_value(STR("node")); size_t wcharsize = sizeof(wchar_t); CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == wchar_cast(0xd852) && v[3] == wchar_cast(0xdf62) : v[2] == wchar_cast(0x24b62))); #else CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2"); #endif } TEST(parse_escapes_error) { xml_document doc; CHECK(doc.load_string(STR("g;&#ab;""), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("g;&#ab;"")); CHECK(!doc.load_string(STR("&#;&#x;&;&#x-;&#-;"), parse_minimal | parse_escapes)); CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;")); } TEST(parse_escapes_attribute) { xml_document doc; for (int wnorm = 0; wnorm < 2; ++wnorm) for (int eol = 0; eol < 2; ++eol) for (int wconv = 0; wconv < 2; ++wconv) { unsigned int flags = parse_escapes; flags |= (wnorm ? parse_wnorm_attribute : 0); flags |= (eol ? parse_eol : 0); flags |= (wconv ? parse_wconv_attribute : 0); CHECK(doc.load_string(STR(""), flags)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("\"")); } } TEST(parse_attribute_spaces) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id3")).value(), STR("v3")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id4")).value(), STR("v4")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id5")).value(), STR("v5")); } TEST(parse_attribute_quot) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2")); } TEST(parse_attribute_no_eol_no_wconv) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\r\rval1 \rval2\r\nval3\nval4\r\r")); } TEST(parse_attribute_eol_no_wconv) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal | parse_eol)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\n\nval1 \nval2\nval3\nval4\n\n")); } TEST(parse_attribute_no_eol_wconv) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal | parse_wconv_attribute)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); } TEST(parse_attribute_eol_wconv) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal | parse_eol | parse_wconv_attribute)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 ")); } TEST(parse_attribute_wnorm) { xml_document doc; for (int eol = 0; eol < 2; ++eol) for (int wconv = 0; wconv < 2; ++wconv) { unsigned int flags = parse_minimal | parse_wnorm_attribute | (eol ? parse_eol : 0) | (wconv ? parse_wconv_attribute : 0); CHECK(doc.load_string(STR(""), flags)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("val1 val2 val3 val4")); } } TEST(parse_attribute_variations) { xml_document doc; for (int wnorm = 0; wnorm < 2; ++wnorm) for (int eol = 0; eol < 2; ++eol) for (int wconv = 0; wconv < 2; ++wconv) for (int escapes = 0; escapes < 2; ++escapes) { unsigned int flags = parse_minimal; flags |= (wnorm ? parse_wnorm_attribute : 0); flags |= (eol ? parse_eol : 0); flags |= (wconv ? parse_wconv_attribute : 0); flags |= (escapes ? parse_escapes : 0); CHECK(doc.load_string(STR(""), flags)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("1")); } } TEST(parse_attribute_error) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); } TEST(parse_attribute_termination_error) { xml_document doc; for (int wnorm = 0; wnorm < 2; ++wnorm) for (int eol = 0; eol < 2; ++eol) for (int wconv = 0; wconv < 2; ++wconv) { unsigned int flags = parse_minimal; flags |= (wnorm ? parse_wnorm_attribute : 0); flags |= (eol ? parse_eol : 0); flags |= (wconv ? parse_wconv_attribute : 0); CHECK(doc.load_string(STR(""), flags)); CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("\"")); CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("'")); } } TEST(parse_attribute_wnorm_coverage) { xml_document doc; CHECK(doc.load_string(STR(""), parse_wnorm_attribute)); CHECK_NODE(doc, STR("")); CHECK(doc.load_string(STR(""), parse_wnorm_attribute | parse_escapes)); CHECK_NODE(doc, STR("")); } TEST(parse_attribute_wconv_coverage) { xml_document doc; CHECK(doc.load_string(STR(""), parse_wconv_attribute)); CHECK_NODE(doc, STR("")); CHECK(doc.load_string(STR(""), parse_wconv_attribute | parse_escapes)); CHECK_NODE(doc, STR("")); } TEST(parse_attribute_eol_coverage) { xml_document doc; CHECK(doc.load_string(STR(""), parse_eol)); CHECK_NODE(doc, STR("")); CHECK(doc.load_string(STR(""), parse_eol | parse_escapes)); CHECK_NODE(doc, STR("")); } TEST(parse_tag_single) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal)); CHECK_NODE(doc, STR("")); } TEST(parse_tag_hierarchy) { xml_document doc; CHECK(doc.load_string(STR(""), parse_minimal)); CHECK_NODE(doc, STR("")); } TEST(parse_tag_error) { xml_document doc; CHECK(doc.load_string(STR("<"), parse_minimal).status == status_unrecognized_tag); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR("<"), parse_minimal).status == status_unrecognized_tag); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_end_element_mismatch); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_end_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); CHECK(doc.load_string(STR(""), parse_minimal).status == status_bad_start_element); } TEST(parse_declaration_cases) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_pi)); CHECK(!doc.first_child()); } TEST(parse_declaration_attr_cases) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_pi)); CHECK(!doc.first_child()); } TEST(parse_declaration_skip) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags)); CHECK(!doc.first_child()); CHECK(doc.load_string(STR(" ?>"), flags)); CHECK(!doc.first_child()); } } TEST(parse_declaration_parse) { xml_document doc; CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration)); xml_node d1 = doc.first_child(); xml_node d2 = doc.last_child(); CHECK(d1 != d2); CHECK(d1.type() == node_declaration); CHECK_STRING(d1.name(), STR("xml")); CHECK(d2.type() == node_declaration); CHECK_STRING(d2.name(), STR("xml")); CHECK_STRING(d2.attribute(STR("version")).value(), STR("1.0")); } TEST(parse_declaration_error) { xml_document doc; unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { unsigned int flags = flag_sets[i]; CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); CHECK(doc.load_string(STR(""), flags).status == status_bad_pi); } CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration).status == status_bad_attribute); CHECK(doc.load_string(STR(""), parse_fragment | parse_declaration).status == status_bad_pi); } TEST(parse_empty) { xml_document doc; CHECK(doc.load_string(STR("")).status == status_no_document_element && !doc.first_child()); CHECK(doc.load_string(STR(""), parse_fragment) && !doc.first_child()); } TEST(parse_out_of_memory) { test_runner::_memory_fail_threshold = 256; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_string(STR("")).status == status_out_of_memory)); CHECK(!doc.first_child()); } TEST(parse_out_of_memory_halfway_node) { const unsigned int count = 10000; static char_t text[count * 4]; for (unsigned int i = 0; i < count; ++i) { text[4*i + 0] = '<'; text[4*i + 1] = 'n'; text[4*i + 2] = '/'; text[4*i + 3] = '>'; } test_runner::_memory_fail_threshold = 65536; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, sizeof(text)).status == status_out_of_memory)); CHECK_NODE(doc.first_child(), STR("")); } TEST(parse_out_of_memory_halfway_attr) { const unsigned int count = 10000; static char_t text[count * 5 + 4]; text[0] = '<'; text[1] = 'n'; for (unsigned int i = 0; i < count; ++i) { text[5*i + 2] = ' '; text[5*i + 3] = 'a'; text[5*i + 4] = '='; text[5*i + 5] = '"'; text[5*i + 6] = '"'; } text[5 * count + 2] = '/'; text[5 * count + 3] = '>'; test_runner::_memory_fail_threshold = 65536; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, sizeof(text)).status == status_out_of_memory)); CHECK_STRING(doc.first_child().name(), STR("n")); CHECK_STRING(doc.first_child().first_attribute().name(), STR("a")); CHECK_STRING(doc.first_child().last_attribute().name(), STR("a")); } TEST(parse_out_of_memory_conversion) { test_runner::_memory_fail_threshold = 1; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("", 7, parse_default, encoding_latin1).status == status_out_of_memory)); CHECK(!doc.first_child()); } TEST(parse_out_of_memory_allocator_state_sync) { const unsigned int count = 10000; static char_t text[count * 4]; for (unsigned int i = 0; i < count; ++i) { text[4*i + 0] = '<'; text[4*i + 1] = 'n'; text[4*i + 2] = '/'; text[4*i + 3] = '>'; } test_runner::_memory_fail_threshold = 65536; xml_document doc; CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, sizeof(text)).status == status_out_of_memory)); CHECK_NODE(doc.first_child(), STR("")); test_runner::_memory_fail_threshold = 0; for (unsigned int j = 0; j < count; ++j) CHECK(doc.append_child(STR("n"))); } static bool test_offset(const char_t* contents, unsigned int options, xml_parse_status status, ptrdiff_t offset) { xml_document doc; xml_parse_result res = doc.load_string(contents, options); return res.status == status && res.offset == offset; } #define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset)) TEST(parse_error_offset) { CHECK_OFFSET("", parse_default, status_ok, 0); test_runner::_memory_fail_threshold = 1; CHECK_ALLOC_FAIL(CHECK_OFFSET("", parse_default, status_out_of_memory, 0)); test_runner::_memory_fail_threshold = 0; CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1); CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2); CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 1); CHECK_OFFSET("", parse_default, status_bad_start_element, 5); CHECK_OFFSET("", parse_default, status_bad_attribute, 8); CHECK_OFFSET("valuevalue1value2outertext")); CHECK_NODE_EX(doc, STR("\nvalue\n\nvalue1\nvalue2outer\ntext\n\n\n"), STR("\t"), 0); CHECK_NODE_EX(doc, STR("\n\tvalue\n\t\n\t\tvalue1\n\t\tvalue2outer\n\ttext\n\t\n\n"), STR("\t"), format_indent); } } TEST_XML_FLAGS(parse_embed_pcdata_fragment, "text", parse_fragment | parse_embed_pcdata) { CHECK_NODE(doc, STR("text")); CHECK(doc.first_child().type() == node_pcdata); CHECK_STRING(doc.first_child().value(), STR("text")); } TEST_XML_FLAGS(parse_embed_pcdata_child, "text", parse_embed_pcdata) { xml_node n = doc.child(STR("n")); CHECK_NODE(doc, STR("text")); CHECK(n.last_child().type() == node_pcdata); CHECK_STRING(n.last_child().value(), STR("text")); } TEST_XML_FLAGS(parse_embed_pcdata_comment, "text1text2", parse_embed_pcdata) { xml_node n = doc.child(STR("n")); CHECK_NODE(doc, STR("text1text2")); CHECK_STRING(n.value(), STR("text1")); CHECK(n.first_child() == n.last_child()); CHECK(n.last_child().type() == node_pcdata); CHECK_STRING(n.last_child().value(), STR("text2")); } TEST(parse_merge_pcdata) { const unsigned int default_parse = parse_escapes | parse_wconv_attribute | parse_eol; unsigned int flag_sets[] = {parse_cdata, parse_pi, parse_comments, parse_declaration}; for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i) { xml_document doc; const unsigned int flags = default_parse | parse_merge_pcdata | flag_sets[i]; xml_parse_result res = doc.load_string(STR("First textSecond textsome more textLast text"), flags); CHECK(res); xml_node child = doc.child(STR("node")); if (flags & parse_comments) { CHECK_STRING(doc.child(STR("node")).text().get(), STR("First text")); CHECK_STRING(child.first_child().value(), STR("First text")); CHECK(child.first_child().next_sibling().type() == node_comment); CHECK_NODE(doc, STR("First textSecond textsome more textLast text")); } else if (flags & parse_cdata) { CHECK_STRING(doc.child(STR("node")).text().get(), STR("First textSecond text")); CHECK_STRING(child.first_child().value(), STR("First textSecond text")); CHECK(child.first_child().next_sibling().type() == node_cdata); CHECK_NODE(doc, STR("First textSecond textsome more textLast text")); } else if (flags & parse_pi) { CHECK_STRING(doc.child(STR("node")).text().get(), STR("First textSecond textsome more text")); CHECK_STRING(child.first_child().value(), STR("First textSecond textsome more text")); CHECK(child.first_child().next_sibling().type() == node_pi); CHECK_NODE(doc, STR("First textSecond textsome more textLast text")); } else { CHECK(child.first_child() == child.last_child()); CHECK(child.first_child().type() == node_pcdata); CHECK_NODE(doc, STR("First textSecond textsome more textLast text")); } CHECK(child.last_child().type() == node_pcdata); } } TEST(parse_encoding_detect) { char test[] = ""; xml_document doc; CHECK(doc.load_buffer(test, sizeof(test))); } TEST(parse_encoding_detect_latin1) { char test0[] = ""; char test1[] = ""; char test2[] = ""; char test3[] = ""; char test4[] = ""; xml_document doc; CHECK(doc.load_buffer(test0, sizeof(test0)).encoding == encoding_utf8); CHECK(doc.load_buffer(test1, sizeof(test1)).encoding == encoding_latin1); CHECK(doc.load_buffer(test2, sizeof(test2)).encoding == encoding_latin1); CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1); CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1); } TEST(parse_encoding_detect_auto) { struct data_t { const char* contents; size_t size; xml_encoding encoding; }; const data_t data[] = { // BOM { "\x00\x00\xfe\xff", 4, encoding_utf32_be }, { "\xff\xfe\x00\x00", 4, encoding_utf32_le }, { "\xfe\xff ", 4, encoding_utf16_be }, { "\xff\xfe ", 4, encoding_utf16_le }, { "\xef\xbb\xbf ", 4, encoding_utf8 }, // automatic tag detection for < or ", 16, encoding_utf32_be }, { "<\x00\x00\x00n\x00\x00\x00/\x00\x00\x00>\x00\x00\x00", 16, encoding_utf32_le }, { "\x00<\x00?\x00n\x00?\x00>", 10, encoding_utf16_be }, { "<\x00?\x00n\x00?\x00>\x00", 10, encoding_utf16_le }, { "\x00<\x00n\x00/\x00>", 8, encoding_utf16_be }, { "<\x00n\x00/\x00>\x00", 8, encoding_utf16_le }, // ", 25, encoding_latin1 }, }; for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { xml_document doc; xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); CHECK(result); CHECK(result.encoding == data[i].encoding); } } TEST(parse_encoding_detect_auto_incomplete) { struct data_t { const char* contents; size_t size; xml_encoding encoding; }; const data_t data[] = { // BOM { "\x00\x00\xfe ", 4, encoding_utf8 }, { "\x00\x00 ", 4, encoding_utf8 }, { "\xff\xfe\x00 ", 4, encoding_utf16_le }, { "\xfe ", 4, encoding_utf8 }, { "\xff ", 4, encoding_utf8 }, { "\xef\xbb ", 4, encoding_utf8 }, { "\xef ", 4, encoding_utf8 }, // automatic tag detection for < or \x00", 8, encoding_utf16_le }, { "\x00", 8, encoding_utf16_be }, { "<\x00?n/\x00>\x00", 8, encoding_utf16_le }, { "\x00 ", 2, encoding_utf8 }, // ", 25, encoding_utf8 }, { "", 25, encoding_utf8 }, { "", 25, encoding_utf8 }, { "<_ABC encoding='latin1'/>", 25, encoding_utf8 }, }; for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { xml_document doc; xml_parse_result result = doc.load_buffer(data[i].contents, data[i].size, parse_fragment); CHECK(result); CHECK(result.encoding == data[i].encoding); } }