From c6607740a0ebc9abfc74169d54236e5a3c5b84f3 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 11 Sep 2019 21:35:03 -0700 Subject: [PATCH] Never escape > in attribute values According to XML spec, > sometimes needs to be escaped in PCDATA (when it occurs as a ]]> pattern), but it doesn't need to be escaped in attribute values. Contributes to #272. --- src/pugixml.cpp | 4 ++-- tests/test_write.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index afe2321..90c48b2 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -1861,7 +1861,7 @@ PUGI__NS_BEGIN enum chartypex_t { ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, >, ", ' + ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ ctx_digit = 8, // 0-9 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . @@ -1872,7 +1872,7 @@ PUGI__NS_BEGIN 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 diff --git a/tests/test_write.cpp b/tests/test_write.cpp index 797ddd0..0410e82 100644 --- a/tests/test_write.cpp +++ b/tests/test_write.cpp @@ -193,8 +193,8 @@ TEST_XML(write_escape, "text") doc.child(STR("node")).attribute(STR("attr")) = STR("<>'\"&\x04\r\n\t"); doc.child(STR("node")).first_child().set_value(STR("<>'\"&\x04\r\n\t")); - CHECK_NODE(doc, STR("<>'\"&\r\n\t")); - CHECK_NODE_EX(doc, STR("<>'\"&\r\n\t"), STR(""), format_raw | format_attribute_single_quote); + CHECK_NODE(doc, STR("'"& \"><>'\"&\r\n\t")); + CHECK_NODE_EX(doc, STR("<>'\"&\r\n\t"), STR(""), format_raw | format_attribute_single_quote); } TEST_XML(write_escape_roundtrip, "text") @@ -208,8 +208,8 @@ TEST_XML(write_escape_roundtrip, "text") // Note: this string is almost identical to the string from write_escape with the exception of \r // \r in PCDATA doesn't roundtrip because it has to go through newline conversion (which could be disabled, but is active by default) - CHECK_NODE(doc, STR("<>'\"&\n\t")); - CHECK_NODE_EX(doc, STR("<>'\"&\n\t"), STR(""), format_raw | format_attribute_single_quote); + CHECK_NODE(doc, STR("'"& \"><>'\"&\n\t")); + CHECK_NODE_EX(doc, STR("<>'\"&\n\t"), STR(""), format_raw | format_attribute_single_quote); } TEST_XML(write_escape_unicode, "")