0
0
mirror of https://github.com/zeux/pugixml.git synced 2025-01-15 10:37:57 +08:00

Refactored PI/declaration parsing, now non top-level declarations result in parsing errors

git-svn-id: http://pugixml.googlecode.com/svn/trunk@515 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
arseny.kapoulkine 2010-06-12 07:30:13 +00:00
parent f2050e5170
commit 0ed895d79c
2 changed files with 60 additions and 83 deletions

View File

@ -2044,105 +2044,79 @@ namespace
// parse node contents, starting with question mark // parse node contents, starting with question mark
++s; ++s;
if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI // read PI target
THROW_ERROR(status_bad_pi, s); char_t* target = s;
else if (OPTSET(parse_pi) || OPTSET(parse_declaration))
if (!IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_pi, s);
SCANWHILE(IS_CHARTYPE(*s, ct_symbol));
CHECK_ERROR(status_bad_pi, s);
// determine node type; stricmp / strcasecmp is not portable
bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
if (declaration ? OPTSET(parse_declaration) : OPTSET(parse_pi))
{ {
char_t* mark = s; if (declaration)
SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target
CHECK_ERROR(status_bad_pi, s);
if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ?
THROW_ERROR(status_bad_pi, s);
ENDSEG();
if (*s == 0 && endch != '>') THROW_ERROR(status_bad_pi, s);
if (ch == '?') // nothing except target present
{ {
if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s); // disallow non top-level declarations
s += (*s == '>'); if ((cursor->header & xml_memory_page_type_mask) != node_document) THROW_ERROR(status_bad_pi, s);
// stricmp / strcasecmp is not portable PUSHNODE(node_declaration);
if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
&& (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
{
if (OPTSET(parse_declaration))
{
PUSHNODE(node_declaration);
cursor->name = mark;
POPNODE();
}
}
else if (OPTSET(parse_pi))
{
PUSHNODE(node_pi); // Append a new node on the tree.
cursor->name = mark;
POPNODE();
}
}
// stricmp / strcasecmp is not portable
else if ((mark[0] == 'x' || mark[0] == 'X') && (mark[1] == 'm' || mark[1] == 'M')
&& (mark[2] == 'l' || mark[2] == 'L') && mark[3] == 0)
{
if (OPTSET(parse_declaration))
{
PUSHNODE(node_declaration);
cursor->name = mark;
// scan for tag end
mark = s;
SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
CHECK_ERROR(status_bad_pi, s);
// replace ending ? with / to terminate properly
*s = '/';
// parse attributes
s = mark;
// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
}
} }
else else
{ {
if (OPTSET(parse_pi)) PUSHNODE(node_pi);
{ }
PUSHNODE(node_pi); // Append a new node on the tree.
cursor->name = mark; cursor->name = target;
}
// ch is a whitespace character, skip whitespaces ENDSEG();
// parse value/attributes
if (ch == '?')
{
// empty node
if (!ENDSWITH(*s, '>')) THROW_ERROR(status_bad_pi, s);
s += (*s == '>');
POPNODE();
}
else if (IS_CHARTYPE(ch, ct_space))
{
SKIPWS(); SKIPWS();
// scan for tag end
char_t* value = s;
SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s); CHECK_ERROR(status_bad_pi, s);
mark = s; if (declaration)
SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'.
CHECK_ERROR(status_bad_pi, s);
ENDSEG();
s += (*s == '>'); // Step over >
if (OPTSET(parse_pi))
{ {
cursor->value = mark; // replace ending ? with / so that 'element' terminates properly
*s = '/';
// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
s = value;
}
else
{
// store value and step over >
cursor->value = value;
POPNODE(); POPNODE();
ENDSEG();
s += (*s == '>');
} }
} }
else THROW_ERROR(status_bad_pi, s);
} }
else // not parsing PI else
{ {
SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>')); // Look for '?>'. // scan for tag end
SCANFOR(s[0] == '?' && ENDSWITH(s[1], '>'));
CHECK_ERROR(status_bad_pi, s); CHECK_ERROR(status_bad_pi, s);
s += (s[1] == '>' ? 2 : 1); s += (s[1] == '>' ? 2 : 1);

View File

@ -513,10 +513,13 @@ TEST_XML(dom_node_copy_crossdoc, "<node/>")
CHECK_NODE(newdoc, STR("<node />")); CHECK_NODE(newdoc, STR("<node />"));
} }
TEST_XML_FLAGS(dom_node_copy_types, "<root><?xml version='1.0'?><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration) TEST_XML_FLAGS(dom_node_copy_types, "<?xml version='1.0'?><root><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_default | parse_pi | parse_comments | parse_declaration)
{ {
doc.append_copy(doc.child(STR("root"))); doc.append_copy(doc.child(STR("root")));
CHECK_NODE(doc, STR("<root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?xml version=\"1.0\"?><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>")); CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
doc.insert_copy_before(doc.first_child(), doc.first_child());
CHECK_NODE(doc, STR("<?xml version=\"1.0\"?><?xml version=\"1.0\"?><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root><root><?pi value?><!--comment--><node id=\"1\">pcdata<![CDATA[cdata]]></node></root>"));
} }
TEST_XML(dom_attr_assign_large_number, "<node attr1='' attr2='' />") TEST_XML(dom_attr_assign_large_number, "<node attr1='' attr2='' />")