From e31d977c8096fd9566bda50d16e843042fe36a50 Mon Sep 17 00:00:00 2001 From: "arseny.kapoulkine" Date: Thu, 20 May 2010 22:15:23 +0000 Subject: [PATCH] Optimized debug mode parsing/saving by order of magnitude git-svn-id: http://pugixml.googlecode.com/svn/trunk@440 99668b35-9821-0410-8761-19e4c4f06640 --- src/pugixml.cpp | 78 +++++++++++++++++++++-------------------------- src/pugixpath.cpp | 53 +++++++++++++++----------------- 2 files changed, 58 insertions(+), 73 deletions(-) diff --git a/src/pugixml.cpp b/src/pugixml.cpp index e6784ae..75db295 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -922,16 +922,11 @@ namespace 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 }; - inline bool is_chartype(char_t c, chartype_t ct) - { - #ifdef PUGIXML_WCHAR_MODE - unsigned int ch = static_cast(c); - - return !!((ch < 128 ? chartype_table[ch] : chartype_table[128]) & ct); - #else - return !!(chartype_table[static_cast(c)] & ct); - #endif - } +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPE(c, ct) ((static_cast(c) < 128 ? chartype_table[static_cast(c)] : chartype_table[128]) & (ct)) +#else + #define IS_CHARTYPE(c, ct) (chartype_table[static_cast(c)] & (ct)) +#endif enum output_chartype_t { @@ -961,16 +956,11 @@ namespace 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - inline bool is_output_chartype(char_t c, output_chartype_t ct) - { - #ifdef PUGIXML_WCHAR_MODE - unsigned int ch = static_cast(c); - - return !!((ch < 128 ? output_chartype_table[ch] : output_chartype_table[128]) & ct); - #else - return !!(output_chartype_table[static_cast(c)] & ct); - #endif - } +#ifdef PUGIXML_WCHAR_MODE + #define IS_OUTPUT_CHARTYPE(c, ct) ((static_cast(c) < 128 ? output_chartype_table[static_cast(c)] : output_chartype_table[128]) & (ct)) +#else + #define IS_OUTPUT_CHARTYPE(c, ct) (output_chartype_table[static_cast(c)] & (ct)) +#endif template struct opt1_to_type { @@ -1488,7 +1478,7 @@ namespace while (true) { - while (!is_chartype(*s, ct_parse_comment)) ++s; + while (!IS_CHARTYPE(*s, ct_parse_comment)) ++s; if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair { @@ -1518,7 +1508,7 @@ namespace while (true) { - while (!is_chartype(*s, ct_parse_cdata)) ++s; + while (!IS_CHARTYPE(*s, ct_parse_cdata)) ++s; if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair { @@ -1553,7 +1543,7 @@ namespace while (true) { - while (!is_chartype(*s, ct_parse_pcdata)) ++s; + while (!IS_CHARTYPE(*s, ct_parse_pcdata)) ++s; if (*s == '<') // PCDATA ends here { @@ -1608,19 +1598,19 @@ namespace gap g; // trim leading whitespaces - if (opt_wnorm && is_chartype(*s, ct_space)) + if (opt_wnorm && IS_CHARTYPE(*s, ct_space)) { char_t* str = s; do ++str; - while (is_chartype(*str, ct_space)); + while (IS_CHARTYPE(*str, ct_space)); g.push(s, str - s); } while (true) { - while (!is_chartype(*s, (opt_wnorm || opt_wconv) ? ct_parse_attr_ws : ct_parse_attr)) ++s; + while (!IS_CHARTYPE(*s, (opt_wnorm || opt_wconv) ? ct_parse_attr_ws : ct_parse_attr)) ++s; if (*s == end_quote) { @@ -1629,25 +1619,25 @@ namespace if (opt_wnorm) { do *str-- = 0; - while (is_chartype(*str, ct_space)); + while (IS_CHARTYPE(*str, ct_space)); } else *str = 0; return s + 1; } - else if (opt_wnorm && is_chartype(*s, ct_space)) + else if (opt_wnorm && IS_CHARTYPE(*s, ct_space)) { *s++ = ' '; - if (is_chartype(*s, ct_space)) + if (IS_CHARTYPE(*s, ct_space)) { char_t* str = s + 1; - while (is_chartype(*str, ct_space)) ++str; + while (IS_CHARTYPE(*str, ct_space)) ++str; g.push(s, str - s); } } - else if (opt_wconv && is_chartype(*s, ct_space)) + else if (opt_wconv && IS_CHARTYPE(*s, ct_space)) { if (opt_eol) { @@ -1719,7 +1709,7 @@ namespace xml_allocator alloc; // Parser utilities. - #define SKIPWS() { while (is_chartype(*s, ct_space)) ++s; } + #define SKIPWS() { while (IS_CHARTYPE(*s, ct_space)) ++s; } #define OPTSET(OPT) ( optmsk & OPT ) #define PUSHNODE(TYPE) { cursor = append_node(cursor, alloc, TYPE); } #define POPNODE() { cursor = cursor->parent; } @@ -1962,15 +1952,15 @@ namespace // parse node contents, starting with question mark ++s; - if (!is_chartype(*s, ct_start_symbol)) // bad PI + if (!IS_CHARTYPE(*s, ct_start_symbol)) // bad PI THROW_ERROR(status_bad_pi, s); else if (OPTSET(parse_pi) || OPTSET(parse_declaration)) { char_t* mark = s; - SCANWHILE(is_chartype(*s, ct_symbol)); // Read PI target + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Read PI target CHECK_ERROR(status_bad_pi, s); - if (!is_chartype(*s, ct_space) && *s != '?') // Target has to end with space or ? + if (!IS_CHARTYPE(*s, ct_space) && *s != '?') // Target has to end with space or ? THROW_ERROR(status_bad_pi, s); ENDSEG(); @@ -2091,38 +2081,38 @@ namespace ++s; LOC_TAG: - if (is_chartype(*s, ct_start_symbol)) // '<#...' + if (IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' { PUSHNODE(node_element); // Append a new node to the tree. cursor->name = s; - SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. ENDSEG(); // Save char in 'ch', terminate & step over. if (ch == '>') { // end of tag } - else if (is_chartype(ch, ct_space)) + else if (IS_CHARTYPE(ch, ct_space)) { LOC_ATTRIBUTES: while (true) { SKIPWS(); // Eat any whitespace. - if (is_chartype(*s, ct_start_symbol)) // <... #... + if (IS_CHARTYPE(*s, ct_start_symbol)) // <... #... { xml_attribute_struct* a = append_attribute_ll(cursor, alloc); // Make space for this attribute. a->name = s; // Save the offset. - SCANWHILE(is_chartype(*s, ct_symbol)); // Scan for a terminator. + SCANWHILE(IS_CHARTYPE(*s, ct_symbol)); // Scan for a terminator. CHECK_ERROR(status_bad_attribute, s); ENDSEG(); // Save char in 'ch', terminate & step over. CHECK_ERROR(status_bad_attribute, s); - if (is_chartype(ch, ct_space)) + if (IS_CHARTYPE(ch, ct_space)) { SKIPWS(); // Eat any whitespace. CHECK_ERROR(status_bad_attribute, s); @@ -2148,7 +2138,7 @@ namespace // After this line the loop continues from the start; // Whitespaces, / and > are ok, symbols and EOF are wrong, // everything else will be detected - if (is_chartype(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); + if (IS_CHARTYPE(*s, ct_start_symbol)) THROW_ERROR(status_bad_attribute, s); } else THROW_ERROR(status_bad_attribute, s); } @@ -2210,7 +2200,7 @@ namespace char_t* name = cursor->name; if (!name) THROW_ERROR(status_end_element_mismatch, s); - while (is_chartype(*s, ct_symbol)) + while (IS_CHARTYPE(*s, ct_symbol)) { if (*s++ != *name++) THROW_ERROR(status_end_element_mismatch, s); } @@ -2681,7 +2671,7 @@ namespace const char_t* prev = s; // While *s is a usual symbol - while (!is_output_chartype(*s, type)) ++s; + while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s; writer.write(prev, static_cast(s - prev)); diff --git a/src/pugixpath.cpp b/src/pugixpath.cpp index bc51b4d..e5bf051 100644 --- a/src/pugixpath.cpp +++ b/src/pugixpath.cpp @@ -82,16 +82,11 @@ namespace 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 }; - inline bool is_chartypex(char_t c, chartypex ct) - { - #ifdef PUGIXML_WCHAR_MODE - unsigned int ch = static_cast(c); - - return !!((ch < 128 ? chartypex_table[ch] : chartypex_table[128]) & ct); - #else - return !!(chartypex_table[static_cast(c)] & ct); - #endif - } +#ifdef PUGIXML_WCHAR_MODE + #define IS_CHARTYPEX(c, ct) ((static_cast(c) < 128 ? chartypex_table[static_cast(c)] : chartypex_table[128]) & (ct)) +#else + #define IS_CHARTYPEX(c, ct) (chartypex_table[static_cast(c)] & (ct)) +#endif bool starts_with(const char_t* string, const char_t* pattern) { @@ -401,7 +396,7 @@ namespace bool check_string_to_number_format(const char_t* string) { // parse leading whitespace - while (is_chartypex(*string, ctx_space)) ++string; + while (IS_CHARTYPEX(*string, ctx_space)) ++string; // parse sign if (*string == '-') ++string; @@ -409,21 +404,21 @@ namespace if (!*string) return false; // if there is no integer part, there should be a decimal part with at least one digit - if (!is_chartypex(string[0], ctx_digit) && (string[0] != '.' || !is_chartypex(string[1], ctx_digit))) return false; + if (!IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !IS_CHARTYPEX(string[1], ctx_digit))) return false; // parse integer part - while (is_chartypex(*string, ctx_digit)) ++string; + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; // parse decimal part if (*string == '.') { ++string; - while (is_chartypex(*string, ctx_digit)) ++string; + while (IS_CHARTYPEX(*string, ctx_digit)) ++string; } // parse trailing whitespace - while (is_chartypex(*string, ctx_space)) ++string; + while (IS_CHARTYPEX(*string, ctx_space)) ++string; return *string == 0; } @@ -919,7 +914,7 @@ namespace pugi { contents_clear(); - while (is_chartypex(*m_cur, ctx_space)) ++m_cur; + while (IS_CHARTYPEX(*m_cur, ctx_space)) ++m_cur; switch (*m_cur) { @@ -1050,13 +1045,13 @@ namespace pugi m_cur += 2; m_cur_lexeme = lex_double_dot; } - else if (is_chartypex(*(m_cur+1), ctx_digit)) + else if (IS_CHARTYPEX(*(m_cur+1), ctx_digit)) { m_cur_lexeme_contents.begin = m_cur; // . ++m_cur; - while (is_chartypex(*m_cur, ctx_digit)) m_cur++; + while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++; m_cur_lexeme_contents.end = m_cur; @@ -1110,28 +1105,28 @@ namespace pugi break; default: - if (is_chartypex(*m_cur, ctx_digit)) + if (IS_CHARTYPEX(*m_cur, ctx_digit)) { m_cur_lexeme_contents.begin = m_cur; - while (is_chartypex(*m_cur, ctx_digit)) m_cur++; + while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++; - if (*m_cur == '.' && is_chartypex(*(m_cur+1), ctx_digit)) + if (*m_cur == '.' && IS_CHARTYPEX(*(m_cur+1), ctx_digit)) { m_cur++; - while (is_chartypex(*m_cur, ctx_digit)) m_cur++; + while (IS_CHARTYPEX(*m_cur, ctx_digit)) m_cur++; } m_cur_lexeme_contents.end = m_cur; m_cur_lexeme = lex_number; } - else if (is_chartypex(*m_cur, ctx_start_symbol)) + else if (IS_CHARTYPEX(*m_cur, ctx_start_symbol)) { m_cur_lexeme_contents.begin = m_cur; - while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; + while (IS_CHARTYPEX(*m_cur, ctx_symbol)) m_cur++; if (m_cur[0] == ':') { @@ -1139,17 +1134,17 @@ namespace pugi { m_cur += 2; // :* } - else if (is_chartypex(m_cur[1], ctx_symbol)) // namespace test qname + else if (IS_CHARTYPEX(m_cur[1], ctx_symbol)) // namespace test qname { m_cur++; // : - while (is_chartypex(*m_cur, ctx_symbol)) m_cur++; + while (IS_CHARTYPEX(*m_cur, ctx_symbol)) m_cur++; } } m_cur_lexeme_contents.end = m_cur; - while (is_chartypex(*m_cur, ctx_space)) ++m_cur; + while (IS_CHARTYPEX(*m_cur, ctx_space)) ++m_cur; m_cur_lexeme = lex_string; } @@ -2276,7 +2271,7 @@ namespace pugi for (string_t::const_iterator it = s.begin(); it != s.end(); ++it) { - if (is_chartypex(*it, ctx_space)) + if (IS_CHARTYPEX(*it, ctx_space)) { if (!r.empty() && r[r.size() - 1] != ' ') r += ' '; @@ -3342,7 +3337,7 @@ namespace pugi // This is either a function call, or not - if not, we shall proceed with location path const char_t* state = m_lexer.state(); - while (is_chartypex(*state, ctx_space)) ++state; + while (IS_CHARTYPEX(*state, ctx_space)) ++state; if (*state != '(') return parse_location_path();