mirror of
https://github.com/zeux/pugixml.git
synced 2024-12-31 00:13:01 +08:00
Refactoring: Merged two chartype tables
git-svn-id: http://pugixml.googlecode.com/svn/trunk@672 99668b35-9821-0410-8761-19e4c4f06640
This commit is contained in:
parent
0e6d53c9e5
commit
86f9ea3c2c
@ -909,61 +909,35 @@ namespace
|
||||
192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
|
||||
};
|
||||
|
||||
enum chartypex
|
||||
enum chartypex_t
|
||||
{
|
||||
ctx_space = 1, // \r, \n, space, tab
|
||||
ctx_start_symbol = 2, // Any symbol > 127, a-z, A-Z, _
|
||||
ctx_digit = 4, // 0-9
|
||||
ctx_symbol = 8 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
|
||||
ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
|
||||
ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
|
||||
ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
|
||||
ctx_digit = 8, // 0-9
|
||||
ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
|
||||
};
|
||||
|
||||
const unsigned char chartypex_table[256] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0-15
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
|
||||
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, // 32-47
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, // 48-63
|
||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 64-79
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 10, // 80-95
|
||||
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 96-111
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, // 112-127
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
|
||||
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
|
||||
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 128+
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
|
||||
};
|
||||
|
||||
enum output_chartype_t
|
||||
{
|
||||
oct_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
|
||||
oct_special_attr = 2 // Any symbol >= 0 and < 32 (except \t), &, <, >, "
|
||||
};
|
||||
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
|
||||
0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
|
||||
|
||||
const unsigned char output_chartype_table[256] =
|
||||
{
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
|
||||
0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32-47
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, // 48-63
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64-128
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128+
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
|
||||
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
|
||||
};
|
||||
|
||||
#ifdef PUGIXML_WCHAR_MODE
|
||||
@ -974,7 +948,6 @@ namespace
|
||||
|
||||
#define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
|
||||
#define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
|
||||
#define IS_OUTPUT_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, output_chartype_table)
|
||||
|
||||
bool is_little_endian()
|
||||
{
|
||||
@ -2708,14 +2681,14 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void text_output_escaped(xml_buffered_writer& writer, const char_t* s, output_chartype_t type)
|
||||
void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
|
||||
{
|
||||
while (*s)
|
||||
{
|
||||
const char_t* prev = s;
|
||||
|
||||
// While *s is a usual symbol
|
||||
while (!IS_OUTPUT_CHARTYPE(*s, type)) ++s;
|
||||
while (!IS_CHARTYPEX(*s, type)) ++s;
|
||||
|
||||
writer.write(prev, static_cast<size_t>(s - prev));
|
||||
|
||||
@ -2781,7 +2754,7 @@ namespace
|
||||
writer.write(a.name()[0] ? a.name() : default_name);
|
||||
writer.write('=', '"');
|
||||
|
||||
text_output_escaped(writer, a.value(), oct_special_attr);
|
||||
text_output_escaped(writer, a.value(), ctx_special_attr);
|
||||
|
||||
writer.write('"');
|
||||
}
|
||||
@ -2834,7 +2807,7 @@ namespace
|
||||
{
|
||||
writer.write('>');
|
||||
|
||||
text_output_escaped(writer, node.first_child().value(), oct_special_pcdata);
|
||||
text_output_escaped(writer, node.first_child().value(), ctx_special_pcdata);
|
||||
|
||||
writer.write('<', '/');
|
||||
writer.write(name);
|
||||
@ -2859,7 +2832,7 @@ namespace
|
||||
}
|
||||
|
||||
case node_pcdata:
|
||||
text_output_escaped(writer, node.value(), oct_special_pcdata);
|
||||
text_output_escaped(writer, node.value(), ctx_special_pcdata);
|
||||
if ((flags & format_raw) == 0) writer.write('\n');
|
||||
break;
|
||||
|
||||
@ -5153,7 +5126,7 @@ namespace
|
||||
bool check_string_to_number_format(const char_t* string)
|
||||
{
|
||||
// parse leading whitespace
|
||||
while (IS_CHARTYPEX(*string, ctx_space)) ++string;
|
||||
while (IS_CHARTYPE(*string, ct_space)) ++string;
|
||||
|
||||
// parse sign
|
||||
if (*string == '-') ++string;
|
||||
@ -5175,7 +5148,7 @@ namespace
|
||||
}
|
||||
|
||||
// parse trailing whitespace
|
||||
while (IS_CHARTYPEX(*string, ctx_space)) ++string;
|
||||
while (IS_CHARTYPE(*string, ct_space)) ++string;
|
||||
|
||||
return *string == 0;
|
||||
}
|
||||
@ -5319,10 +5292,10 @@ namespace
|
||||
{
|
||||
char_t ch = *it++;
|
||||
|
||||
if (IS_CHARTYPEX(ch, ctx_space))
|
||||
if (IS_CHARTYPE(ch, ct_space))
|
||||
{
|
||||
// replace whitespace sequence with single space
|
||||
while (IS_CHARTYPEX(*it, ctx_space)) it++;
|
||||
while (IS_CHARTYPE(*it, ct_space)) it++;
|
||||
|
||||
// avoid leading spaces
|
||||
if (write != buffer) *write++ = ' ';
|
||||
@ -5331,7 +5304,7 @@ namespace
|
||||
}
|
||||
|
||||
// remove trailing space
|
||||
if (write != buffer && IS_CHARTYPEX(write[-1], ctx_space)) write--;
|
||||
if (write != buffer && IS_CHARTYPE(write[-1], ct_space)) write--;
|
||||
|
||||
// zero-terminate
|
||||
*write = 0;
|
||||
@ -5760,7 +5733,7 @@ namespace pugi
|
||||
{
|
||||
const char_t* cur = _cur;
|
||||
|
||||
while (IS_CHARTYPEX(*cur, ctx_space)) ++cur;
|
||||
while (IS_CHARTYPE(*cur, ct_space)) ++cur;
|
||||
|
||||
// save lexeme position for error reporting
|
||||
_cur_lexeme_pos = cur;
|
||||
@ -7915,7 +7888,7 @@ namespace pugi
|
||||
// This is either a function call, or not - if not, we shall proceed with location path
|
||||
const char_t* state = _lexer.state();
|
||||
|
||||
while (IS_CHARTYPEX(*state, ctx_space)) ++state;
|
||||
while (IS_CHARTYPE(*state, ct_space)) ++state;
|
||||
|
||||
if (*state != '(') return parse_location_path();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user