2017-06-22 20:41:08 -07:00
# include "test.hpp"
2010-07-19 09:57:32 +00:00
2014-02-11 06:45:27 +00:00
# include "writer_string.hpp"
2017-06-22 20:41:08 -07:00
using namespace pugi ;
2010-07-19 09:57:32 +00:00
TEST ( parse_pi_skip )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_declaration } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?pi?><?pi value?> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?pi <tag/> value?> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
}
TEST ( parse_pi_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?pi1?><?pi2 value?> " ) , parse_fragment | parse_pi ) ) ;
2010-07-19 09:57:32 +00:00
xml_node pi1 = doc . first_child ( ) ;
xml_node pi2 = doc . last_child ( ) ;
CHECK ( pi1 ! = pi2 ) ;
CHECK ( pi1 . type ( ) = = node_pi ) ;
CHECK_STRING ( pi1 . name ( ) , STR ( " pi1 " ) ) ;
CHECK_STRING ( pi1 . value ( ) , STR ( " " ) ) ;
CHECK ( pi2 . type ( ) = = node_pi ) ;
CHECK_STRING ( pi2 . name ( ) , STR ( " pi2 " ) ) ;
CHECK_STRING ( pi2 . value ( ) , STR ( " value " ) ) ;
}
2010-08-04 13:07:35 +00:00
TEST ( parse_pi_parse_spaces )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?target \r \n \t value ?> " ) , parse_fragment | parse_pi ) ) ;
2010-08-04 13:07:35 +00:00
xml_node pi = doc . first_child ( ) ;
CHECK ( pi . type ( ) = = node_pi ) ;
CHECK_STRING ( pi . name ( ) , STR ( " target " ) ) ;
CHECK_STRING ( pi . value ( ) , STR ( " value " ) ) ;
}
2010-07-19 09:57:32 +00:00
TEST ( parse_pi_error )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_pi } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?> " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?#?> " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name> " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name ? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value ? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value ? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value ? > " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name value ? > " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name& " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name&? " ) , flags ) . status = = status_bad_pi ) ;
2010-07-19 09:57:32 +00:00
}
2016-01-24 14:03:02 +01:00
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xx#?> " ) , parse_fragment | parse_pi ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name&?> " ) , parse_fragment | parse_pi ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?name& x?> " ) , parse_fragment | parse_pi ) . status = = status_bad_pi ) ;
2010-07-19 09:57:32 +00:00
}
2017-02-01 21:07:46 -08:00
TEST ( parse_pi_error_buffer_boundary )
{
char buf1 [ ] = " <?name?> " ;
char buf2 [ ] = " <?name?x " ;
xml_document doc ;
CHECK ( doc . load_buffer_inplace ( buf1 , 8 , parse_fragment | parse_pi ) ) ;
CHECK ( doc . load_buffer_inplace ( buf2 , 8 , parse_fragment | parse_pi ) . status = = status_bad_pi ) ;
}
2010-07-19 09:57:32 +00:00
TEST ( parse_comments_skip )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <!----><!--value--> " ) , parse_fragment ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
TEST ( parse_comments_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <!----><!--value--> " ) , parse_fragment | parse_comments ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c1 = doc . first_child ( ) ;
xml_node c2 = doc . last_child ( ) ;
CHECK ( c1 ! = c2 ) ;
CHECK ( c1 . type ( ) = = node_comment ) ;
CHECK_STRING ( c1 . name ( ) , STR ( " " ) ) ;
CHECK_STRING ( c1 . value ( ) , STR ( " " ) ) ;
CHECK ( c2 . type ( ) = = node_comment ) ;
CHECK_STRING ( c2 . name ( ) , STR ( " " ) ) ;
CHECK_STRING ( c2 . value ( ) , STR ( " value " ) ) ;
}
TEST ( parse_comments_parse_no_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <!-- \r \r val1 \r val2 \r \n val3 \n val4 \r \r --> " ) , parse_fragment | parse_comments ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c = doc . first_child ( ) ;
CHECK ( c . type ( ) = = node_comment ) ;
CHECK_STRING ( c . value ( ) , STR ( " \r \r val1 \r val2 \r \n val3 \n val4 \r \r " ) ) ;
}
TEST ( parse_comments_parse_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <!-- \r \r val1 \r val2 \r \n val3 \n val4 \r \r --> " ) , parse_fragment | parse_comments | parse_eol ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c = doc . first_child ( ) ;
CHECK ( c . type ( ) = = node_comment ) ;
CHECK_STRING ( c . value ( ) , STR ( " \n \n val1 \n val2 \n val3 \n val4 \n \n " ) ) ;
}
TEST ( parse_comments_error )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_comments , parse_fragment | parse_comments | parse_eol } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <!- " ) , flags ) . status = = status_bad_comment ) ;
CHECK ( doc . load_string ( STR ( " <!-- " ) , flags ) . status = = status_bad_comment ) ;
CHECK ( doc . load_string ( STR ( " <!--v " ) , flags ) . status = = status_bad_comment ) ;
CHECK ( doc . load_string ( STR ( " <!--> " ) , flags ) . status = = status_bad_comment ) ;
CHECK ( doc . load_string ( STR ( " <!---> " ) , flags ) . status = = status_bad_comment ) ;
CHECK ( doc . load_string ( STR ( " <!-- <!-- --><!- --> " ) , flags ) . status = = status_bad_comment ) ;
2010-07-19 09:57:32 +00:00
}
}
TEST ( parse_cdata_skip )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <![CDATA[]]><![CDATA[value]]> " ) , parse_fragment ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
2010-08-04 10:01:16 +00:00
TEST ( parse_cdata_skip_contents )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node><![CDATA[]]>hello<![CDATA[value]]>, world!</node> " ) , parse_fragment ) ) ;
2010-08-04 10:01:16 +00:00
CHECK_NODE ( doc , STR ( " <node>hello, world!</node> " ) ) ;
}
2010-07-19 09:57:32 +00:00
TEST ( parse_cdata_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <![CDATA[]]><![CDATA[value]]> " ) , parse_fragment | parse_cdata ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c1 = doc . first_child ( ) ;
xml_node c2 = doc . last_child ( ) ;
CHECK ( c1 ! = c2 ) ;
CHECK ( c1 . type ( ) = = node_cdata ) ;
CHECK_STRING ( c1 . name ( ) , STR ( " " ) ) ;
CHECK_STRING ( c1 . value ( ) , STR ( " " ) ) ;
CHECK ( c2 . type ( ) = = node_cdata ) ;
CHECK_STRING ( c2 . name ( ) , STR ( " " ) ) ;
CHECK_STRING ( c2 . value ( ) , STR ( " value " ) ) ;
}
TEST ( parse_cdata_parse_no_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <![CDATA[ \r \r val1 \r val2 \r \n val3 \n val4 \r \r ]]> " ) , parse_fragment | parse_cdata ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c = doc . first_child ( ) ;
CHECK ( c . type ( ) = = node_cdata ) ;
CHECK_STRING ( c . value ( ) , STR ( " \r \r val1 \r val2 \r \n val3 \n val4 \r \r " ) ) ;
}
TEST ( parse_cdata_parse_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <![CDATA[ \r \r val1 \r val2 \r \n val3 \n val4 \r \r ]]> " ) , parse_fragment | parse_cdata | parse_eol ) ) ;
2010-07-19 09:57:32 +00:00
xml_node c = doc . first_child ( ) ;
CHECK ( c . type ( ) = = node_cdata ) ;
CHECK_STRING ( c . value ( ) , STR ( " \n \n val1 \n val2 \n val3 \n val4 \n \n " ) ) ;
}
TEST ( parse_cdata_error )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_cdata , parse_fragment | parse_cdata | parse_eol } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <![ " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![C " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CD " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDA " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDAT " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[ " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[] " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[data " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[data] " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[data]] " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[> " ) , flags ) . status = = status_bad_cdata ) ;
CHECK ( doc . load_string ( STR ( " <![CDATA[ <![CDATA[]]><![CDATA ]]> " ) , flags ) . status = = status_bad_cdata ) ;
2010-07-19 09:57:32 +00:00
}
}
TEST ( parse_ws_pcdata_skip )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " " ) , parse_fragment ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <root> <node> </node> </root> " ) , parse_minimal ) ) ;
2016-01-24 14:03:02 +01:00
2010-07-19 09:57:32 +00:00
xml_node root = doc . child ( STR ( " root " ) ) ;
2016-01-24 14:03:02 +01:00
2010-07-19 09:57:32 +00:00
CHECK ( root . first_child ( ) = = root . last_child ( ) ) ;
CHECK ( ! root . first_child ( ) . first_child ( ) ) ;
}
TEST ( parse_ws_pcdata_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <root> <node> </node> </root> " ) , parse_minimal | parse_ws_pcdata ) ) ;
2010-07-19 09:57:32 +00:00
xml_node root = doc . child ( STR ( " root " ) ) ;
xml_node c1 = root . first_child ( ) ;
xml_node c2 = c1 . next_sibling ( ) ;
xml_node c3 = c2 . next_sibling ( ) ;
CHECK ( c3 = = root . last_child ( ) ) ;
CHECK ( c1 . type ( ) = = node_pcdata ) ;
CHECK_STRING ( c1 . value ( ) , STR ( " " ) ) ;
CHECK ( c3 . type ( ) = = node_pcdata ) ;
CHECK_STRING ( c3 . value ( ) , STR ( " " ) ) ;
CHECK ( c2 . first_child ( ) = = c2 . last_child ( ) ) ;
CHECK ( c2 . first_child ( ) . type ( ) = = node_pcdata ) ;
CHECK_STRING ( c2 . first_child ( ) . value ( ) , STR ( " " ) ) ;
}
2011-12-09 05:24:07 +00:00
static int get_tree_node_count ( xml_node n )
{
int result = 1 ;
for ( xml_node c = n . first_child ( ) ; c ; c = c . next_sibling ( ) )
result + = get_tree_node_count ( c ) ;
return result ;
}
TEST ( parse_ws_pcdata_permutations )
{
struct test_data_t
{
unsigned int mask ; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
2017-06-22 20:33:02 -07:00
const char_t * source ;
const char_t * result ;
2011-12-09 05:24:07 +00:00
int nodes ; // negative if parsing should fail
} ;
test_data_t test_data [ ] =
{
// external pcdata should be discarded (whitespace or not)
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " ext1<node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 7 , STR ( " ext1<node/>ext2 " ) , STR ( " <node/> " ) , 2 } ,
{ 7 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 7 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 7 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
2011-12-09 05:24:07 +00:00
// inner pcdata should be preserved
{ 7 , STR ( " <node>inner</node> " ) , STR ( " <node>inner</node> " ) , 3 } ,
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " <node>inner1<child/>inner2</node> " ) , STR ( " <node>inner1<child/>inner2</node> " ) , 5 } ,
2011-12-09 05:24:07 +00:00
{ 7 , STR ( " <node>inner1<child>deep</child>inner2</node> " ) , STR ( " <node>inner1<child>deep</child>inner2</node> " ) , 6 } ,
// empty pcdata nodes should never be created
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " <node>inner1<child></child>inner2</node> " ) , STR ( " <node>inner1<child/>inner2</node> " ) , 5 } ,
{ 7 , STR ( " <node><child></child>inner2</node> " ) , STR ( " <node><child/>inner2</node> " ) , 4 } ,
{ 7 , STR ( " <node>inner1<child></child></node> " ) , STR ( " <node>inner1<child/></node> " ) , 4 } ,
{ 7 , STR ( " <node><child></child></node> " ) , STR ( " <node><child/></node> " ) , 3 } ,
2011-12-09 05:24:07 +00:00
// comments, pi or other nodes should not cause pcdata creation either
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " <node><!----><child><?pi?></child><![CDATA[x]]></node> " ) , STR ( " <node><child/><![CDATA[x]]></node> " ) , 4 } ,
2011-12-09 05:24:07 +00:00
// leading/trailing pcdata whitespace should be preserved (note: this will change if parse_ws_pcdata_trim is introduced)
{ 7 , STR ( " <node> \t \t inner1<child> deep </child> \t \n inner2 \n \t </node> " ) , STR ( " <node> \t \t inner1<child> deep </child> \t \n inner2 \n \t </node> " ) , 6 } ,
// whitespace-only pcdata preservation depends on the parsing mode
2016-04-14 00:30:24 -07:00
{ 1 , STR ( " <node> \n \t <child> </child> \n \t <child> <deep> </deep> </child> \n \t <!----> \n \t </node> " ) , STR ( " <node><child/><child><deep/></child></node> " ) , 5 } ,
2011-12-09 05:24:07 +00:00
{ 2 , STR ( " <node> \n \t <child> </child> \n \t <child> <deep> </deep> </child> \n \t <!----> \n \t </node> " ) , STR ( " <node> \n \t <child> </child> \n \t <child> <deep> </deep> </child> \n \t \n \t </node> " ) , 13 } ,
{ 4 , STR ( " <node> \n \t <child> </child> \n \t <child> <deep> </deep> </child> \n \t <!----> \n \t </node> " ) , STR ( " <node><child> </child><child><deep> </deep></child></node> " ) , 7 } ,
// current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here
{ 4 , STR ( " <node> \t \t <!----> \n \n </node> " ) , STR ( " <node> \n \n </node> " ) , 3 } ,
// error case: terminate PCDATA in the middle
2014-02-10 16:57:04 +00:00
{ 7 , STR ( " <node>abcdef " ) , STR ( " <node>abcdef</node> " ) , - 3 } ,
2016-04-14 00:30:24 -07:00
{ 5 , STR ( " <node> " ) , STR ( " <node/> " ) , - 2 } ,
2014-02-11 06:45:27 +00:00
{ 2 , STR ( " <node> " ) , STR ( " <node> </node> " ) , - 3 } ,
2011-12-09 05:24:07 +00:00
// error case: terminate PCDATA as early as possible
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " <node> " ) , STR ( " <node/> " ) , - 2 } ,
2014-02-10 16:57:04 +00:00
{ 7 , STR ( " <node>a " ) , STR ( " <node>a</node> " ) , - 3 } ,
2016-04-14 00:30:24 -07:00
{ 5 , STR ( " <node> " ) , STR ( " <node/> " ) , - 2 } ,
2014-02-11 06:45:27 +00:00
{ 2 , STR ( " <node> " ) , STR ( " <node> </node> " ) , - 3 } ,
2011-12-09 05:24:07 +00:00
} ;
for ( size_t i = 0 ; i < sizeof ( test_data ) / sizeof ( test_data [ 0 ] ) ; + + i )
{
const test_data_t & td = test_data [ i ] ;
for ( int flag = 0 ; flag < 3 ; + + flag )
{
if ( td . mask & ( 1 < < flag ) )
{
unsigned int flags [ ] = { parse_default , parse_default | parse_ws_pcdata , parse_default | parse_ws_pcdata_single } ;
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( ( td . nodes > 0 ) = = doc . load_string ( td . source , flags [ flag ] ) ) ;
2011-12-09 05:24:07 +00:00
CHECK_NODE ( doc , td . result ) ;
int nodes = get_tree_node_count ( doc ) ;
CHECK ( ( td . nodes < 0 ? - td . nodes : td . nodes ) = = nodes ) ;
}
}
}
}
2014-02-11 06:45:27 +00:00
TEST ( parse_ws_pcdata_fragment_permutations )
{
struct test_data_t
{
unsigned int mask ; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
2017-06-22 20:33:02 -07:00
const char_t * source ;
const char_t * result ;
2014-02-11 06:45:27 +00:00
int nodes ; // negative if parsing should fail
} ;
test_data_t test_data [ ] =
{
// external pcdata should be preserved
{ 7 , STR ( " ext1 " ) , STR ( " ext1 " ) , 2 } ,
{ 5 , STR ( " " ) , STR ( " " ) , 1 } ,
{ 2 , STR ( " " ) , STR ( " " ) , 2 } ,
2016-04-14 00:30:24 -07:00
{ 7 , STR ( " ext1<node/> " ) , STR ( " ext1<node/> " ) , 3 } ,
{ 7 , STR ( " <node/>ext2 " ) , STR ( " <node/>ext2 " ) , 3 } ,
{ 7 , STR ( " ext1<node/>ext2 " ) , STR ( " ext1<node/>ext2 " ) , 4 } ,
{ 7 , STR ( " ext1<node1/>ext2<node2/>ext3 " ) , STR ( " ext1<node1/>ext2<node2/>ext3 " ) , 6 } ,
{ 5 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 2 , STR ( " <node/> " ) , STR ( " <node/> " ) , 3 } ,
{ 5 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 2 , STR ( " <node/> " ) , STR ( " <node/> " ) , 3 } ,
{ 5 , STR ( " <node/> " ) , STR ( " <node/> " ) , 2 } ,
{ 2 , STR ( " <node/> " ) , STR ( " <node/> " ) , 4 } ,
{ 5 , STR ( " <node1/> <node2/> " ) , STR ( " <node1/><node2/> " ) , 3 } ,
{ 2 , STR ( " <node1/> <node2/> " ) , STR ( " <node1/> <node2/> " ) , 6 } ,
2014-02-11 06:45:27 +00:00
} ;
for ( size_t i = 0 ; i < sizeof ( test_data ) / sizeof ( test_data [ 0 ] ) ; + + i )
{
const test_data_t & td = test_data [ i ] ;
for ( int flag = 0 ; flag < 3 ; + + flag )
{
if ( td . mask & ( 1 < < flag ) )
{
unsigned int flags [ ] = { parse_default , parse_default | parse_ws_pcdata , parse_default | parse_ws_pcdata_single } ;
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( ( td . nodes > 0 ) = = doc . load_string ( td . source , flags [ flag ] | parse_fragment ) ) ;
2014-02-11 06:45:27 +00:00
CHECK_NODE ( doc , td . result ) ;
int nodes = get_tree_node_count ( doc ) ;
CHECK ( ( td . nodes < 0 ? - td . nodes : td . nodes ) = = nodes ) ;
}
}
}
}
2010-07-19 09:57:32 +00:00
TEST ( parse_pcdata_no_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <root> \r \r val1 \r val2 \r \n val3 \n val4 \r \r </root> " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " root " ) ) , STR ( " \r \r val1 \r val2 \r \n val3 \n val4 \r \r " ) ) ;
}
TEST ( parse_pcdata_eol )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <root> \r \r val1 \r val2 \r \n val3 \n val4 \r \r </root> " ) , parse_minimal | parse_eol ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " root " ) ) , STR ( " \n \n val1 \n val2 \n val3 \n val4 \n \n " ) ) ;
}
TEST ( parse_pcdata_skip_ext )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " pre<root/>post " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( doc . first_child ( ) = = doc . last_child ( ) ) ;
CHECK ( doc . first_child ( ) . type ( ) = = node_element ) ;
}
TEST ( parse_pcdata_error )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <root>pcdata " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
2010-07-19 09:57:32 +00:00
}
2014-02-25 03:41:57 +00:00
TEST ( parse_pcdata_trim )
{
struct test_data_t
{
2017-06-22 20:33:02 -07:00
const char_t * source ;
const char_t * result ;
2014-02-25 03:41:57 +00:00
unsigned int flags ;
} ;
test_data_t test_data [ ] =
{
{ STR ( " <node> text</node> " ) , STR ( " text " ) , 0 } ,
{ STR ( " <node> \t \n text</node> " ) , STR ( " text " ) , 0 } ,
{ STR ( " <node>text </node> " ) , STR ( " text " ) , 0 } ,
{ STR ( " <node>text \t \n </node> " ) , STR ( " text " ) , 0 } ,
{ STR ( " <node> \r \n \t text \t \n \r </node> " ) , STR ( " text " ) , 0 } ,
{ STR ( " text " ) , STR ( " text " ) , parse_fragment } ,
{ STR ( " \t \n text " ) , STR ( " text " ) , parse_fragment } ,
{ STR ( " text " ) , STR ( " text " ) , parse_fragment } ,
{ STR ( " text \t \n " ) , STR ( " text " ) , parse_fragment } ,
{ STR ( " \r \n \t text \t \n \r " ) , STR ( " text " ) , parse_fragment } ,
{ STR ( " <node> \r \n \t text \t \n \r more \r \n \t </node> " ) , STR ( " text \t \n \r more " ) , 0 } ,
{ STR ( " <node> \r \n \t text \t \n \r more \r \n \t </node> " ) , STR ( " text \t \n \n more " ) , parse_eol } ,
{ STR ( " <node> \r \n \t text \r \n \r \n \r \n \r \n \r \n \r \n \r \n more \r \n \t </node> " ) , STR ( " text \n \n \n \n \n \n \n more " ) , parse_eol } ,
{ STR ( " <node> test&&&&&&& </node> " ) , STR ( " test&&&&&&& " ) , 0 } ,
{ STR ( " <node> test&&&&&&& </node> " ) , STR ( " test&&&&&&& " ) , parse_escapes } ,
2014-10-02 03:06:59 +00:00
{ STR ( " test&&&&&&& " ) , STR ( " test&&&&&&& " ) , parse_fragment | parse_escapes } ,
{ STR ( " <node> \r \n \t text \t \n \r m&&e \r \n \t </node> " ) , STR ( " text \t \n \n m&&e " ) , parse_eol | parse_escapes }
2014-02-25 03:41:57 +00:00
} ;
for ( size_t i = 0 ; i < sizeof ( test_data ) / sizeof ( test_data [ 0 ] ) ; + + i )
{
const test_data_t & td = test_data [ i ] ;
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( td . source , td . flags | parse_trim_pcdata ) ) ;
2014-02-25 03:41:57 +00:00
2017-06-22 20:33:02 -07:00
const char_t * value = doc . child ( STR ( " node " ) ) ? doc . child_value ( STR ( " node " ) ) : doc . text ( ) . get ( ) ;
2014-02-25 03:41:57 +00:00
CHECK_STRING ( value , td . result ) ;
}
}
TEST ( parse_pcdata_trim_empty )
{
unsigned int flags [ ] = { 0 , parse_ws_pcdata , parse_ws_pcdata_single , parse_ws_pcdata | parse_ws_pcdata_single } ;
for ( size_t i = 0 ; i < sizeof ( flags ) / sizeof ( flags [ 0 ] ) ; + + i )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node> </node> " ) , flags [ i ] | parse_trim_pcdata ) ) ;
2014-02-25 03:41:57 +00:00
xml_node node = doc . child ( STR ( " node " ) ) ;
CHECK ( node ) ;
CHECK ( ! node . first_child ( ) ) ;
}
}
2010-07-19 09:57:32 +00:00
TEST ( parse_escapes_skip )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id='<>&'"'><>&'"</node> " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " <>&'" " ) ) ;
}
TEST ( parse_escapes_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id='<>&'"'><>&'"</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " <>&' \" " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " <>&' \" " ) ) ;
}
TEST ( parse_escapes_code )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>  </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " \01 " ) ) ;
}
TEST ( parse_escapes_code_exhaustive_dec )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&#/;	&#:;&#a;&#A;
</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &#/; \ x1 \ x2 \ x3 \ x4 \ x5 \ x6 \ x7 \ x8 \ x9&#:;&#a;&#A;
 " ) ) ;
}
TEST ( parse_escapes_code_exhaustive_hex )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&#x/;	&#x:;&#x@;

&#xG;&#x`;

&#xg;</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &#x/; \ x1 \ x2 \ x3 \ x4 \ x5 \ x6 \ x7 \ x8 \ x9&#x:;&#x@; \ xa \ xb \ xc \ xd \ xe \ xf&#xG;&#x`; \ xa \ xb \ xc \ xd \ xe \ xf&#xg; " ) ) ;
}
TEST ( parse_escapes_code_restore )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>  - - </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( "   - - " ) ) ;
}
TEST ( parse_escapes_char_restore )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&q &qu &quo " </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &q &qu &quo " " ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&a &ap &apo &apos </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &a &ap &apo &apos " ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&a &am & </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &a &am & " ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&l < </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &l < " ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&g > </node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &g > " ) ) ;
}
TEST ( parse_escapes_unicode )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>γγ𤭢</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
# ifdef PUGIXML_WCHAR_MODE
2017-06-22 20:33:02 -07:00
const char_t * v = doc . child_value ( STR ( " node " ) ) ;
2010-07-19 09:57:32 +00:00
size_t wcharsize = sizeof ( wchar_t ) ;
2012-03-06 06:13:10 +00:00
CHECK ( v [ 0 ] = = 0x3b3 & & v [ 1 ] = = 0x3b3 & & ( wcharsize = = 2 ? v [ 2 ] = = wchar_cast ( 0xd852 ) & & v [ 3 ] = = wchar_cast ( 0xdf62 ) : v [ 2 ] = = wchar_cast ( 0x24b62 ) ) ) ;
2010-07-19 09:57:32 +00:00
# else
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , " \xce \xb3 \xce \xb3 \xf0 \xa4 \xad \xa2 " ) ;
# endif
}
TEST ( parse_escapes_error )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>g;&#ab;"</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " g;&#ab;" " ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( ! doc . load_string ( STR ( " <node id=' " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='&g " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='> " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='&l " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='< " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='&a " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='& " ) ) ) ;
CHECK ( ! doc . load_string ( STR ( " <node id='&apos " ) ) ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_escapes_code_invalid )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node>&#;&#x;&;&#x-;&#-;</node> " ) , parse_minimal | parse_escapes ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child_value ( STR ( " node " ) ) , STR ( " &#;&#x;&;&#x-;&#-; " ) ) ;
}
2010-08-04 10:01:16 +00:00
TEST ( parse_escapes_attribute )
{
xml_document doc ;
for ( int wnorm = 0 ; wnorm < 2 ; + + wnorm )
for ( int eol = 0 ; eol < 2 ; + + eol )
for ( int wconv = 0 ; wconv < 2 ; + + wconv )
{
unsigned int flags = parse_escapes ;
flags | = ( wnorm ? parse_wnorm_attribute : 0 ) ;
flags | = ( eol ? parse_eol : 0 ) ;
flags | = ( wconv ? parse_wconv_attribute : 0 ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id='"'/> " ) , flags ) ) ;
2010-08-04 10:01:16 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " \" " ) ) ;
}
}
2010-07-19 09:57:32 +00:00
TEST ( parse_attribute_spaces )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id1='v1' id2 ='v2' id3= 'v3' id4 = 'v4' id5 \n \r \t = \r \t \n 'v5' /> " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id1 " ) ) . value ( ) , STR ( " v1 " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id2 " ) ) . value ( ) , STR ( " v2 " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id3 " ) ) . value ( ) , STR ( " v3 " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id4 " ) ) . value ( ) , STR ( " v4 " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id5 " ) ) . value ( ) , STR ( " v5 " ) ) ;
}
TEST ( parse_attribute_quot )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id1='v1' id2= \" v2 \" /> " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id1 " ) ) . value ( ) , STR ( " v1 " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id2 " ) ) . value ( ) , STR ( " v2 " ) ) ;
}
TEST ( parse_attribute_no_eol_no_wconv )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id=' \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r '/> " ) , parse_minimal ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r " ) ) ;
}
TEST ( parse_attribute_eol_no_wconv )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id=' \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r '/> " ) , parse_minimal | parse_eol ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " \t \n \n val1 \n val2 \n val3 \n val4 \n \n " ) ) ;
}
TEST ( parse_attribute_no_eol_wconv )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id=' \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r '/> " ) , parse_minimal | parse_wconv_attribute ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " val1 val2 val3 val4 " ) ) ;
}
TEST ( parse_attribute_eol_wconv )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id=' \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r '/> " ) , parse_minimal | parse_eol | parse_wconv_attribute ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " val1 val2 val3 val4 " ) ) ;
}
TEST ( parse_attribute_wnorm )
{
xml_document doc ;
for ( int eol = 0 ; eol < 2 ; + + eol )
for ( int wconv = 0 ; wconv < 2 ; + + wconv )
{
unsigned int flags = parse_minimal | parse_wnorm_attribute | ( eol ? parse_eol : 0 ) | ( wconv ? parse_wconv_attribute : 0 ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id=' \t \r \r val1 \r val2 \r \n val3 \n val4 \r \r '/> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " val1 val2 val3 val4 " ) ) ;
}
}
TEST ( parse_attribute_variations )
{
xml_document doc ;
for ( int wnorm = 0 ; wnorm < 2 ; + + wnorm )
for ( int eol = 0 ; eol < 2 ; + + eol )
for ( int wconv = 0 ; wconv < 2 ; + + wconv )
for ( int escapes = 0 ; escapes < 2 ; + + escapes )
{
unsigned int flags = parse_minimal ;
2010-08-04 10:01:16 +00:00
flags | = ( wnorm ? parse_wnorm_attribute : 0 ) ;
flags | = ( eol ? parse_eol : 0 ) ;
flags | = ( wconv ? parse_wconv_attribute : 0 ) ;
flags | = ( escapes ? parse_escapes : 0 ) ;
2010-07-19 09:57:32 +00:00
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id='1'/> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id " ) ) . value ( ) , STR ( " 1 " ) ) ;
}
}
TEST ( parse_attribute_error )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id/ " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id?/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id=/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id='/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id= \" /> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id= \" '/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id=' \" /> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id=' \" /> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node #/> " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node#/> " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node id1='1'id2='2'/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node id&='1'/> " ) , parse_minimal ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <node &='1'/> " ) , parse_minimal ) . status = = status_bad_start_element ) ;
2010-07-19 09:57:32 +00:00
}
2010-08-04 10:01:16 +00:00
TEST ( parse_attribute_termination_error )
{
xml_document doc ;
for ( int wnorm = 0 ; wnorm < 2 ; + + wnorm )
for ( int eol = 0 ; eol < 2 ; + + eol )
for ( int wconv = 0 ; wconv < 2 ; + + wconv )
{
unsigned int flags = parse_minimal ;
flags | = ( wnorm ? parse_wnorm_attribute : 0 ) ;
flags | = ( eol ? parse_eol : 0 ) ;
flags | = ( wconv ? parse_wconv_attribute : 0 ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id='value " ) , flags ) . status = = status_bad_attribute ) ;
2010-08-04 10:01:16 +00:00
}
}
TEST ( parse_attribute_quot_inside )
{
xml_document doc ;
for ( int wnorm = 0 ; wnorm < 2 ; + + wnorm )
for ( int eol = 0 ; eol < 2 ; + + eol )
for ( int wconv = 0 ; wconv < 2 ; + + wconv )
{
unsigned int flags = parse_escapes ;
flags | = ( wnorm ? parse_wnorm_attribute : 0 ) ;
flags | = ( eol ? parse_eol : 0 ) ;
flags | = ( wconv ? parse_wconv_attribute : 0 ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node id1=' \" ' id2= \" ' \" /> " ) , flags ) ) ;
2010-08-04 10:01:16 +00:00
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id1 " ) ) . value ( ) , STR ( " \" " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . attribute ( STR ( " id2 " ) ) . value ( ) , STR ( " ' " ) ) ;
}
}
2017-01-31 20:36:59 -08:00
TEST ( parse_attribute_wnorm_coverage )
{
xml_document doc ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' ' a3='x y' a4='x y' a5='x y' /> " ) , parse_wnorm_attribute ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" x y \" a4= \" x y \" a5= \" x y \" /> " ) ) ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' ' a3='x y' a4='x y' a5='x y' /> " ) , parse_wnorm_attribute | parse_escapes ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" x y \" a4= \" x y \" a5= \" x y \" /> " ) ) ;
}
TEST ( parse_attribute_wconv_coverage )
{
xml_document doc ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' \r ' a3=' \r \n \n ' a4=' \n ' /> " ) , parse_wconv_attribute ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" \" a4= \" \" /> " ) ) ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' \r ' a3=' \r \n \n ' a4=' \n ' /> " ) , parse_wconv_attribute | parse_escapes ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" \" a4= \" \" /> " ) ) ;
}
TEST ( parse_attribute_eol_coverage )
{
xml_document doc ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' \r ' a3=' \r \n \n ' a4=' \n ' /> " ) , parse_eol ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" \" a4= \" \" /> " ) ) ;
CHECK ( doc . load_string ( STR ( " <n a1='v' a2=' \r ' a3=' \r \n \n ' a4=' \n ' /> " ) , parse_eol | parse_escapes ) ) ;
CHECK_NODE ( doc , STR ( " <n a1= \" v \" a2= \" \" a3= \" \" a4= \" \" /> " ) ) ;
}
2010-07-19 09:57:32 +00:00
TEST ( parse_tag_single )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node/><node /><node \n /> " ) , parse_minimal ) ) ;
2016-04-14 00:30:24 -07:00
CHECK_NODE ( doc , STR ( " <node/><node/><node/> " ) ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_tag_hierarchy )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <node><n1><n2/></n1><n3><n4><n5></n5></n4></n3 \r \n ></node> " ) , parse_minimal ) ) ;
2016-04-14 00:30:24 -07:00
CHECK_NODE ( doc , STR ( " <node><n1><n2/></n1><n3><n4><n5/></n4></n3></node> " ) ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_tag_error )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " < " ) , parse_minimal ) . status = = status_unrecognized_tag ) ;
CHECK ( doc . load_string ( STR ( " <! " ) , parse_minimal ) . status = = status_unrecognized_tag ) ;
CHECK ( doc . load_string ( STR ( " <!D " ) , parse_minimal ) . status = = status_unrecognized_tag ) ;
CHECK ( doc . load_string ( STR ( " <# " ) , parse_minimal ) . status = = status_unrecognized_tag ) ;
CHECK ( doc . load_string ( STR ( " <node# " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node/ " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node / " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node / " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node / > " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node/ > " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " </ node> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " </node " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " </node " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " <node></ node> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " <node></node " ) , parse_minimal ) . status = = status_bad_end_element ) ;
CHECK ( doc . load_string ( STR ( " <node></node " ) , parse_minimal ) . status = = status_bad_end_element ) ;
CHECK ( doc . load_string ( STR ( " <node></nodes> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " <node> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " <node/>< " ) , parse_minimal ) . status = = status_unrecognized_tag ) ;
CHECK ( doc . load_string ( STR ( " <node attr='value'> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " </></node> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " </node> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " </> " ) , parse_minimal ) . status = = status_end_element_mismatch ) ;
CHECK ( doc . load_string ( STR ( " <node></node v> " ) , parse_minimal ) . status = = status_bad_end_element ) ;
CHECK ( doc . load_string ( STR ( " <node&/> " ) , parse_minimal ) . status = = status_bad_start_element ) ;
CHECK ( doc . load_string ( STR ( " <node& v='1'/> " ) , parse_minimal ) . status = = status_bad_start_element ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_declaration_cases )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml?><?xmL?><?xMl?><?xML?><?Xml?><?XmL?><?XMl?><?XML?> " ) , parse_fragment | parse_pi ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
TEST ( parse_declaration_attr_cases )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml ?><?xmL ?><?xMl ?><?xML ?><?Xml ?><?XmL ?><?XMl ?><?XML ?> " ) , parse_fragment | parse_pi ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
TEST ( parse_declaration_skip )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_pi } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml?><?xml version='1.0'?> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml <tag/> ?> " ) , flags ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
}
TEST ( parse_declaration_parse )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml?><?xml version='1.0'?> " ) , parse_fragment | parse_declaration ) ) ;
2010-07-19 09:57:32 +00:00
xml_node d1 = doc . first_child ( ) ;
xml_node d2 = doc . last_child ( ) ;
CHECK ( d1 ! = d2 ) ;
CHECK ( d1 . type ( ) = = node_declaration ) ;
CHECK_STRING ( d1 . name ( ) , STR ( " xml " ) ) ;
CHECK ( d2 . type ( ) = = node_declaration ) ;
CHECK_STRING ( d2 . name ( ) , STR ( " xml " ) ) ;
CHECK_STRING ( d2 . attribute ( STR ( " version " ) ) . value ( ) , STR ( " 1.0 " ) ) ;
}
TEST ( parse_declaration_error )
{
xml_document doc ;
2014-02-11 06:45:27 +00:00
unsigned int flag_sets [ ] = { parse_fragment , parse_fragment | parse_declaration } ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = flag_sets [ i ] ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?xml? " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?xml> " ) , flags ) . status = = status_bad_pi ) ;
CHECK ( doc . load_string ( STR ( " <?xml version='1> " ) , flags ) . status = = status_bad_pi ) ;
2010-07-19 09:57:32 +00:00
}
2016-01-24 14:03:02 +01:00
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " <?xml version='1?> " ) , parse_fragment | parse_declaration ) . status = = status_bad_attribute ) ;
CHECK ( doc . load_string ( STR ( " <foo><?xml version='1'?></foo> " ) , parse_fragment | parse_declaration ) . status = = status_bad_pi ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_empty )
{
xml_document doc ;
2015-03-05 10:09:54 -08:00
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " " ) ) . status = = status_no_document_element & & ! doc . first_child ( ) ) ;
CHECK ( doc . load_string ( STR ( " " ) , parse_fragment ) & & ! doc . first_child ( ) ) ;
2010-07-19 09:57:32 +00:00
}
TEST ( parse_out_of_memory )
{
test_runner : : _memory_fail_threshold = 256 ;
xml_document doc ;
2015-04-11 22:46:08 -07:00
CHECK_ALLOC_FAIL ( CHECK ( doc . load_string ( STR ( " <foo a='1'/> " ) ) . status = = status_out_of_memory ) ) ;
2010-07-19 09:57:32 +00:00
CHECK ( ! doc . first_child ( ) ) ;
}
2014-10-23 05:46:44 +00:00
TEST ( parse_out_of_memory_halfway_node )
2010-07-19 09:57:32 +00:00
{
2010-08-03 08:03:23 +00:00
const unsigned int count = 10000 ;
static char_t text [ count * 4 ] ;
2010-07-19 09:57:32 +00:00
for ( unsigned int i = 0 ; i < count ; + + i )
{
text [ 4 * i + 0 ] = ' < ' ;
text [ 4 * i + 1 ] = ' n ' ;
text [ 4 * i + 2 ] = ' / ' ;
text [ 4 * i + 3 ] = ' > ' ;
}
test_runner : : _memory_fail_threshold = 65536 ;
xml_document doc ;
2018-03-16 21:33:26 -07:00
CHECK_ALLOC_FAIL ( CHECK ( doc . load_buffer_inplace ( text , sizeof ( text ) ) . status = = status_out_of_memory ) ) ;
2016-04-14 00:30:24 -07:00
CHECK_NODE ( doc . first_child ( ) , STR ( " <n/> " ) ) ;
2010-07-19 09:57:32 +00:00
}
2014-10-23 05:46:44 +00:00
TEST ( parse_out_of_memory_halfway_attr )
{
const unsigned int count = 10000 ;
static char_t text [ count * 5 + 4 ] ;
text [ 0 ] = ' < ' ;
text [ 1 ] = ' n ' ;
for ( unsigned int i = 0 ; i < count ; + + i )
{
text [ 5 * i + 2 ] = ' ' ;
text [ 5 * i + 3 ] = ' a ' ;
text [ 5 * i + 4 ] = ' = ' ;
text [ 5 * i + 5 ] = ' " ' ;
text [ 5 * i + 6 ] = ' " ' ;
}
text [ 5 * count + 2 ] = ' / ' ;
text [ 5 * count + 3 ] = ' > ' ;
test_runner : : _memory_fail_threshold = 65536 ;
xml_document doc ;
2018-03-16 21:33:26 -07:00
CHECK_ALLOC_FAIL ( CHECK ( doc . load_buffer_inplace ( text , sizeof ( text ) ) . status = = status_out_of_memory ) ) ;
2014-10-23 05:46:44 +00:00
CHECK_STRING ( doc . first_child ( ) . name ( ) , STR ( " n " ) ) ;
CHECK_STRING ( doc . first_child ( ) . first_attribute ( ) . name ( ) , STR ( " a " ) ) ;
CHECK_STRING ( doc . first_child ( ) . last_attribute ( ) . name ( ) , STR ( " a " ) ) ;
}
2015-04-11 00:16:39 -07:00
TEST ( parse_out_of_memory_conversion )
{
2017-01-31 19:19:04 -08:00
test_runner : : _memory_fail_threshold = 1 ;
2015-04-11 00:16:39 -07:00
xml_document doc ;
2015-04-11 22:46:08 -07:00
CHECK_ALLOC_FAIL ( CHECK ( doc . load_buffer ( " <foo \x90 /> " , 7 , parse_default , encoding_latin1 ) . status = = status_out_of_memory ) ) ;
2015-04-11 00:16:39 -07:00
CHECK ( ! doc . first_child ( ) ) ;
}
2015-04-12 21:27:12 -07:00
TEST ( parse_out_of_memory_allocator_state_sync )
{
const unsigned int count = 10000 ;
static char_t text [ count * 4 ] ;
for ( unsigned int i = 0 ; i < count ; + + i )
{
text [ 4 * i + 0 ] = ' < ' ;
text [ 4 * i + 1 ] = ' n ' ;
text [ 4 * i + 2 ] = ' / ' ;
text [ 4 * i + 3 ] = ' > ' ;
}
test_runner : : _memory_fail_threshold = 65536 ;
xml_document doc ;
2018-03-16 21:33:26 -07:00
CHECK_ALLOC_FAIL ( CHECK ( doc . load_buffer_inplace ( text , sizeof ( text ) ) . status = = status_out_of_memory ) ) ;
2016-04-14 00:30:24 -07:00
CHECK_NODE ( doc . first_child ( ) , STR ( " <n/> " ) ) ;
2015-04-12 21:27:12 -07:00
test_runner : : _memory_fail_threshold = 0 ;
2015-05-22 08:43:36 -07:00
for ( unsigned int j = 0 ; j < count ; + + j )
2015-04-12 21:27:12 -07:00
CHECK ( doc . append_child ( STR ( " n " ) ) ) ;
}
2017-06-22 20:33:02 -07:00
static bool test_offset ( const char_t * contents , unsigned int options , xml_parse_status status , ptrdiff_t offset )
2010-07-19 09:57:32 +00:00
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
xml_parse_result res = doc . load_string ( contents , options ) ;
2010-07-19 09:57:32 +00:00
return res . status = = status & & res . offset = = offset ;
}
# define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset))
TEST ( parse_error_offset )
{
CHECK_OFFSET ( " <node/> " , parse_default , status_ok , 0 ) ;
test_runner : : _memory_fail_threshold = 1 ;
2015-04-11 22:46:08 -07:00
CHECK_ALLOC_FAIL ( CHECK_OFFSET ( " <node/> " , parse_default , status_out_of_memory , 0 ) ) ;
2010-07-19 09:57:32 +00:00
test_runner : : _memory_fail_threshold = 0 ;
CHECK_OFFSET ( " <3d/> " , parse_default , status_unrecognized_tag , 1 ) ;
CHECK_OFFSET ( " <3d/> " , parse_default , status_unrecognized_tag , 2 ) ;
2014-02-10 16:57:04 +00:00
CHECK_OFFSET ( " < " , parse_default , status_unrecognized_tag , 1 ) ;
2010-07-19 09:57:32 +00:00
CHECK_OFFSET ( " <?pi " , parse_default , status_bad_pi , 3 ) ;
CHECK_OFFSET ( " <?pi " , parse_default | parse_pi , status_bad_pi , 3 ) ;
CHECK_OFFSET ( " <?xml " , parse_default | parse_declaration , status_bad_pi , 4 ) ;
CHECK_OFFSET ( " <!---- " , parse_default , status_bad_comment , 5 ) ;
CHECK_OFFSET ( " <!---- " , parse_default | parse_comments , status_bad_comment , 4 ) ;
CHECK_OFFSET ( " <![CDA " , parse_default , status_bad_cdata , 5 ) ;
CHECK_OFFSET ( " <![CDATA[non-terminated]] " , parse_default , status_bad_cdata , 9 ) ;
CHECK_OFFSET ( " <!DOCTYPE doc " , parse_default , status_bad_doctype , 12 ) ;
CHECK_OFFSET ( " <!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \" orde " , parse_default , status_bad_doctype , 76 ) ;
CHECK_OFFSET ( " <node " , parse_default , status_bad_start_element , 4 ) ;
CHECK_OFFSET ( " <node " , parse_default , status_bad_start_element , 5 ) ;
CHECK_OFFSET ( " <nod%> " , parse_default , status_bad_start_element , 5 ) ;
CHECK_OFFSET ( " <node a=2> " , parse_default , status_bad_attribute , 8 ) ;
CHECK_OFFSET ( " <node a='2> " , parse_default , status_bad_attribute , 9 ) ;
CHECK_OFFSET ( " <n></n $> " , parse_default , status_bad_end_element , 7 ) ;
CHECK_OFFSET ( " <n></n " , parse_default , status_bad_end_element , 5 ) ;
2016-11-13 16:59:14 -08:00
CHECK_OFFSET ( " <no></na> " , parse_default , status_end_element_mismatch , 6 ) ;
CHECK_OFFSET ( " <no></nod> " , parse_default , status_end_element_mismatch , 6 ) ;
2010-07-19 09:57:32 +00:00
}
2010-09-20 18:14:58 +00:00
TEST ( parse_result_default )
{
xml_parse_result result ;
CHECK ( ! result ) ;
CHECK ( result . status = = status_internal_error ) ;
2010-09-20 19:07:11 +00:00
CHECK ( result . offset = = 0 ) ;
CHECK ( result . encoding = = encoding_auto ) ;
2010-09-20 18:14:58 +00:00
}
2014-02-11 06:45:27 +00:00
TEST ( parse_bom_fragment )
{
struct test_data_t
{
xml_encoding encoding ;
const char * data ;
size_t size ;
const char_t * text ;
} ;
const test_data_t data [ ] =
{
{ encoding_utf8 , " \xef \xbb \xbf " , 3 , STR ( " " ) } ,
{ encoding_utf8 , " \xef \xbb \xbf test " , 7 , STR ( " test " ) } ,
{ encoding_utf16_be , " \xfe \xff " , 2 , STR ( " " ) } ,
{ encoding_utf16_be , " \xfe \xff \x00 t \x00 o \x00 s \x00 t " , 10 , STR ( " tost " ) } ,
{ encoding_utf16_le , " \xff \xfe " , 2 , STR ( " " ) } ,
{ encoding_utf16_le , " \xff \xfe t \x00 o \x00 s \x00 t \x00 " , 10 , STR ( " tost " ) } ,
{ encoding_utf32_be , " \x00 \x00 \xfe \xff " , 4 , STR ( " " ) } ,
{ encoding_utf32_be , " \x00 \x00 \xfe \xff \x00 \x00 \x00 t \x00 \x00 \x00 o \x00 \x00 \x00 s \x00 \x00 \x00 t " , 20 , STR ( " tost " ) } ,
{ encoding_utf32_le , " \xff \xfe \x00 \x00 " , 4 , STR ( " " ) } ,
{ encoding_utf32_le , " \xff \xfe \x00 \x00 t \x00 \x00 \x00 o \x00 \x00 \x00 s \x00 \x00 \x00 t \x00 \x00 \x00 " , 20 , STR ( " tost " ) } ,
} ;
for ( size_t i = 0 ; i < sizeof ( data ) / sizeof ( data [ 0 ] ) ; + + i )
{
xml_document doc ;
CHECK ( doc . load_buffer ( data [ i ] . data , data [ i ] . size , parse_fragment , data [ i ] . encoding ) ) ;
CHECK_STRING ( doc . text ( ) . get ( ) , data [ i ] . text ) ;
CHECK ( save_narrow ( doc , format_no_declaration | format_raw | format_write_bom , data [ i ] . encoding ) = = std : : string ( data [ i ] . data , data [ i ] . size ) ) ;
}
}
TEST ( parse_bom_fragment_invalid_utf8 )
{
xml_document doc ;
CHECK ( doc . load_buffer ( " \xef \xbb \xbb " , 3 , parse_fragment , encoding_utf8 ) ) ;
const char_t * value = doc . text ( ) . get ( ) ;
# ifdef PUGIXML_WCHAR_MODE
CHECK ( value [ 0 ] = = wchar_cast ( 0xfefb ) & & value [ 1 ] = = 0 ) ;
# else
CHECK_STRING ( value , " \xef \xbb \xbb " ) ;
# endif
}
TEST ( parse_bom_fragment_invalid_utf16 )
{
xml_document doc ;
CHECK ( doc . load_buffer ( " \xff \xfe " , 2 , parse_fragment , encoding_utf16_be ) ) ;
const char_t * value = doc . text ( ) . get ( ) ;
# ifdef PUGIXML_WCHAR_MODE
CHECK ( value [ 0 ] = = wchar_cast ( 0xfffe ) & & value [ 1 ] = = 0 ) ;
# else
CHECK_STRING ( value , " \xef \xbf \xbe " ) ;
# endif
}
TEST ( parse_bom_fragment_invalid_utf32 )
{
xml_document doc ;
CHECK ( doc . load_buffer ( " \xff \xff \x00 \x00 " , 4 , parse_fragment , encoding_utf32_le ) ) ;
const char_t * value = doc . text ( ) . get ( ) ;
# ifdef PUGIXML_WCHAR_MODE
CHECK ( value [ 0 ] = = wchar_cast ( 0xffff ) & & value [ 1 ] = = 0 ) ;
# else
CHECK_STRING ( value , " \xef \xbf \xbf " ) ;
# endif
}
2014-02-23 19:28:27 +00:00
TEST ( parse_pcdata_gap_fragment )
{
xml_document doc ;
2014-11-17 19:52:23 -08:00
CHECK ( doc . load_string ( STR ( " a&b " ) , parse_fragment | parse_escapes ) ) ;
2014-02-23 19:28:27 +00:00
CHECK_STRING ( doc . text ( ) . get ( ) , STR ( " a&b " ) ) ;
2014-02-23 19:38:24 +00:00
}
2014-10-23 07:41:07 +00:00
TEST ( parse_name_end_eof )
{
char_t test [ ] = STR ( " <node> " ) ;
xml_document doc ;
CHECK ( doc . load_buffer_inplace ( test , 6 * sizeof ( char_t ) ) . status = = status_end_element_mismatch ) ;
CHECK_STRING ( doc . first_child ( ) . name ( ) , STR ( " node " ) ) ;
}
TEST ( parse_close_tag_eof )
{
char_t test1 [ ] = STR ( " <node></node " ) ;
char_t test2 [ ] = STR ( " <node></nodx " ) ;
xml_document doc ;
CHECK ( doc . load_buffer_inplace ( test1 , 12 * sizeof ( char_t ) ) . status = = status_bad_end_element ) ;
CHECK_STRING ( doc . first_child ( ) . name ( ) , STR ( " node " ) ) ;
CHECK ( doc . load_buffer_inplace ( test2 , 12 * sizeof ( char_t ) ) . status = = status_end_element_mismatch ) ;
CHECK_STRING ( doc . first_child ( ) . name ( ) , STR ( " node " ) ) ;
}
2015-03-12 20:21:59 -07:00
TEST ( parse_fuzz_doctype )
{
unsigned char data [ ] =
{
0x3b , 0x3c , 0x21 , 0x44 , 0x4f , 0x43 , 0x54 , 0x59 , 0x50 , 0x45 , 0xef , 0xbb , 0xbf , 0x3c , 0x3f , 0x78 ,
0x6d , 0x6c , 0x20 , 0x76 , 0x65 , 0x72 , 0x73 , 0x69 , 0x6f , 0x6e , 0x3d , 0x22 , 0x31 , 0x2e , 0x30 , 0x22 ,
0x3f , 0x3e , 0x3c , 0x21 , 0x2d , 0x2d , 0x20 , 0xe9 , 0x80 , 0xb1 , 0xe5 , 0xa0 , 0xb1 , 0xe3 , 0x82 , 0xb4 ,
0xe3 , 0x83 , 0xb3 , 0x20 , 0xef , 0x83 , 0x97 , 0xe3 , 0xa9 , 0x2a , 0x20 , 0x2d , 0x2d , 0x3e
} ;
xml_document doc ;
CHECK ( doc . load_buffer ( data , sizeof ( data ) ) . status = = status_bad_doctype ) ;
}
2016-01-12 20:16:29 -08:00
TEST ( parse_embed_pcdata )
{
// parse twice - once with default and once with embed_pcdata flags
for ( int i = 0 ; i < 2 ; + + i )
{
unsigned int flags = ( i = = 0 ) ? parse_default : parse_default | parse_embed_pcdata ;
xml_document doc ;
xml_parse_result res = doc . load_string ( STR ( " <node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data /></two></node> " ) , flags ) ;
CHECK ( res ) ;
xml_node child = doc . child ( STR ( " node " ) ) . child ( STR ( " child " ) ) ;
// parse_embed_pcdata omits PCDATA nodes so DOM is different
if ( flags & parse_embed_pcdata )
{
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " key " ) ) . value ( ) , STR ( " value " ) ) ;
CHECK ( ! doc . child ( STR ( " node " ) ) . child ( STR ( " key " ) ) . first_child ( ) ) ;
}
else
{
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " key " ) ) . value ( ) , STR ( " " ) ) ;
CHECK ( doc . child ( STR ( " node " ) ) . child ( STR ( " key " ) ) . first_child ( ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " key " ) ) . first_child ( ) . value ( ) , STR ( " value " ) ) ;
}
// higher-level APIs work the same though
CHECK_STRING ( child . text ( ) . get ( ) , STR ( " outer " ) ) ;
CHECK_STRING ( child . child ( STR ( " inner1 " ) ) . text ( ) . get ( ) , STR ( " value1 " ) ) ;
CHECK_STRING ( child . child_value ( ) , STR ( " outer " ) ) ;
CHECK_STRING ( child . child_value ( STR ( " inner2 " ) ) , STR ( " value2 " ) ) ;
# ifndef PUGIXML_NO_XPATH
CHECK_XPATH_NUMBER ( doc , STR ( " count(node/child/*[starts-with(., 'value')]) " ) , 2 ) ;
# endif
2016-04-14 00:30:24 -07:00
CHECK_NODE ( doc , STR ( " <node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data/></two></node> " ) ) ;
2016-01-12 20:16:29 -08:00
CHECK_NODE_EX ( doc , STR ( " <node> \n <key>value</key> \n <child> \n <inner1>value1</inner1> \n <inner2>value2</inner2>outer</child> \n <two>text<data /> \n </two> \n </node> \n " ) , STR ( " \t " ) , 0 ) ;
CHECK_NODE_EX ( doc , STR ( " <node> \n \t <key>value</key> \n \t <child> \n \t \t <inner1>value1</inner1> \n \t \t <inner2>value2</inner2>outer</child> \n \t <two>text<data /> \n \t </two> \n </node> \n " ) , STR ( " \t " ) , format_indent ) ;
}
2016-01-24 15:17:07 +01:00
}
2016-07-14 23:04:17 -07:00
2017-02-01 21:07:46 -08:00
TEST_XML_FLAGS ( parse_embed_pcdata_fragment , " text " , parse_fragment | parse_embed_pcdata )
{
CHECK_NODE ( doc , STR ( " text " ) ) ;
CHECK ( doc . first_child ( ) . type ( ) = = node_pcdata ) ;
CHECK_STRING ( doc . first_child ( ) . value ( ) , STR ( " text " ) ) ;
}
2017-02-02 08:40:34 -08:00
TEST_XML_FLAGS ( parse_embed_pcdata_child , " <n><child/>text</n> " , parse_embed_pcdata )
{
xml_node n = doc . child ( STR ( " n " ) ) ;
CHECK_NODE ( doc , STR ( " <n><child/>text</n> " ) ) ;
CHECK ( n . last_child ( ) . type ( ) = = node_pcdata ) ;
CHECK_STRING ( n . last_child ( ) . value ( ) , STR ( " text " ) ) ;
}
TEST_XML_FLAGS ( parse_embed_pcdata_comment , " <n>text1<!---->text2</n> " , parse_embed_pcdata )
{
xml_node n = doc . child ( STR ( " n " ) ) ;
CHECK_NODE ( doc , STR ( " <n>text1text2</n> " ) ) ;
CHECK_STRING ( n . value ( ) , STR ( " text1 " ) ) ;
CHECK ( n . first_child ( ) = = n . last_child ( ) ) ;
CHECK ( n . last_child ( ) . type ( ) = = node_pcdata ) ;
CHECK_STRING ( n . last_child ( ) . value ( ) , STR ( " text2 " ) ) ;
}
2023-08-20 10:21:26 +05:30
TEST ( parse_merge_pcdata )
{
unsigned int flag_sets [ ] = { parse_cdata , parse_pi , parse_comments , parse_declaration } ;
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
2023-08-25 19:04:07 -07:00
unsigned int flags = parse_merge_pcdata | flag_sets [ i ] ;
2023-08-20 10:21:26 +05:30
xml_document doc ;
xml_parse_result res = doc . load_string ( STR ( " <node>First text<!-- here is a mesh node -->Second text<![CDATA[someothertext]]>some more text<?include somedata?>Last text</node> " ) , flags ) ;
CHECK ( res ) ;
xml_node child = doc . child ( STR ( " node " ) ) ;
if ( flags & parse_comments )
{
CHECK_STRING ( child . first_child ( ) . value ( ) , STR ( " First text " ) ) ;
CHECK ( child . first_child ( ) . next_sibling ( ) . type ( ) = = node_comment ) ;
CHECK_NODE ( doc , STR ( " <node>First text<!-- here is a mesh node -->Second textsome more textLast text</node> " ) ) ;
}
else if ( flags & parse_cdata )
{
CHECK_STRING ( child . first_child ( ) . value ( ) , STR ( " First textSecond text " ) ) ;
CHECK ( child . first_child ( ) . next_sibling ( ) . type ( ) = = node_cdata ) ;
CHECK_NODE ( doc , STR ( " <node>First textSecond text<![CDATA[someothertext]]>some more textLast text</node> " ) ) ;
}
else if ( flags & parse_pi )
{
CHECK_STRING ( child . first_child ( ) . value ( ) , STR ( " First textSecond textsome more text " ) ) ;
CHECK ( child . first_child ( ) . next_sibling ( ) . type ( ) = = node_pi ) ;
CHECK_NODE ( doc , STR ( " <node>First textSecond textsome more text<?include somedata?>Last text</node> " ) ) ;
}
else
{
CHECK ( child . first_child ( ) = = child . last_child ( ) ) ;
CHECK ( child . first_child ( ) . type ( ) = = node_pcdata ) ;
CHECK_NODE ( doc , STR ( " <node>First textSecond textsome more textLast text</node> " ) ) ;
}
2023-08-25 19:04:07 -07:00
2023-08-20 10:21:26 +05:30
CHECK ( child . last_child ( ) . type ( ) = = node_pcdata ) ;
}
}
2023-08-25 19:04:07 -07:00
TEST ( parse_merge_pcdata_escape )
{
xml_document doc ;
xml_parse_result res = doc . load_string ( STR ( " <node>First &lt; <!-- comment 1 --> Second &gt; <!-- comment 2 --> Third &quot;</node> " ) , parse_default | parse_merge_pcdata ) ;
CHECK ( res ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child_value ( ) , STR ( " First < Second > Third " " ) ) ;
}
TEST ( parse_merge_pcdata_whitespace )
{
unsigned int flag_sets [ ] = { 0 , parse_ws_pcdata , parse_ws_pcdata_single } ;
for ( unsigned int i = 0 ; i < sizeof ( flag_sets ) / sizeof ( flag_sets [ 0 ] ) ; + + i )
{
unsigned int flags = parse_merge_pcdata | flag_sets [ i ] ;
xml_document doc ;
xml_parse_result res = doc . load_string ( STR ( " <node><child1> <!-- comment 1 --> \t <!-- comment 2 --> \n </child1><child2>text<!-- comment 1--> \t <!-- comment2 --> end</child2></node> " ) , flags ) ;
CHECK ( res ) ;
if ( flags & parse_ws_pcdata )
{
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " child1 " ) ) . child_value ( ) , STR ( " \t \n " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " child2 " ) ) . child_value ( ) , STR ( " text \t end " ) ) ;
}
else if ( flags & parse_ws_pcdata_single )
{
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " child1 " ) ) . child_value ( ) , STR ( " \n " ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " child2 " ) ) . child_value ( ) , STR ( " text end " ) ) ;
}
else
{
CHECK ( ! doc . child ( STR ( " node " ) ) . child ( STR ( " child1 " ) ) . first_child ( ) ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . child ( STR ( " child2 " ) ) . child_value ( ) , STR ( " text end " ) ) ;
}
}
}
2023-08-25 19:29:35 -07:00
TEST ( parse_merge_pcdata_append )
{
xml_document doc ;
doc . append_child ( STR ( " node " ) ) . append_child ( node_pcdata ) ;
xml_parse_result res = doc . child ( STR ( " node " ) ) . append_buffer ( " hello <!--comment-->world " , 25 , parse_merge_pcdata | parse_fragment ) ;
CHECK ( res . status = = status_append_invalid_root ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . first_child ( ) . value ( ) , STR ( " " ) ) ;
doc . child ( STR ( " node " ) ) . remove_children ( ) ;
res = doc . child ( STR ( " node " ) ) . append_buffer ( " hello <!--comment-->world " , 25 , parse_merge_pcdata | parse_fragment ) ;
CHECK ( res . status = = status_ok ) ;
CHECK_STRING ( doc . child ( STR ( " node " ) ) . first_child ( ) . value ( ) , STR ( " hello world " ) ) ;
}
2016-07-14 23:04:17 -07:00
TEST ( parse_encoding_detect )
{
char test [ ] = " <?xml version='1.0' encoding='utf-8'?><n/> " ;
xml_document doc ;
CHECK ( doc . load_buffer ( test , sizeof ( test ) ) ) ;
}
TEST ( parse_encoding_detect_latin1 )
{
char test0 [ ] = " <?xml version='1.0' encoding='utf-8'?><n/> " ;
char test1 [ ] = " <?xml version='1.0' encoding='iso-8859-1'?><n/> " ;
char test2 [ ] = " <?xml version='1.0' encoding = \" latin1 \" ?><n/> " ;
char test3 [ ] = " <?xml version='1.0' encoding='ISO-8859-1'?><n/> " ;
char test4 [ ] = " <?xml version='1.0' encoding = \" LATIN1 \" ?><n/> " ;
xml_document doc ;
CHECK ( doc . load_buffer ( test0 , sizeof ( test0 ) ) . encoding = = encoding_utf8 ) ;
CHECK ( doc . load_buffer ( test1 , sizeof ( test1 ) ) . encoding = = encoding_latin1 ) ;
CHECK ( doc . load_buffer ( test2 , sizeof ( test2 ) ) . encoding = = encoding_latin1 ) ;
CHECK ( doc . load_buffer ( test3 , sizeof ( test3 ) ) . encoding = = encoding_latin1 ) ;
CHECK ( doc . load_buffer ( test4 , sizeof ( test4 ) ) . encoding = = encoding_latin1 ) ;
2016-07-15 18:53:59 -05:00
}
2017-01-31 07:50:39 -08:00
TEST ( parse_encoding_detect_auto )
{
struct data_t
{
const char * contents ;
size_t size ;
xml_encoding encoding ;
} ;
const data_t data [ ] =
{
// BOM
{ " \x00 \x00 \xfe \xff " , 4 , encoding_utf32_be } ,
{ " \xff \xfe \x00 \x00 " , 4 , encoding_utf32_le } ,
{ " \xfe \xff " , 4 , encoding_utf16_be } ,
{ " \xff \xfe " , 4 , encoding_utf16_le } ,
{ " \xef \xbb \xbf " , 4 , encoding_utf8 } ,
// automatic tag detection for < or <?
{ " \x00 \x00 \x00 < \x00 \x00 \x00 n \x00 \x00 \x00 / \x00 \x00 \x00 > " , 16 , encoding_utf32_be } ,
{ " < \x00 \x00 \x00 n \x00 \x00 \x00 / \x00 \x00 \x00 > \x00 \x00 \x00 " , 16 , encoding_utf32_le } ,
{ " \x00 < \x00 ? \x00 n \x00 ? \x00 > " , 10 , encoding_utf16_be } ,
{ " < \x00 ? \x00 n \x00 ? \x00 > \x00 " , 10 , encoding_utf16_le } ,
{ " \x00 < \x00 n \x00 / \x00 > " , 8 , encoding_utf16_be } ,
{ " < \x00 n \x00 / \x00 > \x00 " , 8 , encoding_utf16_le } ,
// <?xml encoding
{ " <?xml encoding='latin1'?> " , 25 , encoding_latin1 } ,
} ;
for ( size_t i = 0 ; i < sizeof ( data ) / sizeof ( data [ 0 ] ) ; + + i )
{
xml_document doc ;
xml_parse_result result = doc . load_buffer ( data [ i ] . contents , data [ i ] . size , parse_fragment ) ;
CHECK ( result ) ;
CHECK ( result . encoding = = data [ i ] . encoding ) ;
}
}
TEST ( parse_encoding_detect_auto_incomplete )
{
struct data_t
{
const char * contents ;
size_t size ;
xml_encoding encoding ;
} ;
const data_t data [ ] =
{
// BOM
{ " \x00 \x00 \xfe " , 4 , encoding_utf8 } ,
{ " \x00 \x00 " , 4 , encoding_utf8 } ,
{ " \xff \xfe \x00 " , 4 , encoding_utf16_le } ,
{ " \xfe " , 4 , encoding_utf8 } ,
{ " \xff " , 4 , encoding_utf8 } ,
{ " \xef \xbb " , 4 , encoding_utf8 } ,
{ " \xef " , 4 , encoding_utf8 } ,
// automatic tag detection for < or <?
{ " \x00 \x00 \x00 " , 4 , encoding_utf8 } ,
{ " < \x00 \x00 n/ \x00 > \x00 " , 8 , encoding_utf16_le } ,
{ " \x00 <n \x00 \x00 / \x00 > " , 8 , encoding_utf16_be } ,
{ " < \x00 ?n/ \x00 > \x00 " , 8 , encoding_utf16_le } ,
2017-03-21 10:33:20 -07:00
{ " \x00 " , 2 , encoding_utf8 } ,
2017-01-31 07:50:39 -08:00
// <?xml encoding
{ " <?xmC encoding='latin1'?> " , 25 , encoding_utf8 } ,
{ " <?xBC encoding='latin1'?> " , 25 , encoding_utf8 } ,
{ " <?ABC encoding='latin1'?> " , 25 , encoding_utf8 } ,
{ " <_ABC encoding='latin1'/> " , 25 , encoding_utf8 } ,
} ;
for ( size_t i = 0 ; i < sizeof ( data ) / sizeof ( data [ 0 ] ) ; + + i )
{
xml_document doc ;
xml_parse_result result = doc . load_buffer ( data [ i ] . contents , data [ i ] . size , parse_fragment ) ;
CHECK ( result ) ;
CHECK ( result . encoding = = data [ i ] . encoding ) ;
}
}