tpl/lang/perl/tplxml
2013-03-12 16:38:58 -04:00

307 lines
11 KiB
Perl
Executable File

#!/usr/bin/perl
# tplxml
# by Troy Hanson 27 Feb 2006
# convert between tpl and XML
# Copyright (c) 2005-2006, Troy Hanson http://tpl.sourceforge.net
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
use strict;
use warnings;
use XML::Parser;
use FindBin;
use lib "$FindBin::Bin"; #locate Tpl.pm in same directory as tplxml
use Tpl;
use bytes;
sub quote_chars {
my $str = shift;
$$str =~ s/&/&/g; #order matters
$$str =~ s/</&lt;/g;
$$str =~ s/>/&gt;/g;
}
sub unquote_chars {
my $str = shift;
$$str =~ s/&lt;/</g;
$$str =~ s/&gt;/>/g;
$$str =~ s/&amp;/&/g;
}
sub hex_chars {
my $str = shift;
my $hex;
for(my $i=0; $i < length $$str; $i++) {
my $byte = unpack("C",substr($$str,$i,1));
$hex .= sprintf("%02x", $byte);
}
$$str = $hex;
}
sub unhex_chars {
my $str = shift;
my $bytes;
for(my $i=0; $i < length $$str; $i+=2) {
my $hexbyte = substr($$str,$i,2);
$bytes .= pack("C", hex($hexbyte));
}
$$str= $bytes;
}
sub tpl2xml {
my $src = shift;
my (@out,@args);
# build list of references to hold output of unpacking
my ($fmt,@fxlens) = peek_fmt($src);
for(my ($i,$j,$k)=(0,0,0);$i<length($fmt);$i++) {
push @args, [] if substr($fmt,$i,2) =~ /^[iucfIU]\#$/; # octothorpic
push @args, \$out[$j++] if substr($fmt,$i,2) =~ /^[iuBscfIU][^\#]*$/;
push @args, $fxlens[$k++] if substr($fmt,$i,1) eq "#";
}
my $tpl = Tpl->tpl_map($fmt,@args);
$tpl->tpl_load($src);
$tpl->tpl_unpack(0);
# construct xml preamble
my $pre = qq{<?xml version="1.0" encoding="utf-8" ?>
<!DOCTYPE tplxml [
<!ELEMENT tplxml (A|i|u|I|U|B|s|c|f|fx)*>
<!ATTLIST tplxml
format CDATA #REQUIRED
fxlens CDATA #REQUIRED
>
<!ELEMENT i (#PCDATA)>
<!ELEMENT u (#PCDATA)>
<!ELEMENT I (#PCDATA)>
<!ELEMENT U (#PCDATA)>
<!ELEMENT B (#PCDATA)>
<!ELEMENT s (#PCDATA)>
<!ELEMENT c (#PCDATA)>
<!ELEMENT f (#PCDATA)>
<!ELEMENT A (el)*>
<!ELEMENT el (A|i|u|I|U|B|s|c|f|fx)+>
<!ELEMENT fx (i|u|I|U|c|f)*>
]>\n};
print $pre;
my $fxattr = join ",", @fxlens;
print qq{<tplxml format="$fmt" fxlens="$fxattr">\n};
tpl2xml_node($tpl,"A0",1);
print qq{</tplxml>\n};
}
sub tpl2xml_node {
my $tpl = shift;
my $node = shift;
my $indent = shift;
my $i = " " x $indent;
for my $c (@{ $tpl->{$node} }) {
if (ref($c)) {
my ($type,$addr,$fxlen) = @$c;
quote_chars $addr if $type eq 's';
hex_chars $addr if $type eq 'B';
if (not defined $fxlen) {
print qq{$i<$type>$$addr</$type>\n}; # singleton
} else {
# all elements of octothorpic fixed-len array
print qq{$i<fx>\n};
print qq{$i <$type>$addr->[$_]</$type>\n} for (0..$fxlen-1);
print qq{$i</fx>\n};
}
} else {
# A node
print qq{$i<A>\n};
my $idx = $1 if $c =~ /^A(\d+)$/;
while($tpl->tpl_unpack($idx) > 0) {
print qq{$i<el>\n};
tpl2xml_node($tpl,$c,$indent+1);
print qq{$i</el>\n};
}
print qq{$i</A>\n};
}
}
}
sub xml2tpl {
my $src = shift;
my $p = new XML::Parser( Style => 'Tree' );
my $tree = $p->parse($$src);
die "not a tpl xml document" unless $tree->[0] eq 'tplxml';
die "no format attribute" unless defined $tree->[1][0]->{format};
my $fmt = $tree->[1][0]->{format};
die "no fxlens attribute" unless defined $tree->[1][0]->{fxlens};
my @fxlens = split /,/, $tree->[1][0]->{fxlens};
# build list of references to variables for use in packing
my (@args,@out);
for(my ($i,$j,$k)=(0,0,0);$i<length($fmt);$i++) {
push @args, [] if substr($fmt,$i,2) =~ /^[iucfIU]\#$/; # octothorpic
push @args, \$out[$j++] if substr($fmt,$i,2) =~ /^[iuBscfIU][^\#]*$/;
push @args, $fxlens[$k++] if substr($fmt,$i,1) eq "#";
}
my $tpl = Tpl->tpl_map($fmt,@args);
xml2tpl_dfs($tpl,$tree->[1]);
$tpl->tpl_pack(0);
print $tpl->tpl_dump;
}
sub xml2tpl_dfs {
my $tpl = shift;
my $xml = shift;
my @next = @$xml; # ($attr,@tagvals) = $$xml;
shift @next; # discard <tplxml> attributes
my @tpltoks = @{ $tpl->{"A0"} }; #expected tokens when parsing
TAG: while (@next) {
my $xmltag = shift @next;
my $xmlval = shift @next;
# skip whitespace/newlines embedded between tags
next TAG if ($xmltag eq "0" and $xmlval =~ /^\s+$/);
# pack if necessary. consume tokens by look-ahead until non-pack token.
while (@tpltoks > 0 and $tpltoks[0] =~ /^P(\d+)$/) {
shift @tpltoks;
$tpl->tpl_pack($1);
}
# If tpl format specifies a non-array type should appear at this point
# in the XML tree, then validate the type matches the format and assign
# the value from the XML to the variable from which it'll be packed
my $tpltoken = shift @tpltoks;
my $octothorpic=0;
if (ref $tpltoken) {
my ($tpltype,$tpladdr,$fxlen) = @$tpltoken;
# This block is how we handle octothorpic (fixed length) arrays.
# If $fxlen is defined then an octothorpic <fx> node is expected.
# After finding the <fx> node we put its subnodes (the array elements)
# onto the @next array for immediate parsing and we use $fxlen:$remaining
# as a signet version of the $fxlen to induce the element-processing loop.
if (defined $fxlen) {
if ($fxlen =~ /^(\d+):(\d+)$/) { # $1==orig $fxlen, $2==remain $fxlen
$octothorpic=1;
unshift @tpltoks, [$tpltype, $tpladdr, $1.":".($2-1)] if $2 > 1;
} else { # octothorpic array expected; look for <fx> parent node
die "expected '<fx>' but got '<$xmltag>'" unless $xmltag eq 'fx';
@{ $tpladdr } = (); # Empty accumulator array for octothorpic values
unshift @tpltoks, [$tpltype, $tpladdr, "$fxlen:$fxlen"]; # x:x signet
shift @$xmlval; # discard 'A' attributes
unshift @next, @$xmlval; #parse xml subtree now (dfs)
next TAG; # proceed to children of <fx> node
}
}
if ($tpltype ne $xmltag) {
die "mismatch: xml has '$xmltag' where format specifies '$tpltype'";
}
# expect @$xmlval to be ({},0,'value') i.e. a single, terminal text node
if (@$xmlval > 3 || $xmlval->[1] ne '0') {
die "error: xml tag '$xmltag' cannot enclose sub-tags";
}
if ($octothorpic) {
push @{ $tpladdr }, $xmlval->[2];
} else {
$$tpladdr = $xmlval->[2];
}
unquote_chars $tpladdr if $tpltype eq 's';
unhex_chars $tpladdr if $tpltype eq 'B';
} elsif ($tpltoken =~ /^A(\d+)$/) {
# tpl format specifies an array should appear at this point in the XML
if ($xmltag ne 'A') {
die "mismatch: xml has '$xmltag' where format specifies 'A'";
}
shift @$xmlval; # discard 'A' attributes
# form token that means "replace me with tokens from A(n), x times"
# (where x is the number of elements contained by this array).
my $array_count=0;
for(my $i=0; $i < @$xmlval; $i+=2) {
$array_count++ if $xmlval->[$i] eq 'el';
}
unshift @tpltoks, "N$1:$array_count" if $array_count > 0;
unshift @next, @$xmlval; #parse xml subtree now (dfs)
} elsif ($tpltoken =~ /^N(\d+):(\d+)$/) {
if ($xmltag ne "el") {
die "mismatch: xml has '$xmltag' where array 'el' is expected";
}
# prepend A$1's tokens (and decremented N:count) to expected tokens
my ($n,$elsleft) = ($1, ($2 - 1));
unshift @tpltoks, "N$n:$elsleft" if $elsleft > 0;
unshift @tpltoks, "P$n"; # "pack me now" token
unshift @tpltoks, @{ $tpl->{"A$1"} };
shift @$xmlval; # discard 'el' attributes
unshift @next, @$xmlval; # proceed to parse el subtree (dfs)
} else {
die "internal error, unexpected token $tpltoken";
}
}
# pack if necessary. consume tokens by look-ahead until non-pack token.
while (@tpltoks > 0 and $tpltoks[0] =~ /^P(\d+)$/) {
shift @tpltoks;
$tpl->tpl_pack($1);
}
if (@tpltoks > 0) {
die "error: end of xml document reached but format requires more data";
}
}
sub peek_fmt {
my $buf = shift;
die "invalid tpl file" unless ($$buf =~ /^tpl/);
my $flags = CORE::unpack("C", substr($$buf,3,1));
my $UF = ($flags & 1) ? "N" : "V"; # big or little endian fxlens
my $fmt = (CORE::unpack("Z*", substr($$buf,8)));
my $num_octothorpes = scalar (my @o = ($fmt =~ /#/g));
my @fxlens;
my $fx = 8 + length($fmt) + 1;
for(my $i=0; $i < $num_octothorpes; $i++) {
my $fxlen_bytes = substr($$buf,$fx,4);
my $fxlen = unpack($UF, $fxlen_bytes);
push @fxlens, $fxlen;
$fx += 4;
}
return ($fmt,@fxlens);
}
##########################################################################
# Slurp input file, auto-detect if conversion is to tpl or XML, and run.
##########################################################################
undef $/;
my $src = <>;
our $to = (substr($src,0,3) eq "tpl") ? "xml" : "tpl";
xml2tpl(\$src) if $to eq "tpl";
tpl2xml(\$src) if $to eq "xml";