3227992e71
All checks were successful
rpcrypto-build / build (Debug, himix200.toolchain.cmake) (push) Successful in 1m2s
rpcrypto-build / build (Debug, hisiv510.toolchain.cmake) (push) Successful in 1m3s
rpcrypto-build / build (Release, himix200.toolchain.cmake) (push) Successful in 1m14s
linux-hisiv500-gcc / linux-gcc-hisiv500 (push) Successful in 1m15s
rpcrypto-build / build (Release, hisiv510.toolchain.cmake) (push) Successful in 1m20s
linux-mips64-gcc / linux-gcc-mips64el (push) Successful in 1m39s
linux-x64-gcc / linux-gcc (push) Successful in 1m51s
rpcrypto-build / build (Debug, hisiv510.toolchain.cmake) (pull_request) Successful in 57s
rpcrypto-build / build (Release, himix200.toolchain.cmake) (pull_request) Successful in 1m2s
rpcrypto-build / build (Debug, himix200.toolchain.cmake) (pull_request) Successful in 1m10s
rpcrypto-build / build (Release, hisiv510.toolchain.cmake) (pull_request) Successful in 1m5s
linux-hisiv500-gcc / linux-gcc-hisiv500 (pull_request) Successful in 1m16s
linux-mips64-gcc / linux-gcc-mips64el (pull_request) Successful in 1m32s
linux-x64-gcc / linux-gcc (pull_request) Successful in 1m51s
1187 lines
34 KiB
C++
1187 lines
34 KiB
C++
|
|
/*************************************************************************\
|
|
* BNF Lite is a C++ template library for lightweight grammar parsers *
|
|
* Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. *
|
|
* *
|
|
* Permission is hereby granted, free of charge, to any person *
|
|
* obtaining a copy of this software and associated documentation *
|
|
* files (the "Software"), to deal in the Software without restriction, *
|
|
* including without limitation the rights to use, copy, modify, merge, *
|
|
* publish, distribute, sublicense, and/or sell copies of the Software, *
|
|
* and to permit persons to whom the Software is furnished to do so, *
|
|
* subject to the following conditions: *
|
|
* *
|
|
* The above copyright notice and this permission notice shall be *
|
|
* included in all copies or substantial portions of the Software. *
|
|
* *
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, *
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY *
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, *
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH *
|
|
* THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
|
|
\*************************************************************************/
|
|
|
|
#ifndef BNFLITE_H
|
|
#define BNFLITE_H
|
|
|
|
#include <string.h>
|
|
#include <string>
|
|
#include <list>
|
|
#include <vector>
|
|
#include <bitset>
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <typeinfo>
|
|
|
|
namespace bnf {
|
|
// BNF (Backus-Naur form) is a notation for describing syntax of computer languages
|
|
// BNF Lite is the source code template library implementing the way to support BNF specifications
|
|
// BNF Terms:
|
|
// * Production rule is formal BNF expression which is a conjunction of a series
|
|
// of more concrete rules:
|
|
// production_rule ::= <rule_1>...<rule_n> | <rule_n_1>...<rule_m>;
|
|
// * e.g.
|
|
// <digit> ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9>
|
|
// <number> ::= <digit> | <digit> <number>
|
|
// where the number is just a digit or another number with one more digit;
|
|
// Now this example can be presented in C++ friendly notation:
|
|
// Lexem Digit = Token("0") | "1" | "2" | "4" | "5" | "6" | "7" | "8" | "9";
|
|
// RULE(Number) = Digit | Digit + Number;
|
|
// where:
|
|
// * Token is a terminal production;
|
|
// * Lexem (or LEXEM) is a lexical production;
|
|
// * Rule (or RULE) is used here as synonym of syntax production
|
|
// To parse any number (e.g. 532) it is just enough to call the bnf::Analyze(Number, "532")
|
|
|
|
enum Limits {
|
|
maxCharNum = 256,
|
|
maxLexemLength = 1024,
|
|
maxRepeate = 4096,
|
|
maxEmptyStack = 16
|
|
};
|
|
|
|
enum Status {
|
|
eNone = 0,
|
|
eOk = 1,
|
|
eRet = 0x8,
|
|
e1st = 0x10,
|
|
eSkip = 0x20,
|
|
eTry = 0x40,
|
|
eNull = 0x80,
|
|
eRest = 0x0100,
|
|
eNoData = 0x0200,
|
|
eOver = 0x0400,
|
|
eEof = 0x0800,
|
|
eBadRule = 0x1000,
|
|
eBadLexem = 0x2000,
|
|
eSyntax = 0x4000,
|
|
eError = ((~(unsigned int) 0) >> 1) + 1
|
|
};
|
|
|
|
class _Tie;
|
|
class _And;
|
|
class _Or;
|
|
class _Cycle;
|
|
|
|
/* context class to support the first kind of callback */
|
|
class _Base// base parser class
|
|
{
|
|
public:
|
|
std::vector<const char *> cntxV;// public for internal extensions
|
|
protected:
|
|
friend class Token;
|
|
friend class Lexem;
|
|
friend class Rule;
|
|
friend class _And;
|
|
friend class _Or;
|
|
friend class _Cycle;
|
|
int level;
|
|
const char *pstop;
|
|
|
|
int _chk_stack()
|
|
{
|
|
static const char *org;
|
|
static int cnt;
|
|
if (org != cntxV.back()) {
|
|
org = cntxV.back();
|
|
cnt = 0;
|
|
} else if (++cnt > maxEmptyStack)
|
|
return eOver | eError;
|
|
return 0;
|
|
}
|
|
|
|
const char *(*zero_parse)(const char *);
|
|
|
|
int catch_error(const char *ptr)// attempt to catch general syntax error
|
|
{
|
|
return eSyntax | eError;
|
|
}
|
|
|
|
virtual void _erase(int low, int up = 0)
|
|
{
|
|
cntxV.erase(cntxV.begin() + low, up ? cntxV.begin() + up : cntxV.end());
|
|
}
|
|
|
|
virtual std::pair<void *, int> _pre_call(void *callback)
|
|
{
|
|
return std::make_pair((void *) 0, 0);
|
|
}
|
|
|
|
virtual void _post_call(std::pair<void *, int> up){};
|
|
virtual void _do_call(std::pair<void *, int> up,
|
|
void *callback,
|
|
size_t org,
|
|
const char *name){};
|
|
virtual void _stub_call(size_t org, const char *name){};
|
|
|
|
public:
|
|
int _analyze(_Tie &root, const char *text, size_t *);
|
|
_Base(const char *(*pre)(const char *) )
|
|
: level(1),
|
|
pstop(0),
|
|
zero_parse(pre ? pre : base_parser){};
|
|
virtual ~_Base(){};
|
|
|
|
// default pre-parser procedure to skip special symbols
|
|
static const char *base_parser(const char *ptr)
|
|
{
|
|
for (char cc = *ptr; cc != 0; cc = *++ptr) {
|
|
if (cc != ' ' && cc != '\t' && cc != '\n' && cc != '\r') { break; }
|
|
}
|
|
return ptr;
|
|
}
|
|
};
|
|
|
|
#if !defined(_MSC_VER)
|
|
#define _NAME_OFF 0
|
|
#else
|
|
#define _NAME_OFF 6
|
|
#endif
|
|
|
|
/* internal base class to support multiform relationships between different BNFlite elements */
|
|
class _Tie {
|
|
bool _is_compound();
|
|
|
|
protected:
|
|
friend class _Base;
|
|
friend class ExtParser;
|
|
friend class _And;
|
|
friend class _Or;
|
|
friend class _Cycle;
|
|
friend class Token;
|
|
friend class Lexem;
|
|
friend class Rule;
|
|
|
|
bool inner;
|
|
mutable std::vector<const _Tie *> use;
|
|
mutable std::list<const _Tie *> usage;
|
|
std::string name;
|
|
|
|
template<class T>
|
|
static void _setname(T *t, const char *name = 0)
|
|
{
|
|
static int cnt = 0;
|
|
if (name) {
|
|
t->name = name;
|
|
} else {
|
|
t->name = typeid(*t).name() + _NAME_OFF;
|
|
for (int i = ++cnt; i != 0; i /= 10) {
|
|
t->name += '0' + i - (i / 10) * 10;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _clone(const _Tie *lnk)
|
|
{
|
|
usage.swap(lnk->usage);
|
|
for (std::list<const _Tie *>::const_iterator usg = usage.begin();
|
|
usg != usage.end(); ++usg) {
|
|
for (size_t i = 0; i < (*usg)->use.size(); i++) {
|
|
if ((*usg)->use[i] == lnk) { (*usg)->use[i] = this; }
|
|
}
|
|
}
|
|
use.swap(lnk->use);
|
|
for (size_t i = 0; i < use.size(); i++) {
|
|
if (!use[i]) continue;
|
|
std::list<const _Tie *>::iterator itr =
|
|
std::find(use[i]->usage.begin(), use[i]->usage.end(), lnk);
|
|
*itr = this;
|
|
}
|
|
if (lnk->inner) { delete lnk; }
|
|
}
|
|
|
|
_Tie(std::string nm = "") : inner(false), name(nm){};
|
|
|
|
explicit _Tie(const _Tie *lnk) : inner(true), name(lnk->name)
|
|
{
|
|
_clone(lnk);
|
|
}
|
|
|
|
_Tie(const _Tie &link) : inner(link.inner), name(link.name)
|
|
{
|
|
_clone(&link);
|
|
}
|
|
|
|
virtual ~_Tie()
|
|
{
|
|
for (size_t i = 0; i < use.size(); i++) {
|
|
const _Tie *lnk = use[i];
|
|
if (lnk) {
|
|
lnk->usage.remove(this);
|
|
for (size_t j = 0; j < use.size(); j++) {
|
|
if (use[j] == lnk) { use[j] = 0; }
|
|
}
|
|
if (lnk->inner && lnk->usage.size() == 0) { delete lnk; }
|
|
}
|
|
}
|
|
}
|
|
|
|
static int call_1st(const _Tie *lnk, _Base *parser)
|
|
{
|
|
return lnk->_parse(parser);
|
|
}
|
|
|
|
void _clue(const _Tie &link)
|
|
{
|
|
if (!use.size() || _is_compound()) {
|
|
use.push_back(&link);
|
|
} else {
|
|
if (use[0]) {
|
|
use[0]->usage.remove(this);
|
|
if (use[0]->inner && use[0]->usage.size() == 0) {
|
|
delete use[0];
|
|
}
|
|
}
|
|
use[0] = &link;
|
|
}
|
|
link.usage.push_back(this);
|
|
}
|
|
|
|
template<class T>
|
|
static T *_safe_delete(T *t)
|
|
{
|
|
if (t->usage.size() != 0) {
|
|
if (!t->inner) { return new T(t); }
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
virtual int _parse(_Base *parser) const throw() = 0;
|
|
|
|
public:
|
|
void setName(const char *name) { this->name = name; }
|
|
|
|
const char *getName() { return name.c_str(); }
|
|
|
|
_And operator+(const _Tie &link);
|
|
_And operator+(const char *s);
|
|
_And operator+(bool (*f)(const char *, size_t));
|
|
friend _And operator+(const char *s, const _Tie &lnk);
|
|
friend _And operator+(bool (*f)(const char *, size_t), const _Tie &lnk);
|
|
_Or operator|(const _Tie &link);
|
|
_Or operator|(const char *s);
|
|
_Or operator|(bool (*f)(const char *, size_t));
|
|
friend _Or operator|(const char *s, const _Tie &lnk);
|
|
friend _Or operator|(bool (*f)(const char *, size_t), const _Tie &lnk);
|
|
|
|
// Support Augmented BNF constructions like "<a>*<b><element>" to implement repetition;
|
|
// In ABNF <a> and <b> imply at least <a> and at most <b> occurrences of the element;
|
|
// e.g *<element> allows any number(from 0 to infinity, 1*<element> requires at least one;
|
|
// 3*3<element> allows exactly 3 and 1*2<element> allows one or two.
|
|
_Cycle operator()(int at_least,
|
|
int total);// ABNF case <a>.<b>*<element> as element(a,b)
|
|
_Cycle operator*(); // ABNF case *<element> (from 0 to infinity)
|
|
_Cycle
|
|
operator!();// ABNF case <0>.<1>*<element> or <1><element> (at least one)
|
|
};
|
|
|
|
/* implementation of parsing control rules */
|
|
template<const unsigned int flg, const char cc>
|
|
class _Ctrl : public _Tie {
|
|
protected:
|
|
friend class _Tie;
|
|
|
|
virtual int _parse(_Base *parser) const throw() { return flg; }
|
|
|
|
explicit _Ctrl(const _Ctrl *ctrl) : _Tie(ctrl){};
|
|
_Ctrl(const _Ctrl &control) : _Tie(control){};
|
|
|
|
public:
|
|
explicit _Ctrl() : _Tie(std::string(1, cc)){};
|
|
|
|
~_Ctrl() { _safe_delete(this); }
|
|
};
|
|
|
|
/* Null operation, immediate successful return */
|
|
typedef _Ctrl<eOk, 'N'> Null;// stub for some constructions (e.g. "zero-or-one")
|
|
|
|
/* Force Return, immediate return from conjunction rule to impact disjunction rule */
|
|
typedef _Ctrl<eOk | eRet, 'R'> Return;
|
|
|
|
/* Switch to use "Accept First" strategy for disjunction rule instead "Accept Best" */
|
|
typedef _Ctrl<e1st, '1'> AcceptFirst;
|
|
|
|
/* Try to catch syntax error in current conjunction rule */
|
|
typedef _Ctrl<eOk | eTry, 'T'> Try;
|
|
|
|
/* Check but do not accept next statement for conjunction rule */
|
|
typedef _Ctrl<eOk | eSkip, 'S'> Skip;
|
|
|
|
/* Force syntax error */
|
|
typedef _Ctrl<eError | eSyntax, 'E'> Catch;
|
|
|
|
/* interface class for tokens */
|
|
class Token : public _Tie {
|
|
Token &operator=(const _Tie &);
|
|
explicit Token(const _Tie &);
|
|
|
|
public:
|
|
class interval_set : protected std::map<wchar_t, bool> {
|
|
public:
|
|
interval_set()
|
|
{
|
|
insert(std::make_pair(0, false));
|
|
insert(std::make_pair(WCHAR_MAX, false));
|
|
}
|
|
|
|
bool test(wchar_t key) const { return (--upper_bound(key))->second; }
|
|
|
|
void reset(wchar_t key) { set(key, 0, false); }
|
|
|
|
void set(wchar_t key, size_t rep = 0, bool val = true)
|
|
{
|
|
wchar_t key_end = key + rep + 1;
|
|
if (key == 0 || key_end == WCHAR_MAX) return;
|
|
std::map<wchar_t, bool>::iterator right_begin = lower_bound(key);
|
|
std::map<wchar_t, bool>::iterator left_begin = right_begin;
|
|
--left_begin;
|
|
std::map<wchar_t, bool>::iterator right_end = upper_bound(key_end);
|
|
std::map<wchar_t, bool>::iterator left_end = right_end;
|
|
--left_end;
|
|
if (left_end->second == val)
|
|
if (left_end->first >= key_end && right_begin == left_end)
|
|
erase(right_begin);
|
|
else
|
|
erase(right_begin, right_end);
|
|
else {
|
|
std::map<wchar_t, bool>::iterator itr =
|
|
insert(std::make_pair(key_end, left_end->second)).first;
|
|
if (right_begin->first < itr->first) erase(right_begin, itr);
|
|
}
|
|
if (left_begin->second != val) insert(std::make_pair(key, val));
|
|
}
|
|
|
|
void flip()
|
|
{
|
|
for (std::map<wchar_t, bool>::iterator itr = begin(); itr != end();
|
|
++itr)
|
|
itr->second = !itr->second;
|
|
}
|
|
};
|
|
|
|
protected:
|
|
friend class _Tie;
|
|
#if defined(BNFLITE_WIDE)
|
|
interval_set match;
|
|
#else
|
|
std::bitset<bnf::maxCharNum> match;
|
|
#endif
|
|
explicit Token(const Token *tkn) : _Tie(tkn), match(tkn->match){};
|
|
|
|
virtual int _parse(_Base *parser) const throw()
|
|
{
|
|
const char *cc = parser->cntxV.back();
|
|
if (parser->level) cc = parser->zero_parse(cc);
|
|
char c = *((unsigned char *) cc);
|
|
if (match.test(c)) {
|
|
if (parser->level) {
|
|
parser->cntxV.push_back(cc);
|
|
parser->_stub_call(parser->cntxV.size() - 1, name.c_str());
|
|
}
|
|
parser->cntxV.push_back(++cc);
|
|
return c ? eOk : eOk | eEof;
|
|
}
|
|
return c ? eNone : eEof;
|
|
}
|
|
|
|
public:
|
|
Token(const char c) : _Tie(std::string(1, c))
|
|
{
|
|
Add(c, 0);
|
|
};// create single char token
|
|
|
|
Token(int fst, int lst) : _Tie(std::string(1, fst).append("-") += lst)
|
|
{
|
|
Add(fst, lst);
|
|
};// create token by ASCII charactes in range
|
|
|
|
Token(const char *s) : _Tie(std::string(s))
|
|
{
|
|
Add(s);
|
|
};// create token by C string sample
|
|
|
|
Token(const char *s, const Token &token)
|
|
: _Tie(std::string(s)),
|
|
match(token.match)
|
|
{
|
|
Add(s);
|
|
};// create token by both C string sample and another token set
|
|
|
|
Token(const Token &token) : _Tie(token), match(token.match){};
|
|
|
|
virtual ~Token() { _safe_delete(this); }
|
|
|
|
void
|
|
Add(int fst,
|
|
int lst = 0,
|
|
const char *sample =
|
|
"")// add characters in range fst...lst exept mentioned in sample;
|
|
{
|
|
switch (lst) {// lst == 0|1: add single | upper&lower case character(s)
|
|
case 1:
|
|
if (fst >= 'A' && fst <= 'Z')
|
|
match.set(fst - 'A' + 'a');
|
|
else if (fst >= 'a' && fst <= 'z')
|
|
match.set(fst - 'a' + 'A');
|
|
case 0:
|
|
match.set((unsigned char) fst);
|
|
break;
|
|
default:
|
|
for (int i = fst; i <= lst; i++) { match.set((unsigned char) i); }
|
|
Remove(sample);
|
|
}
|
|
}
|
|
|
|
void Add(const char *sample)
|
|
{
|
|
while (*sample) { match.set((unsigned char) *sample++); }
|
|
}
|
|
|
|
void Remove(int fst, int lst = 0)
|
|
{
|
|
for (int i = fst; i <= (lst ? lst : fst); i++) {
|
|
match.reset((unsigned char) i);
|
|
}
|
|
}
|
|
|
|
void Remove(const char *sample)
|
|
{
|
|
while (*sample) { match.reset((unsigned char) *sample++); }
|
|
}
|
|
|
|
int GetSymbol(int next = 1)// get first short symbol
|
|
{
|
|
for (unsigned int i = next; i < maxCharNum; i++) {
|
|
if (match.test(i)) return i;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
Token &Invert()// invert token to build construction to not match
|
|
{
|
|
match.flip();
|
|
return *this;
|
|
}
|
|
};
|
|
#if __cplusplus > 199711L
|
|
inline Token
|
|
operator""_T(const char *sample, size_t len)
|
|
{
|
|
return Token(std::string(sample, len).c_str());
|
|
}
|
|
#endif
|
|
|
|
/* standalone callback wrapper class */
|
|
class Action : public _Tie {
|
|
bool (*action)(const char *lexem, size_t len);
|
|
Action(_Tie &);
|
|
|
|
protected:
|
|
friend class _Tie;
|
|
explicit Action(const Action *a) : _Tie(a), action(a->action){};
|
|
|
|
int _parse(_Base *parser) const throw()
|
|
{
|
|
std::vector<const char *>::reverse_iterator itr =
|
|
parser->cntxV.rbegin() + 1;
|
|
return (*action)(*itr, parser->cntxV.back() - *itr);
|
|
}
|
|
|
|
public:
|
|
Action(bool (*action)(const char *lexem, size_t len), const char *name = "")
|
|
: _Tie(name),
|
|
action(action){};
|
|
|
|
virtual ~Action() { _safe_delete(this); }
|
|
};
|
|
|
|
/* internal class to support conjunction constructions of BNFlite elements */
|
|
class _And : public _Tie {
|
|
protected:
|
|
friend class _Tie;
|
|
friend class Lexem;
|
|
|
|
_And(const _Tie &b1, const _Tie &b2) : _Tie("")
|
|
{
|
|
(name = b1.name).append("+") += b2.name;
|
|
_clue(b1);
|
|
_clue(b2);
|
|
}
|
|
|
|
explicit _And(const _And *rl) : _Tie(rl){};
|
|
|
|
virtual int _parse(_Base *parser) const throw()
|
|
{
|
|
int stat = 0;
|
|
size_t save = 0;
|
|
size_t size = parser->cntxV.size();
|
|
for (unsigned i = 0; i < use.size(); i++, stat &= ~(eSkip | eOk)) {
|
|
stat |= use[i]->_parse(parser);
|
|
if (!(stat & eOk) || (stat & eError)
|
|
|| ((stat & eEof)
|
|
&& (parser->cntxV.back() == parser->cntxV[size - 1]))) {
|
|
if (parser->level && (stat & eTry) && !(stat & eError)
|
|
&& !save) {
|
|
stat |= parser->catch_error(parser->cntxV.back());
|
|
}
|
|
parser->_erase(size);
|
|
return stat & ~(eTry | eSkip | eOk);
|
|
} else {
|
|
if (save) {
|
|
parser->cntxV.resize(save);
|
|
save = 0;
|
|
}
|
|
if (stat & eSkip) { save = parser->cntxV.size(); }
|
|
}
|
|
}
|
|
return eOk | (stat & ~(eTry | eSkip));
|
|
}
|
|
|
|
public:
|
|
~_And() { _safe_delete(this); }
|
|
|
|
_And &operator+(const _Tie &rule2)
|
|
{
|
|
name.append("+") += rule2.name;
|
|
_clue(rule2);
|
|
return *this;
|
|
}
|
|
|
|
_And &operator+(const char *s)
|
|
{
|
|
name.append("+") += s;
|
|
_clue(Token(s));
|
|
return *this;
|
|
}
|
|
|
|
_And &operator+(bool (*f)(const char *, size_t))
|
|
{
|
|
name += "+()";
|
|
_clue(Action(f));
|
|
return *this;
|
|
}
|
|
|
|
friend _And operator+(const char *s, const _Tie &link);
|
|
friend _And operator+(bool (*f)(const char *, size_t), const _Tie &link);
|
|
};
|
|
|
|
inline _And
|
|
_Tie::operator+(const _Tie &rule2)
|
|
{
|
|
return _And(*this, rule2);
|
|
}
|
|
|
|
inline _And
|
|
_Tie::operator+(const char *s)
|
|
{
|
|
return _And(*this, Token(s));
|
|
}
|
|
|
|
inline _And
|
|
_Tie::operator+(bool (*f)(const char *, size_t))
|
|
{
|
|
return _And(*this, Action(f));
|
|
}
|
|
|
|
inline _And
|
|
operator+(const char *s, const _Tie &link)
|
|
{
|
|
return _And(Token(s), link);
|
|
}
|
|
|
|
inline _And
|
|
operator+(bool (*f)(const char *, size_t), const _Tie &link)
|
|
{
|
|
return _And(Action(f), link);
|
|
}
|
|
|
|
/* internal class to support disjunction constructions of BNFlite elements */
|
|
class _Or : public _Tie {
|
|
protected:
|
|
friend class _Tie;
|
|
|
|
_Or(const _Tie &b1, const _Tie &b2) : _Tie("")
|
|
{
|
|
(name = b1.name).append("|") += b2.name;
|
|
_clue(b1);
|
|
_clue(b2);
|
|
}
|
|
|
|
explicit _Or(const _Or *rl) : _Tie(rl){};
|
|
|
|
virtual int _parse(_Base *parser) const throw()
|
|
{
|
|
int stat = 0;
|
|
int tstat = 0;
|
|
int max = 0;
|
|
int tmp = -1;
|
|
size_t size = parser->cntxV.size();
|
|
for (unsigned i = 0; i < use.size();
|
|
i++, stat &= ~(eOk | eRet | eEof | eError)) {
|
|
size_t msize = parser->cntxV.size();
|
|
if (msize > size) {
|
|
parser->cntxV.push_back(parser->cntxV[size - 1]);
|
|
}
|
|
stat |= use[i]->_parse(parser);
|
|
if (stat & (eOk | eError)) {
|
|
tmp = parser->cntxV.back() - parser->cntxV[size - 1];
|
|
if ((tmp > max) || (tmp > 0 && (stat & (eRet | e1st)))
|
|
|| (tmp >= 0 && (stat & eError))) {
|
|
max = tmp;
|
|
tstat = stat;
|
|
if (msize > size) { parser->_erase(size, msize + 1); }
|
|
if (stat & (eRet | e1st | eError)) { break; }
|
|
continue;
|
|
}
|
|
}
|
|
if (parser->cntxV.size() > msize) { parser->_erase(msize); }
|
|
}
|
|
return (max || tmp >= 0 ? tstat | eOk : tstat & ~eOk) & ~(e1st | eRet);
|
|
}
|
|
|
|
public:
|
|
~_Or() { _safe_delete(this); }
|
|
|
|
_Or &operator|(const _Tie &rule2)
|
|
{
|
|
name.append("|") += rule2.name;
|
|
_clue(rule2);
|
|
return *this;
|
|
}
|
|
|
|
_Or &operator|(const char *s)
|
|
{
|
|
name.append("|") += s;
|
|
_clue(Token(s));
|
|
return *this;
|
|
}
|
|
|
|
_Or &operator|(bool (*f)(const char *, size_t))
|
|
{
|
|
name += "|()";
|
|
_clue(Action(f));
|
|
return *this;
|
|
}
|
|
|
|
friend _Or operator|(const char *s, const _Tie &link);
|
|
friend _Or operator|(bool (*f)(const char *, size_t), const _Tie &link);
|
|
};
|
|
|
|
inline _Or
|
|
_Tie::operator|(const _Tie &rule2)
|
|
{
|
|
return _Or(*this, rule2);
|
|
}
|
|
|
|
inline _Or
|
|
_Tie::operator|(const char *s)
|
|
{
|
|
return _Or(*this, Token(s));
|
|
}
|
|
|
|
inline _Or
|
|
_Tie::operator|(bool (*f)(const char *, size_t))
|
|
{
|
|
return _Or(*this, Action(f));
|
|
}
|
|
|
|
inline _Or
|
|
operator|(const char *s, const _Tie &link)
|
|
{
|
|
return _Or(Token(s), link);
|
|
}
|
|
|
|
inline _Or
|
|
operator|(bool (*f)(const char *, size_t), const _Tie &link)
|
|
{
|
|
return _Or(Action(f), link);
|
|
}
|
|
|
|
inline bool
|
|
_Tie::_is_compound()
|
|
{
|
|
return dynamic_cast<_And *>(this) || dynamic_cast<_Or *>(this);
|
|
}
|
|
|
|
/* interface class for lexem */
|
|
class Lexem : public _Tie {
|
|
Lexem &operator=(const class Rule &);
|
|
Lexem(const Rule &rule);
|
|
|
|
protected:
|
|
friend class _Tie;
|
|
explicit Lexem(Lexem *lxm) : _Tie(lxm){};
|
|
|
|
virtual int _parse(_Base *parser) const throw()
|
|
{
|
|
if (!use.size()) return eError | eBadLexem;
|
|
if (!parser->level || dynamic_cast<const Action *>(use[0]))
|
|
return use[0]->_parse(parser);
|
|
size_t size = parser->cntxV.size();
|
|
parser->cntxV.push_back(parser->zero_parse(parser->cntxV.back()));
|
|
parser->level--;
|
|
int stat = use[0]->_parse(parser);
|
|
parser->level++;
|
|
if ((stat & eOk) && parser->cntxV.size() - size > 1) {
|
|
parser->_stub_call(size - 1, name.c_str());
|
|
if (parser->cntxV.back() > parser->pstop)
|
|
parser->pstop = parser->cntxV.back();
|
|
parser->cntxV[(++size)++] = parser->cntxV.back();
|
|
}
|
|
parser->cntxV.resize(size);
|
|
return stat;
|
|
}
|
|
|
|
public:
|
|
Lexem(const char *literal, bool cs = 0) : _Tie()
|
|
{
|
|
int size = strlen(literal);
|
|
switch (size) {
|
|
case 1:
|
|
this->operator=(Token(literal[0], cs));
|
|
case 0:
|
|
break;
|
|
default: {
|
|
_And _and(Token(literal[0], cs), Token(literal[1], cs));
|
|
for (int i = 2; i < size; i++) {
|
|
_and.operator+((const _Tie &) Token(literal[i], cs));
|
|
}
|
|
this->operator=(_and);
|
|
}
|
|
}
|
|
_setname(this, literal);
|
|
}
|
|
|
|
explicit Lexem() : _Tie() { _setname(this); }
|
|
|
|
virtual ~Lexem() { _safe_delete(this); }
|
|
|
|
Lexem(const _Tie &link) : _Tie()
|
|
{
|
|
_setname(this, 0);
|
|
_clue(link);
|
|
}
|
|
|
|
Lexem &operator=(const Lexem &lexem)
|
|
{
|
|
if (&lexem != this) _clue(lexem);
|
|
return *this;
|
|
}
|
|
|
|
Lexem &operator=(const _Tie &link)
|
|
{
|
|
_clue(link);
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
/* interface class for BNF rules */
|
|
class Rule : public _Tie {
|
|
void *callback;
|
|
|
|
protected:
|
|
friend class _Tie;
|
|
friend class _And;
|
|
explicit Rule(const Rule *rl) : _Tie(rl), callback(rl->callback){};
|
|
|
|
virtual int _parse(_Base *parser) const throw()
|
|
{
|
|
if (!use.size() || !parser->level) return eError | eBadRule;
|
|
if (dynamic_cast<const Action *>(use[0])) {
|
|
return use[0]->_parse(parser);
|
|
}
|
|
size_t size = parser->cntxV.size();
|
|
std::pair<void *, int> up = parser->_pre_call(callback);
|
|
int stat = use[0]->_parse(parser);
|
|
if ((stat & eOk) && parser->cntxV.size() - size > 1) {
|
|
parser->_do_call(up, callback, size, name.c_str());
|
|
if (parser->cntxV.back() > parser->pstop)
|
|
parser->pstop = parser->cntxV.back();
|
|
parser->cntxV[(++size)++] = parser->cntxV.back();
|
|
}
|
|
parser->cntxV.resize(size);
|
|
parser->_post_call(up);
|
|
return stat;
|
|
}
|
|
|
|
public:
|
|
explicit Rule() : _Tie(), callback(0) { _setname(this); }
|
|
|
|
virtual ~Rule() { _safe_delete(this); }
|
|
|
|
Rule(const _Tie &link) : _Tie(), callback(0)
|
|
{
|
|
const Rule *rl = dynamic_cast<const Rule *>(&link);
|
|
if (rl) {
|
|
_clone(&link);
|
|
callback = rl->callback;
|
|
name = rl->name;
|
|
} else {
|
|
_clue(link);
|
|
callback = 0;
|
|
_setname(this);
|
|
}
|
|
}
|
|
|
|
Rule &operator=(const _Tie &link)
|
|
{
|
|
_clue(link);
|
|
return *this;
|
|
}
|
|
|
|
Rule &operator=(const Rule &rule)
|
|
{
|
|
if (&rule == this) return *this;
|
|
return this->operator=((const _Tie &) rule);
|
|
}
|
|
template<class U>
|
|
friend Rule &Bind(Rule &rule, U (*callback)(std::vector<U> &));
|
|
template<class U>
|
|
Rule &operator[](U (*callback)(std::vector<U> &));
|
|
};
|
|
|
|
/* friendly debug interface */
|
|
#define LEXEM(lexem) \
|
|
Lexem lexem; \
|
|
lexem.setName(#lexem); \
|
|
lexem
|
|
#define RULE(rule) \
|
|
Rule rule; \
|
|
rule.setName(#rule); \
|
|
rule
|
|
|
|
/* internal class to support repeat constructions of BNF elements */
|
|
class _Cycle : public _Tie {
|
|
unsigned int min, max;
|
|
int flag;
|
|
|
|
protected:
|
|
friend class _Tie;
|
|
explicit _Cycle(const _Cycle *u)
|
|
: _Tie(u),
|
|
min(u->min),
|
|
max(u->max),
|
|
flag(u->flag){};
|
|
_Cycle(const _Cycle &w) : _Tie(w), min(w.min), max(w.max), flag(w.flag){};
|
|
|
|
int _parse(_Base *parser) const throw()
|
|
{
|
|
int stat;
|
|
unsigned int i;
|
|
for (stat = 0, i = 0; i < max;
|
|
i++, stat &= ~(e1st | eTry | eSkip | eRet | eOk)) {
|
|
stat |= use[0]->_parse(parser);
|
|
if ((stat & (eOk | eError)) == eOk) continue;
|
|
return i < min ? stat & ~eOk : stat | parser->_chk_stack() | eOk;
|
|
}
|
|
return stat | flag | eOk;
|
|
}
|
|
|
|
_Cycle(int at_least,
|
|
const _Tie &link,
|
|
int total = maxRepeate,
|
|
int limit = maxRepeate)
|
|
: _Tie(std::string("@")),
|
|
min(at_least),
|
|
max(total),
|
|
flag(total < limit ? eNone : eOver | eError)
|
|
{
|
|
_clue(link);
|
|
}
|
|
|
|
public:
|
|
~_Cycle() { _safe_delete(this); }
|
|
|
|
friend _Cycle operator*(int at_least, const _Tie &link);
|
|
friend _Cycle Repeat(int at_least, const Rule &rule, int total, int limit);
|
|
friend _Cycle
|
|
Iterate(int at_least, const Lexem &lexem, int total, int limit);
|
|
friend _Cycle
|
|
Series(int at_least, const Token &token, int total, int limit);
|
|
};
|
|
|
|
inline _Cycle
|
|
_Tie::operator*()
|
|
{
|
|
return _Cycle(0, *this);
|
|
}
|
|
|
|
inline _Cycle
|
|
_Tie::operator!()
|
|
{
|
|
return _Cycle(0, *this, 1);
|
|
}
|
|
|
|
inline _Cycle
|
|
_Tie::operator()(int at_least, int total)
|
|
{
|
|
return _Cycle(at_least, *this, total);
|
|
}
|
|
|
|
inline _Cycle
|
|
operator*(int at_least, const _Tie &link)
|
|
{
|
|
return _Cycle(at_least, link);
|
|
}
|
|
|
|
inline _Cycle
|
|
Repeat(int at_least,
|
|
const Rule &rule,
|
|
int total = maxLexemLength,
|
|
int limit = maxRepeate)
|
|
{
|
|
return _Cycle(at_least, rule, total, limit);
|
|
}
|
|
|
|
inline _Cycle
|
|
Iterate(int at_least,
|
|
const Lexem &lexem,
|
|
int total = maxLexemLength,
|
|
int limit = maxLexemLength)
|
|
{
|
|
return _Cycle(at_least, lexem, total, limit);
|
|
}
|
|
|
|
inline _Cycle
|
|
Series(int at_least,
|
|
const Token &token,
|
|
int total = maxLexemLength,
|
|
int limit = maxCharNum)
|
|
{
|
|
return _Cycle(at_least, token, total, limit);
|
|
}
|
|
|
|
/* context class to support the second kind of callback */
|
|
template<class U>
|
|
class _Parser : public _Base {
|
|
protected:
|
|
std::vector<U> *cntxU;
|
|
unsigned int off;
|
|
|
|
void _erase(int low, int up = 0)
|
|
{
|
|
cntxV.erase(cntxV.begin() + low, up ? cntxV.begin() + up : cntxV.end());
|
|
if (cntxU && level)
|
|
cntxU->erase(cntxU->begin() + (low - off) / 2,
|
|
up ? cntxU->begin() + (up - off) / 2 : cntxU->end());
|
|
}
|
|
|
|
virtual std::pair<void *, int> _pre_call(void *callback)
|
|
{
|
|
std::pair<void *, int> up = std::make_pair(cntxU, off);
|
|
cntxU = callback ? new std::vector<U> : 0;
|
|
off = callback ? cntxV.size() : 0;
|
|
return up;
|
|
}
|
|
|
|
virtual void _post_call(std::pair<void *, int> up)
|
|
{
|
|
if (cntxU) { delete cntxU; }
|
|
cntxU = (std::vector<U> *) up.first;
|
|
off = up.second;
|
|
}
|
|
|
|
virtual void _do_call(std::pair<void *, int> up,
|
|
void *callback,
|
|
size_t org,
|
|
const char *name)
|
|
{
|
|
if (callback) {
|
|
if (up.first) {
|
|
((std::vector<U> *) up.first)
|
|
->push_back(U(reinterpret_cast<U (*)(std::vector<U> &)>(
|
|
callback)(*cntxU),
|
|
cntxV[org], cntxV.back() - cntxV[org], name));
|
|
} else {
|
|
reinterpret_cast<U (*)(std::vector<U> &)>(callback)(*cntxU);
|
|
}
|
|
} else if (up.first) {
|
|
((std::vector<U> *) up.first)
|
|
->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name));
|
|
}
|
|
}
|
|
|
|
virtual void _stub_call(size_t org, const char *name)
|
|
{
|
|
if (cntxU) {
|
|
cntxU->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name));
|
|
}
|
|
}
|
|
|
|
public:
|
|
_Parser(const char *(*f)(const char *), std::vector<U> *v)
|
|
: _Base(f),
|
|
cntxU(v),
|
|
off(0){};
|
|
virtual ~_Parser(){};
|
|
|
|
int _get_result(U &u)
|
|
{
|
|
if (cntxU && cntxU->size()) {
|
|
u.data = cntxU->front().data;
|
|
return 0;
|
|
} else
|
|
return eNull;
|
|
}
|
|
template<class W>
|
|
friend Rule &Bind(Rule &rule, W (*callback)(std::vector<W> &));
|
|
};
|
|
|
|
inline int
|
|
_Base::_analyze(_Tie &root, const char *text, size_t *plen)
|
|
{
|
|
cntxV.push_back(text);
|
|
cntxV.push_back(text);
|
|
int stat = root._parse(this);
|
|
const char *ptr = zero_parse(pstop > cntxV.back() ? pstop : cntxV.back());
|
|
if (plen) *plen = ptr - text;
|
|
return stat | (*ptr ? eError | eRest : 0);
|
|
}
|
|
|
|
/* User interface template to support the second kind of callback */
|
|
/* The user need to specify own 'Foo' abstract type to develop own callbaks */
|
|
/* like: Interface<Foo> CallBack(std::vector<Interface<Foo>>& res); */
|
|
template<typename Data = bool>
|
|
struct Interface {
|
|
Data data; // user data element
|
|
const char *text;// pointer to parsed text according to bound Rule
|
|
size_t length; // length of parsed text according to bound Rule
|
|
const char *name;// the name of bound Rule
|
|
Interface(const Interface &ifc,
|
|
const char *text,
|
|
size_t length,
|
|
const char *name)
|
|
: data(ifc.data),
|
|
text(text),
|
|
length(length),
|
|
name(
|
|
name){};// mandatory constructor with user data to be called from library
|
|
Interface(const char *text, size_t length, const char *name)
|
|
: data(),
|
|
text(text),
|
|
length(length),
|
|
name(
|
|
name){};// mandatory default constructor to be called from library
|
|
Interface(Data data, std::vector<Interface> &res, const char *name = "")
|
|
: data(data),
|
|
text(res.size() ? res[0].text : ""),
|
|
length(res.size() ? res[res.size() - 1].text - res[0].text
|
|
+ res[res.size() - 1].length
|
|
: 0),
|
|
name(
|
|
name){};// constructor to pass data from user's callback to library
|
|
Interface(const Interface &front,
|
|
const Interface &back,
|
|
const char *name = "")
|
|
: data(),
|
|
text(front.text),
|
|
length(back.text - front.text + back.length),
|
|
name(
|
|
name){};// constructor to pass data from user's callback to library
|
|
Interface() : data(), text(0), length(0), name(0){};// default constructor
|
|
|
|
static Interface
|
|
ByPass(std::vector<Interface> &res)// simplest user callback example
|
|
{
|
|
return res.size() ? res[0] : Interface();
|
|
}// just to pass data to upper level
|
|
|
|
int _get_pstop(const char **pstop)
|
|
{
|
|
if (pstop) *pstop = text + length;
|
|
return length ? eNone : eNull;
|
|
}
|
|
};
|
|
|
|
/* Private parsing interface */
|
|
template<class U>
|
|
inline int
|
|
_Analyze(_Tie &root, U &u, const char *(*pre_parse)(const char *) )
|
|
{
|
|
if (typeid(U) == typeid(Interface<>)) {
|
|
_Base base(pre_parse);
|
|
return base._analyze(root, u.text, &u.length);
|
|
} else {
|
|
std::vector<U> v;
|
|
_Parser<U> parser(pre_parse, &v);
|
|
return parser._analyze(root, u.text, &u.length) | parser._get_result(u);
|
|
}
|
|
}
|
|
|
|
/* Primary interface set to start parsing of text against constructed rules */
|
|
template<class U>
|
|
inline int
|
|
Analyze(_Tie &root,
|
|
const char *text,
|
|
const char **pstop,
|
|
U &u,
|
|
const char *(*pre_parse)(const char *) = 0)
|
|
{
|
|
u.text = text;
|
|
return _Analyze(root, u, pre_parse) | u._get_pstop(pstop);
|
|
}
|
|
|
|
template<class U>
|
|
inline int
|
|
Analyze(_Tie &root,
|
|
const char *text,
|
|
U &u,
|
|
const char *(*pre_parse)(const char *) = 0)
|
|
{
|
|
u.text = text;
|
|
return _Analyze(root, u, pre_parse) | u._get_pstop(0);
|
|
}
|
|
|
|
inline int
|
|
Analyze(_Tie &root,
|
|
const char *text,
|
|
const char **pstop = 0,
|
|
const char *(*pre_parse)(const char *) = 0)
|
|
{
|
|
Interface<> u;
|
|
u.text = text;
|
|
return _Analyze(root, u, pre_parse) | u._get_pstop(pstop);
|
|
}
|
|
|
|
/* Create association between Rule and user's callback */
|
|
template<class U>
|
|
inline Rule &
|
|
Bind(Rule &rule, U (*callback)(std::vector<U> &))
|
|
{
|
|
rule.callback = reinterpret_cast<void *>(callback);
|
|
return rule;
|
|
}
|
|
|
|
template<class U>
|
|
inline Rule &
|
|
Rule::operator[](U (*callback)(std::vector<U> &))// for C++11
|
|
{
|
|
this->callback = reinterpret_cast<void *>(callback);
|
|
return *this;
|
|
}
|
|
|
|
}; // namespace bnf
|
|
#endif// BNFLITE_H
|