diff --git a/3party/bnflite/bnflite.h b/3party/bnflite/bnflite.h new file mode 100644 index 0000000..bba4a8d --- /dev/null +++ b/3party/bnflite/bnflite.h @@ -0,0 +1,1186 @@ + +/*************************************************************************\ +* BNF Lite is a C++ template library for lightweight grammar parsers * +* Copyright (c) 2017 by Alexander A. Semjonov. ALL RIGHTS RESERVED. * +* * +* Permission is hereby granted, free of charge, to any person * +* obtaining a copy of this software and associated documentation * +* files (the "Software"), to deal in the Software without restriction, * +* including without limitation the rights to use, copy, modify, merge, * +* publish, distribute, sublicense, and/or sell copies of the Software, * +* and to permit persons to whom the Software is furnished to do so, * +* subject to the following conditions: * +* * +* The above copyright notice and this permission notice shall be * +* included in all copies or substantial portions of the Software. * +* * +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.* +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH * +* THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * +\*************************************************************************/ + +#ifndef BNFLITE_H +#define BNFLITE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace bnf { +// BNF (Backus-Naur form) is a notation for describing syntax of computer languages +// BNF Lite is the source code template library implementing the way to support BNF specifications +// BNF Terms: +// * Production rule is formal BNF expression which is a conjunction of a series +// of more concrete rules: +// production_rule ::= ... | ...; +// * e.g. +// ::= <0> | <1> | <2> | <3> | <4> | <5> | <6> | <7> | <8> | <9> +// ::= | +// where the number is just a digit or another number with one more digit; +// Now this example can be presented in C++ friendly notation: +// Lexem Digit = Token("0") | "1" | "2" | "4" | "5" | "6" | "7" | "8" | "9"; +// RULE(Number) = Digit | Digit + Number; +// where: +// * Token is a terminal production; +// * Lexem (or LEXEM) is a lexical production; +// * Rule (or RULE) is used here as synonym of syntax production +// To parse any number (e.g. 532) it is just enough to call the bnf::Analyze(Number, "532") + +enum Limits { + maxCharNum = 256, + maxLexemLength = 1024, + maxRepeate = 4096, + maxEmptyStack = 16 +}; + +enum Status { + eNone = 0, + eOk = 1, + eRet = 0x8, + e1st = 0x10, + eSkip = 0x20, + eTry = 0x40, + eNull = 0x80, + eRest = 0x0100, + eNoData = 0x0200, + eOver = 0x0400, + eEof = 0x0800, + eBadRule = 0x1000, + eBadLexem = 0x2000, + eSyntax = 0x4000, + eError = ((~(unsigned int) 0) >> 1) + 1 +}; + +class _Tie; +class _And; +class _Or; +class _Cycle; + +/* context class to support the first kind of callback */ +class _Base// base parser class +{ +public: + std::vector cntxV;// public for internal extensions +protected: + friend class Token; + friend class Lexem; + friend class Rule; + friend class _And; + friend class _Or; + friend class _Cycle; + int level; + const char *pstop; + + int _chk_stack() + { + static const char *org; + static int cnt; + if (org != cntxV.back()) { + org = cntxV.back(); + cnt = 0; + } else if (++cnt > maxEmptyStack) + return eOver | eError; + return 0; + } + + const char *(*zero_parse)(const char *); + + int catch_error(const char *ptr)// attempt to catch general syntax error + { + return eSyntax | eError; + } + + virtual void _erase(int low, int up = 0) + { + cntxV.erase(cntxV.begin() + low, up ? cntxV.begin() + up : cntxV.end()); + } + + virtual std::pair _pre_call(void *callback) + { + return std::make_pair((void *) 0, 0); + } + + virtual void _post_call(std::pair up){}; + virtual void _do_call(std::pair up, + void *callback, + size_t org, + const char *name){}; + virtual void _stub_call(size_t org, const char *name){}; + +public: + int _analyze(_Tie &root, const char *text, size_t *); + _Base(const char *(*pre)(const char *) ) + : level(1), + pstop(0), + zero_parse(pre ? pre : base_parser){}; + virtual ~_Base(){}; + + // default pre-parser procedure to skip special symbols + static const char *base_parser(const char *ptr) + { + for (char cc = *ptr; cc != 0; cc = *++ptr) { + if (cc != ' ' && cc != '\t' && cc != '\n' && cc != '\r') { break; } + } + return ptr; + } +}; + +#if !defined(_MSC_VER) +#define _NAME_OFF 0 +#else +#define _NAME_OFF 6 +#endif + +/* internal base class to support multiform relationships between different BNFlite elements */ +class _Tie { + bool _is_compound(); + +protected: + friend class _Base; + friend class ExtParser; + friend class _And; + friend class _Or; + friend class _Cycle; + friend class Token; + friend class Lexem; + friend class Rule; + + bool inner; + mutable std::vector use; + mutable std::list usage; + std::string name; + + template + static void _setname(T *t, const char *name = 0) + { + static int cnt = 0; + if (name) { + t->name = name; + } else { + t->name = typeid(*t).name() + _NAME_OFF; + for (int i = ++cnt; i != 0; i /= 10) { + t->name += '0' + i - (i / 10) * 10; + } + } + } + + void _clone(const _Tie *lnk) + { + usage.swap(lnk->usage); + for (std::list::const_iterator usg = usage.begin(); + usg != usage.end(); ++usg) { + for (size_t i = 0; i < (*usg)->use.size(); i++) { + if ((*usg)->use[i] == lnk) { (*usg)->use[i] = this; } + } + } + use.swap(lnk->use); + for (size_t i = 0; i < use.size(); i++) { + if (!use[i]) continue; + std::list::iterator itr = + std::find(use[i]->usage.begin(), use[i]->usage.end(), lnk); + *itr = this; + } + if (lnk->inner) { delete lnk; } + } + + _Tie(std::string nm = "") : inner(false), name(nm){}; + + explicit _Tie(const _Tie *lnk) : inner(true), name(lnk->name) + { + _clone(lnk); + } + + _Tie(const _Tie &link) : inner(link.inner), name(link.name) + { + _clone(&link); + } + + virtual ~_Tie() + { + for (size_t i = 0; i < use.size(); i++) { + const _Tie *lnk = use[i]; + if (lnk) { + lnk->usage.remove(this); + for (size_t j = 0; j < use.size(); j++) { + if (use[j] == lnk) { use[j] = 0; } + } + if (lnk->inner && lnk->usage.size() == 0) { delete lnk; } + } + } + } + + static int call_1st(const _Tie *lnk, _Base *parser) + { + return lnk->_parse(parser); + } + + void _clue(const _Tie &link) + { + if (!use.size() || _is_compound()) { + use.push_back(&link); + } else { + if (use[0]) { + use[0]->usage.remove(this); + if (use[0]->inner && use[0]->usage.size() == 0) { + delete use[0]; + } + } + use[0] = &link; + } + link.usage.push_back(this); + } + + template + static T *_safe_delete(T *t) + { + if (t->usage.size() != 0) { + if (!t->inner) { return new T(t); } + } + return 0; + } + + virtual int _parse(_Base *parser) const throw() = 0; + +public: + void setName(const char *name) { this->name = name; } + + const char *getName() { return name.c_str(); } + + _And operator+(const _Tie &link); + _And operator+(const char *s); + _And operator+(bool (*f)(const char *, size_t)); + friend _And operator+(const char *s, const _Tie &lnk); + friend _And operator+(bool (*f)(const char *, size_t), const _Tie &lnk); + _Or operator|(const _Tie &link); + _Or operator|(const char *s); + _Or operator|(bool (*f)(const char *, size_t)); + friend _Or operator|(const char *s, const _Tie &lnk); + friend _Or operator|(bool (*f)(const char *, size_t), const _Tie &lnk); + + // Support Augmented BNF constructions like "*" to implement repetition; + // In ABNF and imply at least and at most occurrences of the element; + // e.g * allows any number(from 0 to infinity, 1* requires at least one; + // 3*3 allows exactly 3 and 1*2 allows one or two. + _Cycle operator()(int at_least, + int total);// ABNF case .* as element(a,b) + _Cycle operator*(); // ABNF case * (from 0 to infinity) + _Cycle + operator!();// ABNF case <0>.<1>* or <1> (at least one) +}; + +/* implementation of parsing control rules */ +template +class _Ctrl : public _Tie { +protected: + friend class _Tie; + + virtual int _parse(_Base *parser) const throw() { return flg; } + + explicit _Ctrl(const _Ctrl *ctrl) : _Tie(ctrl){}; + _Ctrl(const _Ctrl &control) : _Tie(control){}; + +public: + explicit _Ctrl() : _Tie(std::string(1, cc)){}; + + ~_Ctrl() { _safe_delete(this); } +}; + +/* Null operation, immediate successful return */ +typedef _Ctrl Null;// stub for some constructions (e.g. "zero-or-one") + +/* Force Return, immediate return from conjunction rule to impact disjunction rule */ +typedef _Ctrl Return; + +/* Switch to use "Accept First" strategy for disjunction rule instead "Accept Best" */ +typedef _Ctrl AcceptFirst; + +/* Try to catch syntax error in current conjunction rule */ +typedef _Ctrl Try; + +/* Check but do not accept next statement for conjunction rule */ +typedef _Ctrl Skip; + +/* Force syntax error */ +typedef _Ctrl Catch; + +/* interface class for tokens */ +class Token : public _Tie { + Token &operator=(const _Tie &); + explicit Token(const _Tie &); + +public: + class interval_set : protected std::map { + public: + interval_set() + { + insert(std::make_pair(0, false)); + insert(std::make_pair(WCHAR_MAX, false)); + } + + bool test(wchar_t key) const { return (--upper_bound(key))->second; } + + void reset(wchar_t key) { set(key, 0, false); } + + void set(wchar_t key, size_t rep = 0, bool val = true) + { + wchar_t key_end = key + rep + 1; + if (key == 0 || key_end == WCHAR_MAX) return; + std::map::iterator right_begin = lower_bound(key); + std::map::iterator left_begin = right_begin; + --left_begin; + std::map::iterator right_end = upper_bound(key_end); + std::map::iterator left_end = right_end; + --left_end; + if (left_end->second == val) + if (left_end->first >= key_end && right_begin == left_end) + erase(right_begin); + else + erase(right_begin, right_end); + else { + std::map::iterator itr = + insert(std::make_pair(key_end, left_end->second)).first; + if (right_begin->first < itr->first) erase(right_begin, itr); + } + if (left_begin->second != val) insert(std::make_pair(key, val)); + } + + void flip() + { + for (std::map::iterator itr = begin(); itr != end(); + ++itr) + itr->second = !itr->second; + } + }; + +protected: + friend class _Tie; +#if defined(BNFLITE_WIDE) + interval_set match; +#else + std::bitset match; +#endif + explicit Token(const Token *tkn) : _Tie(tkn), match(tkn->match){}; + + virtual int _parse(_Base *parser) const throw() + { + const char *cc = parser->cntxV.back(); + if (parser->level) cc = parser->zero_parse(cc); + char c = *((unsigned char *) cc); + if (match.test(c)) { + if (parser->level) { + parser->cntxV.push_back(cc); + parser->_stub_call(parser->cntxV.size() - 1, name.c_str()); + } + parser->cntxV.push_back(++cc); + return c ? eOk : eOk | eEof; + } + return c ? eNone : eEof; + } + +public: + Token(const char c) : _Tie(std::string(1, c)) + { + Add(c, 0); + };// create single char token + + Token(int fst, int lst) : _Tie(std::string(1, fst).append("-") += lst) + { + Add(fst, lst); + };// create token by ASCII charactes in range + + Token(const char *s) : _Tie(std::string(s)) + { + Add(s); + };// create token by C string sample + + Token(const char *s, const Token &token) + : _Tie(std::string(s)), + match(token.match) + { + Add(s); + };// create token by both C string sample and another token set + + Token(const Token &token) : _Tie(token), match(token.match){}; + + virtual ~Token() { _safe_delete(this); } + + void + Add(int fst, + int lst = 0, + const char *sample = + "")// add characters in range fst...lst exept mentioned in sample; + { + switch (lst) {// lst == 0|1: add single | upper&lower case character(s) + case 1: + if (fst >= 'A' && fst <= 'Z') + match.set(fst - 'A' + 'a'); + else if (fst >= 'a' && fst <= 'z') + match.set(fst - 'a' + 'A'); + case 0: + match.set((unsigned char) fst); + break; + default: + for (int i = fst; i <= lst; i++) { match.set((unsigned char) i); } + Remove(sample); + } + } + + void Add(const char *sample) + { + while (*sample) { match.set((unsigned char) *sample++); } + } + + void Remove(int fst, int lst = 0) + { + for (int i = fst; i <= (lst ? lst : fst); i++) { + match.reset((unsigned char) i); + } + } + + void Remove(const char *sample) + { + while (*sample) { match.reset((unsigned char) *sample++); } + } + + int GetSymbol(int next = 1)// get first short symbol + { + for (unsigned int i = next; i < maxCharNum; i++) { + if (match.test(i)) return i; + } + return 0; + } + + Token &Invert()// invert token to build construction to not match + { + match.flip(); + return *this; + } +}; +#if __cplusplus > 199711L +inline Token +operator""_T(const char *sample, size_t len) +{ + return Token(std::string(sample, len).c_str()); +} +#endif + +/* standalone callback wrapper class */ +class Action : public _Tie { + bool (*action)(const char *lexem, size_t len); + Action(_Tie &); + +protected: + friend class _Tie; + explicit Action(const Action *a) : _Tie(a), action(a->action){}; + + int _parse(_Base *parser) const throw() + { + std::vector::reverse_iterator itr = + parser->cntxV.rbegin() + 1; + return (*action)(*itr, parser->cntxV.back() - *itr); + } + +public: + Action(bool (*action)(const char *lexem, size_t len), const char *name = "") + : _Tie(name), + action(action){}; + + virtual ~Action() { _safe_delete(this); } +}; + +/* internal class to support conjunction constructions of BNFlite elements */ +class _And : public _Tie { +protected: + friend class _Tie; + friend class Lexem; + + _And(const _Tie &b1, const _Tie &b2) : _Tie("") + { + (name = b1.name).append("+") += b2.name; + _clue(b1); + _clue(b2); + } + + explicit _And(const _And *rl) : _Tie(rl){}; + + virtual int _parse(_Base *parser) const throw() + { + int stat = 0; + size_t save = 0; + size_t size = parser->cntxV.size(); + for (unsigned i = 0; i < use.size(); i++, stat &= ~(eSkip | eOk)) { + stat |= use[i]->_parse(parser); + if (!(stat & eOk) || (stat & eError) + || ((stat & eEof) + && (parser->cntxV.back() == parser->cntxV[size - 1]))) { + if (parser->level && (stat & eTry) && !(stat & eError) + && !save) { + stat |= parser->catch_error(parser->cntxV.back()); + } + parser->_erase(size); + return stat & ~(eTry | eSkip | eOk); + } else { + if (save) { + parser->cntxV.resize(save); + save = 0; + } + if (stat & eSkip) { save = parser->cntxV.size(); } + } + } + return eOk | (stat & ~(eTry | eSkip)); + } + +public: + ~_And() { _safe_delete(this); } + + _And &operator+(const _Tie &rule2) + { + name.append("+") += rule2.name; + _clue(rule2); + return *this; + } + + _And &operator+(const char *s) + { + name.append("+") += s; + _clue(Token(s)); + return *this; + } + + _And &operator+(bool (*f)(const char *, size_t)) + { + name += "+()"; + _clue(Action(f)); + return *this; + } + + friend _And operator+(const char *s, const _Tie &link); + friend _And operator+(bool (*f)(const char *, size_t), const _Tie &link); +}; + +inline _And +_Tie::operator+(const _Tie &rule2) +{ + return _And(*this, rule2); +} + +inline _And +_Tie::operator+(const char *s) +{ + return _And(*this, Token(s)); +} + +inline _And +_Tie::operator+(bool (*f)(const char *, size_t)) +{ + return _And(*this, Action(f)); +} + +inline _And +operator+(const char *s, const _Tie &link) +{ + return _And(Token(s), link); +} + +inline _And +operator+(bool (*f)(const char *, size_t), const _Tie &link) +{ + return _And(Action(f), link); +} + +/* internal class to support disjunction constructions of BNFlite elements */ +class _Or : public _Tie { +protected: + friend class _Tie; + + _Or(const _Tie &b1, const _Tie &b2) : _Tie("") + { + (name = b1.name).append("|") += b2.name; + _clue(b1); + _clue(b2); + } + + explicit _Or(const _Or *rl) : _Tie(rl){}; + + virtual int _parse(_Base *parser) const throw() + { + int stat = 0; + int tstat = 0; + int max = 0; + int tmp = -1; + size_t size = parser->cntxV.size(); + for (unsigned i = 0; i < use.size(); + i++, stat &= ~(eOk | eRet | eEof | eError)) { + size_t msize = parser->cntxV.size(); + if (msize > size) { + parser->cntxV.push_back(parser->cntxV[size - 1]); + } + stat |= use[i]->_parse(parser); + if (stat & (eOk | eError)) { + tmp = parser->cntxV.back() - parser->cntxV[size - 1]; + if ((tmp > max) || (tmp > 0 && (stat & (eRet | e1st))) + || (tmp >= 0 && (stat & eError))) { + max = tmp; + tstat = stat; + if (msize > size) { parser->_erase(size, msize + 1); } + if (stat & (eRet | e1st | eError)) { break; } + continue; + } + } + if (parser->cntxV.size() > msize) { parser->_erase(msize); } + } + return (max || tmp >= 0 ? tstat | eOk : tstat & ~eOk) & ~(e1st | eRet); + } + +public: + ~_Or() { _safe_delete(this); } + + _Or &operator|(const _Tie &rule2) + { + name.append("|") += rule2.name; + _clue(rule2); + return *this; + } + + _Or &operator|(const char *s) + { + name.append("|") += s; + _clue(Token(s)); + return *this; + } + + _Or &operator|(bool (*f)(const char *, size_t)) + { + name += "|()"; + _clue(Action(f)); + return *this; + } + + friend _Or operator|(const char *s, const _Tie &link); + friend _Or operator|(bool (*f)(const char *, size_t), const _Tie &link); +}; + +inline _Or +_Tie::operator|(const _Tie &rule2) +{ + return _Or(*this, rule2); +} + +inline _Or +_Tie::operator|(const char *s) +{ + return _Or(*this, Token(s)); +} + +inline _Or +_Tie::operator|(bool (*f)(const char *, size_t)) +{ + return _Or(*this, Action(f)); +} + +inline _Or +operator|(const char *s, const _Tie &link) +{ + return _Or(Token(s), link); +} + +inline _Or +operator|(bool (*f)(const char *, size_t), const _Tie &link) +{ + return _Or(Action(f), link); +} + +inline bool +_Tie::_is_compound() +{ + return dynamic_cast<_And *>(this) || dynamic_cast<_Or *>(this); +} + +/* interface class for lexem */ +class Lexem : public _Tie { + Lexem &operator=(const class Rule &); + Lexem(const Rule &rule); + +protected: + friend class _Tie; + explicit Lexem(Lexem *lxm) : _Tie(lxm){}; + + virtual int _parse(_Base *parser) const throw() + { + if (!use.size()) return eError | eBadLexem; + if (!parser->level || dynamic_cast(use[0])) + return use[0]->_parse(parser); + size_t size = parser->cntxV.size(); + parser->cntxV.push_back(parser->zero_parse(parser->cntxV.back())); + parser->level--; + int stat = use[0]->_parse(parser); + parser->level++; + if ((stat & eOk) && parser->cntxV.size() - size > 1) { + parser->_stub_call(size - 1, name.c_str()); + if (parser->cntxV.back() > parser->pstop) + parser->pstop = parser->cntxV.back(); + parser->cntxV[(++size)++] = parser->cntxV.back(); + } + parser->cntxV.resize(size); + return stat; + } + +public: + Lexem(const char *literal, bool cs = 0) : _Tie() + { + int size = strlen(literal); + switch (size) { + case 1: + this->operator=(Token(literal[0], cs)); + case 0: + break; + default: { + _And _and(Token(literal[0], cs), Token(literal[1], cs)); + for (int i = 2; i < size; i++) { + _and.operator+((const _Tie &) Token(literal[i], cs)); + } + this->operator=(_and); + } + } + _setname(this, literal); + } + + explicit Lexem() : _Tie() { _setname(this); } + + virtual ~Lexem() { _safe_delete(this); } + + Lexem(const _Tie &link) : _Tie() + { + _setname(this, 0); + _clue(link); + } + + Lexem &operator=(const Lexem &lexem) + { + if (&lexem != this) _clue(lexem); + return *this; + } + + Lexem &operator=(const _Tie &link) + { + _clue(link); + return *this; + } +}; + +/* interface class for BNF rules */ +class Rule : public _Tie { + void *callback; + +protected: + friend class _Tie; + friend class _And; + explicit Rule(const Rule *rl) : _Tie(rl), callback(rl->callback){}; + + virtual int _parse(_Base *parser) const throw() + { + if (!use.size() || !parser->level) return eError | eBadRule; + if (dynamic_cast(use[0])) { + return use[0]->_parse(parser); + } + size_t size = parser->cntxV.size(); + std::pair up = parser->_pre_call(callback); + int stat = use[0]->_parse(parser); + if ((stat & eOk) && parser->cntxV.size() - size > 1) { + parser->_do_call(up, callback, size, name.c_str()); + if (parser->cntxV.back() > parser->pstop) + parser->pstop = parser->cntxV.back(); + parser->cntxV[(++size)++] = parser->cntxV.back(); + } + parser->cntxV.resize(size); + parser->_post_call(up); + return stat; + } + +public: + explicit Rule() : _Tie(), callback(0) { _setname(this); } + + virtual ~Rule() { _safe_delete(this); } + + Rule(const _Tie &link) : _Tie(), callback(0) + { + const Rule *rl = dynamic_cast(&link); + if (rl) { + _clone(&link); + callback = rl->callback; + name = rl->name; + } else { + _clue(link); + callback = 0; + _setname(this); + } + } + + Rule &operator=(const _Tie &link) + { + _clue(link); + return *this; + } + + Rule &operator=(const Rule &rule) + { + if (&rule == this) return *this; + return this->operator=((const _Tie &) rule); + } + template + friend Rule &Bind(Rule &rule, U (*callback)(std::vector &)); + template + Rule &operator[](U (*callback)(std::vector &)); +}; + +/* friendly debug interface */ +#define LEXEM(lexem) \ + Lexem lexem; \ + lexem.setName(#lexem); \ + lexem +#define RULE(rule) \ + Rule rule; \ + rule.setName(#rule); \ + rule + +/* internal class to support repeat constructions of BNF elements */ +class _Cycle : public _Tie { + unsigned int min, max; + int flag; + +protected: + friend class _Tie; + explicit _Cycle(const _Cycle *u) + : _Tie(u), + min(u->min), + max(u->max), + flag(u->flag){}; + _Cycle(const _Cycle &w) : _Tie(w), min(w.min), max(w.max), flag(w.flag){}; + + int _parse(_Base *parser) const throw() + { + int stat; + unsigned int i; + for (stat = 0, i = 0; i < max; + i++, stat &= ~(e1st | eTry | eSkip | eRet | eOk)) { + stat |= use[0]->_parse(parser); + if ((stat & (eOk | eError)) == eOk) continue; + return i < min ? stat & ~eOk : stat | parser->_chk_stack() | eOk; + } + return stat | flag | eOk; + } + + _Cycle(int at_least, + const _Tie &link, + int total = maxRepeate, + int limit = maxRepeate) + : _Tie(std::string("@")), + min(at_least), + max(total), + flag(total < limit ? eNone : eOver | eError) + { + _clue(link); + } + +public: + ~_Cycle() { _safe_delete(this); } + + friend _Cycle operator*(int at_least, const _Tie &link); + friend _Cycle Repeat(int at_least, const Rule &rule, int total, int limit); + friend _Cycle + Iterate(int at_least, const Lexem &lexem, int total, int limit); + friend _Cycle + Series(int at_least, const Token &token, int total, int limit); +}; + +inline _Cycle +_Tie::operator*() +{ + return _Cycle(0, *this); +} + +inline _Cycle +_Tie::operator!() +{ + return _Cycle(0, *this, 1); +} + +inline _Cycle +_Tie::operator()(int at_least, int total) +{ + return _Cycle(at_least, *this, total); +} + +inline _Cycle +operator*(int at_least, const _Tie &link) +{ + return _Cycle(at_least, link); +} + +inline _Cycle +Repeat(int at_least, + const Rule &rule, + int total = maxLexemLength, + int limit = maxRepeate) +{ + return _Cycle(at_least, rule, total, limit); +} + +inline _Cycle +Iterate(int at_least, + const Lexem &lexem, + int total = maxLexemLength, + int limit = maxLexemLength) +{ + return _Cycle(at_least, lexem, total, limit); +} + +inline _Cycle +Series(int at_least, + const Token &token, + int total = maxLexemLength, + int limit = maxCharNum) +{ + return _Cycle(at_least, token, total, limit); +} + +/* context class to support the second kind of callback */ +template +class _Parser : public _Base { +protected: + std::vector *cntxU; + unsigned int off; + + void _erase(int low, int up = 0) + { + cntxV.erase(cntxV.begin() + low, up ? cntxV.begin() + up : cntxV.end()); + if (cntxU && level) + cntxU->erase(cntxU->begin() + (low - off) / 2, + up ? cntxU->begin() + (up - off) / 2 : cntxU->end()); + } + + virtual std::pair _pre_call(void *callback) + { + std::pair up = std::make_pair(cntxU, off); + cntxU = callback ? new std::vector : 0; + off = callback ? cntxV.size() : 0; + return up; + } + + virtual void _post_call(std::pair up) + { + if (cntxU) { delete cntxU; } + cntxU = (std::vector *) up.first; + off = up.second; + } + + virtual void _do_call(std::pair up, + void *callback, + size_t org, + const char *name) + { + if (callback) { + if (up.first) { + ((std::vector *) up.first) + ->push_back(U(reinterpret_cast &)>( + callback)(*cntxU), + cntxV[org], cntxV.back() - cntxV[org], name)); + } else { + reinterpret_cast &)>(callback)(*cntxU); + } + } else if (up.first) { + ((std::vector *) up.first) + ->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name)); + } + } + + virtual void _stub_call(size_t org, const char *name) + { + if (cntxU) { + cntxU->push_back(U(cntxV[org], cntxV.back() - cntxV[org], name)); + } + } + +public: + _Parser(const char *(*f)(const char *), std::vector *v) + : _Base(f), + cntxU(v), + off(0){}; + virtual ~_Parser(){}; + + int _get_result(U &u) + { + if (cntxU && cntxU->size()) { + u.data = cntxU->front().data; + return 0; + } else + return eNull; + } + template + friend Rule &Bind(Rule &rule, W (*callback)(std::vector &)); +}; + +inline int +_Base::_analyze(_Tie &root, const char *text, size_t *plen) +{ + cntxV.push_back(text); + cntxV.push_back(text); + int stat = root._parse(this); + const char *ptr = zero_parse(pstop > cntxV.back() ? pstop : cntxV.back()); + if (plen) *plen = ptr - text; + return stat | (*ptr ? eError | eRest : 0); +} + +/* User interface template to support the second kind of callback */ +/* The user need to specify own 'Foo' abstract type to develop own callbaks */ +/* like: Interface CallBack(std::vector>& res); */ +template +struct Interface { + Data data; // user data element + const char *text;// pointer to parsed text according to bound Rule + size_t length; // length of parsed text according to bound Rule + const char *name;// the name of bound Rule + Interface(const Interface &ifc, + const char *text, + size_t length, + const char *name) + : data(ifc.data), + text(text), + length(length), + name( + name){};// mandatory constructor with user data to be called from library + Interface(const char *text, size_t length, const char *name) + : data(), + text(text), + length(length), + name( + name){};// mandatory default constructor to be called from library + Interface(Data data, std::vector &res, const char *name = "") + : data(data), + text(res.size() ? res[0].text : ""), + length(res.size() ? res[res.size() - 1].text - res[0].text + + res[res.size() - 1].length + : 0), + name( + name){};// constructor to pass data from user's callback to library + Interface(const Interface &front, + const Interface &back, + const char *name = "") + : data(), + text(front.text), + length(back.text - front.text + back.length), + name( + name){};// constructor to pass data from user's callback to library + Interface() : data(), text(0), length(0), name(0){};// default constructor + + static Interface + ByPass(std::vector &res)// simplest user callback example + { + return res.size() ? res[0] : Interface(); + }// just to pass data to upper level + + int _get_pstop(const char **pstop) + { + if (pstop) *pstop = text + length; + return length ? eNone : eNull; + } +}; + +/* Private parsing interface */ +template +inline int +_Analyze(_Tie &root, U &u, const char *(*pre_parse)(const char *) ) +{ + if (typeid(U) == typeid(Interface<>)) { + _Base base(pre_parse); + return base._analyze(root, u.text, &u.length); + } else { + std::vector v; + _Parser parser(pre_parse, &v); + return parser._analyze(root, u.text, &u.length) | parser._get_result(u); + } +} + +/* Primary interface set to start parsing of text against constructed rules */ +template +inline int +Analyze(_Tie &root, + const char *text, + const char **pstop, + U &u, + const char *(*pre_parse)(const char *) = 0) +{ + u.text = text; + return _Analyze(root, u, pre_parse) | u._get_pstop(pstop); +} + +template +inline int +Analyze(_Tie &root, + const char *text, + U &u, + const char *(*pre_parse)(const char *) = 0) +{ + u.text = text; + return _Analyze(root, u, pre_parse) | u._get_pstop(0); +} + +inline int +Analyze(_Tie &root, + const char *text, + const char **pstop = 0, + const char *(*pre_parse)(const char *) = 0) +{ + Interface<> u; + u.text = text; + return _Analyze(root, u, pre_parse) | u._get_pstop(pstop); +} + +/* Create association between Rule and user's callback */ +template +inline Rule & +Bind(Rule &rule, U (*callback)(std::vector &)) +{ + rule.callback = reinterpret_cast(callback); + return rule; +} + +template +inline Rule & +Rule::operator[](U (*callback)(std::vector &))// for C++11 +{ + this->callback = reinterpret_cast(callback); + return *this; +} + +}; // namespace bnf +#endif// BNFLITE_H diff --git a/CMakeLists.txt b/CMakeLists.txt index 87ca1e3..3a858d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,8 @@ target_link_libraries(${PROJECT_NAME} PUBLIC fmt::fmt jsoncpp_static target_compile_definitions(${PROJECT_NAME} PRIVATE ULIB_LIBRARY_IMPL) target_include_directories( ${PROJECT_NAME} - PUBLIC 3party/inja + PUBLIC 3party/bnflite + 3party/inja 3party/mongoose 3party/nlohmann 3party/nonstd