From 8a32d9844333d66ca8b92e84b9fb726eea97c7d3 Mon Sep 17 00:00:00 2001 From: Daniel Sipka Date: Fri, 24 Apr 2015 00:35:13 +0200 Subject: [PATCH] refactor tokenizer with delimiter change support --- src/template_type.cpp | 75 ++++++++----------- src/template_type.hpp | 9 +-- src/token.cpp | 4 +- src/token.hpp | 2 +- test/data/changing_delimiters.hpp | 4 + .../changing_delimiters.mustache | 0 .../{delimiter => }/changing_delimiters.txt | 0 test/data/delimiter/changing_delimiters.js | 4 - test/data/delimiter/delimiters.js | 6 -- test/data/delimiters.hpp | 6 ++ test/data/{delimiter => }/delimiters.mustache | 0 test/data/{delimiter => }/delimiters.txt | 0 test/headerize.cpp | 42 ++++------- test/test_main.cpp | 2 + 14 files changed, 65 insertions(+), 89 deletions(-) create mode 100644 test/data/changing_delimiters.hpp rename test/data/{delimiter => }/changing_delimiters.mustache (100%) rename test/data/{delimiter => }/changing_delimiters.txt (100%) delete mode 100644 test/data/delimiter/changing_delimiters.js delete mode 100644 test/data/delimiter/delimiters.js create mode 100644 test/data/delimiters.hpp rename test/data/{delimiter => }/delimiters.mustache (100%) rename test/data/{delimiter => }/delimiters.txt (100%) diff --git a/src/template_type.cpp b/src/template_type.cpp index 7b40872..fabc4f0 100644 --- a/src/template_type.cpp +++ b/src/template_type.cpp @@ -7,54 +7,39 @@ template_type::template_type(const std::string& str) { strip_whitespace(); } -void template_type::tokenize(const std::string& t) { - std::string delim_start{"{{"}; - std::string delim_end{"}}"}; - std::string::const_iterator tok_end, tok_start = t.begin(); - parse_state pstate = parse_state::start; - unsigned int del_pos = 0; - for (std::string::const_iterator it = t.begin(); it != t.end(); ++it) { - if (pstate == parse_state::start) { - if (*it == delim_start[0]) { - pstate = parse_state::in_del_start; - tok_end = it; - del_pos = 1; - } else if (*it == '\n') { - tokens.push_back({{tok_start, it + 1}}); - tok_start = it + 1; - } - } else if (pstate == parse_state::in_del_start) { - if (*it == delim_start[del_pos] && ++del_pos == delim_start.size()) - pstate = parse_state::in_del; - else - pstate = parse_state::start; - } else if (pstate == parse_state::in_del) { - if (*it == '{') - pstate = parse_state::in_esccontent; - else if (*it == delim_end[0] && (del_pos = 1)) - pstate = parse_state::in_del_end; - else - pstate = parse_state::in_content; - } else if (pstate == parse_state::in_esccontent && *it == '}') { - pstate = parse_state::in_content; - } else if (pstate == parse_state::in_content && *it == delim_end[0]) { - pstate = parse_state::in_del_end; - del_pos = 1; - } else if (pstate == parse_state::in_del_end) { - if (*it == delim_end[del_pos] && ++del_pos == delim_end.size()) { - pstate = parse_state::start; - tokens.push_back({{tok_start, tok_end}}); - tokens.push_back( - {{tok_end, it + 1}, - delim_start.size(), - delim_end.size()}); - tok_start = it + 1; - } else { - pstate = parse_state::start; +void template_type::process_text(citer begin, citer end) { + if (begin == end) + return; + auto start = begin; + for (auto it = begin; it != end; ++it) + if (*it == '\n' || it == end - 1) { + tokens.push_back({{start, it + 1}}); + start = it + 1; + } +} + +void template_type::tokenize(const std::string& tmplt) { + std::string open{"{{"}, close{"}}"}; + citer beg = tmplt.begin(); + for (unsigned long pos = 0; pos < tmplt.size();) { + auto to = tmplt.find(open, pos); + auto tc = tmplt.find(close, (to == std::string::npos)?to:(to + 1)); + if (tc != std::string::npos && to != std::string::npos) { + if (*(beg + to + open.size()) == '{' && *(beg + tc + close.size()) == '}') + ++tc; + process_text(beg + pos, beg + to); + pos = tc + close.size(); + tokens.push_back({{beg + to, beg + tc + close.size()}, + open.size(), close.size()}); + if (*(beg + to + open.size()) == '=' && *(beg + tc - 1) == '=') { + open = {beg + to + open.size() + 1, beg + tmplt.find(' ', to)}; + close = {beg + tmplt.find(' ', to) + 1, beg + tc - 1}; } + } else { + process_text(beg + pos, tmplt.end()); + pos = tc; } } - tokens.push_back({{tok_start, t.end()}}); } void template_type::strip_whitespace() { diff --git a/src/template_type.hpp b/src/template_type.hpp index bca45d1..cc7b6aa 100644 --- a/src/template_type.hpp +++ b/src/template_type.hpp @@ -4,6 +4,7 @@ #include #include "token.hpp" +#include "utils.hpp" namespace mstch { @@ -16,12 +17,10 @@ class template_type { void operator<<(const token& token) { tokens.push_back(token); } private: - enum class parse_state { - start, in_del_start, in_del, in_content, in_esccontent, in_del_end - }; - void tokenize(const std::string& str); - void strip_whitespace(); std::vector tokens; + void strip_whitespace(); + void process_text(citer beg, citer end); + void tokenize(const std::string& tmplt); }; } diff --git a/src/token.cpp b/src/token.cpp index de58143..d025ac3 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -19,7 +19,9 @@ token::token(const std::string& str, std::size_t left, std::size_t right): m_raw(str), m_eol(false), m_ws_only(false) { if (left != 0 && right != 0) { - if (str[left] == '{' && str[str.size() - right - 1] == '}') { + if (str[left] == '=' && str[str.size() - right - 1] == '=') { + m_type = type::delimiter_change; + } else if (str[left] == '{' && str[str.size() - right - 1] == '}') { m_type = type::unescaped_variable; m_name = {first_not_ws(str.begin() + left + 1, str.end() - right), first_not_ws(str.rbegin() + 1 + right, str.rend() - left) + 1}; diff --git a/src/token.hpp b/src/token.hpp index 39971b6..2c86dd4 100644 --- a/src/token.hpp +++ b/src/token.hpp @@ -8,7 +8,7 @@ class token { public: enum class type { text, variable, section_open, section_close, inverted_section_open, - unescaped_variable, comment, partial + unescaped_variable, comment, partial, delimiter_change }; token(const std::string& str, std::size_t left = 0, std::size_t right = 0); type token_type() const { return m_type; }; diff --git a/test/data/changing_delimiters.hpp b/test/data/changing_delimiters.hpp new file mode 100644 index 0000000..1322543 --- /dev/null +++ b/test/data/changing_delimiters.hpp @@ -0,0 +1,4 @@ +const mstch::node changing_delimiters_data = mstch::map{ + {"foo", std::string{"foooooooooooooo"}}, + {"bar", std::string{"bar!"}} +}; diff --git a/test/data/delimiter/changing_delimiters.mustache b/test/data/changing_delimiters.mustache similarity index 100% rename from test/data/delimiter/changing_delimiters.mustache rename to test/data/changing_delimiters.mustache diff --git a/test/data/delimiter/changing_delimiters.txt b/test/data/changing_delimiters.txt similarity index 100% rename from test/data/delimiter/changing_delimiters.txt rename to test/data/changing_delimiters.txt diff --git a/test/data/delimiter/changing_delimiters.js b/test/data/delimiter/changing_delimiters.js deleted file mode 100644 index b808f4c..0000000 --- a/test/data/delimiter/changing_delimiters.js +++ /dev/null @@ -1,4 +0,0 @@ -({ - "foo": "foooooooooooooo", - "bar": "bar!" -}) diff --git a/test/data/delimiter/delimiters.js b/test/data/delimiter/delimiters.js deleted file mode 100644 index 365d01e..0000000 --- a/test/data/delimiter/delimiters.js +++ /dev/null @@ -1,6 +0,0 @@ -({ - first: "It worked the first time.", - second: "And it worked the second time.", - third: "Then, surprisingly, it worked the third time.", - fourth: "Fourth time also fine!." -}) diff --git a/test/data/delimiters.hpp b/test/data/delimiters.hpp new file mode 100644 index 0000000..0e560ab --- /dev/null +++ b/test/data/delimiters.hpp @@ -0,0 +1,6 @@ +const mstch::node delimiters_data = mstch::map{ + {"first", std::string{"It worked the first time."}}, + {"second", std::string{"And it worked the second time."}}, + {"third", std::string{"Then, surprisingly, it worked the third time."}}, + {"fourth", std::string{"Fourth time also fine!."}} +}; diff --git a/test/data/delimiter/delimiters.mustache b/test/data/delimiters.mustache similarity index 100% rename from test/data/delimiter/delimiters.mustache rename to test/data/delimiters.mustache diff --git a/test/data/delimiter/delimiters.txt b/test/data/delimiters.txt similarity index 100% rename from test/data/delimiter/delimiters.txt rename to test/data/delimiters.txt diff --git a/test/headerize.cpp b/test/headerize.cpp index b976eb4..d8b8750 100644 --- a/test/headerize.cpp +++ b/test/headerize.cpp @@ -6,20 +6,10 @@ #include #include -void wrap_code(std::istream& input, std::ostream& output) { - std::string line; - while (std::getline(input, line)) { - output << line; - if (!input.eof()) - output << std::endl; - } - output << std::endl; -} - void wrap_string(std::istream& input, std::ostream& output, const std::string& variable_name) { - output << "const std::string " << variable_name << "{" << std::endl;; + output << "const std::string " << variable_name << "{\n"; std::string line; while (std::getline(input, line)) { boost::replace_all(line, "\\", "\\\\"); @@ -27,9 +17,9 @@ void wrap_string(std::istream& input, std::ostream& output, output << " \"" << line; if (!input.eof()) output << "\\n"; - output << "\"" << std::endl; + output << "\"\n"; } - output << "};" << std::endl; + output << "};\n"; } int main(int argc, char* argv[]) { @@ -54,34 +44,32 @@ int main(int argc, char* argv[]) { } if (!vm.count("output")) { - std::cout << "Output file not set" << std::endl; + std::cerr << "Output file not set" << std::endl; return 1; } std::ofstream output(vm["output"].as(), std::ios::out); if (vm.count("namespace")) - output << "namespace " << vm["namespace"].as() << " {" << std::endl; + output << "namespace " << vm["namespace"].as() << " {\n"; - if (vm.count("input-string")) { - for (auto& string_filename: vm["input-string"].as>()) { - std::ifstream input(string_filename, std::ios::in); - wrap_string(input, output, - boost::replace_all_copy(string_filename, ".", "_")); + if (vm.count("input-string")) + for (auto& filename: vm["input-string"].as>()) { + std::ifstream input(filename, std::ios::in); + wrap_string(input, output, boost::replace_all_copy(filename, ".", "_")); input.close(); } - } - if (vm.count("input-code")) { - for (auto& data_filename: vm["input-code"].as>()) { - std::ifstream input(data_filename, std::ios::in); - wrap_code(input, output); + if (vm.count("input-code")) + for (auto& filename: vm["input-code"].as>()) { + std::ifstream input(filename, std::ios::in); + output << std::string{(std::istreambuf_iterator(input)), + std::istreambuf_iterator()} << std::endl; input.close(); } - } if (vm.count("namespace")) - output << "}" << std::endl; + output << "}\n"; output.close(); diff --git a/test/test_main.cpp b/test/test_main.cpp index f1a7a81..e9b7a4e 100644 --- a/test/test_main.cpp +++ b/test/test_main.cpp @@ -20,9 +20,11 @@ MSTCH_TEST(array_of_strings) MSTCH_TEST(backslashes) MSTCH_TEST(bug_11_eating_whitespace) MSTCH_TEST(bug_length_property) +MSTCH_TEST(changing_delimiters) MSTCH_TEST(comments) MSTCH_TEST(complex) MSTCH_TEST(context_lookup) +MSTCH_TEST(delimiters) MSTCH_TEST(disappearing_whitespace) MSTCH_TEST(dot_notation) MSTCH_TEST(double_render)