From bb21e64a51009b7a867987b21fdb7631c78e07de Mon Sep 17 00:00:00 2001 From: Daniel Sipka Date: Wed, 15 Apr 2015 01:42:51 +0200 Subject: [PATCH] optimizations --- src/CMakeLists.txt | 3 +- src/mstch.cpp | 2 +- src/render_context.cpp | 91 +++++++++++++++++++++---------- src/render_context.hpp | 12 ++-- src/state/in_inverted_section.hpp | 4 +- src/state/outside_section.hpp | 2 +- src/token.cpp | 22 +------- src/token.hpp | 17 ++++-- src/utils.cpp | 18 ------ src/utils.hpp | 1 - test/benchmark_main.cpp | 24 -------- 11 files changed, 88 insertions(+), 108 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d8cb99..95fc2e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -find_package(Boost 1.54 COMPONENTS regex REQUIRED) +find_package(Boost 1.54 REQUIRED) include_directories( ${CMAKE_SOURCE_DIR}/include @@ -18,4 +18,3 @@ set(SRC visitor/render_section.cpp) add_library(mstch STATIC ${SRC}) -target_link_libraries(mstch ${Boost_REGEX_LIBRARY}) \ No newline at end of file diff --git a/src/mstch.cpp b/src/mstch.cpp index e0514b2..f023cbc 100644 --- a/src/mstch.cpp +++ b/src/mstch.cpp @@ -11,5 +11,5 @@ std::string mstch::render( const object& root, const std::map& partials) { - return render_context(root, partials).render(strip_whitespace(tmplt)); + return render_context(root, partials).render(tmplt); } diff --git a/src/render_context.cpp b/src/render_context.cpp index e235cbd..60a8ea7 100644 --- a/src/render_context.cpp +++ b/src/render_context.cpp @@ -9,8 +9,9 @@ const mstch::node render_context::null_node; render_context::push::push(render_context& context, const mstch::object& obj): context(context) { - context.objects.push_front(obj); - context.push_state(); + context.objects.emplace_front(obj); + context.state.push(std::unique_ptr( + new state::outside_section)); } render_context::push::~push() { @@ -26,11 +27,11 @@ render_context::render_context( const mstch::object& object, const std::map& partials): partials{partials}, - objects{object} + objects{object}, + delim_start{"{{"}, + delim_end{"}}"} { - push_state(); - for(auto& partial: this->partials) - partial.second = strip_whitespace(partial.second); + state.push(std::unique_ptr(new state::outside_section)); } const mstch::node& render_context::find_node( @@ -54,54 +55,86 @@ const mstch::node& render_context::get_node(const std::string& token) { return find_node(token, objects); } -enum class parse_state { - start, in_del_start, in_del, in_content, in_escaped_content, in_del_end -}; - -std::string render_context::render(const std::string& t) { - const std::string delim_start{"{{"}; - const std::string delim_end{"}}"}; - std::string out; +void render_context::tokenize(const std::string& t, std::vector& toks) { std::string::const_iterator tok_end, tok_start = t.begin(); parse_state pstate = parse_state::start; - unsigned int delim_p = 0; + unsigned int del_pos = 0; + bool ws_only = true; for (std::string::const_iterator it = t.begin(); it != t.end(); ++it) { - if(pstate == parse_state::start && *it == delim_start[0]) { - pstate = parse_state::in_del_start; - tok_end = it; - delim_p = 1; + if(pstate == parse_state::start) { + if(*it == delim_start[0]) { + pstate = parse_state::in_del_start; + tok_end = it; + del_pos = 1; + } else if(*it == '\n') { + toks.push_back({false, true, ws_only, {tok_start, it + 1}}); + ws_only = true; + tok_start = it + 1; + } else if (*it != ' ' && *it != '\t') { + ws_only = false; + } } else if(pstate == parse_state::in_del_start) { - if (*it == delim_start[delim_p] && ++delim_p == delim_start.size()) + if (*it == delim_start[del_pos] && ++del_pos == delim_start.size()) pstate = parse_state::in_del; else pstate = parse_state::start; } else if(pstate == parse_state::in_del) { if (*it== '{') { - pstate = parse_state::in_escaped_content; + pstate = parse_state::in_esccontent; } else if (*it == delim_end[0]) { pstate = parse_state::in_del_end; - delim_p = 1; + del_pos = 1; } else { pstate = parse_state::in_content; } - } else if(pstate == parse_state::in_escaped_content && *it == '}') { + } else if(pstate == parse_state::in_esccontent && *it == '}') { pstate = parse_state::in_content; } else if(pstate == parse_state::in_content && *it == delim_end[0]) { pstate = parse_state::in_del_end; - delim_p = 1; + del_pos = 1; } else if(pstate == parse_state::in_del_end) { - if (*it == delim_end[delim_p] && ++delim_p == delim_end.size()) { + if (*it == delim_end[del_pos] && ++del_pos == delim_end.size()) { pstate = parse_state::start; - out += state.top()->render(*this, {false, {tok_start,tok_end}}); - out += state.top()->render(*this, {true, {tok_end, it + 1}}); + toks.push_back({false, false, ws_only, {tok_start, tok_end}}); + toks.push_back({true, false, false, {tok_end, it + 1}}); + ws_only = true; tok_start = it + 1; } else { pstate = parse_state::start; } } } - out += state.top()->render(*this, {false, {tok_start, t.end()}}); - return out; + toks.push_back({false, false, ws_only, {tok_start, t.end()}}); +} + +void render_context::strip_whitespace(std::vector& tokens) { + auto line_begin = tokens.begin(); + bool has_tag = false, non_space = false; + for(auto it = tokens.begin(); it != tokens.end(); ++it) { + auto type = (*it).token_type(); + if(type != token::type::text && type != token::type::variable && + type != token::type::unescaped_variable) + has_tag = true; + else if(!(*it).is_ws_only()) + non_space = true; + if((*it).is_eol()) { + if(has_tag && !non_space) + for (auto line_it = line_begin; line_it != it + 1; ++line_it) + if ((*line_it).is_ws_only()) + (*line_it).mark(); + non_space = has_tag = false; + line_begin = it + 1; + } + } + for(auto it = tokens.begin(); it != tokens.end();) + ((*it).is_marked())?(it = tokens.erase(it)):(++it); +} + +std::string render_context::render(const std::string& tmplt) { + std::vector tokens; + tokenize(tmplt, tokens); + strip_whitespace(tokens); + return render(tokens); } std::string render_context::render(const std::vector& tokens) { diff --git a/src/render_context.hpp b/src/render_context.hpp index de2c816..9a12253 100644 --- a/src/render_context.hpp +++ b/src/render_context.hpp @@ -31,20 +31,22 @@ namespace mstch { state.top() = std::unique_ptr( new T(std::forward(args)...)); } - template - void push_state(Args&&... args) { - state.push(std::unique_ptr( - new T(std::forward(args)...))); - } private: + enum class parse_state { + start, in_del_start, in_del, in_content, in_esccontent, in_del_end + }; static const mstch::node null_node; const mstch::node& find_node( const std::string& token, const std::deque& current_objects); + void tokenize(const std::string& tmplt, std::vector& tokens); + void strip_whitespace(std::vector& tokens); std::string render(const std::vector& tokens); std::map partials; std::deque objects; std::stack> state; + std::string delim_start; + std::string delim_end; }; } diff --git a/src/state/in_inverted_section.hpp b/src/state/in_inverted_section.hpp index a6eef12..663f9e0 100644 --- a/src/state/in_inverted_section.hpp +++ b/src/state/in_inverted_section.hpp @@ -9,9 +9,9 @@ namespace mstch { namespace state { class in_inverted_section: public render_state { public: - in_inverted_section(const std::string §ion_name); + in_inverted_section(const std::string& section_name); std::string render( - render_context &context, const token &token) override; + render_context& context, const token& token) override; private: const std::string section_name; std::vector section_tokens; diff --git a/src/state/outside_section.hpp b/src/state/outside_section.hpp index 3ef406d..9c213ca 100644 --- a/src/state/outside_section.hpp +++ b/src/state/outside_section.hpp @@ -8,7 +8,7 @@ namespace mstch { class outside_section: public render_state { public: std::string render( - render_context &context, const token &token) override; + render_context& context, const token& token) override; }; } } diff --git a/src/token.cpp b/src/token.cpp index 0e219cc..cb6f974 100644 --- a/src/token.cpp +++ b/src/token.cpp @@ -19,10 +19,10 @@ std::tuple token::token_info(const std::string& inside) { } } -token::token(bool is_tag_val, const std::string& raw_val): - raw_val(raw_val), is_tag_val(is_tag_val) +token::token(bool is_tag, bool eol, bool ws_only, const std::string& raw_val): + raw_val(raw_val), eol(eol), ws_only(ws_only), marked(false) { - if(is_tag_val) { + if(is_tag) { std::string inside{raw_val.substr(2, raw_val.size() - 4)}; boost::trim(inside); if (inside.size() > 0) { @@ -36,19 +36,3 @@ token::token(bool is_tag_val, const std::string& raw_val): content_val = raw_val; } } - -token::type token::token_type() const { - return type_val; -} - -std::string token::content() const { - return content_val; -} - -std::string token::raw() const { - return raw_val; -} - -bool token::is_tag() const { - return is_tag_val; -} diff --git a/src/token.hpp b/src/token.hpp index c9e55f2..2e3e6a5 100644 --- a/src/token.hpp +++ b/src/token.hpp @@ -10,16 +10,21 @@ namespace mstch { text, variable, section_open, section_close, inverted_section_open, unescaped_variable, comment, partial }; - token(bool is_tag_val, const std::string& raw_val); - type token_type() const; - std::string content() const; - std::string raw() const; - bool is_tag() const; + token(bool is_tag, bool eol, bool ws_only, const std::string& raw_val); + type token_type() const { return type_val; }; + const std::string& content() const { return content_val; }; + const std::string& raw() const { return raw_val; }; + bool is_eol() const { return eol; } + bool is_ws_only() const { return ws_only; } + bool is_marked() const { return marked; } + void mark() { marked = true; }; private: type type_val; std::string content_val; std::string raw_val; - bool is_tag_val; + bool eol; + bool ws_only; + bool marked; std::tuple token_info(const std::string& inside); }; } diff --git a/src/utils.cpp b/src/utils.cpp index 70a9a9e..558c2c7 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,25 +1,7 @@ #include "utils.hpp" -#include #include -std::string mstch::strip_whitespace(const std::string& tmplt) { - boost::regex comment_match("\\{\\{![^\\}]*\\}\\}"); - boost::regex tag_match("\\{{2}[ ]*[#|/|^|!|>]{1}[^\\}]*\\}{2}"); - boost::regex whitespace_match("^\\s*$"); - std::ostringstream out; - std::istringstream in(boost::regex_replace(tmplt, comment_match, "{{!}}")); - for(std::string line; std::getline(in, line);) { - std::string no_tags = boost::regex_replace(line, tag_match, ""); - if (no_tags != line && boost::regex_match(no_tags, whitespace_match)) - out << boost::regex_replace(line, boost::regex("\\s"), ""); - else - out << line << (in.eof()?"":"\n"); - } - return out.str(); - return tmplt; -} - std::string mstch::html_escape(std::string str) { boost::replace_all(str, "&", "&"); boost::replace_all(str, "'", "'"); diff --git a/src/utils.hpp b/src/utils.hpp index ab8d6bd..ff7cdaa 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -4,7 +4,6 @@ #include namespace mstch { - std::string strip_whitespace(const std::string& tmplt); std::string html_escape(std::string str); } diff --git a/test/benchmark_main.cpp b/test/benchmark_main.cpp index 76578c5..7b657d1 100644 --- a/test/benchmark_main.cpp +++ b/test/benchmark_main.cpp @@ -4,30 +4,6 @@ #include int main() { - std::string complex_html{ - "

{{header}}

\n" - "{{#list}}\n" - "
    \n" - " {{#item}}\n" - " {{#current}}\n" - "
  • {{name}}
  • \n" - " {{/current}}\n" - " {{#link}}\n" - "
  • {{name}}
  • \n" - " {{/link}}\n" - " {{/item}}\n" - "
\n" - "{{/list}}\n" - "{{#empty}}\n" - "

The list is empty.

\n" - "{{/empty}}\n" - "{{^empty}}\n" - "

The list is not empty.

\n" - "{{/empty}}" - }; - - - std::string comment_tmp{ "

{{header}}

    " "{{#comments}}
  • {{name}}
    "