refactor tokenizer with delimiter change support
This commit is contained in:
parent
7bdde0783f
commit
8a32d98443
@ -7,54 +7,39 @@ template_type::template_type(const std::string& str) {
|
|||||||
strip_whitespace();
|
strip_whitespace();
|
||||||
}
|
}
|
||||||
|
|
||||||
void template_type::tokenize(const std::string& t) {
|
void template_type::process_text(citer begin, citer end) {
|
||||||
std::string delim_start{"{{"};
|
if (begin == end)
|
||||||
std::string delim_end{"}}"};
|
return;
|
||||||
std::string::const_iterator tok_end, tok_start = t.begin();
|
auto start = begin;
|
||||||
parse_state pstate = parse_state::start;
|
for (auto it = begin; it != end; ++it)
|
||||||
unsigned int del_pos = 0;
|
if (*it == '\n' || it == end - 1) {
|
||||||
for (std::string::const_iterator it = t.begin(); it != t.end(); ++it) {
|
tokens.push_back({{start, it + 1}});
|
||||||
if (pstate == parse_state::start) {
|
start = it + 1;
|
||||||
if (*it == delim_start[0]) {
|
}
|
||||||
pstate = parse_state::in_del_start;
|
}
|
||||||
tok_end = it;
|
|
||||||
del_pos = 1;
|
void template_type::tokenize(const std::string& tmplt) {
|
||||||
} else if (*it == '\n') {
|
std::string open{"{{"}, close{"}}"};
|
||||||
tokens.push_back({{tok_start, it + 1}});
|
citer beg = tmplt.begin();
|
||||||
tok_start = it + 1;
|
for (unsigned long pos = 0; pos < tmplt.size();) {
|
||||||
}
|
auto to = tmplt.find(open, pos);
|
||||||
} else if (pstate == parse_state::in_del_start) {
|
auto tc = tmplt.find(close, (to == std::string::npos)?to:(to + 1));
|
||||||
if (*it == delim_start[del_pos] && ++del_pos == delim_start.size())
|
if (tc != std::string::npos && to != std::string::npos) {
|
||||||
pstate = parse_state::in_del;
|
if (*(beg + to + open.size()) == '{' && *(beg + tc + close.size()) == '}')
|
||||||
else
|
++tc;
|
||||||
pstate = parse_state::start;
|
process_text(beg + pos, beg + to);
|
||||||
} else if (pstate == parse_state::in_del) {
|
pos = tc + close.size();
|
||||||
if (*it == '{')
|
tokens.push_back({{beg + to, beg + tc + close.size()},
|
||||||
pstate = parse_state::in_esccontent;
|
open.size(), close.size()});
|
||||||
else if (*it == delim_end[0] && (del_pos = 1))
|
if (*(beg + to + open.size()) == '=' && *(beg + tc - 1) == '=') {
|
||||||
pstate = parse_state::in_del_end;
|
open = {beg + to + open.size() + 1, beg + tmplt.find(' ', to)};
|
||||||
else
|
close = {beg + tmplt.find(' ', to) + 1, beg + tc - 1};
|
||||||
pstate = parse_state::in_content;
|
|
||||||
} else if (pstate == parse_state::in_esccontent && *it == '}') {
|
|
||||||
pstate = parse_state::in_content;
|
|
||||||
} else if (pstate == parse_state::in_content && *it == delim_end[0]) {
|
|
||||||
pstate = parse_state::in_del_end;
|
|
||||||
del_pos = 1;
|
|
||||||
} else if (pstate == parse_state::in_del_end) {
|
|
||||||
if (*it == delim_end[del_pos] && ++del_pos == delim_end.size()) {
|
|
||||||
pstate = parse_state::start;
|
|
||||||
tokens.push_back({{tok_start, tok_end}});
|
|
||||||
tokens.push_back(
|
|
||||||
{{tok_end, it + 1},
|
|
||||||
delim_start.size(),
|
|
||||||
delim_end.size()});
|
|
||||||
tok_start = it + 1;
|
|
||||||
} else {
|
|
||||||
pstate = parse_state::start;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
process_text(beg + pos, tmplt.end());
|
||||||
|
pos = tc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tokens.push_back({{tok_start, t.end()}});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void template_type::strip_whitespace() {
|
void template_type::strip_whitespace() {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "token.hpp"
|
#include "token.hpp"
|
||||||
|
#include "utils.hpp"
|
||||||
|
|
||||||
namespace mstch {
|
namespace mstch {
|
||||||
|
|
||||||
@ -16,12 +17,10 @@ class template_type {
|
|||||||
void operator<<(const token& token) { tokens.push_back(token); }
|
void operator<<(const token& token) { tokens.push_back(token); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class parse_state {
|
|
||||||
start, in_del_start, in_del, in_content, in_esccontent, in_del_end
|
|
||||||
};
|
|
||||||
void tokenize(const std::string& str);
|
|
||||||
void strip_whitespace();
|
|
||||||
std::vector<token> tokens;
|
std::vector<token> tokens;
|
||||||
|
void strip_whitespace();
|
||||||
|
void process_text(citer beg, citer end);
|
||||||
|
void tokenize(const std::string& tmplt);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,9 @@ token::token(const std::string& str, std::size_t left, std::size_t right):
|
|||||||
m_raw(str), m_eol(false), m_ws_only(false)
|
m_raw(str), m_eol(false), m_ws_only(false)
|
||||||
{
|
{
|
||||||
if (left != 0 && right != 0) {
|
if (left != 0 && right != 0) {
|
||||||
if (str[left] == '{' && str[str.size() - right - 1] == '}') {
|
if (str[left] == '=' && str[str.size() - right - 1] == '=') {
|
||||||
|
m_type = type::delimiter_change;
|
||||||
|
} else if (str[left] == '{' && str[str.size() - right - 1] == '}') {
|
||||||
m_type = type::unescaped_variable;
|
m_type = type::unescaped_variable;
|
||||||
m_name = {first_not_ws(str.begin() + left + 1, str.end() - right),
|
m_name = {first_not_ws(str.begin() + left + 1, str.end() - right),
|
||||||
first_not_ws(str.rbegin() + 1 + right, str.rend() - left) + 1};
|
first_not_ws(str.rbegin() + 1 + right, str.rend() - left) + 1};
|
||||||
|
@ -8,7 +8,7 @@ class token {
|
|||||||
public:
|
public:
|
||||||
enum class type {
|
enum class type {
|
||||||
text, variable, section_open, section_close, inverted_section_open,
|
text, variable, section_open, section_close, inverted_section_open,
|
||||||
unescaped_variable, comment, partial
|
unescaped_variable, comment, partial, delimiter_change
|
||||||
};
|
};
|
||||||
token(const std::string& str, std::size_t left = 0, std::size_t right = 0);
|
token(const std::string& str, std::size_t left = 0, std::size_t right = 0);
|
||||||
type token_type() const { return m_type; };
|
type token_type() const { return m_type; };
|
||||||
|
4
test/data/changing_delimiters.hpp
Normal file
4
test/data/changing_delimiters.hpp
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
const mstch::node changing_delimiters_data = mstch::map{
|
||||||
|
{"foo", std::string{"foooooooooooooo"}},
|
||||||
|
{"bar", std::string{"<b>bar!</b>"}}
|
||||||
|
};
|
@ -1,4 +0,0 @@
|
|||||||
({
|
|
||||||
"foo": "foooooooooooooo",
|
|
||||||
"bar": "<b>bar!</b>"
|
|
||||||
})
|
|
@ -1,6 +0,0 @@
|
|||||||
({
|
|
||||||
first: "It worked the first time.",
|
|
||||||
second: "And it worked the second time.",
|
|
||||||
third: "Then, surprisingly, it worked the third time.",
|
|
||||||
fourth: "Fourth time also fine!."
|
|
||||||
})
|
|
6
test/data/delimiters.hpp
Normal file
6
test/data/delimiters.hpp
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
const mstch::node delimiters_data = mstch::map{
|
||||||
|
{"first", std::string{"It worked the first time."}},
|
||||||
|
{"second", std::string{"And it worked the second time."}},
|
||||||
|
{"third", std::string{"Then, surprisingly, it worked the third time."}},
|
||||||
|
{"fourth", std::string{"Fourth time also fine!."}}
|
||||||
|
};
|
@ -6,20 +6,10 @@
|
|||||||
#include <boost/program_options/variables_map.hpp>
|
#include <boost/program_options/variables_map.hpp>
|
||||||
#include <boost/program_options/parsers.hpp>
|
#include <boost/program_options/parsers.hpp>
|
||||||
|
|
||||||
void wrap_code(std::istream& input, std::ostream& output) {
|
|
||||||
std::string line;
|
|
||||||
while (std::getline(input, line)) {
|
|
||||||
output << line;
|
|
||||||
if (!input.eof())
|
|
||||||
output << std::endl;
|
|
||||||
}
|
|
||||||
output << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void wrap_string(std::istream& input, std::ostream& output,
|
void wrap_string(std::istream& input, std::ostream& output,
|
||||||
const std::string& variable_name)
|
const std::string& variable_name)
|
||||||
{
|
{
|
||||||
output << "const std::string " << variable_name << "{" << std::endl;;
|
output << "const std::string " << variable_name << "{\n";
|
||||||
std::string line;
|
std::string line;
|
||||||
while (std::getline(input, line)) {
|
while (std::getline(input, line)) {
|
||||||
boost::replace_all(line, "\\", "\\\\");
|
boost::replace_all(line, "\\", "\\\\");
|
||||||
@ -27,9 +17,9 @@ void wrap_string(std::istream& input, std::ostream& output,
|
|||||||
output << " \"" << line;
|
output << " \"" << line;
|
||||||
if (!input.eof())
|
if (!input.eof())
|
||||||
output << "\\n";
|
output << "\\n";
|
||||||
output << "\"" << std::endl;
|
output << "\"\n";
|
||||||
}
|
}
|
||||||
output << "};" << std::endl;
|
output << "};\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
@ -54,34 +44,32 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!vm.count("output")) {
|
if (!vm.count("output")) {
|
||||||
std::cout << "Output file not set" << std::endl;
|
std::cerr << "Output file not set" << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ofstream output(vm["output"].as<std::string>(), std::ios::out);
|
std::ofstream output(vm["output"].as<std::string>(), std::ios::out);
|
||||||
|
|
||||||
if (vm.count("namespace"))
|
if (vm.count("namespace"))
|
||||||
output << "namespace " << vm["namespace"].as<std::string>() << " {" << std::endl;
|
output << "namespace " << vm["namespace"].as<std::string>() << " {\n";
|
||||||
|
|
||||||
if (vm.count("input-string")) {
|
if (vm.count("input-string"))
|
||||||
for (auto& string_filename: vm["input-string"].as<std::vector<std::string>>()) {
|
for (auto& filename: vm["input-string"].as<std::vector<std::string>>()) {
|
||||||
std::ifstream input(string_filename, std::ios::in);
|
std::ifstream input(filename, std::ios::in);
|
||||||
wrap_string(input, output,
|
wrap_string(input, output, boost::replace_all_copy(filename, ".", "_"));
|
||||||
boost::replace_all_copy(string_filename, ".", "_"));
|
|
||||||
input.close();
|
input.close();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (vm.count("input-code")) {
|
if (vm.count("input-code"))
|
||||||
for (auto& data_filename: vm["input-code"].as<std::vector<std::string>>()) {
|
for (auto& filename: vm["input-code"].as<std::vector<std::string>>()) {
|
||||||
std::ifstream input(data_filename, std::ios::in);
|
std::ifstream input(filename, std::ios::in);
|
||||||
wrap_code(input, output);
|
output << std::string{(std::istreambuf_iterator<char>(input)),
|
||||||
|
std::istreambuf_iterator<char>()} << std::endl;
|
||||||
input.close();
|
input.close();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (vm.count("namespace"))
|
if (vm.count("namespace"))
|
||||||
output << "}" << std::endl;
|
output << "}\n";
|
||||||
|
|
||||||
output.close();
|
output.close();
|
||||||
|
|
||||||
|
@ -20,9 +20,11 @@ MSTCH_TEST(array_of_strings)
|
|||||||
MSTCH_TEST(backslashes)
|
MSTCH_TEST(backslashes)
|
||||||
MSTCH_TEST(bug_11_eating_whitespace)
|
MSTCH_TEST(bug_11_eating_whitespace)
|
||||||
MSTCH_TEST(bug_length_property)
|
MSTCH_TEST(bug_length_property)
|
||||||
|
MSTCH_TEST(changing_delimiters)
|
||||||
MSTCH_TEST(comments)
|
MSTCH_TEST(comments)
|
||||||
MSTCH_TEST(complex)
|
MSTCH_TEST(complex)
|
||||||
MSTCH_TEST(context_lookup)
|
MSTCH_TEST(context_lookup)
|
||||||
|
MSTCH_TEST(delimiters)
|
||||||
MSTCH_TEST(disappearing_whitespace)
|
MSTCH_TEST(disappearing_whitespace)
|
||||||
MSTCH_TEST(dot_notation)
|
MSTCH_TEST(dot_notation)
|
||||||
MSTCH_TEST(double_render)
|
MSTCH_TEST(double_render)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user