diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e8e8d1..01e1356 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,14 +185,15 @@ endif(SLED_BUILD_TESTS) if(SLED_BUILD_FUZZ) - macro(add_fuzz_test name sources) + function(add_fuzz_test name sources) add_executable(${name} ${sources}) target_link_libraries(${name} PRIVATE sled) target_compile_options(${name} PRIVATE -g -O1 -fsanitize=fuzzer,address -fsanitize-coverage=trace-cmp) target_link_options(${name} PRIVATE -fsanitize=fuzzer,address -fsanitize-coverage=trace-cmp) - endmacro() + endfunction() add_fuzz_test(base64_fuzz src/sled/strings/base64_fuzz.cc) + add_fuzz_test(uri_fuzz src/sled/uri_fuzz.cc) endif(SLED_BUILD_FUZZ) diff --git a/src/sled/filesystem/path.cc b/src/sled/filesystem/path.cc index e7607f1..143dd3a 100644 --- a/src/sled/filesystem/path.cc +++ b/src/sled/filesystem/path.cc @@ -9,6 +9,13 @@ #define PATH_MAX_SZ PATH_MAX #endif +#if defined(_WIN32) +#include +#define getcwd _getcwd +#else +#include +#endif + namespace sled { Path Path::Current() diff --git a/src/sled/log/log.h b/src/sled/log/log.h index 355edbd..2bbcedd 100644 --- a/src/sled/log/log.h +++ b/src/sled/log/log.h @@ -116,6 +116,8 @@ void Log(LogLevel level, const char *tag, const char *fmt, const char *file_name #define LOGF(tag, fmt, ...) SLOG(sled::LogLevel::kFatal, tag, fmt, ##__VA_ARGS__) #define ASSERT(cond, fmt, ...) SLOG_ASSERT(cond, "ASSERT", fmt, ##__VA_ARGS__) +#define SLED_ASSERT(cond, fmt, ...) SLOG_ASSERT(cond, "ASSERT", fmt, ##__VA_ARGS__) +#define SLED_DASSERT(cond, fmt, ...) SLOG_ASSERT(cond, "ASSERT", fmt, ##__VA_ARGS__) #define __LOG_EVERY_N(n, level, tag, fmt, ...) \ do { \ diff --git a/src/sled/ref_counter.h b/src/sled/ref_counter.h index 28daaf2..3f262b4 100644 --- a/src/sled/ref_counter.h +++ b/src/sled/ref_counter.h @@ -22,19 +22,13 @@ public: sled::RefCountReleaseStatus DecRef() { - int ref_count_after_subtract = - ref_count_.fetch_sub(1, std::memory_order_acq_rel) - 1; + int ref_count_after_subtract = ref_count_.fetch_sub(1, std::memory_order_acq_rel) - 1; - if (ref_count_after_subtract == 0) { - return sled::RefCountReleaseStatus::kDroppedLastRef; - } + if (ref_count_after_subtract == 0) { return sled::RefCountReleaseStatus::kDroppedLastRef; } return sled::RefCountReleaseStatus::kOtherRefsRemained; } - bool HasOneRef() const - { - return ref_count_.load(std::memory_order_acquire) == 1; - } + bool HasOneRef() const { return ref_count_.load(std::memory_order_acquire) == 1; } private: std::atomic ref_count_; diff --git a/src/sled/sanitizer_test.cc b/src/sled/sanitizer_test.cc index 2180533..e69de29 100644 --- a/src/sled/sanitizer_test.cc +++ b/src/sled/sanitizer_test.cc @@ -1 +0,0 @@ -#include diff --git a/src/sled/sled.h b/src/sled/sled.h index aed7dc6..ae01126 100644 --- a/src/sled/sled.h +++ b/src/sled/sled.h @@ -86,6 +86,9 @@ namespace async {} #include "sled/time_utils.h" #include "sled/variant.h" +// uri +#include "sled/uri.h" + // testing #include "sled/testing/test.h" #endif// SLED_SLED_H diff --git a/src/sled/system/pid.h b/src/sled/system/pid.h index 07d6d61..9d56be4 100644 --- a/src/sled/system/pid.h +++ b/src/sled/system/pid.h @@ -2,7 +2,13 @@ #ifndef SLED_SYSTEM_PID_H #define SLED_SYSTEM_PID_H +#ifdef _WIN32 +#include +#define getpid (int) GetCurrentProcessId +typedef unsigned long pid_t; +#else #include +#endif namespace sled { pid_t GetCachedPID(); diff --git a/src/sled/uri.cc b/src/sled/uri.cc index 0109ac2..1b2876f 100644 --- a/src/sled/uri.cc +++ b/src/sled/uri.cc @@ -1,553 +1,161 @@ #include "sled/uri.h" +#include "sled/log/log.h" #include "sled/strings/utils.h" #include #include +#include #include #include #include #include -namespace detail { -class uri { - /* URIs are broadly divided into two categories: hierarchical and - * non-hierarchical. Both hierarchical URIs and non-hierarchical URIs have a - * few elements in common; all URIs have a scheme of one or more alphanumeric - * characters followed by a colon, and they all may optionally have a query - * component preceded by a question mark, and a fragment component preceded by - * an octothorpe (hash mark: '#'). The query consists of stanzas separated by - * either ampersands ('&') or semicolons (';') (but only one or the other), - * and each stanza consists of a key and an optional value; if the value - * exists, the key and value must be divided by an equals sign. - * - * The following is an example from Wikipedia of a hierarchical URI: - * scheme:[//[user:password@]domain[:port]][/]path[?query][#fragment] - */ - -public: - enum class scheme_category { Hierarchical, NonHierarchical }; - - enum class component { Scheme, Content, Username, Password, Host, Port, Path, Query, Fragment }; - - enum class query_argument_separator { ampersand, semicolon }; - - uri(char const *uri_text, - scheme_category category = scheme_category::Hierarchical, - query_argument_separator separator = query_argument_separator::ampersand) - : m_category(category), - m_port(0), - m_path_is_rooted(false), - m_separator(separator) - { - setup(std::string(uri_text), category); - }; - - uri(std::string const &uri_text, - scheme_category category = scheme_category::Hierarchical, - query_argument_separator separator = query_argument_separator::ampersand) - : m_category(category), - m_port(0), - m_path_is_rooted(false), - m_separator(separator) - { - setup(uri_text, category); - }; - - uri(std::map const &components, - scheme_category category, - bool rooted_path, - query_argument_separator separator = query_argument_separator::ampersand) - : m_category(category), - m_path_is_rooted(rooted_path), - m_separator(separator) - { - if (components.count(component::Scheme)) { - if (components.at(component::Scheme).length() == 0) { - throw std::invalid_argument("Scheme cannot be empty."); - } - m_scheme = components.at(component::Scheme); - } else { - throw std::invalid_argument("A URI must have a scheme."); - } - - if (category == scheme_category::Hierarchical) { - if (components.count(component::Content)) { - throw std::invalid_argument("The content component is only for use in non-hierarchical URIs."); - } - - bool has_username = components.count(component::Username); - bool has_password = components.count(component::Password); - if (has_username && has_password) { - m_username = components.at(component::Username); - m_password = components.at(component::Password); - } else if ((has_username && !has_password) || (!has_username && has_password)) { - throw std::invalid_argument("If a username or password is supplied, both must be provided."); - } - - if (components.count(component::Host)) { m_host = components.at(component::Host); } - - if (components.count(component::Port)) { m_port = std::stoul(components.at(component::Port)); } - - if (components.count(component::Path)) { - m_path = components.at(component::Path); - } else { - throw std::invalid_argument("A path is required on a hierarchical URI, even an empty path."); - } - } else { - if (components.count(component::Username) || components.count(component::Password) - || components.count(component::Host) || components.count(component::Port) - || components.count(component::Path)) { - throw std::invalid_argument( - "None of the hierarchical components are allowed in a non-hierarchical URI."); - } - - if (components.count(component::Content)) { - m_content = components.at(component::Content); - } else { - throw std::invalid_argument( - "Content is a required component for a non-hierarchical URI, even an empty string."); - } - } - - if (components.count(component::Query)) { m_query = components.at(component::Query); } - - if (components.count(component::Fragment)) { m_fragment = components.at(component::Fragment); } - } - - uri(uri const &other, std::map const &replacements) - : m_category(other.m_category), - m_path_is_rooted(other.m_path_is_rooted), - m_separator(other.m_separator) - { - m_scheme = (replacements.count(component::Scheme)) ? replacements.at(component::Scheme) : other.m_scheme; - - if (m_category == scheme_category::Hierarchical) { - m_username - = (replacements.count(component::Username)) ? replacements.at(component::Username) : other.m_username; - - m_password - = (replacements.count(component::Password)) ? replacements.at(component::Password) : other.m_password; - - m_host = (replacements.count(component::Host)) ? replacements.at(component::Host) : other.m_host; - - m_port - = (replacements.count(component::Port)) ? std::stoul(replacements.at(component::Port)) : other.m_port; - - m_path = (replacements.count(component::Path)) ? replacements.at(component::Path) : other.m_path; - } else { - m_content - = (replacements.count(component::Content)) ? replacements.at(component::Content) : other.m_content; - } - - m_query = (replacements.count(component::Query)) ? replacements.at(component::Query) : other.m_query; - - m_fragment - = (replacements.count(component::Fragment)) ? replacements.at(component::Fragment) : other.m_fragment; - } - - // Copy constructor; just use the copy assignment operator internally. - uri(uri const &other) { *this = other; }; - - // Copy assignment operator - uri &operator=(uri const &other) - { - if (this != &other) { - m_scheme = other.m_scheme; - m_content = other.m_content; - m_username = other.m_username; - m_password = other.m_password; - m_host = other.m_host; - m_path = other.m_path; - m_query = other.m_query; - m_fragment = other.m_fragment; - m_query_dict = other.m_query_dict; - m_category = other.m_category; - m_port = other.m_port; - m_path_is_rooted = other.m_path_is_rooted; - m_separator = other.m_separator; - } - return *this; - } - - ~uri(){}; - - std::string const &get_scheme() const { return m_scheme; }; - - scheme_category get_scheme_category() const { return m_category; }; - - std::string const &get_content() const - { - if (m_category != scheme_category::NonHierarchical) { - throw std::domain_error("The content component is only valid for non-hierarchical URIs."); - } - return m_content; - }; - - std::string const &get_username() const - { - if (m_category != scheme_category::Hierarchical) { - throw std::domain_error("The username component is only valid for hierarchical URIs."); - } - return m_username; - }; - - std::string const &get_password() const - { - if (m_category != scheme_category::Hierarchical) { - throw std::domain_error("The password component is only valid for hierarchical URIs."); - } - return m_password; - }; - - std::string const &get_host() const - { - if (m_category != scheme_category::Hierarchical) { - throw std::domain_error("The host component is only valid for hierarchical URIs."); - } - return m_host; - }; - - unsigned long get_port() const - { - if (m_category != scheme_category::Hierarchical) { - throw std::domain_error("The port component is only valid for hierarchical URIs."); - } - return m_port; - }; - - std::string const &get_path() const - { - if (m_category != scheme_category::Hierarchical) { - throw std::domain_error("The path component is only valid for hierarchical URIs."); - } - return m_path; - }; - - std::string const &get_query() const { return m_query; }; - - std::map const &get_query_dictionary() const { return m_query_dict; }; - - std::string const &get_fragment() const { return m_fragment; }; - - std::string to_string() const - { - std::string full_uri; - full_uri.append(m_scheme); - full_uri.append(":"); - - if (m_content.length() > m_path.length()) { - full_uri.append("//"); - if (!(m_username.empty() || m_password.empty())) { - full_uri.append(m_username); - full_uri.append(":"); - full_uri.append(m_password); - full_uri.append("@"); - } - - full_uri.append(m_host); - - if (m_port != 0) { - full_uri.append(":"); - full_uri.append(std::to_string(m_port)); - } - } - - if (m_path_is_rooted) { full_uri.append("/"); } - full_uri.append(m_path); - - if (!m_query.empty()) { - full_uri.append("?"); - full_uri.append(m_query); - } - - if (!m_fragment.empty()) { - full_uri.append("#"); - full_uri.append(m_fragment); - } - - return full_uri; - }; - -private: - void setup(std::string const &uri_text, scheme_category category) - { - size_t const uri_length = uri_text.length(); - - if (uri_length == 0) { throw std::invalid_argument("URIs cannot be of zero length."); } - - std::string::const_iterator cursor = parse_scheme(uri_text, uri_text.begin()); - // After calling parse_scheme, *cursor == ':'; none of the following parsers - // expect a separator character, so we advance the cursor upon calling them. - cursor = parse_content(uri_text, (cursor + 1)); - - if ((cursor != uri_text.end()) && (*cursor == '?')) { cursor = parse_query(uri_text, (cursor + 1)); } - - if ((cursor != uri_text.end()) && (*cursor == '#')) { cursor = parse_fragment(uri_text, (cursor + 1)); } - - init_query_dictionary();// If the query string is empty, this will be empty too. - }; - - std::string::const_iterator parse_scheme(std::string const &uri_text, std::string::const_iterator scheme_start) - { - std::string::const_iterator scheme_end = scheme_start; - while ((scheme_end != uri_text.end()) && (*scheme_end != ':')) { - if (!(std::isalnum(*scheme_end) || (*scheme_end == '-') || (*scheme_end == '+') || (*scheme_end == '.'))) { - throw std::invalid_argument( - "Invalid character found in the scheme component. Supplied URI was: \"" + uri_text + "\"."); - } - ++scheme_end; - } - - if (scheme_end == uri_text.end()) { - throw std::invalid_argument( - "End of URI found while parsing the scheme. Supplied URI was: \"" + uri_text + "\"."); - } - - if (scheme_start == scheme_end) { - throw std::invalid_argument( - "Scheme component cannot be zero-length. Supplied URI was: \"" + uri_text + "\"."); - } - - m_scheme = std::string(scheme_start, scheme_end); - return scheme_end; - }; - - std::string::const_iterator parse_content(std::string const &uri_text, std::string::const_iterator content_start) - { - std::string::const_iterator content_end = content_start; - while ((content_end != uri_text.end()) && (*content_end != '?') && (*content_end != '#')) { ++content_end; } - - m_content = std::string(content_start, content_end); - - if ((m_category == scheme_category::Hierarchical) && (m_content.length() > 0)) { - // If it's a hierarchical URI, the content should be parsed for the hierarchical components. - std::string::const_iterator path_start = m_content.begin(); - std::string::const_iterator path_end = m_content.end(); - if (!m_content.compare(0, 2, "//")) { - // In this case an authority component is present. - std::string::const_iterator authority_cursor = (m_content.begin() + 2); - if (m_content.find_first_of('@') != std::string::npos) { - std::string::const_iterator userpass_divider - = parse_username(uri_text, m_content, authority_cursor); - authority_cursor = parse_password(uri_text, m_content, (userpass_divider + 1)); - // After this call, *authority_cursor == '@', so we skip over it. - ++authority_cursor; - } - - authority_cursor = parse_host(uri_text, m_content, authority_cursor); - - if ((authority_cursor != m_content.end()) && (*authority_cursor == ':')) { - authority_cursor = parse_port(uri_text, m_content, (authority_cursor + 1)); - } - - if ((authority_cursor != m_content.end()) && (*authority_cursor == '/')) { - // Then the path is rooted, and we should note this. - m_path_is_rooted = true; - path_start = authority_cursor + 1; - } - - // If we've reached the end and no path is present then set path_start - // to the end. - if (authority_cursor == m_content.end()) { path_start = m_content.end(); } - } else if (!m_content.compare(0, 1, "/")) { - m_path_is_rooted = true; - ++path_start; - } - - // We can now build the path based on what remains in the content string, - // since that's all that exists after the host and optional port component. - m_path = std::string(path_start, path_end); - } - return content_end; - }; - - std::string::const_iterator - parse_username(std::string const &uri_text, std::string const &content, std::string::const_iterator username_start) - { - std::string::const_iterator username_end = username_start; - // Since this is only reachable when '@' was in the content string, we can - // ignore the end-of-string case. - while (*username_end != ':') { - if (*username_end == '@') { - throw std::invalid_argument( - "Username must be followed by a password. Supplied URI was: \"" + uri_text + "\"."); - } - ++username_end; - } - m_username = std::string(username_start, username_end); - return username_end; - }; - - std::string::const_iterator - parse_password(std::string const &uri_text, std::string const &content, std::string::const_iterator password_start) - { - std::string::const_iterator password_end = password_start; - while (*password_end != '@') { ++password_end; } - - m_password = std::string(password_start, password_end); - return password_end; - }; - - std::string::const_iterator - parse_host(std::string const &uri_text, std::string const &content, std::string::const_iterator host_start) - { - std::string::const_iterator host_end = host_start; - // So, the host can contain a few things. It can be a domain, it can be an - // IPv4 address, it can be an IPv6 address, or an IPvFuture literal. In the - // case of those last two, it's of the form [...] where what's between the - // brackets is a matter of which IPv?? version it is. - while (host_end != content.end()) { - if (*host_end == '[') { - // We're parsing an IPv6 or IPvFuture address, so we should handle that - // instead of the normal procedure. - while ((host_end != content.end()) && (*host_end != ']')) { ++host_end; } - - if (host_end == content.end()) { - throw std::invalid_argument( - "End of content component encountered " - "while parsing the host component. " - "Supplied URI was: \"" - + uri_text + "\"."); - } - - ++host_end; - break; - // We can stop looping, we found the end of the IP literal, which is the - // whole of the host component when one's in use. - } else if ((*host_end == ':') || (*host_end == '/')) { - break; - } else { - ++host_end; - } - } - - m_host = std::string(host_start, host_end); - return host_end; - }; - - std::string::const_iterator - parse_port(std::string const &uri_text, std::string const &content, std::string::const_iterator port_start) - { - std::string::const_iterator port_end = port_start; - while ((port_end != content.end()) && (*port_end != '/')) { - if (!std::isdigit(*port_end)) { - throw std::invalid_argument( - "Invalid character while parsing the port. " - "Supplied URI was: \"" - + uri_text + "\"."); - } - - ++port_end; - } - - m_port = std::stoul(std::string(port_start, port_end)); - return port_end; - }; - - std::string::const_iterator parse_query(std::string const &uri_text, std::string::const_iterator query_start) - { - std::string::const_iterator query_end = query_start; - while ((query_end != uri_text.end()) && (*query_end != '#')) { - // Queries can contain almost any character except hash, which is reserved - // for the start of the fragment. - ++query_end; - } - m_query = std::string(query_start, query_end); - return query_end; - }; - - std::string::const_iterator parse_fragment(std::string const &uri_text, std::string::const_iterator fragment_start) - { - m_fragment = std::string(fragment_start, uri_text.end()); - return uri_text.end(); - }; - - void init_query_dictionary() - { - if (!m_query.empty()) { - // Loop over the query string looking for '&'s, then check each one for - // an '=' to find keys and values; if there's not an '=' then the key - // will have an empty value in the map. - char separator = (m_separator == query_argument_separator::ampersand) ? '&' : ';'; - size_t carat = 0; - size_t stanza_end = m_query.find_first_of(separator); - do { - std::string stanza - = m_query.substr(carat, - ((stanza_end != std::string::npos) ? (stanza_end - carat) : std::string::npos)); - size_t key_value_divider = stanza.find_first_of('='); - std::string key = stanza.substr(0, key_value_divider); - std::string value; - if (key_value_divider != std::string::npos) { value = stanza.substr((key_value_divider + 1)); } - - if (m_query_dict.count(key) != 0) { throw std::invalid_argument("Bad key in the query string!"); } - - m_query_dict.emplace(key, value); - carat = ((stanza_end != std::string::npos) ? (stanza_end + 1) : std::string::npos); - stanza_end = m_query.find_first_of(separator, carat); - } while ((stanza_end != std::string::npos) || (carat != std::string::npos)); - } - } - - std::string m_scheme; - std::string m_content; - std::string m_username; - std::string m_password; - std::string m_host; - std::string m_path; - std::string m_query; - std::string m_fragment; - - std::map m_query_dict; - - scheme_category m_category; - unsigned long m_port; - bool m_path_is_rooted; - query_argument_separator m_separator; -}; -}// namespace detail - namespace sled { -URI +sled::StatusOr URI::ParseURI(const std::string &uri_str) { + static const std::regex uri_regex( + R"((([a-zA-Z][a-zA-Z0-9+.-]*):)?)"// scheme: + R"(([^?#]*))" // authority and path + R"((?:\?([^#]*))?)" // ?query + R"((?:#(.*))?)", + std::regex::ECMAScript); + static const std::regex authority_and_path_regex(R"(//([^/]*)(/.*)?)", std::regex::ECMAScript); + static const std::regex authority_only_regex(R"(([^/?#]*))", std::regex::ECMAScript); + static const std::regex authority_regex( + R"((?:([^@:]*)(?::([^@]*))?@)?)"// user:pass@ or user:@ or :pass@ + R"((\[[^\]]*\]|[^\[:]*))" // [::1] or 127.0.0.1 or domain + R"((?::([0-9]*))?)" // port + , + std::regex::ECMAScript); + URI uri; - detail::uri uri_impl(uri_str.c_str(), detail::uri::scheme_category::Hierarchical); - uri.set_scheme(uri_impl.get_scheme()); - // uri.set_content(uri_impl.get_content()); - uri.set_username(uri_impl.get_username()); - uri.set_password(uri_impl.get_password()); - uri.set_host(uri_impl.get_host()); - uri.set_port(uri_impl.get_port()); - uri.set_path(std::string("/") + uri_impl.get_path()); - uri.set_query(uri_impl.get_query_dictionary()); - uri.set_anchor(uri_impl.get_fragment()); + std::smatch match; + if (!std::regex_match(uri_str, match, uri_regex)) { + return sled::MakeStatusOr(sled::StatusCode::kInvalidArgument, "Invalid URI format"); + } + uri.set_scheme(sled::ToLower(match[2])); + + int counter = 0; + // for (auto res : match) { LOGD("match", "{}:{}", counter++, res); } + + std::smatch authority_and_path_match; + std::string authority_and_path = match[3].str(); + if (!std::regex_match(authority_and_path, authority_and_path_match, authority_and_path_regex)) { + /* + std::smatch authority_only_match; + if (std::regex_match(authority_and_path, authority_only_regex) + && std::regex_match(authority_and_path, authority_only_match, authority_regex)) { + // not find // + uri.has_authority_ = false; + // bob@example.com + // example.com + if (!authority_only_match[4].str().empty()) { uri.set_port(std::stoi(authority_only_match[4])); } + uri.set_username(authority_only_match[1]); + uri.set_password(authority_only_match[2]); + uri.set_host(authority_only_match[3]); + return uri; + } else { + */ + uri.set_path(match[3]); + // } + } else { + // counter = 0; + // for (auto res : authority_and_path_match) { LOGD("", "{}:{}", counter++, res); } + std::string authority = authority_and_path_match[1]; + std::smatch authority_match; + if (!std::regex_match(authority, authority_match, authority_regex)) { + return sled::MakeStatusOr(sled::StatusCode::kInvalidArgument, "Invalid URI Authority"); + } + // has // + uri.has_authority_ = true; + // counter = 0; + // for (auto res : authority_match) { LOGD("", "{}:{}", counter++, res); } + std::string path = authority_and_path_match[2]; + + if (!authority_match[4].str().empty()) { + try { + uri.set_port(std::stoi(authority_match[4])); + } catch (const std::out_of_range &e) { + return sled::MakeStatusOr(sled::StatusCode::kInvalidArgument, "Invalid URI Port"); + } + } + uri.set_username(authority_match[1]); + uri.set_password(authority_match[2]); + uri.set_host(authority_match[3]); + uri.set_path(authority_and_path_match[2]); + } + + // LOGD("query", "{}", match[4]); + uri.set_query(match[4]); + uri.set_anchor(match[5]); return std::move(uri); } -URI::URI(const std::string &uri_str) { *this = ParseURI(uri_str); } +URI::URI(const std::string &uri_str) +{ + auto uri_or = ParseURI(uri_str); + ASSERT(uri_or.ok(), "{}", uri_or.status()); + *this = uri_or.value(); +} + +std::map +ParseQueryMap(const std::string &query) +{ + + std::map query_param_; + auto item = sled::StrSplit(query, "&", /*ignore_empty=*/true); + for (auto &i : item) { + if (i[0] == '=') { continue; } + + auto kv = sled::StrSplit(i, "=", /*ignore_empty=*/true); + if (kv.size() == 2) { + query_param_[kv[0]] = kv[1]; + } else { + query_param_[kv[0]] = ""; + } + } + return std::move(query_param_); +} + +void +URI::set_query(std::string const &v) +{ + query_ = v; + query_param_ = ParseQueryMap(query_); +} + +void +URI::set_query(std::string &&v) +{ + query_ = std::move(v); + query_param_ = ParseQueryMap(query_); +} std::string URI::href() const { std::stringstream ss; if (!scheme().empty()) { ss << scheme() << ":"; } - if (!user_info().empty()) { ss << user_info() << "@"; } + if (has_authority_) { ss << "//"; } if (!authority().empty()) { ss << authority(); } ss << path(); - ss << "?" << query_string(); - ss << "#" << anchor(); + if (!query().empty()) { ss << "?" << query(); } + if (!anchor().empty()) { ss << "#" << anchor(); } return ss.str(); } std::string URI::authority() const { - if (port() == 0) { - return host(); - } else { - return host() + ":" + std::to_string(port()); - } + std::stringstream ss; + if (!username().empty()) { ss << username(); } + if (!password().empty()) { ss << ":" << password(); } + if (!username().empty() || !password().empty()) { ss << "@"; } + ss << host(); + if (port() != 0) { ss << ":" << port(); } + + return ss.str(); } std::string @@ -558,16 +166,4 @@ URI::user_info() const return username() + ":" + password(); } -std::string -URI::query_string() const -{ - std::stringstream ss; - for (auto item : query()) { - std::string key = item.first; - std::string value = item.second; - if (key.empty()) { return value; } - ss << key + "=" + value; - } - return ss.str(); -} }// namespace sled diff --git a/src/sled/uri.h b/src/sled/uri.h index a0aaf03..1658988 100644 --- a/src/sled/uri.h +++ b/src/sled/uri.h @@ -1,18 +1,23 @@ #pragma once -#include #ifndef SLED_URI_H #define SLED_URI_H +#include "sled/status_or.h" +#include #include namespace sled { namespace internal { -#define __SLED_URI_GETTER_AND_SETTER(type, name) \ +#define __SLED_URI_GETTER(type, name) \ type &name() & { return name##_; } \ type &&name() && { return std::move(name##_); } \ - type const &name() const & { return name##_; } \ + type const &name() const & { return name##_; } + +#define __SLED_URI_SETTER(type, name) \ void set_##name(type const &v) { name##_ = v; } \ void set_##name(type &&v) { name##_ = std::move(v); } +#define __SLED_URI_GETTER_AND_SETTER(type, name) __SLED_URI_GETTER(type, name) __SLED_URI_SETTER(type, name) + }// namespace internal class URI { @@ -23,13 +28,13 @@ public: // static URI ParseAbsoluteURI(const std::string &uri_str); // http://xxx.com/index.html?field=value#download - static URI ParseURI(const std::string &uri_str); + static sled::StatusOr ParseURI(const std::string &uri_str); // http://xxx.com/index.html // static URI ParseURIReference(const std::string &uri_str); URI() = default; - URI(const std::string &uri_str); + SLED_DEPRECATED URI(const std::string &uri_str); // setter getter __SLED_URI_GETTER_AND_SETTER(std::string, scheme) @@ -39,13 +44,17 @@ public: __SLED_URI_GETTER_AND_SETTER(std::string, host) __SLED_URI_GETTER_AND_SETTER(unsigned long, port) __SLED_URI_GETTER_AND_SETTER(std::string, path) - __SLED_URI_GETTER_AND_SETTER(ParamMap, query) + // __SLED_URI_GETTER_AND_SETTER(std::string, query) + __SLED_URI_GETTER(std::string, query) + void set_query(std::string const &v); + void set_query(std::string &&v); __SLED_URI_GETTER_AND_SETTER(std::string, anchor) + __SLED_URI_GETTER_AND_SETTER(ParamMap, query_param) + std::string href() const; std::string authority() const; std::string user_info() const; - std::string query_string() const; private: std::string scheme_; @@ -53,10 +62,13 @@ private: std::string username_; std::string password_; std::string host_; - unsigned long port_; + unsigned long port_ = 0; std::string path_; - ParamMap query_; + std::string query_; std::string anchor_; + ParamMap query_param_; + + bool has_authority_ = false; }; }// namespace sled diff --git a/src/sled/uri_fuzz.cc b/src/sled/uri_fuzz.cc new file mode 100644 index 0000000..7821d08 --- /dev/null +++ b/src/sled/uri_fuzz.cc @@ -0,0 +1,21 @@ +#include +#include + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + auto uri_or = sled::URI::ParseURI(std::string(reinterpret_cast(data), size)); + if (!uri_or.ok()) { return 0; } + auto uri = std::move(uri_or.value()); + // sled::URI uri(std::string(reinterpret_cast(data), size)); + int cnt = 0; + if (!uri.scheme().empty()) { cnt += 1 << 12; } + if (!uri.username().empty()) { cnt += 1 << 11; } + if (!uri.password().empty()) { cnt += 1 << 10; } + if (!uri.host().empty()) { cnt += 1 << 9; } + if (uri.port() != 0) { cnt += 1 << 8; } + if (!uri.path().empty()) { cnt += 1 << 7; } + if (!uri.query().empty()) { cnt += uri.query().size(); } + if (!uri.anchor().empty()) { cnt += 1 << 5; } + return 0; +} diff --git a/src/sled/uri_test.cc b/src/sled/uri_test.cc index baea0b4..c0c2baa 100644 --- a/src/sled/uri_test.cc +++ b/src/sled/uri_test.cc @@ -1,13 +1,120 @@ +#include #include -TEST_CASE("") +TEST_SUITE("URI") { - sled::URI uri("http://example.com:1234/dir1/dir2/file?a=1#anchor"); - CHECK_EQ(uri.scheme(), "http"); - CHECK_EQ(uri.host(), "example.com"); - CHECK_EQ(uri.port(), 1234); - CHECK_EQ(uri.path(), "/dir1/dir2/file"); - CHECK_EQ(uri.query().size(), 1); - CHECK_EQ(uri.query()["a"], "1"); - CHECK_EQ(uri.anchor(), "anchor"); + TEST_CASE("Base") + { + sled::URI uri("http://user:pass@example.com:1234/dir1/dir2/file?a=1&b=1#anchor"); + CHECK_EQ(uri.scheme(), "http"); + CHECK_EQ(uri.host(), "example.com"); + CHECK_EQ(uri.port(), 1234); + CHECK_EQ(uri.path(), "/dir1/dir2/file"); + CHECK_EQ(uri.query(), "a=1&b=1"); + CHECK_EQ(uri.query_param()["a"], "1"); + CHECK_EQ(uri.anchor(), "anchor"); + } + TEST_CASE("scheme://authority - With domain") + { + auto host_and_port = sled::URI("http://baidu.com:443"); + CHECK_EQ(host_and_port.scheme(), "http"); + CHECK_EQ(host_and_port.host(), "baidu.com"); + CHECK_EQ(host_and_port.port(), 443); + + auto host = sled::URI("http://baidu.com"); + CHECK_EQ(host_and_port.scheme(), "http"); + CHECK_EQ(host.host(), "baidu.com"); + + auto host2 = sled::URI("http://baidu.com/"); + CHECK_EQ(host_and_port.scheme(), "http"); + CHECK_EQ(host2.host(), "baidu.com"); + CHECK_EQ(host2.path(), "/"); + + auto user_info = sled::URI("http://user:pass@example.com"); + CHECK_EQ(user_info.scheme(), "http"); + CHECK_EQ(user_info.username(), "user"); + CHECK_EQ(user_info.password(), "pass"); + CHECK_EQ(user_info.host(), "example.com"); + + auto user_info2 = sled::URI("http://a_.!~*\'(-)n0123Di%25%26:pass;:&=+$,word@www.zend.com"); + CHECK_EQ(user_info2.scheme(), "http"); + CHECK_EQ(user_info2.username(), "a_.!~*\'(-)n0123Di%25%26"); + CHECK_EQ(user_info2.password(), "pass;:&=+$,word"); + CHECK_EQ(user_info2.host(), "www.zend.com"); + } + + TEST_CASE("scheme://auhtority - With IPV6") + { + auto ipv6 = sled::URI("http://[::1]:443"); + CHECK_EQ(ipv6.scheme(), "http"); + CHECK_EQ(ipv6.host(), "[::1]"); + CHECK_EQ(ipv6.port(), 443); + auto ipv6_2 = sled::URI("http://[::1]"); + CHECK_EQ(ipv6_2.scheme(), "http"); + CHECK_EQ(ipv6_2.host(), "[::1]"); + auto ipv6_3 = sled::URI("http://[::1]/"); + CHECK_EQ(ipv6_3.scheme(), "http"); + CHECK_EQ(ipv6_3.host(), "[::1]"); + CHECK_EQ(ipv6_3.path(), "/"); + } + + TEST_CASE("scheme:/") + { + auto root = sled::URI("http:/"); + CHECK_EQ(root.scheme(), "http"); + CHECK_EQ(root.path(), "/"); + } + + TEST_CASE("scheme:///") + { + auto root = sled::URI("http:///"); + CHECK_EQ(root.scheme(), "http"); + CHECK_EQ(root.path(), "/"); + } + + TEST_CASE("?queryOnly") + { + CHECK_EQ(sled::URI("?a=1&b=2").query(), "a=1&b=2"); + CHECK_EQ(sled::URI("?").query(), ""); + } + + TEST_CASE("#fragmentOnly") + { + CHECK_EQ(sled::URI("#anchor").anchor(), "anchor"); + CHECK_EQ(sled::URI("#").anchor(), ""); + } + + TEST_CASE("authorityOnly") + { + CHECK_EQ(sled::URI("//bob@example.com").host(), "example.com"); + CHECK_EQ(sled::URI("//bob@example.com").username(), "bob"); + CHECK_EQ(sled::URI("mailto:bob@example.com").scheme(), "mailto"); + CHECK_EQ(sled::URI("mailto:bob@example.com").path(), "bob@example.com"); + CHECK_EQ(sled::URI("example.com").path(), "example.com"); + CHECK_EQ(sled::URI("example.com:").scheme(), "example.com"); + CHECK_EQ(sled::URI("example.com:1234").scheme(), "example.com"); + CHECK_EQ(sled::URI("example.com:1234").path(), "1234"); + } + + TEST_CASE("invliad uri") + { + // auto invalid = sled::URI("http:"); + // auto invalid2 = sled::URI("http://"); + // auto invalid3 = sled::URI("http://"); + // auto invalid4 = sled::URI("[[::1]]"); + CHECK_EQ(sled::URI("[[2620:0:1cfe:face:b00c::3]]").host(), ""); + CHECK_EQ(sled::URI("[[2620:0:1cfe:face:b00c::3]]").path(), "[[2620:0:1cfe:face:b00c::3]]"); + } + + TEST_CASE("href") + { + CHECK_EQ(sled::URI("http://example.com").href(), "http://example.com"); + CHECK_EQ(sled::URI("http://example.com:1234").href(), "http://example.com:1234"); + CHECK_EQ(sled::URI("http://example.com:1234/").href(), "http://example.com:1234/"); + CHECK_EQ(sled::URI("http://example.com:1234/dir1/dir2/file?a=1&b=1#anchor").href(), + "http://example.com:1234/dir1/dir2/file?a=1&b=1#anchor"); + CHECK_EQ(sled::URI("mailto:bob@example.com").href(), "mailto:bob@example.com"); + CHECK_EQ(sled::URI("bob@example.com").href(), "bob@example.com"); + CHECK_EQ(sled::URI("bob@example.com:999").href(), "bob@example.com:999"); + } }