From 0f1d8e4e182a48d366f06633b797a5dfbbfea407 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 28 Feb 2009 13:40:01 +0100 Subject: [PATCH] Auxiliary functions to parse and validate parts of URIs. To support URI syntax in the manner of RFC 3986, this adds a self-contained set of functions to parse (parts of) URIs and to validate such parts using basic syntax checks. To avoid blocking while resolving DNS strings, strings that do not have the characteristics of native IPv4/v6 address strings always pass; a wrong hostname will then be detected later when connecting. Note: strictly speaking, this does not deal with URIs, since the 'scheme' part is missing and only the "locator" part is parsed. It is possible to extend this later, using something like dccp://host[:port], http://host[:port], or udp://host[:port] to specify the transport protocols of paraslash. --- net.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net.h | 21 +++++++++++ 2 files changed, 130 insertions(+) diff --git a/net.c b/net.c index b510dc18..2a3fc72b 100644 --- a/net.c +++ b/net.c @@ -20,6 +20,7 @@ #endif #include +#include #include "para.h" #include "error.h" @@ -85,6 +86,114 @@ void disable_crypt(int fd) crypt_data_array[fd].private_data = NULL; } +/** + * Match string as a candidate IPv4 address. + * + * \param address The string to match. + * \return True if \a address has "dot-quad" format. + */ +static bool is_v4_dot_quad(const char *address) +{ + bool result; + regex_t r; + + assert(!regcomp(&r, "^([0-9]+\\.){3}[0-9]+$", REG_EXTENDED|REG_NOSUB)); + result = regexec(&r, address, 0, NULL, 0) == 0; + regfree(&r); + return result; +} + +/** + * Perform basic syntax checking on the host-part of an URL: + * + * - Since ':' is invalid in IPv4 addresses and DNS names, the + * presence of ':' causes interpretation as IPv6 address; + * - next the first-match-wins algorithm from RFC 3986 is applied; + * - else the string is considered as DNS name, to be resolved later. + * + * \param host The host string to check. + * \return True if \a host passes the syntax checks. + * + * \sa RFC 3986, 3.2.2; RFC 1123, 2.1; RFC 1034, 3.5 + */ +static bool host_string_ok(const char *host) +{ + if (host == NULL || *host == '\0') + return false; + if (strchr(host, ':') != NULL) + return is_valid_ipv6_address(host); + if (is_v4_dot_quad(host)) + return is_valid_ipv4_address(host); + return true; +} + +/** + * Parse and validate URL string. + * + * The URL syntax is loosely based on RFC 3986, supporting one of + * - "["host"]"[:port] for native IPv6 addresses and + * - host[:port] for IPv4 hostnames and DNS names. + * + * Native IPv6 addresses must be enclosed in square brackets, since + * otherwise there is an ambiguity with the port separator `:'. + * The 'port' part is always considered to be a number; if absent, + * it is set to -1, to indicate that a default port is to be used. + * + * The following are valid examples: + * - 10.10.1.1 + * - 10.10.1.2:8000 + * - localhost + * - localhost:8001 + * - [::1]:8000 + * - [badc0de::1] + * + * \param url The URL string to take apart. + * \param host To return the copied host part of \a url. + * \param hostlen The maximum length of \a host. + * \param port To return the port number (if any) of \a url. + * + * \return Pointer to \a host, or NULL if failed. + * If NULL is returned, \a host and \a portnum are undefined. If no + * port number was present in \a url, \a portnum is set to -1. + * + * \sa RFC 3986, 3.2.2/3.2.3 + */ +char *parse_url(const char *url, + char *host, ssize_t hostlen, + int32_t *port) +{ + const char *o = url; + char *c = host, *end = c + (hostlen - 1); + + *port = -1; + + if (o == NULL || hostlen < 1) + goto failed; + + if (*o == '[') { + for (++o; (*c = *o == ']' ? '\0' : *o); c++, o++) + if (c == end) + goto failed; + + if (*o++ != ']' || (*o != '\0' && *o != ':')) + goto failed; + } else { + for (; (*c = *o == ':'? '\0' : *o); c++, o++) + if (c == end) + goto failed; + } + + if (*o == ':') + if (para_atoi32(++o, port) < 0 || + *port < 0 || *port > 0xffff) + goto failed; + + if (host_string_ok(host)) + return host; +failed: + *host = '\0'; + return NULL; +} /** * Determine the socket type for a given layer-4 protocol. diff --git a/net.h b/net.h index 8ec9fa52..11b1708f 100644 --- a/net.h +++ b/net.h @@ -25,6 +25,12 @@ #endif /** \endcond */ + +/** + * Functions to parse and validate (parts of) URLs. + */ +extern char *parse_url(const char *url, + char *host, ssize_t hostlen, int32_t *port); /** * Ensure that string conforms to the IPv4 address format. * @@ -39,6 +45,21 @@ _static_inline_ bool is_valid_ipv4_address(const char *address) return inet_pton(AF_INET, address, &test_it) != 0; } +/** + * Ensure that string conforms to IPv6 address format. + * + * \param address The address string to check. + * + * \return 1 if string has a valid IPv6 address syntax, 0 if not. + * \sa RFC 4291 + */ +_static_inline_ bool is_valid_ipv6_address(const char *address) +{ + struct in6_addr test_it; + + return inet_pton(AF_INET6, address, &test_it) != 0; +} + /** * Generic socket creation (passive and active sockets). */ -- 2.39.5