#endif
#include <dirent.h>
+#include <regex.h>
#include "para.h"
#include "error.h"
crypt_data_array[fd].private_data = NULL;
}
+/**
+ * Match string as a candidate IPv4 address.
+ *
+ * \param address The string to match.
+ * \return True if \a address has "dot-quad" format.
+ */
+static bool is_v4_dot_quad(const char *address)
+{
+ bool result;
+ regex_t r;
+
+ assert(!regcomp(&r, "^([0-9]+\\.){3}[0-9]+$", REG_EXTENDED|REG_NOSUB));
+ result = regexec(&r, address, 0, NULL, 0) == 0;
+ regfree(&r);
+ return result;
+}
+
+/**
+ * Perform basic syntax checking on the host-part of an URL:
+ *
+ * - Since ':' is invalid in IPv4 addresses and DNS names, the
+ * presence of ':' causes interpretation as IPv6 address;
+ * - next the first-match-wins algorithm from RFC 3986 is applied;
+ * - else the string is considered as DNS name, to be resolved later.
+ *
+ * \param host The host string to check.
+ * \return True if \a host passes the syntax checks.
+ *
+ * \sa RFC 3986, 3.2.2; RFC 1123, 2.1; RFC 1034, 3.5
+ */
+static bool host_string_ok(const char *host)
+{
+ if (host == NULL || *host == '\0')
+ return false;
+ if (strchr(host, ':') != NULL)
+ return is_valid_ipv6_address(host);
+ if (is_v4_dot_quad(host))
+ return is_valid_ipv4_address(host);
+ return true;
+}
+
+/**
+ * Parse and validate URL string.
+ *
+ * The URL syntax is loosely based on RFC 3986, supporting one of
+ * - "["host"]"[:port] for native IPv6 addresses and
+ * - host[:port] for IPv4 hostnames and DNS names.
+ *
+ * Native IPv6 addresses must be enclosed in square brackets, since
+ * otherwise there is an ambiguity with the port separator `:'.
+ * The 'port' part is always considered to be a number; if absent,
+ * it is set to -1, to indicate that a default port is to be used.
+ *
+ * The following are valid examples:
+ * - 10.10.1.1
+ * - 10.10.1.2:8000
+ * - localhost
+ * - localhost:8001
+ * - [::1]:8000
+ * - [badc0de::1]
+ *
+ * \param url The URL string to take apart.
+ * \param host To return the copied host part of \a url.
+ * \param hostlen The maximum length of \a host.
+ * \param port To return the port number (if any) of \a url.
+ *
+ * \return Pointer to \a host, or NULL if failed.
+ * If NULL is returned, \a host and \a portnum are undefined. If no
+ * port number was present in \a url, \a portnum is set to -1.
+ *
+ * \sa RFC 3986, 3.2.2/3.2.3
+ */
+char *parse_url(const char *url,
+ char *host, ssize_t hostlen,
+ int32_t *port)
+{
+ const char *o = url;
+ char *c = host, *end = c + (hostlen - 1);
+
+ *port = -1;
+
+ if (o == NULL || hostlen < 1)
+ goto failed;
+
+ if (*o == '[') {
+ for (++o; (*c = *o == ']' ? '\0' : *o); c++, o++)
+ if (c == end)
+ goto failed;
+
+ if (*o++ != ']' || (*o != '\0' && *o != ':'))
+ goto failed;
+ } else {
+ for (; (*c = *o == ':'? '\0' : *o); c++, o++)
+ if (c == end)
+ goto failed;
+ }
+
+ if (*o == ':')
+ if (para_atoi32(++o, port) < 0 ||
+ *port < 0 || *port > 0xffff)
+ goto failed;
+
+ if (host_string_ok(host))
+ return host;
+failed:
+ *host = '\0';
+ return NULL;
+}
/**
* Determine the socket type for a given layer-4 protocol.
#endif
/** \endcond */
+
+/**
+ * Functions to parse and validate (parts of) URLs.
+ */
+extern char *parse_url(const char *url,
+ char *host, ssize_t hostlen, int32_t *port);
/**
* Ensure that string conforms to the IPv4 address format.
*
return inet_pton(AF_INET, address, &test_it) != 0;
}
+/**
+ * Ensure that string conforms to IPv6 address format.
+ *
+ * \param address The address string to check.
+ *
+ * \return 1 if string has a valid IPv6 address syntax, 0 if not.
+ * \sa RFC 4291
+ */
+_static_inline_ bool is_valid_ipv6_address(const char *address)
+{
+ struct in6_addr test_it;
+
+ return inet_pton(AF_INET6, address, &test_it) != 0;
+}
+
/**
* Generic socket creation (passive and active sockets).
*/