Revise evhttp_uri_parse implementation to handle more of RFC3986

This commit is contained in:
Nick Mathewson 2010-10-19 11:26:59 -04:00
parent fadbfd4e6e
commit eaa5f1d9ed
3 changed files with 417 additions and 94 deletions

360
http.c
View File

@ -2317,6 +2317,9 @@ static const char uri_chars[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
#define CHAR_IS_UNRESERVED(c) \
(uri_chars[(unsigned char)(c)])
/*
* Helper functions to encode/decode a string for inclusion in a URI.
* The returned string must be freed by the caller.
@ -2337,7 +2340,7 @@ evhttp_uriencode(const char *uri, ev_ssize_t len, int space_as_plus)
end = uri+strlen(uri);
for (p = uri; p < end; p++) {
if (uri_chars[(unsigned char)(*p)]) {
if (CHAR_IS_UNRESERVED(*p)) {
evbuffer_add(buf, p, 1);
} else if (*p == ' ' && space_as_plus) {
evbuffer_add(buf, "+", 1);
@ -3334,16 +3337,208 @@ bind_socket(const char *address, ev_uint16_t port, int reuse)
return (fd);
}
/* Return true of the string starting at s and ending immediately before eos
* is a valid URI scheme according to RFC3986
*/
static int
scheme_ok(const char *s, const char *eos)
{
/* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
EVUTIL_ASSERT(eos >= s);
if (s == eos)
return 0;
if (!EVUTIL_ISALPHA(*s))
return 0;
while (++s < eos) {
if (! EVUTIL_ISALNUM(*s) &&
*s != '+' && *s != '-' && *s != '.')
return 0;
}
return 1;
}
#define SUBDELIMS "!$&'()*+,;="
/* Return true iff [s..eos) is a valid userinfo */
static int
userinfo_ok(const char *s, const char *eos)
{
while (s < eos) {
if (CHAR_IS_UNRESERVED(*s) ||
strchr(SUBDELIMS, *s) ||
*s == ':')
++s;
else if (*s == '%' && s+2 < eos &&
EVUTIL_ISXDIGIT(s[1]) &&
EVUTIL_ISXDIGIT(s[2]))
s += 3;
else
return 0;
}
return 1;
}
static int
regname_ok(const char *s, const char *eos)
{
while (s && s<eos) {
if (CHAR_IS_UNRESERVED(*s) ||
strchr(SUBDELIMS, *s))
++s;
else if (*s == '%' &&
EVUTIL_ISXDIGIT(s[1]) &&
EVUTIL_ISXDIGIT(s[2]))
s += 3;
else
return 0;
}
return 1;
}
static int
parse_port(const char *s, const char *eos)
{
int portnum = 0;
if (s == eos)
return 0; /* The RFC allows an empty port. */
while (s < eos) {
if (! EVUTIL_ISDIGIT(*s))
return -1;
portnum = (portnum * 10) + (*s - '0');
++s;
}
return portnum;
}
/* returns 0 for bad, 1 for ipv6, 2 for IPvFuture */
static int
bracket_addr_ok(const char *s, const char *eos)
{
if (s + 3 > eos || *s != '[' || *(eos-1) != ']')
return 0;
if (s[1] == 'v') {
/* IPvFuture, or junk.
"v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
*/
s += 2; /* skip [v */
--eos;
if (!EVUTIL_ISXDIGIT(*s)) /*require at least one*/
return 0;
while (s < eos && *s != '.') {
if (EVUTIL_ISXDIGIT(*s))
++s;
else
return 0;
}
if (*s != '.')
return 0;
++s;
while (s < eos) {
if (CHAR_IS_UNRESERVED(*s) ||
strchr(SUBDELIMS, *s) ||
*s == ':')
++s;
else
return 0;
}
return 2;
} else {
/* IPv6, or junk */
char buf[64];
int n_chars = eos-s-2;
struct in6_addr in6;
if (n_chars >= 64) /* way too long */
return 0;
memcpy(buf, s+1, n_chars);
buf[n_chars]='\0';
return (evutil_inet_pton(AF_INET6,buf,&in6)==1) ? 1 : 0;
}
}
static int
parse_authority(struct evhttp_uri *uri, char *s, char *eos)
{
char *cp, *port;
EVUTIL_ASSERT(eos);
if (eos == s) {
uri->host = mm_strdup("");
return 0;
}
/* Optionally, we start with "userinfo@" */
cp = strchr(s, '@');
if (cp && cp < eos) {
if (! userinfo_ok(s,cp))
return -1;
*cp++ = '\0';
uri->userinfo = mm_strdup(s);
} else {
cp = s;
}
/* Optionally, we end with ":port" */
for (port=eos-1; port >= cp && EVUTIL_ISDIGIT(*port); --port)
;
if (port >= cp && *port == ':') {
if ((uri->port = parse_port(port+1, eos))<0)
return -1;
eos = port;
}
/* Now, cp..eos holds the "host" port, which can be an IPv4Address,
* an IP-Literal, or a reg-name */
EVUTIL_ASSERT(eos >= cp);
if (*cp == '[' && eos >= cp+2 && *(eos-1) == ']') {
/* IPv6address, IP-Literal, or junk. */
if (! bracket_addr_ok(cp, eos))
return -1;
} else {
/* Make sure the host part is ok. */
if (! regname_ok(cp,eos)) /* Match IPv4Address or reg-name */
return -1;
}
uri->host = mm_malloc(eos-cp+1);
memcpy(uri->host, cp, eos-cp);
uri->host[eos-cp] = '\0';
return 0;
}
/* Return the character after the longest prefix of 'cp' that matches...
* *pchar / "/" if allow_qchars is false, or
* *(pchar / "/" / "?") if allow_chars is true.
*/
static char *
end_of_path(char *cp, int allow_qchars)
{
while (*cp) {
if (CHAR_IS_UNRESERVED(*cp) ||
strchr(SUBDELIMS, *cp) ||
*cp == ':' || *cp == '@' || *cp == '/')
++cp;
else if (*cp == '%' && EVUTIL_ISXDIGIT(cp[1]) &&
EVUTIL_ISXDIGIT(cp[2]))
cp += 3;
else if (*cp == '?' && allow_qchars)
++cp;
else
return cp;
}
return cp;
}
struct evhttp_uri *
evhttp_uri_parse(const char *source_uri)
{
char *readbuf = NULL, *readp = NULL, *token = NULL, *query = NULL, *host = NULL, *port = NULL;
char *readbuf = NULL, *readp = NULL, *token = NULL, *query = NULL;
char *path = NULL, *fragment = NULL;
int got_authority = 0;
struct evhttp_uri *uri = mm_calloc(1, sizeof(struct evhttp_uri));
if (uri == NULL) {
event_err(1, "%s: calloc", __func__);
goto err;
}
uri->port = -1;
readbuf = mm_strdup(source_uri);
if (readbuf == NULL) {
@ -3354,57 +3549,79 @@ evhttp_uri_parse(const char *source_uri)
readp = readbuf;
token = NULL;
/* 1. scheme:// */
token = strstr(readp, "://");
if (!token) {
/* unsupported uri */
/* We try to follow RFC3986 here as much as we can, and match
the productions
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
relative-ref = relative-part [ "?" query ] [ "#" fragment ]
*/
/* 1. scheme: */
token = strchr(readp, ':');
if (token && scheme_ok(readp,token)) {
*token = '\0';
uri->scheme = mm_strdup(readp);
readp = token+1; /* eat : */
}
/* 2. Optionally, "//" then an 'authority' part. */
if (readp[0]=='/' && readp[1] == '/') {
char *authority;
readp += 2;
authority = readp;
path = strchr(readp, '/'); /*XXXX path can be empty; we can
* have a query though */
if (!path)
path = strchr(readp, '\0');
if (parse_authority(uri, authority, path) < 0)
goto err;
readp = path;
got_authority = 1;
}
/* 3. Query: path-abempty, path-absolute, path-rootless, or path-empty
*/
path = readp;
readp = end_of_path(path, 0);
/* Query */
if (*readp == '?') {
*readp = '\0';
++readp;
query = readp;
readp = end_of_path(readp, 1);
}
/* fragment */
if (*readp == '#') {
*readp = '\0';
++readp;
fragment = readp;
readp = end_of_path(readp, 1);
}
if (*readp != '\0') {
goto err;
}
*token = '\0';
uri->scheme = mm_strdup(readp);
/* If you didn't get an authority, the path can't begin with "//" */
if (!got_authority && path[0]=='/' && path[1]=='/')
goto err;
/* If you did get an authority, the path must begin with "/" or be
* empty. */
if (got_authority && path[0] != '/' && path[0] != '\0')
goto err;
readp = token;
readp += 3; /* eat :// */
/* 2. query */
query = strchr(readp, '/');
if (query) {
char *fragment = strchr(query, '#');
if (fragment) {
*fragment++ = '\0'; /* eat '#' */
uri->fragment = mm_strdup(fragment);
}
if (path)
uri->path = mm_strdup(path);
else
uri->path = mm_strdup("");
if (query)
uri->query = mm_strdup(query);
*query = '\0'; /* eat '/' */
}
/* 3. user:pass@host:port */
host = strchr(readp, '@');
if (host) {
char *pass = 0;
/* got user:pass@host:port */
*host++ = '\0'; /* eat @ */;
pass = strchr(readp, ':');
if (pass) {
*pass++ = '\0'; /* eat ':' */
uri->pass = mm_strdup(pass);
}
uri->user = mm_strdup(readp);
readp = host;
}
/* 4. host:port */
port = strchr(readp, ':');
if (port) {
*port++ = '\0'; /* eat ':' */
uri->port = atoi(port);
}
/* 5. host */
uri->host = mm_strdup(readp);
if (fragment)
uri->fragment = mm_strdup(fragment);
mm_free(readbuf);
@ -3426,8 +3643,7 @@ evhttp_uri_free(struct evhttp_uri *uri)
}
_URI_FREE_STR(scheme);
_URI_FREE_STR(user);
_URI_FREE_STR(pass);
_URI_FREE_STR(userinfo);
_URI_FREE_STR(host);
_URI_FREE_STR(query);
_URI_FREE_STR(fragment);
@ -3442,40 +3658,42 @@ evhttp_uri_join(struct evhttp_uri *uri, char *buf, size_t limit)
{
struct evbuffer *tmp = 0;
size_t joined_size = 0;
char *output = NULL;
#define _URI_ADD(f) evbuffer_add(tmp, uri->f, strlen(uri->f))
if (!uri || !uri->scheme || !buf || !limit)
if (!uri || !buf || !limit)
return NULL;
tmp = evbuffer_new();
if (!tmp)
return NULL;
_URI_ADD(scheme);
evbuffer_add(tmp, "://", 3);
if (uri->host && *uri->host) {
if (uri->user && *uri->user) {
_URI_ADD(user);
if (uri->pass && *uri->pass) {
evbuffer_add(tmp, ":", 1);
_URI_ADD(pass);
}
evbuffer_add(tmp, "@", 1);
}
if (uri->scheme) {
_URI_ADD(scheme);
evbuffer_add(tmp, ":", 1);
}
if (uri->host) {
evbuffer_add(tmp, "//", 2);
if (uri->userinfo)
evbuffer_add_printf(tmp,"%s@", uri->userinfo);
_URI_ADD(host);
if (uri->port >= 0)
evbuffer_add_printf(tmp,":%d", uri->port);
if (uri->port > 0)
evbuffer_add_printf(tmp,":%u", uri->port);
if (uri->path && uri->path[0] != '/' && uri->path[0] != '\0')
goto err;
}
if (uri->query && *uri->query)
if (uri->path)
_URI_ADD(path);
if (uri->query) {
evbuffer_add(tmp, "?", 1);
_URI_ADD(query);
}
if (uri->fragment && *uri->fragment) {
if (!uri->query || !*uri->query)
evbuffer_add(tmp, "/", 1);
if (uri->fragment) {
evbuffer_add(tmp, "#", 1);
_URI_ADD(fragment);
}
@ -3491,8 +3709,10 @@ evhttp_uri_join(struct evhttp_uri *uri, char *buf, size_t limit)
}
evbuffer_remove(tmp, buf, joined_size);
output = buf;
err:
evbuffer_free(tmp);
return (char *)buf;
return output;
#undef _URI_ADD
}

View File

@ -123,11 +123,11 @@ struct {
*/
struct evhttp_uri {
char *scheme; /* scheme; e.g http, ftp etc */
char *host; /* hostname, or NULL */
char *user; /* usename, or NULL */
char *pass; /* password, or NULL */
char *host; /* hostname, IP address, or NULL */
char *userinfo; /* userinfo (typically username:pass), or NULL */
int port; /* port, or zero */
char *query; /* path + query: e.g. /path/to?param=foo, or NULL */
char *path; /* path, or NULL */
char *query; /* query, or NULL */
char *fragment; /* fragment or NULL */
};

View File

@ -1725,15 +1725,23 @@ http_parse_uri_test(void *ptr)
tt_want(evhttp_uri_join(0, url_tmp, sizeof(url_tmp)) == NULL);
tt_want(evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)) == NULL);
tt_want(evhttp_uri_parse("mailto:foo@bar") == NULL);
uri = evhttp_uri_parse("mailto:foo@bar");
tt_want(uri != NULL);
tt_want(uri->host == NULL);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == -1);
tt_want(!strcmp(uri->scheme, "mailto"));
tt_want(!strcmp(uri->path, "foo@bar"));
tt_want(uri->query == NULL);
tt_want(uri->fragment == NULL);
uri = evhttp_uri_parse("http://www.test.com/?q=test");
tt_want(strcmp(uri->scheme, "http") == 0);
tt_want(strcmp(uri->host, "www.test.com") == 0);
tt_want(strcmp(uri->query, "/?q=test") == 0);
tt_want(uri->user == NULL);
tt_want(uri->pass == NULL);
tt_want(uri->port == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test") == 0);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == -1);
tt_want(uri->fragment == NULL);
TT_URI("http://www.test.com/?q=test");
evhttp_uri_free(uri);
@ -1741,45 +1749,140 @@ http_parse_uri_test(void *ptr)
uri = evhttp_uri_parse("ftp://www.test.com/?q=test");
tt_want(strcmp(uri->scheme, "ftp") == 0);
tt_want(strcmp(uri->host, "www.test.com") == 0);
tt_want(strcmp(uri->query, "/?q=test") == 0);
tt_want(uri->user == NULL);
tt_want(uri->pass == NULL);
tt_want(uri->port == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test") == 0);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == -1);
tt_want(uri->fragment == NULL);
TT_URI("ftp://www.test.com/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[::1]:999/?q=test");
tt_want(strcmp(uri->scheme, "ftp") == 0);
tt_want(strcmp(uri->host, "[::1]") == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test") == 0);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == 999);
tt_want(uri->fragment == NULL);
TT_URI("ftp://[::1]:999/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[ff00::127.0.0.1]/?q=test");
tt_want(strcmp(uri->scheme, "ftp") == 0);
tt_want(strcmp(uri->host, "[ff00::127.0.0.1]") == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test") == 0);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == -1);
tt_want(uri->fragment == NULL);
TT_URI("ftp://[ff00::127.0.0.1]/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[v99.not_anytime_soon]/?q=test");
tt_want(strcmp(uri->scheme, "ftp") == 0);
tt_want(strcmp(uri->host, "[v99.not_anytime_soon]") == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test") == 0);
tt_want(uri->userinfo == NULL);
tt_want(uri->port == -1);
tt_want(uri->fragment == NULL);
TT_URI("ftp://[v99.not_anytime_soon]/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment");
tt_want(strcmp(uri->scheme, "scheme") == 0);
tt_want(strcmp(uri->user, "user") == 0);
tt_want(strcmp(uri->pass, "pass") == 0);
tt_want(strcmp(uri->userinfo, "user:pass") == 0);
tt_want(strcmp(uri->host, "foo.com") == 0);
tt_want(uri->port == 42);
tt_want(strcmp(uri->query, "/?q=test&s=some+thing") == 0);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(strcmp(uri->query, "q=test&s=some+thing") == 0);
tt_want(strcmp(uri->fragment, "fragment") == 0);
TT_URI("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("scheme://user@foo.com/#fragment");
tt_want(strcmp(uri->scheme, "scheme") == 0);
tt_want(strcmp(uri->user, "user") == 0);
tt_want(uri->pass == NULL);
tt_want(strcmp(uri->userinfo, "user") == 0);
tt_want(strcmp(uri->host, "foo.com") == 0);
tt_want(uri->port == 0);
tt_want(strcmp(uri->query, "/") == 0);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "/") == 0);
tt_want(uri->query == NULL);
tt_want(strcmp(uri->fragment, "fragment") == 0);
TT_URI("scheme://user@foo.com/#fragment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("file:///some/path/to/the/file");
tt_want(strcmp(uri->scheme, "file") == 0);
tt_want(uri->user == NULL);
tt_want(uri->pass == NULL);
tt_want(uri->userinfo == NULL);
tt_want(strcmp(uri->host, "") == 0);
tt_want(uri->port == 0);
tt_want(strcmp(uri->query, "/some/path/to/the/file") == 0);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "/some/path/to/the/file") == 0);
tt_want(uri->query == NULL);
tt_want(uri->fragment == NULL);
TT_URI("file:///some/path/to/the/file");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("///some/path/to/the-file");
tt_want(uri != NULL);
tt_want(uri->scheme == NULL);
tt_want(uri->userinfo == NULL);
tt_want(strcmp(uri->host, "") == 0);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "/some/path/to/the-file") == 0);
tt_want(uri->query == NULL);
tt_want(uri->fragment == NULL);
TT_URI("///some/path/to/the-file");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("/s:ome/path/to/the-file?q=99#fred");
tt_want(uri != NULL);
tt_want(uri->scheme == NULL);
tt_want(uri->userinfo == NULL);
tt_want(uri->host == NULL);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "/s:ome/path/to/the-file") == 0);
tt_want(strcmp(uri->query, "q=99") == 0);
tt_want(strcmp(uri->fragment, "fred") == 0);
TT_URI("/s:ome/path/to/the-file?q=99#fred");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("relative/path/with/co:lon");
tt_want(uri != NULL);
tt_want(uri->scheme == NULL);
tt_want(uri->userinfo == NULL);
tt_want(uri->host == NULL);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "relative/path/with/co:lon") == 0);
tt_want(uri->query == NULL);
tt_want(uri->fragment == NULL);
TT_URI("relative/path/with/co:lon");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("bob?q=99&q2=q?33#fr?ed");
tt_want(uri != NULL);
tt_want(uri->scheme == NULL);
tt_want(uri->userinfo == NULL);
tt_want(uri->host == NULL);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "bob") == 0);
tt_want(strcmp(uri->query, "q=99&q2=q?33") == 0);
tt_want(strcmp(uri->fragment, "fr?ed") == 0);
TT_URI("bob?q=99&q2=q?33#fr?ed");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("#fr?ed");
tt_want(uri != NULL);
tt_want(uri->scheme == NULL);
tt_want(uri->userinfo == NULL);
tt_want(uri->host == NULL);
tt_want(uri->port == -1);
tt_want(strcmp(uri->path, "") == 0);
tt_want(uri->query == NULL);
tt_want(strcmp(uri->fragment, "fr?ed") == 0);
TT_URI("#fr?ed");
evhttp_uri_free(uri);
}
static void