From a5a76e689c185fb9f6f430fd8e989645b627bdb3 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 19 Oct 2010 12:35:50 -0400 Subject: [PATCH] Add a huge pile of tests for the new URI functions, and make them pass. --- http.c | 52 +++++++++++--- test/regress_http.c | 162 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 193 insertions(+), 21 deletions(-) diff --git a/http.c b/http.c index 883c675f..2bf48498 100644 --- a/http.c +++ b/http.c @@ -3399,12 +3399,12 @@ static int parse_port(const char *s, const char *eos) { int portnum = 0; - if (s == eos) - return 0; /* The RFC allows an empty port. */ while (s < eos) { if (! EVUTIL_ISDIGIT(*s)) return -1; portnum = (portnum * 10) + (*s - '0'); + if (portnum < 0) + return -1; ++s; } return portnum; @@ -3480,7 +3480,10 @@ parse_authority(struct evhttp_uri *uri, char *s, char *eos) for (port=eos-1; port >= cp && EVUTIL_ISDIGIT(*port); --port) ; if (port >= cp && *port == ':') { - if ((uri->port = parse_port(port+1, eos))<0) + if (port+1 == eos) /* Leave port unspecified; the RFC allows a + * nil port */ + uri->port = -1; + else if ((uri->port = parse_port(port+1, eos))<0) return -1; eos = port; } @@ -3503,6 +3506,17 @@ parse_authority(struct evhttp_uri *uri, char *s, char *eos) } +static char * +end_of_authority(char *cp) +{ + while (*cp) { + if (*cp == '?' || *cp == '#' || *cp == '/') + return cp; + ++cp; + } + return cp; +} + /* Return the character after the longest prefix of 'cp' that matches... * *pchar / "/" if allow_qchars is false, or * *(pchar / "/" / "?") if allow_chars is true. @@ -3526,6 +3540,19 @@ end_of_path(char *cp, int allow_qchars) return cp; } +static int +path_matches_noscheme(const char *cp) +{ + while (*cp) { + if (*cp == ':') + return 0; + else if (*cp == '/') + return 1; + ++cp; + } + return 1; +} + struct evhttp_uri * evhttp_uri_parse(const char *source_uri) { @@ -3572,10 +3599,7 @@ evhttp_uri_parse(const char *source_uri) char *authority; readp += 2; authority = readp; - path = strchr(readp, '/'); /*XXXX path can be empty; we can - * have a query though */ - if (!path) - path = strchr(readp, '\0'); + path = end_of_authority(readp); if (parse_authority(uri, authority, path) < 0) goto err; readp = path; @@ -3605,6 +3629,8 @@ evhttp_uri_parse(const char *source_uri) goto err; } + /* These next two cases may be unreachable; I'm leaving them + * in to be defensive. */ /* If you didn't get an authority, the path can't begin with "//" */ if (!got_authority && path[0]=='/' && path[1]=='/') goto err; @@ -3612,11 +3638,15 @@ evhttp_uri_parse(const char *source_uri) * empty. */ if (got_authority && path[0] != '/' && path[0] != '\0') goto err; + /* (End of maybe-unreachable cases) */ - if (path) - uri->path = mm_strdup(path); - else - uri->path = mm_strdup(""); + /* If there was no scheme, the first part of the path (if any) must + * have no colon in it. */ + if (! uri->scheme && !path_matches_noscheme(path)) + goto err; + + EVUTIL_ASSERT(path); + uri->path = mm_strdup(path); if (query) uri->query = mm_strdup(query); diff --git a/test/regress_http.c b/test/regress_http.c index fee9a263..34d2781e 100644 --- a/test/regress_http.c +++ b/test/regress_http.c @@ -1717,13 +1717,66 @@ http_parse_uri_test(void *ptr) char *ret = evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)); \ tt_want(ret != NULL); \ tt_want(ret == url_tmp); \ - tt_want(strcmp(ret, want) == 0); \ + if (strcmp(ret,want) != 0) \ + TT_FAIL(("\"%s\" != \"%s\"",ret,want)); \ } while(0) - tt_want(evhttp_uri_join(0, 0, 0) == NULL); - tt_want(evhttp_uri_join(0, url_tmp, 0) == NULL); - tt_want(evhttp_uri_join(0, url_tmp, sizeof(url_tmp)) == NULL); + tt_want(evhttp_uri_join(NULL, 0, 0) == NULL); + tt_want(evhttp_uri_join(NULL, url_tmp, 0) == NULL); + tt_want(evhttp_uri_join(NULL, url_tmp, sizeof(url_tmp)) == NULL); + + /* bad URIs: parsing */ +#define BAD(s) do { \ + if (evhttp_uri_parse(s) != NULL) \ + TT_FAIL(("Expected error parsing \"%s\"",s)); \ + } while(0) + BAD("http://www.test.com/ why hello"); + BAD("http://www.test.com/why-hello\x01"); + BAD("http://www.test.com/why-hello?\x01"); + BAD("http://www.test.com/why-hello#\x01"); + BAD("http://www.\x01.test.com/why-hello"); + BAD("http://www.%7test.com/why-hello"); + BAD("http://www.test.com/why-hell%7o"); + BAD("h%3ttp://www.test.com/why-hello"); + BAD("http://www.test.com/why-hello%7"); + BAD("http://www.test.com/why-hell%7o"); + BAD("http://www.test.com/foo?ba%r"); + BAD("http://www.test.com/foo#ba%r"); + BAD("99:99/foo"); + BAD("http://www.test.com:999x/"); + BAD("http://www.test.com:x/"); + BAD("http://[hello-there]/"); + BAD("http://[::1]]/"); + BAD("http://[::1/"); + BAD("http://[foob/"); + BAD("http://[/"); + BAD("http://[ffff:ffff:ffff:ffff:Ffff:ffff:ffff:" + "ffff:ffff:ffff:ffff:ffff:ffff:ffff]/"); + BAD("http://[vX.foo]/"); + BAD("http://[vX.foo]/"); + BAD("http://[v.foo]/"); + BAD("http://[v5.fo%o]/"); + BAD("http://[v5X]/"); + BAD("http://[v5]/"); + BAD("http://[]/"); + BAD("http://f\x01red@www.example.com/"); + BAD("http://f%0red@www.example.com/"); + BAD("http://www.example.com:9999999999999999999999999999999999999/"); + BAD("http://www.example.com:hihi/"); + BAD("://www.example.com/"); + + /* bad URIs: joining */ + uri = calloc(sizeof(struct evhttp_uri),1); + uri->host = (char*)"www.example.com"; + tt_want(evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)) != NULL); + /* not enough space: */ + tt_want(evhttp_uri_join(uri, url_tmp, 3) == NULL); + /* host is set, but path doesn't start with "/": */ + uri->path = (char*)"hi_mom"; tt_want(evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)) == NULL); + tt_want(evhttp_uri_join(uri, NULL, sizeof(url_tmp))==NULL); + tt_want(evhttp_uri_join(uri, url_tmp, 0)==NULL); + free(uri); uri = evhttp_uri_parse("mailto:foo@bar"); tt_want(uri != NULL); @@ -1734,16 +1787,95 @@ http_parse_uri_test(void *ptr) tt_want(!strcmp(uri->path, "foo@bar")); tt_want(uri->query == NULL); tt_want(uri->fragment == NULL); + TT_URI("mailto:foo@bar"); + evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://www.test.com/?q=test"); + uri = evhttp_uri_parse("http://www.test.com/?q=t%33est"); tt_want(strcmp(uri->scheme, "http") == 0); tt_want(strcmp(uri->host, "www.test.com") == 0); tt_want(strcmp(uri->path, "/") == 0); + tt_want(strcmp(uri->query, "q=t%33est") == 0); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want(uri->fragment == NULL); + TT_URI("http://www.test.com/?q=t%33est"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://%77ww.test.com"); + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "%77ww.test.com") == 0); + tt_want(strcmp(uri->path, "") == 0); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want(uri->fragment == NULL); + TT_URI("http://%77ww.test.com"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://www.test.com?q=test"); + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "www.test.com") == 0); + tt_want(strcmp(uri->path, "") == 0); tt_want(strcmp(uri->query, "q=test") == 0); tt_want(uri->userinfo == NULL); tt_want(uri->port == -1); tt_want(uri->fragment == NULL); - TT_URI("http://www.test.com/?q=test"); + TT_URI("http://www.test.com?q=test"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://www.test.com#fragment"); + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "www.test.com") == 0); + tt_want(strcmp(uri->path, "") == 0); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want_str_op(uri->fragment, ==, "fragment"); + TT_URI("http://www.test.com#fragment"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://8000/"); + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "8000") == 0); + tt_want(strcmp(uri->path, "/") == 0); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want(uri->fragment == NULL); + TT_URI("http://8000/"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://:8000/"); + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "") == 0); + tt_want(strcmp(uri->path, "/") == 0); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == 8000); + tt_want(uri->fragment == NULL); + TT_URI("http://:8000/"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://www.test.com:/"); /* empty port */ + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "www.test.com") == 0); + tt_want_str_op(uri->path, ==, "/"); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want(uri->fragment == NULL); + TT_URI("http://www.test.com/"); + evhttp_uri_free(uri); + + uri = evhttp_uri_parse("http://www.test.com:"); /* empty port 2 */ + tt_want(strcmp(uri->scheme, "http") == 0); + tt_want(strcmp(uri->host, "www.test.com") == 0); + tt_want(strcmp(uri->path, "") == 0); + tt_want(uri->query == NULL); + tt_want(uri->userinfo == NULL); + tt_want(uri->port == -1); + tt_want(uri->fragment == NULL); + TT_URI("http://www.test.com"); evhttp_uri_free(uri); uri = evhttp_uri_parse("ftp://www.test.com/?q=test"); @@ -1779,15 +1911,15 @@ http_parse_uri_test(void *ptr) TT_URI("ftp://[ff00::127.0.0.1]/?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("ftp://[v99.not_anytime_soon]/?q=test"); + uri = evhttp_uri_parse("ftp://[v99.not_(any:time)_soon]/?q=test"); tt_want(strcmp(uri->scheme, "ftp") == 0); - tt_want(strcmp(uri->host, "[v99.not_anytime_soon]") == 0); + tt_want(strcmp(uri->host, "[v99.not_(any:time)_soon]") == 0); tt_want(strcmp(uri->path, "/") == 0); tt_want(strcmp(uri->query, "q=test") == 0); tt_want(uri->userinfo == NULL); tt_want(uri->port == -1); tt_want(uri->fragment == NULL); - TT_URI("ftp://[v99.not_anytime_soon]/?q=test"); + TT_URI("ftp://[v99.not_(any:time)_soon]/?q=test"); evhttp_uri_free(uri); uri = evhttp_uri_parse("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment"); @@ -1812,6 +1944,17 @@ http_parse_uri_test(void *ptr) TT_URI("scheme://user@foo.com/#fragment"); evhttp_uri_free(uri); + uri = evhttp_uri_parse("scheme://%75ser@foo.com/#frag@ment"); + tt_want(strcmp(uri->scheme, "scheme") == 0); + tt_want(strcmp(uri->userinfo, "%75ser") == 0); + tt_want(strcmp(uri->host, "foo.com") == 0); + tt_want(uri->port == -1); + tt_want(strcmp(uri->path, "/") == 0); + tt_want(uri->query == NULL); + tt_want(strcmp(uri->fragment, "frag@ment") == 0); + TT_URI("scheme://%75ser@foo.com/#frag@ment"); + evhttp_uri_free(uri); + uri = evhttp_uri_parse("file:///some/path/to/the/file"); tt_want(strcmp(uri->scheme, "file") == 0); tt_want(uri->userinfo == NULL); @@ -1882,7 +2025,6 @@ http_parse_uri_test(void *ptr) tt_want(strcmp(uri->fragment, "fr?ed") == 0); TT_URI("#fr?ed"); evhttp_uri_free(uri); - } static void