Merge branch 'http_parse'

This commit is contained in:
Nick Mathewson 2010-10-18 14:20:06 -04:00
commit f13e449b53
4 changed files with 320 additions and 59 deletions

129
http.c
View File

@ -178,7 +178,7 @@ static void evhttp_read_cb(struct bufferevent *, void *);
static void evhttp_write_cb(struct bufferevent *, void *);
static void evhttp_error_cb(struct bufferevent *bufev, short what, void *arg);
static int evhttp_decode_uri_internal(const char *uri, size_t length,
char *ret, int always_decode_plus);
char *ret, int decode_plus);
#ifndef _EVENT_HAVE_STRSEP
/* strsep replacement for platforms that lack it. Only works if
@ -2295,12 +2295,13 @@ evhttp_send_page(struct evhttp_request *req, struct evbuffer *databuf)
}
static const char uri_chars[256] = {
/* 0 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
/* 64 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
@ -2317,49 +2318,70 @@ static const char uri_chars[256] = {
};
/*
* Helper functions to encode/decode a URI.
* Helper functions to encode/decode a string for inclusion in a URI.
* The returned string must be freed by the caller.
*/
char *
evhttp_encode_uri(const char *uri)
evhttp_uriencode(const char *uri, ev_ssize_t len, int space_as_plus)
{
struct evbuffer *buf = evbuffer_new();
char *p;
const char *p, *end;
char *result;
if (buf == NULL)
return (NULL);
for (p = (char *)uri; *p != '\0'; p++) {
if (len >= 0)
end = uri+len;
else
end = uri+strlen(uri);
for (p = uri; p < end; p++) {
if (uri_chars[(unsigned char)(*p)]) {
evbuffer_add(buf, p, 1);
} else if (*p == ' ' && space_as_plus) {
evbuffer_add(buf, "+", 1);
} else {
evbuffer_add_printf(buf, "%%%02X", (unsigned char)(*p));
}
}
evbuffer_add(buf, "", 1);
p = mm_strdup((char*)evbuffer_pullup(buf, -1));
evbuffer_add(buf, "", 1); /* NUL-terminator. */
result = mm_malloc(evbuffer_get_length(buf));
if (!result)
return NULL;
evbuffer_remove(buf, result, evbuffer_get_length(buf));
evbuffer_free(buf);
return (p);
return (result);
}
char *
evhttp_encode_uri(const char *str)
{
return evhttp_uriencode(str, -1, 0);
}
/*
* @param always_decode_plus: when true we transform plus to space even
* if we have not seen a ?.
* @param decode_plus_ctl: if 1, we decode plus into space. If 0, we don't.
* If -1, when true we transform plus to space only after we've seen
* a ?. -1 is deprecated.
* @return the number of bytes written to 'ret'.
*/
static int
evhttp_decode_uri_internal(
const char *uri, size_t length, char *ret, int always_decode_plus)
const char *uri, size_t length, char *ret, int decode_plus_ctl)
{
char c;
int j, in_query = always_decode_plus;
int j;
int decode_plus = (decode_plus_ctl == 1) ? 1: 0;
unsigned i;
for (i = j = 0; i < length; i++) {
c = uri[i];
if (c == '?') {
in_query = 1;
} else if (c == '+' && in_query) {
if (decode_plus_ctl < 0)
decode_plus = 1;
} else if (c == '+' && decode_plus) {
c = ' ';
} else if (c == '%' && EVUTIL_ISXDIGIT(uri[i+1]) &&
EVUTIL_ISXDIGIT(uri[i+2])) {
@ -2377,6 +2399,7 @@ evhttp_decode_uri_internal(
return (j);
}
/* deprecated */
char *
evhttp_decode_uri(const char *uri)
{
@ -2389,7 +2412,30 @@ evhttp_decode_uri(const char *uri)
}
evhttp_decode_uri_internal(uri, strlen(uri),
ret, 0 /*always_decode_plus*/);
ret, -1 /*always_decode_plus*/);
return (ret);
}
char *
evhttp_uridecode(const char *uri, int decode_plus, size_t *size_out)
{
char *ret;
int n;
if ((ret = mm_malloc(strlen(uri) + 1)) == NULL) {
event_warn("%s: malloc(%lu)", __func__,
(unsigned long)(strlen(uri) + 1));
return (NULL);
}
n = evhttp_decode_uri_internal(uri, strlen(uri),
ret, !!decode_plus/*always_decode_plus*/);
if (size_out) {
EVUTIL_ASSERT(n >= 0);
*size_out = (size_t)n;
}
return (ret);
}
@ -2399,26 +2445,25 @@ evhttp_decode_uri(const char *uri)
* The arguments are separated by key and value.
*/
void
evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
int
evhttp_parse_query__checked_20(const char *uri, struct evkeyvalq *headers)
{
char *line;
char *argument;
char *p;
int result = -1;
TAILQ_INIT(headers);
/* No arguments - we are done */
if (strchr(uri, '?') == NULL)
return;
return 0;
if ((line = mm_strdup(uri)) == NULL) {
/* TODO(niels): does this function need to return -1 */
event_warn("%s: strdup", __func__);
return;
return -1;
}
argument = line;
/* We already know that there has to be a ? */
@ -2431,13 +2476,13 @@ evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
value = argument;
key = strsep(&value, "=");
if (value == NULL || *key == '\0')
if (value == NULL || *key == '\0') {
goto error;
}
if ((decoded_value = mm_malloc(strlen(value) + 1)) == NULL) {
/* TODO(niels): do we need to return -1 here? */
event_warn("%s: mm_malloc", __func__);
break;
goto error;
}
evhttp_decode_uri_internal(value, strlen(value),
decoded_value, 1 /*always_decode_plus*/);
@ -2446,10 +2491,29 @@ evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
mm_free(decoded_value);
}
error:
result = 0;
goto done;
error:
evhttp_clear_headers(headers);
done:
mm_free(line);
return result;
}
#undef evhttp_parse_query
void evhttp_parse_query(const char *uri, struct evkeyvalq *headers);
/* We define this here so as to avoid changing the ABI for evhttp_parse_query
* in 2.0.8. The next time we break ABI compatibility, we can go back to
* having the function above be called evhttp_parse_query
*/
void
evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
{
evhttp_parse_query__checked_20(uri, headers);
}
static struct evhttp_cb *
evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req)
{
@ -2466,7 +2530,7 @@ evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req)
if ((translated = mm_malloc(offset + 1)) == NULL)
return (NULL);
offset = evhttp_decode_uri_internal(req->uri, offset,
translated, 0 /* always_decode_plus */);
translated, 0 /* decode_plus */);
TAILQ_FOREACH(cb, callbacks, next) {
int res = 0;
@ -2994,12 +3058,17 @@ evhttp_request_set_chunked_cb(struct evhttp_request *req,
*/
const char *
evhttp_request_get_uri(struct evhttp_request *req) {
evhttp_request_get_uri(const struct evhttp_request *req) {
if (req->uri == NULL)
event_debug(("%s: request %p has no uri\n", __func__, req));
return (req->uri);
}
enum evhttp_cmd_type
evhttp_request_get_command(const struct evhttp_request *req) {
return (req->type);
}
/** Returns the input headers */
struct evkeyvalq *evhttp_request_get_input_headers(struct evhttp_request *req)
{

View File

@ -450,7 +450,9 @@ void evhttp_cancel_request(struct evhttp_request *req);
/** Returns the request URI */
const char *evhttp_request_get_uri(struct evhttp_request *req);
const char *evhttp_request_get_uri(const struct evhttp_request *req);
/** Returns the request command */
enum evhttp_cmd_type evhttp_request_get_command(const struct evhttp_request *req);
/** Returns the input headers */
struct evkeyvalq *evhttp_request_get_input_headers(struct evhttp_request *req);
/** Returns the output headers */
@ -506,26 +508,67 @@ void evhttp_clear_headers(struct evkeyvalq *headers);
/**
Helper function to encode a URI.
Helper function to encode a string for inclusion in a URI. All
characters are replaced by their hex-escaped (%22) equivalents,
except for characters explicitly unreserved by RFC3986 -- that is,
ASCII alphanumeric characters, hyphen, dot, underscore, and tilde.
The returned string must be freed by the caller.
The returned string must be freed by the caller.
@param uri an unencoded URI
@return a newly allocated URI-encoded string or NULL on failure
@param str an unencoded string
@return a newly allocated URI-encoded string or NULL on failure
*/
char *evhttp_encode_uri(const char *uri);
char *evhttp_encode_uri(const char *str);
/**
Helper function to decode a URI.
As evhttp_encode_uri, but if 'size' is nonnegative, treat the string
as being 'size' bytes long. This allows you to encode strings that
may contain 0-valued bytes.
The returned string must be freed by the caller.
The returned string must be freed by the caller.
@param str an unencoded string
@param size the length of the string to encode, or -1 if the string
is NUL-terminated
@param space_to_plus if true, space characters in 'str' are encoded
as +, not %20.
@return a newly allocate URI-encoded string, or NULL on failure.
*/
char *evhttp_uriencode(const char *str, ev_ssize_t size, int space_to_plus);
/**
Helper function to sort of decode a URI-encoded string. Unlike
evhttp_get_decoded_uri, it decodes all plus characters that appear
_after_ the first question mark character, but no plusses that occur
before. This is not a good way to decode URIs in whole or in part.
The returned string must be freed by the caller
@deprecated This function is deprecated; you probably want to use
evhttp_get_decoded_uri instead.
@param uri an encoded URI
@return a newly allocated unencoded URI or NULL on failure
*/
char *evhttp_decode_uri(const char *uri);
/**
Helper function to decode a URI-escaped string or HTTP parameter.
If 'decode_plus' is 1, then we decode the string as an HTTP parameter
value, and convert all plus ('+') characters to spaces. If
'decode_plus' is 0, we leave all plus characters unchanged.
The returned string must be freed by the caller.
@param uri a URI-encode encoded URI
@param decode_plus determines whether we convert '+' to sapce.
@param size_out if size_out is not NULL, *size_out is set to the size of the
returned string
@return a newly allocated unencoded URI or NULL on failure
*/
char *evhttp_uridecode(const char *uri, int decode_plus,
size_t *size_out);
/**
Helper function to parse out arguments in a query.
@ -541,9 +584,15 @@ char *evhttp_decode_uri(const char *uri);
@param uri the request URI
@param headers the head of the evkeyval queue
@return 0 on success, -1 on failure
*/
void evhttp_parse_query(const char *uri, struct evkeyvalq *headers);
#define evhttp_parse_query(uri, headers) \
evhttp_parse_query__checked_20((uri), (headers))
/* Do not call this function directly; it is a temporary alias introduced
* to avoid changing the old signature for evhttp_parse_query
*/
int evhttp_parse_query__checked_20(const char *uri, struct evkeyvalq *headers);
/**
* Escape HTML character entities in a string.

View File

@ -25,8 +25,6 @@
*
*/
#include "event2/event-config.h"
#include <sys/types.h>
#include <sys/stat.h>
#ifdef WIN32
@ -44,15 +42,12 @@
#include <string.h>
#include <errno.h>
#include <event.h>
#include <evutil.h>
#include <evhttp.h>
#include <event2/event.h>
#include <event2/buffer.h>
#include <event2/util.h>
#include <event2/http.h>
#include <event2/thread.h>
#ifdef WIN32
#include "iocp-internal.h"
#endif
static void http_basic_cb(struct evhttp_request *req, void *arg);
static char *content;
@ -71,8 +66,7 @@ http_basic_cb(struct evhttp_request *req, void *arg)
evbuffer_free(evb);
}
/* cheasy way of detecting evbuffer_add_reference */
#ifdef _EVENT2_EVENT_H_
#if LIBEVENT_VERSION_NUMBER >= 0x02000200
static void
http_ref_cb(struct evhttp_request *req, void *arg)
{
@ -90,6 +84,7 @@ http_ref_cb(struct evhttp_request *req, void *arg)
int
main(int argc, char **argv)
{
struct event_config *cfg = event_config_new();
struct event_base *base;
struct evhttp *http;
int i;
@ -105,8 +100,6 @@ main(int argc, char **argv)
return (1);
#endif
base = event_base_new();
for (i = 1; i < argc; ++i) {
if (*argv[i] != '-')
continue;
@ -133,7 +126,7 @@ main(int argc, char **argv)
case 'i':
use_iocp = 1;
evthread_use_windows_threads();
event_base_start_iocp(base, 0);
event_config_set_flag(cfg,EVENT_BASE_FLAG_STARTUP_IOCP);
break;
#endif
default:
@ -142,6 +135,12 @@ main(int argc, char **argv)
}
}
base = event_base_new_with_config(cfg);
if (!base) {
fprintf(stderr, "creating event_base failed. Exiting.\n");
return 1;
}
http = evhttp_new(base);
content = malloc(content_len);

View File

@ -1639,29 +1639,172 @@ static void
http_parse_query_test(void *ptr)
{
struct evkeyvalq headers;
int r;
TAILQ_INIT(&headers);
evhttp_parse_query("http://www.test.com/?q=test", &headers);
r = evhttp_parse_query("http://www.test.com/?q=test", &headers);
tt_want(validate_header(&headers, "q", "test") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
evhttp_parse_query("http://www.test.com/?q=test&foo=bar", &headers);
r = evhttp_parse_query("http://www.test.com/?q=test&foo=bar", &headers);
tt_want(validate_header(&headers, "q", "test") == 0);
tt_want(validate_header(&headers, "foo", "bar") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
evhttp_parse_query("http://www.test.com/?q=test+foo", &headers);
r = evhttp_parse_query("http://www.test.com/?q=test+foo", &headers);
tt_want(validate_header(&headers, "q", "test foo") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
evhttp_parse_query("http://www.test.com/?q=test%0Afoo", &headers);
r = evhttp_parse_query("http://www.test.com/?q=test%0Afoo", &headers);
tt_want(validate_header(&headers, "q", "test\nfoo") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
evhttp_parse_query("http://www.test.com/?q=test%0Dfoo", &headers);
r = evhttp_parse_query("http://www.test.com/?q=test%0Dfoo", &headers);
tt_want(validate_header(&headers, "q", "test\rfoo") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=test&&q2", &headers);
tt_int_op(r, ==, -1);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=test+this", &headers);
tt_want(validate_header(&headers, "q", "test this") == 0);
tt_int_op(r, ==, 0);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=test&q2=foo", &headers);
tt_int_op(r, ==, 0);
tt_want(validate_header(&headers, "q", "test") == 0);
tt_want(validate_header(&headers, "q2", "foo") == 0);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q&q2=foo", &headers);
tt_int_op(r, ==, -1);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=foo&q2", &headers);
tt_int_op(r, ==, -1);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=foo&q2&q3=x", &headers);
tt_int_op(r, ==, -1);
evhttp_clear_headers(&headers);
r = evhttp_parse_query("http://www.test.com/?q=&q2=&q3=", &headers);
tt_int_op(r, ==, 0);
tt_want(validate_header(&headers, "q", "") == 0);
tt_want(validate_header(&headers, "q2", "") == 0);
tt_want(validate_header(&headers, "q3", "") == 0);
evhttp_clear_headers(&headers);
end:
evhttp_clear_headers(&headers);
}
static void
http_uriencode_test(void *ptr)
{
char *s=NULL, *s2=NULL;
size_t sz;
#define ENC(from,want,plus) do { \
s = evhttp_uriencode((from), -1, (plus)); \
tt_assert(s); \
tt_str_op(s,==,(want)); \
sz = -1; \
s2 = evhttp_uridecode((s), (plus), &sz); \
tt_assert(s2); \
tt_str_op(s2,==,(from)); \
tt_int_op(sz,==,strlen(from)); \
free(s); \
free(s2); \
s = s2 = NULL; \
} while (0)
#define DEC(from,want,dp) do { \
s = evhttp_uridecode((from),(dp),&sz); \
tt_assert(s); \
tt_str_op(s,==,(want)); \
tt_int_op(sz,==,strlen(want)); \
free(s); \
s = NULL; \
} while (0)
#define OLD_DEC(from,want) do { \
s = evhttp_decode_uri((from)); \
tt_assert(s); \
tt_str_op(s,==,(want)); \
free(s); \
s = NULL; \
} while (0)
ENC("Hello", "Hello",0);
ENC("99", "99",0);
ENC("", "",0);
ENC(
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-.~_",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-.~_",0);
ENC(" ", "%20",0);
ENC(" ", "+",1);
ENC("\xff\xf0\xe0", "%FF%F0%E0",0);
ENC("\x01\x19", "%01%19",1);
ENC("http://www.ietf.org/rfc/rfc3986.txt",
"http%3A%2F%2Fwww.ietf.org%2Frfc%2Frfc3986.txt",1);
ENC("1+2=3", "1%2B2%3D3",1);
ENC("1+2=3", "1%2B2%3D3",0);
/* Now try encoding with internal NULs. */
s = evhttp_uriencode("hello\0world", 11, 0);
tt_assert(s);
tt_str_op(s,==,"hello%00world");
free(s);
s = NULL;
/* Now try out some decoding cases that we don't generate with
* encode_uri: Make sure that malformed stuff doesn't crash... */
DEC("%%xhello th+ere \xff",
"%%xhello th+ere \xff", 0);
/* Make sure plus decoding works */
DEC("plus+should%20work+", "plus should work ",1);
/* Try some lowercase hex */
DEC("%f0%a0%b0", "\xf0\xa0\xb0",1);
/* Try an internal NUL. */
sz = 0;
s = evhttp_uridecode("%00%00x%00%00", 1, &sz);
tt_int_op(sz,==,5);
tt_assert(!memcmp(s, "\0\0x\0\0", 5));
free(s);
s = NULL;
/* Try with size == NULL */
sz = 0;
s = evhttp_uridecode("%00%00x%00%00", 1, NULL);
tt_assert(!memcmp(s, "\0\0x\0\0", 5));
free(s);
s = NULL;
/* Test out the crazy old behavior of the deprecated
* evhttp_decode_uri */
OLD_DEC("http://example.com/normal+path/?key=val+with+spaces",
"http://example.com/normal+path/?key=val with spaces");
end:
if (s)
free(s);
if (s2)
free(s2);
#undef ENC
#undef DEC
#undef OLD_DEC
}
static void
@ -2658,6 +2801,7 @@ struct testcase_t http_testcases[] = {
{ "base", http_base_test, TT_FORK|TT_NEED_BASE, NULL, NULL },
{ "bad_headers", http_bad_header_test, 0, NULL, NULL },
{ "parse_query", http_parse_query_test, 0, NULL, NULL },
{ "uriencode", http_uriencode_test, 0, NULL, NULL },
HTTP_LEGACY(basic),
HTTP_LEGACY(cancel),
HTTP_LEGACY(virtual_host),