From 09c35acb091f95183ca22c483f7f1ab657a321af Mon Sep 17 00:00:00 2001 From: Hitalo Souza <63821277+enghitalo@users.noreply.github.com> Date: Fri, 16 Feb 2024 05:36:22 -0400 Subject: [PATCH] thirdparty: update picohttpparser (#20843) --- .../picohttpparser/src/picohttpparser.c | 94 +++++++++++++------ .../picohttpparser/src/picohttpparser.h | 11 ++- 2 files changed, 74 insertions(+), 31 deletions(-) diff --git a/thirdparty/picohttpparser/src/picohttpparser.c b/thirdparty/picohttpparser/src/picohttpparser.c index 74ccc3ef34..680039b422 100644 --- a/thirdparty/picohttpparser/src/picohttpparser.c +++ b/thirdparty/picohttpparser/src/picohttpparser.c @@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last *valp_ += res_; \ } while (0) +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char, + int *ret) +{ + /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128 + * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */ + static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */ + "\"\"" /* 0x22 */ + "()" /* 0x28,0x29 */ + ",," /* 0x2c */ + "//" /* 0x2f */ + ":@" /* 0x3a-0x40 */ + "[]" /* 0x5b-0x5d */ + "{\xff"; /* 0x7b-0xff */ + const char *buf_start = buf; + int found; + buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found); + if (!found) { + CHECK_EOF(); + } + while (1) { + if (*buf == next_char) { + break; + } else if (!token_char_map[(unsigned char)*buf]) { + *ret = -1; + return NULL; + } + ++buf; + CHECK_EOF(); + } + *token = buf_start; + *token_len = buf - buf_start; + return buf; +} + /* returned pointer is always within [buf, buf_end), or null */ static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) { @@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { /* parsing name, but do not discard SP before colon, see * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ - headers[*num_headers].name = buf; - static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */ - "\"\"" /* 0x22 */ - "()" /* 0x28,0x29 */ - ",," /* 0x2c */ - "//" /* 0x2f */ - ":@" /* 0x3a-0x40 */ - "[]" /* 0x5b-0x5d */ - "{\377"; /* 0x7b-0xff */ - int found; - buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); - if (!found) { - CHECK_EOF(); + if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) { + return NULL; } - while (1) { - if (*buf == ':') { - break; - } else if (!token_char_map[(unsigned char)*buf]) { - *ret = -1; - return NULL; - } - ++buf; - CHECK_EOF(); - } - if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) { + if (headers[*num_headers].name_len == 0) { *ret = -1; return NULL; } @@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha } /* parse request line */ - ADVANCE_TOKEN(*method, *method_len); + if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) { + return NULL; + } do { ++buf; + CHECK_EOF(); } while (*buf == ' '); ADVANCE_TOKEN(*path, *path_len); do { ++buf; + CHECK_EOF(); } while (*buf == ' '); if (*method_len == 0 || *path_len == 0) { *ret = -1; @@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min } do { ++buf; + CHECK_EOF(); } while (*buf == ' '); /* parse status code, we want at least [:digit:][:digit:][:digit:] to try to parse */ if (buf_end - buf < 4) { @@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min } PARSE_INT_3(status); - /* get message includig preceding space */ + /* get message including preceding space */ if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { return NULL; } if (*msg_len == 0) { /* ok */ } else if (**msg == ' ') { - /* remove preceding space */ + /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP + * before running past the end of the given buffer. */ do { ++*msg; --*msg_len; @@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_ size_t dst = 0, src = 0, bufsz = *_bufsz; ssize_t ret = -2; /* incomplete */ + decoder->_total_read += bufsz; + while (1) { switch (decoder->_state) { case CHUNKED_IN_CHUNK_SIZE: @@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_ ret = -1; goto Exit; } + /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */ + switch (buf[src]) { + case ' ': + case '\011': + case ';': + case '\012': + case '\015': + break; + default: + ret = -1; + goto Exit; + } break; } if (decoder->_hex_count == sizeof(size_t) * 2) { @@ -632,6 +666,12 @@ Exit: if (dst != src) memmove(buf + dst, buf + src, bufsz - src); *_bufsz = dst; + /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */ + if (ret == -2) { + decoder->_total_overhead += bufsz - dst; + if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4) + ret = -1; + } return ret; } diff --git a/thirdparty/picohttpparser/src/picohttpparser.h b/thirdparty/picohttpparser/src/picohttpparser.h index 8121b128f1..13bc855b6d 100644 --- a/thirdparty/picohttpparser/src/picohttpparser.h +++ b/thirdparty/picohttpparser/src/picohttpparser.h @@ -27,6 +27,7 @@ #ifndef picohttpparser_h #define picohttpparser_h +#include #include #ifdef _MSC_VER @@ -39,12 +40,12 @@ extern "C" { /* contains name and value of a header (name == NULL if is a continuing line * of a multiline header */ -typedef struct phr_header { +struct phr_header { const char *name; size_t name_len; const char *value; size_t value_len; -}phr_header; +}; /* returns number of bytes consumed if successful, -2 if request is partial, * -1 if failed */ @@ -64,6 +65,8 @@ struct phr_chunked_decoder { char consume_trailer; /* if trailing headers should be consumed */ char _hex_count; char _state; + uint64_t _total_read; + uint64_t _total_overhead; }; /* the function rewrites the buffer given as (buf, bufsz) removing the chunked- @@ -72,8 +75,8 @@ struct phr_chunked_decoder { * repeatedly call the function while it returns -2 (incomplete) every time * supplying newly arrived data. If the end of the chunked-encoded data is * found, the function returns a non-negative number indicating the number of - * octets left undecoded at the tail of the supplied buffer. Returns -1 on - * error. + * octets left undecoded, that starts from the offset returned by `*bufsz`. + * Returns -1 on error. */ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);