thirdparty: update picohttpparser (#20843)

This commit is contained in:
Hitalo Souza 2024-02-16 05:36:22 -04:00 committed by GitHub
parent a21658b9fb
commit 09c35acb09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 74 additions and 31 deletions

View File

@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last
*valp_ += res_; \
} while (0)
/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
int *ret)
{
/* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
* bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */
"\"\"" /* 0x22 */
"()" /* 0x28,0x29 */
",," /* 0x2c */
"//" /* 0x2f */
":@" /* 0x3a-0x40 */
"[]" /* 0x5b-0x5d */
"{\xff"; /* 0x7b-0xff */
const char *buf_start = buf;
int found;
buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
if (!found) {
CHECK_EOF();
}
while (1) {
if (*buf == next_char) {
break;
} else if (!token_char_map[(unsigned char)*buf]) {
*ret = -1;
return NULL;
}
++buf;
CHECK_EOF();
}
*token = buf_start;
*token_len = buf - buf_start;
return buf;
}
/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
{
@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
/* parsing name, but do not discard SP before colon, see
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
headers[*num_headers].name = buf;
static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
"\"\"" /* 0x22 */
"()" /* 0x28,0x29 */
",," /* 0x2c */
"//" /* 0x2f */
":@" /* 0x3a-0x40 */
"[]" /* 0x5b-0x5d */
"{\377"; /* 0x7b-0xff */
int found;
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
if (!found) {
CHECK_EOF();
}
while (1) {
if (*buf == ':') {
break;
} else if (!token_char_map[(unsigned char)*buf]) {
*ret = -1;
if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
return NULL;
}
++buf;
CHECK_EOF();
}
if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
if (headers[*num_headers].name_len == 0) {
*ret = -1;
return NULL;
}
@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
}
/* parse request line */
ADVANCE_TOKEN(*method, *method_len);
if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
return NULL;
}
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
ADVANCE_TOKEN(*path, *path_len);
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
if (*method_len == 0 || *path_len == 0) {
*ret = -1;
@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
}
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
if (buf_end - buf < 4) {
@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
}
PARSE_INT_3(status);
/* get message includig preceding space */
/* get message including preceding space */
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
return NULL;
}
if (*msg_len == 0) {
/* ok */
} else if (**msg == ' ') {
/* remove preceding space */
/* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
* before running past the end of the given buffer. */
do {
++*msg;
--*msg_len;
@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
size_t dst = 0, src = 0, bufsz = *_bufsz;
ssize_t ret = -2; /* incomplete */
decoder->_total_read += bufsz;
while (1) {
switch (decoder->_state) {
case CHUNKED_IN_CHUNK_SIZE:
@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
ret = -1;
goto Exit;
}
/* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
switch (buf[src]) {
case ' ':
case '\011':
case ';':
case '\012':
case '\015':
break;
default:
ret = -1;
goto Exit;
}
break;
}
if (decoder->_hex_count == sizeof(size_t) * 2) {
@ -632,6 +666,12 @@ Exit:
if (dst != src)
memmove(buf + dst, buf + src, bufsz - src);
*_bufsz = dst;
/* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
if (ret == -2) {
decoder->_total_overhead += bufsz - dst;
if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
ret = -1;
}
return ret;
}

View File

@ -27,6 +27,7 @@
#ifndef picohttpparser_h
#define picohttpparser_h
#include <stdint.h>
#include <sys/types.h>
#ifdef _MSC_VER
@ -39,12 +40,12 @@ extern "C" {
/* contains name and value of a header (name == NULL if is a continuing line
* of a multiline header */
typedef struct phr_header {
struct phr_header {
const char *name;
size_t name_len;
const char *value;
size_t value_len;
}phr_header;
};
/* returns number of bytes consumed if successful, -2 if request is partial,
* -1 if failed */
@ -64,6 +65,8 @@ struct phr_chunked_decoder {
char consume_trailer; /* if trailing headers should be consumed */
char _hex_count;
char _state;
uint64_t _total_read;
uint64_t _total_overhead;
};
/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
@ -72,8 +75,8 @@ struct phr_chunked_decoder {
* repeatedly call the function while it returns -2 (incomplete) every time
* supplying newly arrived data. If the end of the chunked-encoded data is
* found, the function returns a non-negative number indicating the number of
* octets left undecoded at the tail of the supplied buffer. Returns -1 on
* error.
* octets left undecoded, that starts from the offset returned by `*bufsz`.
* Returns -1 on error.
*/
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);