thirdparty: update picohttpparser (#20843)

2025-09-12 17:07:11 -04:00 · 2024-02-16 05:36:22 -04:00 · 2024-02-16 05:36:22 -04:00 · 09c35acb09
commit 09c35acb09
parent a21658b9fb
2 changed files with 74 additions and 31 deletions
--- a/thirdparty/picohttpparser/src/picohttpparser.c
+++ b/thirdparty/picohttpparser/src/picohttpparser.c
@ -241,6 +241,41 @@ static const char *is_complete(const char *buf, const char *buf_end, size_t last
        *valp_ += res_;                                                                                                            \
    } while (0)
 /* returned pointer is always within [buf, buf_end), or null */
 static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
                               int *ret)
 {
    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
                                             "\"\""   /* 0x22 */
                                             "()"     /* 0x28,0x29 */
                                             ",,"     /* 0x2c */
                                             "//"     /* 0x2f */
                                             ":@"     /* 0x3a-0x40 */
                                             "[]"     /* 0x5b-0x5d */
                                             "{\xff"; /* 0x7b-0xff */
    const char *buf_start = buf;
    int found;
    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
    if (!found) {
        CHECK_EOF();
    }
    while (1) {
        if (*buf == next_char) {
            break;
        } else if (!token_char_map[(unsigned char)*buf]) {
            *ret = -1;
            return NULL;
        }
        ++buf;
        CHECK_EOF();
    }
    *token = buf_start;
    *token_len = buf - buf_start;
    return buf;
 }
 /* returned pointer is always within [buf, buf_end), or null */
 static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
 {
@ -280,31 +315,10 @@ static const char *parse_headers(const char *buf, const char *buf_end, struct ph
        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
            /* parsing name, but do not discard SP before colon, see
             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
-            headers[*num_headers].name = buf;
+            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
-            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
+                return NULL;
                                                      "\"\""   /* 0x22 */
                                                      "()"     /* 0x28,0x29 */
                                                      ",,"     /* 0x2c */
                                                      "//"     /* 0x2f */
                                                      ":@"     /* 0x3a-0x40 */
                                                      "[]"     /* 0x5b-0x5d */
                                                      "{\377"; /* 0x7b-0xff */
            int found;
            buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
            if (!found) {
                CHECK_EOF();
            }
-            while (1) {
+            if (headers[*num_headers].name_len == 0) {
                if (*buf == ':') {
                    break;
                } else if (!token_char_map[(unsigned char)*buf]) {
                    *ret = -1;
                    return NULL;
                }
                ++buf;
                CHECK_EOF();
            }
            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
                *ret = -1;
                return NULL;
            }
@ -352,13 +366,17 @@ static const char *parse_request(const char *buf, const char *buf_end, const cha
    }
    /* parse request line */
-    ADVANCE_TOKEN(*method, *method_len);
+    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
        return NULL;
    }
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    ADVANCE_TOKEN(*path, *path_len);
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    if (*method_len == 0 || *path_len == 0) {
        *ret = -1;
@ -422,6 +440,7 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
    }
    do {
        ++buf;
        CHECK_EOF();
    } while (*buf == ' ');
    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
    if (buf_end - buf < 4) {
@ -430,14 +449,15 @@ static const char *parse_response(const char *buf, const char *buf_end, int *min
    }
    PARSE_INT_3(status);
-    /* get message includig preceding space */
+    /* get message including preceding space */
    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
        return NULL;
    }
    if (*msg_len == 0) {
        /* ok */
    } else if (**msg == ' ') {
-        /* remove preceding space */
+        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
         * before running past the end of the given buffer. */
        do {
            ++*msg;
            --*msg_len;
@ -525,6 +545,8 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
    size_t dst = 0, src = 0, bufsz = *_bufsz;
    ssize_t ret = -2; /* incomplete */
    decoder->_total_read += bufsz;
    while (1) {
        switch (decoder->_state) {
        case CHUNKED_IN_CHUNK_SIZE:
@ -537,6 +559,18 @@ ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_
                        ret = -1;
                        goto Exit;
                    }
                    /* the only characters that may appear after the chunk size are BWS, semicolon, or CRLF */
                    switch (buf[src]) {
                    case ' ':
                    case '\011':
                    case ';':
                    case '\012':
                    case '\015':
                        break;
                    default:
                        ret = -1;
                        goto Exit;
                    }
                    break;
                }
                if (decoder->_hex_count == sizeof(size_t) * 2) {
@ -632,6 +666,12 @@ Exit:
    if (dst != src)
        memmove(buf + dst, buf + src, bufsz - src);
    *_bufsz = dst;
    /* if incomplete but the overhead of the chunked encoding is >=100KB and >80%, signal an error */
    if (ret == -2) {
        decoder->_total_overhead += bufsz - dst;
        if (decoder->_total_overhead >= 100 * 1024 && decoder->_total_read - decoder->_total_overhead < decoder->_total_read / 4)
            ret = -1;
    }
    return ret;
 }
--- a/thirdparty/picohttpparser/src/picohttpparser.h
+++ b/thirdparty/picohttpparser/src/picohttpparser.h
@ -27,6 +27,7 @@
 #ifndef picohttpparser_h
 #define picohttpparser_h
 #include <stdint.h>
 #include <sys/types.h>
 #ifdef _MSC_VER
@ -39,12 +40,12 @@ extern "C" {
 /* contains name and value of a header (name == NULL if is a continuing line
 * of a multiline header */
-typedef struct phr_header {
+struct phr_header {
    const char *name;
    size_t name_len;
    const char *value;
    size_t value_len;
-}phr_header;
+};
 /* returns number of bytes consumed if successful, -2 if request is partial,
 * -1 if failed */
@ -64,6 +65,8 @@ struct phr_chunked_decoder {
    char consume_trailer;       /* if trailing headers should be consumed */
    char _hex_count;
    char _state;
    uint64_t _total_read;
    uint64_t _total_overhead;
 };
 /* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
@ -72,8 +75,8 @@ struct phr_chunked_decoder {
 * repeatedly call the function while it returns -2 (incomplete) every time
 * supplying newly arrived data.  If the end of the chunked-encoded data is
 * found, the function returns a non-negative number indicating the number of
- * octets left undecoded at the tail of the supplied buffer.  Returns -1 on
+ * octets left undecoded, that starts from the offset returned by `*bufsz`.
- * error.
+ * Returns -1 on error.
 */
 ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);