From 5c53d5f82da808f68917ff556b554578afb76e06 Mon Sep 17 00:00:00 2001 From: Nikos Mavrogiannopoulos Date: Wed, 9 Dec 2020 09:31:07 +0100 Subject: [PATCH] Updated bundled http-parser Signed-off-by: Nikos Mavrogiannopoulos --- src/http-parser/http_parser.c | 100 ++++++++++++++++++++++++++++++---- src/http-parser/http_parser.h | 20 +++++-- 2 files changed, 103 insertions(+), 17 deletions(-) diff --git a/src/http-parser/http_parser.c b/src/http-parser/http_parser.c index 9f8e0519..9be003e7 100644 --- a/src/http-parser/http_parser.c +++ b/src/http-parser/http_parser.c @@ -18,11 +18,10 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ -#include #include "http_parser.h" #include #include -#include +#include #include #include @@ -382,7 +381,10 @@ enum header_states , h_transfer_encoding , h_upgrade + , h_matching_transfer_encoding_token_start , h_matching_transfer_encoding_chunked + , h_matching_transfer_encoding_token + , h_matching_connection_token_start , h_matching_connection_keep_alive , h_matching_connection_close @@ -651,6 +653,8 @@ size_t http_parser_execute (http_parser *parser, const char *status_mark = 0; enum state p_state = (enum state) parser->state; const unsigned int lenient = parser->lenient_http_headers; + const unsigned int allow_chunked_length = parser->allow_chunked_length; + uint32_t nread = parser->nread; /* We're in an error state. Don't bother doing anything. */ @@ -729,6 +733,7 @@ reexecute: if (ch == CR || ch == LF) break; parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; if (ch == 'H') { @@ -766,6 +771,7 @@ reexecute: if (ch == CR || ch == LF) break; parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; if (ch == 'H') { @@ -923,6 +929,7 @@ reexecute: if (ch == CR || ch == LF) break; parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; if (UNLIKELY(!IS_ALPHA(ch))) { @@ -1336,6 +1343,7 @@ reexecute: parser->header_state = h_general; } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { parser->header_state = h_transfer_encoding; + parser->uses_transfer_encoding = 1; } break; @@ -1417,10 +1425,14 @@ reexecute: if ('c' == c) { parser->header_state = h_matching_transfer_encoding_chunked; } else { - parser->header_state = h_general; + parser->header_state = h_matching_transfer_encoding_token; } break; + /* Multi-value `Transfer-Encoding` header */ + case h_matching_transfer_encoding_token_start: + break; + case h_content_length: if (UNLIKELY(!IS_NUM(ch))) { SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); @@ -1564,16 +1576,41 @@ reexecute: goto error; /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_token_start: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + h_state = h_matching_transfer_encoding_chunked; + } else if (STRICT_TOKEN(c)) { + /* TODO(indutny): similar code below does this, but why? + * At the very least it seems to be inconsistent given that + * h_matching_transfer_encoding_token does not check for + * `STRICT_TOKEN` + */ + h_state = h_matching_transfer_encoding_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; + case h_matching_transfer_encoding_chunked: parser->index++; if (parser->index > sizeof(CHUNKED)-1 || c != CHUNKED[parser->index]) { - h_state = h_general; + h_state = h_matching_transfer_encoding_token; } else if (parser->index == sizeof(CHUNKED)-2) { h_state = h_transfer_encoding_chunked; } break; + case h_matching_transfer_encoding_token: + if (ch == ',') { + h_state = h_matching_transfer_encoding_token_start; + parser->index = 0; + } + break; + case h_matching_connection_token_start: /* looking for 'Connection: keep-alive' */ if (c == 'k') { @@ -1632,7 +1669,7 @@ reexecute: break; case h_transfer_encoding_chunked: - if (ch != ' ') h_state = h_general; + if (ch != ' ') h_state = h_matching_transfer_encoding_token; break; case h_connection_keep_alive: @@ -1766,12 +1803,22 @@ reexecute: REEXECUTE(); } - /* Cannot use chunked encoding and a content-length header together - per the HTTP specification. */ - if ((parser->flags & F_CHUNKED) && + /* Cannot use transfer-encoding and a content-length header together + per the HTTP specification. (RFC 7230 Section 3.3.3) */ + if ((parser->uses_transfer_encoding == 1) && (parser->flags & F_CONTENTLENGTH)) { - SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); - goto error; + /* Allow it for lenient parsing as long as `Transfer-Encoding` is + * not `chunked` or allow_length_with_encoding is set + */ + if (parser->flags & F_CHUNKED) { + if (!allow_chunked_length) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + } else if (!lenient) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } } UPDATE_STATE(s_headers_done); @@ -1846,8 +1893,31 @@ reexecute: UPDATE_STATE(NEW_MESSAGE()); CALLBACK_NOTIFY(message_complete); } else if (parser->flags & F_CHUNKED) { - /* chunked encoding - ignore Content-Length header */ + /* chunked encoding - ignore Content-Length header, + * prepare for a chunk */ UPDATE_STATE(s_chunk_size_start); + } else if (parser->uses_transfer_encoding == 1) { + if (parser->type == HTTP_REQUEST && !lenient) { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field + * is present in a request and the chunked transfer coding is not + * the final encoding, the message body length cannot be determined + * reliably; the server MUST respond with the 400 (Bad Request) + * status code and then close the connection. + */ + SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING); + RETURN(p - data); /* Error */ + } else { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field is present in a response and + * the chunked transfer coding is not the final encoding, the + * message body length is determined by reading the connection until + * it is closed by the server. + */ + UPDATE_STATE(s_body_identity_eof); + } } else { if (parser->content_length == 0) { /* Content-Length header given but zero: Content-Length: 0\r\n */ @@ -2101,6 +2171,12 @@ http_message_needs_eof (const http_parser *parser) return 0; } + /* RFC 7230 3.3.3, see `s_headers_almost_done` */ + if ((parser->uses_transfer_encoding == 1) && + (parser->flags & F_CHUNKED) == 0) { + return 1; + } + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { return 0; } @@ -2448,7 +2524,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, end = buf + off + len; /* NOTE: The characters are already validated and are in the [0-9] range */ - assert(off + len <= buflen && "Port number overflow"); + assert((size_t) (off + len) <= buflen && "Port number overflow"); v = 0; for (p = buf + off; p < end; p++) { v *= 10; diff --git a/src/http-parser/http_parser.h b/src/http-parser/http_parser.h index 16b5281d..3772b399 100644 --- a/src/http-parser/http_parser.h +++ b/src/http-parser/http_parser.h @@ -27,7 +27,7 @@ extern "C" { /* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 #define HTTP_PARSER_VERSION_MINOR 9 -#define HTTP_PARSER_VERSION_PATCH 2 +#define HTTP_PARSER_VERSION_PATCH 4 #include #if defined(_WIN32) && !defined(__MINGW32__) && \ @@ -41,6 +41,8 @@ typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; +#elif (defined(__sun) || defined(__sun__)) && defined(__SunOS_5_9) +#include #else #include #endif @@ -275,7 +277,9 @@ enum flags XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ XX(STRICT, "strict mode assertion failed") \ XX(PAUSED, "parser is paused") \ - XX(UNKNOWN, "an unknown error occurred") + XX(UNKNOWN, "an unknown error occurred") \ + XX(INVALID_TRANSFER_ENCODING, \ + "request has invalid transfer-encoding") \ /* Define HPE_* values for each errno value above */ @@ -293,14 +297,20 @@ enum http_errno { struct http_parser { /** PRIVATE **/ unsigned int type : 2; /* enum http_parser_type */ - unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ unsigned int state : 7; /* enum state from http_parser.c */ unsigned int header_state : 7; /* enum header_state from http_parser.c */ - unsigned int index : 7; /* index into current matcher */ + unsigned int index : 5; /* index into current matcher */ + unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */ + unsigned int allow_chunked_length : 1; /* Allow headers with both + * `Content-Length` and + * `Transfer-Encoding: chunked` set */ unsigned int lenient_http_headers : 1; uint32_t nread; /* # bytes read in various scenarios */ - uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one) + * if no Content-Length header. + */ /** READ-ONLY **/ unsigned short http_major;