Updated bundled http-parser

Signed-off-by: Nikos Mavrogiannopoulos <n.mavrogiannopoulos@gmail.com>
This commit is contained in:
Nikos Mavrogiannopoulos
2020-12-09 09:31:07 +01:00
parent 3702debb95
commit 5c53d5f82d
2 changed files with 103 additions and 17 deletions

View File

@@ -18,11 +18,10 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <config.h>
#include "http_parser.h"
#include <assert.h>
#include <stddef.h>
#include <c-ctype.h>
#include <ctype.h>
#include <string.h>
#include <limits.h>
@@ -382,7 +381,10 @@ enum header_states
, h_transfer_encoding
, h_upgrade
, h_matching_transfer_encoding_token_start
, h_matching_transfer_encoding_chunked
, h_matching_transfer_encoding_token
, h_matching_connection_token_start
, h_matching_connection_keep_alive
, h_matching_connection_close
@@ -651,6 +653,8 @@ size_t http_parser_execute (http_parser *parser,
const char *status_mark = 0;
enum state p_state = (enum state) parser->state;
const unsigned int lenient = parser->lenient_http_headers;
const unsigned int allow_chunked_length = parser->allow_chunked_length;
uint32_t nread = parser->nread;
/* We're in an error state. Don't bother doing anything. */
@@ -729,6 +733,7 @@ reexecute:
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;
if (ch == 'H') {
@@ -766,6 +771,7 @@ reexecute:
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;
if (ch == 'H') {
@@ -923,6 +929,7 @@ reexecute:
if (ch == CR || ch == LF)
break;
parser->flags = 0;
parser->uses_transfer_encoding = 0;
parser->content_length = ULLONG_MAX;
if (UNLIKELY(!IS_ALPHA(ch))) {
@@ -1336,6 +1343,7 @@ reexecute:
parser->header_state = h_general;
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
parser->header_state = h_transfer_encoding;
parser->uses_transfer_encoding = 1;
}
break;
@@ -1417,10 +1425,14 @@ reexecute:
if ('c' == c) {
parser->header_state = h_matching_transfer_encoding_chunked;
} else {
parser->header_state = h_general;
parser->header_state = h_matching_transfer_encoding_token;
}
break;
/* Multi-value `Transfer-Encoding` header */
case h_matching_transfer_encoding_token_start:
break;
case h_content_length:
if (UNLIKELY(!IS_NUM(ch))) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
@@ -1564,16 +1576,41 @@ reexecute:
goto error;
/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_token_start:
/* looking for 'Transfer-Encoding: chunked' */
if ('c' == c) {
h_state = h_matching_transfer_encoding_chunked;
} else if (STRICT_TOKEN(c)) {
/* TODO(indutny): similar code below does this, but why?
* At the very least it seems to be inconsistent given that
* h_matching_transfer_encoding_token does not check for
* `STRICT_TOKEN`
*/
h_state = h_matching_transfer_encoding_token;
} else if (c == ' ' || c == '\t') {
/* Skip lws */
} else {
h_state = h_general;
}
break;
case h_matching_transfer_encoding_chunked:
parser->index++;
if (parser->index > sizeof(CHUNKED)-1
|| c != CHUNKED[parser->index]) {
h_state = h_general;
h_state = h_matching_transfer_encoding_token;
} else if (parser->index == sizeof(CHUNKED)-2) {
h_state = h_transfer_encoding_chunked;
}
break;
case h_matching_transfer_encoding_token:
if (ch == ',') {
h_state = h_matching_transfer_encoding_token_start;
parser->index = 0;
}
break;
case h_matching_connection_token_start:
/* looking for 'Connection: keep-alive' */
if (c == 'k') {
@@ -1632,7 +1669,7 @@ reexecute:
break;
case h_transfer_encoding_chunked:
if (ch != ' ') h_state = h_general;
if (ch != ' ') h_state = h_matching_transfer_encoding_token;
break;
case h_connection_keep_alive:
@@ -1766,12 +1803,22 @@ reexecute:
REEXECUTE();
}
/* Cannot use chunked encoding and a content-length header together
per the HTTP specification. */
if ((parser->flags & F_CHUNKED) &&
/* Cannot use transfer-encoding and a content-length header together
per the HTTP specification. (RFC 7230 Section 3.3.3) */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CONTENTLENGTH)) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
/* Allow it for lenient parsing as long as `Transfer-Encoding` is
* not `chunked` or allow_length_with_encoding is set
*/
if (parser->flags & F_CHUNKED) {
if (!allow_chunked_length) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
} else if (!lenient) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
}
UPDATE_STATE(s_headers_done);
@@ -1846,8 +1893,31 @@ reexecute:
UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete);
} else if (parser->flags & F_CHUNKED) {
/* chunked encoding - ignore Content-Length header */
/* chunked encoding - ignore Content-Length header,
* prepare for a chunk */
UPDATE_STATE(s_chunk_size_start);
} else if (parser->uses_transfer_encoding == 1) {
if (parser->type == HTTP_REQUEST && !lenient) {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field
* is present in a request and the chunked transfer coding is not
* the final encoding, the message body length cannot be determined
* reliably; the server MUST respond with the 400 (Bad Request)
* status code and then close the connection.
*/
SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
RETURN(p - data); /* Error */
} else {
/* RFC 7230 3.3.3 */
/* If a Transfer-Encoding header field is present in a response and
* the chunked transfer coding is not the final encoding, the
* message body length is determined by reading the connection until
* it is closed by the server.
*/
UPDATE_STATE(s_body_identity_eof);
}
} else {
if (parser->content_length == 0) {
/* Content-Length header given but zero: Content-Length: 0\r\n */
@@ -2101,6 +2171,12 @@ http_message_needs_eof (const http_parser *parser)
return 0;
}
/* RFC 7230 3.3.3, see `s_headers_almost_done` */
if ((parser->uses_transfer_encoding == 1) &&
(parser->flags & F_CHUNKED) == 0) {
return 1;
}
if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
return 0;
}
@@ -2448,7 +2524,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
end = buf + off + len;
/* NOTE: The characters are already validated and are in the [0-9] range */
assert(off + len <= buflen && "Port number overflow");
assert((size_t) (off + len) <= buflen && "Port number overflow");
v = 0;
for (p = buf + off; p < end; p++) {
v *= 10;

View File

@@ -27,7 +27,7 @@ extern "C" {
/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 9
#define HTTP_PARSER_VERSION_PATCH 2
#define HTTP_PARSER_VERSION_PATCH 4
#include <stddef.h>
#if defined(_WIN32) && !defined(__MINGW32__) && \
@@ -41,6 +41,8 @@ typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#elif (defined(__sun) || defined(__sun__)) && defined(__SunOS_5_9)
#include <sys/inttypes.h>
#else
#include <stdint.h>
#endif
@@ -275,7 +277,9 @@ enum flags
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(PAUSED, "parser is paused") \
XX(UNKNOWN, "an unknown error occurred")
XX(UNKNOWN, "an unknown error occurred") \
XX(INVALID_TRANSFER_ENCODING, \
"request has invalid transfer-encoding") \
/* Define HPE_* values for each errno value above */
@@ -293,14 +297,20 @@ enum http_errno {
struct http_parser {
/** PRIVATE **/
unsigned int type : 2; /* enum http_parser_type */
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 7; /* enum state from http_parser.c */
unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 7; /* index into current matcher */
unsigned int index : 5; /* index into current matcher */
unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */
unsigned int allow_chunked_length : 1; /* Allow headers with both
* `Content-Length` and
* `Transfer-Encoding: chunked` set */
unsigned int lenient_http_headers : 1;
uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one)
* if no Content-Length header.
*/
/** READ-ONLY **/
unsigned short http_major;