mirror of
https://gitlab.com/openconnect/ocserv.git
synced 2026-02-10 08:46:58 +08:00
html: enhanced HTML decoding with decoding of explicit unicode chars
This commit is contained in:
36
src/html.c
36
src/html.c
@@ -19,12 +19,14 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
#include <common.h>
|
#include <common.h>
|
||||||
#include <c-strcase.h>
|
#include <c-strcase.h>
|
||||||
#include <c-ctype.h>
|
#include <c-ctype.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
#include "html.h"
|
#include "html.h"
|
||||||
|
|
||||||
@@ -39,6 +41,10 @@ char *unescape_html(void *pool, const char *html, unsigned len, unsigned *out_le
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
for (i = pos = 0; i < len;) {
|
for (i = pos = 0; i < len;) {
|
||||||
|
if (len-pos < 1) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
if (html[i] == '&') {
|
if (html[i] == '&') {
|
||||||
if (!c_strncasecmp(&html[i], "<", 4)) {
|
if (!c_strncasecmp(&html[i], "<", 4)) {
|
||||||
msg[pos++] = '<';
|
msg[pos++] = '<';
|
||||||
@@ -58,6 +64,33 @@ char *unescape_html(void *pool, const char *html, unsigned len, unsigned *out_le
|
|||||||
} else if (!c_strncasecmp(&html[i], "'", 6)) {
|
} else if (!c_strncasecmp(&html[i], "'", 6)) {
|
||||||
msg[pos++] = '\'';
|
msg[pos++] = '\'';
|
||||||
i += 6;
|
i += 6;
|
||||||
|
} else if (!strncmp(&html[i], "&#x", 3)) {
|
||||||
|
const char *p = &html[i];
|
||||||
|
char *endptr = NULL;
|
||||||
|
long val;
|
||||||
|
|
||||||
|
p+=3;
|
||||||
|
val = strtol(p, &endptr, 16);
|
||||||
|
if (endptr == NULL || *endptr != ';' || val > WCHAR_MAX) {
|
||||||
|
/* skip */
|
||||||
|
msg[pos++] = html[i++];
|
||||||
|
} else {
|
||||||
|
char tmpmb[MB_CUR_MAX];
|
||||||
|
wchar_t ch = val;
|
||||||
|
mbstate_t ps;
|
||||||
|
memset(&ps, 0, sizeof(ps));
|
||||||
|
|
||||||
|
i += (ptrdiff_t)(1+endptr-(&html[i]));
|
||||||
|
val = wcrtomb(tmpmb, ch, &ps);
|
||||||
|
|
||||||
|
if (val == -1)
|
||||||
|
goto fail;
|
||||||
|
if (len-pos > val)
|
||||||
|
memcpy(&msg[pos], tmpmb, val);
|
||||||
|
else
|
||||||
|
goto fail;
|
||||||
|
pos += val;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
msg[pos++] = html[i++];
|
msg[pos++] = html[i++];
|
||||||
} else
|
} else
|
||||||
@@ -69,6 +102,9 @@ char *unescape_html(void *pool, const char *html, unsigned len, unsigned *out_le
|
|||||||
*out_len = pos;
|
*out_len = pos;
|
||||||
|
|
||||||
return msg;
|
return msg;
|
||||||
|
fail:
|
||||||
|
talloc_free(msg);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *unescape_url(void *pool, const char *url, unsigned len, unsigned *out_len)
|
char *unescape_url(void *pool, const char *url, unsigned len, unsigned *out_len)
|
||||||
|
|||||||
Reference in New Issue
Block a user