mirror of
https://gitlab.com/openconnect/ocserv.git
synced 2026-02-09 16:26:59 +08:00
Merge branch 'issue345' into 'master'
Stop accepting new TCP connections when the server is at maximum active connection capacity & add support for gracefully stopping the server. Closes #345 See merge request openconnect/ocserv!212
This commit is contained in:
@@ -205,4 +205,52 @@ The ocserv server gathers statistical data about the latency incurred while proc
|
||||
|
||||
* Latency information is emitted to the log and can also be queried via occtl. Mean latency for an interval can be computed as latency_median_total/latency_sample_count and mean STDEV can be computed as latency_rms_total/latency_sample_count.
|
||||
|
||||
* Latency information can be used as a metric to measure how the ocserv is performing and to measure effective server load.
|
||||
* Latency information can be used as a metric to measure how the ocserv is performing and to measure effective server load.
|
||||
|
||||
## Load Balancer integration
|
||||
|
||||
Ocserv can be deployed behind a layer 3 load balancer to support high availabilty and scale.
|
||||
|
||||
### Example load balancer configuration using keepalived.
|
||||
This is not intended as an exhaustive guide to configuring keepalived, but rather as a high level overview.
|
||||
|
||||
* One or more hosts (directors) running keepalived, with a virtual IP assigned to them, optionally using VRRP to manage VIP failover (not shown here).
|
||||
|
||||
* Three or more instances of ocserv running on hosts (real-server). Virtual IP assigned to the loopback interface with an ARP filter to prevent them from avertising.
|
||||
|
||||
* Define a iptables rule to tag incoming traffic to be load balanced:
|
||||
```
|
||||
iptables -A PREROUTING -t mangle -d $VIP/32 -j MARK --set-mark 1
|
||||
```
|
||||
|
||||
* Define a keepalived configuration file. Replace IP addresses with the IP of the actual server instances. VRRP configuration not shown here.
|
||||
```
|
||||
virtual_server fwmark 1 {
|
||||
delay_loop 5
|
||||
lb_algo rr
|
||||
lb_kind DR
|
||||
persistence_timeout 300
|
||||
real_server 10.0.0.1 443 {
|
||||
TCP_CHECK {
|
||||
connect_port 443
|
||||
connect_timeout 5
|
||||
}
|
||||
}
|
||||
real_server 10.0.0.2 443 {
|
||||
TCP_CHECK {
|
||||
connect_port 443
|
||||
connect_timeout 5
|
||||
}
|
||||
}
|
||||
real_server 10.0.0.3 443 {
|
||||
TCP_CHECK {
|
||||
connect_port 443
|
||||
connect_timeout 5
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
* Set ocserv option "server-drain-ms = 10000" (2 times the health check interval) to permit graceful shutdown of ocserv instances. This setting adds a delay between the time when the server stops accepting new connections (which causes the load balancer to view it as unhealthy) and when existing clients are disconnected. This prevents clients from attempting to reconnect to a server that is shutting down or has recently shutdown.
|
||||
|
||||
* Notes on sizing the HA cluster. Best practices for high availability are to maintain a minimum of two spare nodes as this permits for one node to be undergoing maintenance and for an unplanned failure on a second node. Each node should be sized to account for a rapid reconnect of all clients, which will cause a spike of CPU utilization due to TLS key exchange. The rate-limit-ms can be used to flatten the spike at the expense of some clients retrying their connections.
|
||||
@@ -723,3 +723,8 @@ ipv4-network = 192.168.2.0
|
||||
ipv4-netmask = 255.255.255.0
|
||||
|
||||
cert-user-oid = 0.9.2342.19200300.100.1.1
|
||||
|
||||
# To enable load-balancer connection draining, set server-drain-ms to a value
|
||||
# higher than your load-balancer health probe interval.
|
||||
# server-drain-ms = 15000
|
||||
|
||||
|
||||
@@ -869,6 +869,9 @@ static int cfg_ini_handler(void *_ctx, const char *section, const char *name, co
|
||||
} else if (strcmp(name, "rate-limit-ms") == 0) {
|
||||
if (!WARN_ON_VHOST(vhost->name, "rate-limit-ms", rate_limit_ms))
|
||||
READ_NUMERIC(config->rate_limit_ms);
|
||||
} else if (strcmp(name, "server-drain-ms") == 0) {
|
||||
if (!WARN_ON_VHOST(vhost->name, "server-drain-ms", server_drain_ms))
|
||||
READ_NUMERIC(config->server_drain_ms);
|
||||
} else if (strcmp(name, "ocsp-response") == 0) {
|
||||
READ_STRING(config->ocsp_response);
|
||||
#ifdef ANYCONNECT_CLIENT_COMPAT
|
||||
|
||||
70
src/main.c
70
src/main.c
@@ -87,7 +87,7 @@ int worker_argc = 0;
|
||||
char **worker_argv = NULL;
|
||||
|
||||
static void listen_watcher_cb (EV_P_ ev_io *w, int revents);
|
||||
static void flow_control_cb (EV_P_ ev_timer *w, int revents);
|
||||
static void resume_accept_cb (EV_P_ ev_timer *w, int revents);
|
||||
|
||||
int syslog_open = 0;
|
||||
sigset_t sig_default_set;
|
||||
@@ -104,6 +104,7 @@ typedef struct sec_mod_watcher_st {
|
||||
ev_io ctl_watcher;
|
||||
sec_mod_watcher_st * sec_mod_watchers = NULL;
|
||||
ev_timer maintenance_watcher;
|
||||
ev_timer graceful_shutdown_watcher;
|
||||
ev_signal maintenance_sig_watcher;
|
||||
ev_signal term_sig_watcher;
|
||||
ev_signal int_sig_watcher;
|
||||
@@ -132,7 +133,7 @@ static void add_listener(void *pool, struct listen_list_st *list,
|
||||
ev_init(&tmp->io, listen_watcher_cb);
|
||||
ev_io_set(&tmp->io, fd, EV_READ);
|
||||
|
||||
ev_init(&tmp->flow_control, flow_control_cb);
|
||||
ev_init(&tmp->resume_accept, resume_accept_cb);
|
||||
|
||||
list_add(&list->head, &(tmp->list));
|
||||
list->total++;
|
||||
@@ -979,9 +980,8 @@ static void kill_children_auth_timeout(main_server_st* s)
|
||||
}
|
||||
}
|
||||
|
||||
static void term_sig_watcher_cb(struct ev_loop *loop, ev_signal *w, int revents)
|
||||
static void terminate_server(main_server_st * s)
|
||||
{
|
||||
main_server_st *s = ev_userdata(loop);
|
||||
unsigned total = 10;
|
||||
|
||||
mslog(s, NULL, LOG_INFO, "termination request received; waiting for children to die");
|
||||
@@ -999,6 +999,40 @@ static void term_sig_watcher_cb(struct ev_loop *loop, ev_signal *w, int revents)
|
||||
ev_break (loop, EVBREAK_ALL);
|
||||
}
|
||||
|
||||
static void graceful_shutdown_watcher_cb(EV_P_ ev_timer *w, int revents)
|
||||
{
|
||||
main_server_st *s = ev_userdata(loop);
|
||||
|
||||
terminate_server(s);
|
||||
}
|
||||
|
||||
static void term_sig_watcher_cb(struct ev_loop *loop, ev_signal *w, int revents)
|
||||
{
|
||||
main_server_st *s = ev_userdata(loop);
|
||||
struct listener_st *ltmp = NULL, *lpos;
|
||||
unsigned int server_drain_ms = GETCONFIG(s)->server_drain_ms;
|
||||
|
||||
if (server_drain_ms == 0) {
|
||||
terminate_server(s);
|
||||
}
|
||||
else
|
||||
{
|
||||
mslog(s, NULL, LOG_INFO, "termination request received; stopping new connections");
|
||||
graceful_shutdown_watcher.repeat = ((ev_tstamp)(server_drain_ms)) / 1000.;
|
||||
mslog(s, NULL, LOG_INFO, "termination request received; waiting %d ms", server_drain_ms);
|
||||
ev_timer_again(loop, &graceful_shutdown_watcher);
|
||||
|
||||
// Close the listening ports and stop the IO
|
||||
list_for_each_safe(&s->listen_list.head, ltmp, lpos, list) {
|
||||
ev_io_stop(loop, <mp->io);
|
||||
close(ltmp->fd);
|
||||
list_del(<mp->list);
|
||||
talloc_free(ltmp);
|
||||
s->listen_list.total--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void reload_sig_watcher_cb(struct ev_loop *loop, ev_signal *w, int revents)
|
||||
{
|
||||
main_server_st *s = ev_userdata(loop);
|
||||
@@ -1034,13 +1068,19 @@ static void cmd_watcher_cb (EV_P_ ev_io *w, int revents)
|
||||
}
|
||||
}
|
||||
|
||||
static void flow_control_cb (EV_P_ ev_timer *w, int revents)
|
||||
static void resume_accept_cb (EV_P_ ev_timer *w, int revents)
|
||||
{
|
||||
struct listener_st *ltmp = (struct listener_st *)((char*)w - offsetof(struct listener_st, flow_control));
|
||||
main_server_st *s = ev_userdata(loop);
|
||||
struct listener_st *ltmp = (struct listener_st *)((char*)w - offsetof(struct listener_st, resume_accept));
|
||||
// Add hysteresis to the pause/resume cycle to damp oscillations
|
||||
unsigned int resume_threshold = GETCONFIG(s)->max_clients * 9 / 10;
|
||||
|
||||
// Clear the timer and resume accept
|
||||
ev_timer_stop(loop, <mp->flow_control);
|
||||
ev_io_start(loop, <mp->io);
|
||||
// Only resume accepting connections if we are under the limit
|
||||
if (resume_threshold == 0 || s->stats.active_clients < resume_threshold) {
|
||||
// Clear the timer and resume accept
|
||||
ev_timer_stop(loop, <mp->resume_accept);
|
||||
ev_io_start(loop, <mp->io);
|
||||
}
|
||||
}
|
||||
|
||||
static void listen_watcher_cb (EV_P_ ev_io *w, int revents)
|
||||
@@ -1217,6 +1257,12 @@ fork_failed:
|
||||
forward_udp_to_owner(s, ltmp);
|
||||
}
|
||||
|
||||
if (GETCONFIG(s)->max_clients > 0 && s->stats.active_clients >= GETCONFIG(s)->max_clients) {
|
||||
ltmp->resume_accept.repeat = ((ev_tstamp)(1));
|
||||
ev_io_stop(loop, <mp->io);
|
||||
ev_timer_again(loop, <mp->resume_accept);
|
||||
}
|
||||
|
||||
// Rate limiting of incoming connections is implemented as follows:
|
||||
// After accepting a client connection:
|
||||
// Arm the flow control timer.
|
||||
@@ -1230,9 +1276,9 @@ fork_failed:
|
||||
if (retval || rqueue > wqueue / 2) {
|
||||
mslog(s, NULL, LOG_INFO, "delaying accepts for %d ms", GETCONFIG(s)->rate_limit_ms);
|
||||
// Arm the timer and pause accept
|
||||
ltmp->flow_control.repeat = ((ev_tstamp)(GETCONFIG(s)->rate_limit_ms)) / 1000.;
|
||||
ltmp->resume_accept.repeat = ((ev_tstamp)(GETCONFIG(s)->rate_limit_ms)) / 1000.;
|
||||
ev_io_stop(loop, <mp->io);
|
||||
ev_timer_again(loop, <mp->flow_control);
|
||||
ev_timer_again(loop, <mp->resume_accept);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1597,6 +1643,8 @@ int main(int argc, char** argv)
|
||||
ev_timer_set(&maintenance_watcher, MAIN_MAINTENANCE_TIME, MAIN_MAINTENANCE_TIME);
|
||||
ev_timer_start(loop, &maintenance_watcher);
|
||||
|
||||
ev_init(&graceful_shutdown_watcher, graceful_shutdown_watcher_cb);
|
||||
|
||||
#if defined(CAPTURE_LATENCY_SUPPORT)
|
||||
ev_init(&latency_watcher, latency_watcher_cb);
|
||||
ev_timer_set(&latency_watcher, LATENCY_AGGREGATION_TIME, LATENCY_AGGREGATION_TIME);
|
||||
|
||||
@@ -70,7 +70,7 @@ struct listener_st {
|
||||
socklen_t addr_len;
|
||||
int family;
|
||||
int protocol;
|
||||
ev_timer flow_control;
|
||||
ev_timer resume_accept;
|
||||
};
|
||||
|
||||
struct listen_list_st {
|
||||
|
||||
@@ -304,6 +304,7 @@ struct cfg_st {
|
||||
* TCP sessions. */
|
||||
unsigned rate_limit_ms; /* if non zero force a connection every rate_limit milliseconds if ocserv-sm is heavily loaded */
|
||||
unsigned ping_leases; /* non zero if we need to ping prior to leasing */
|
||||
unsigned server_drain_ms; /* how long to wait after we stop accepting new connections before closing old connections */
|
||||
|
||||
size_t rx_per_sec;
|
||||
size_t tx_per_sec;
|
||||
|
||||
@@ -91,7 +91,7 @@ dist_check_SCRIPTS += test-pass test-pass-cert test-cert test-group-pass \
|
||||
test-gssapi test-pass-opt-cert test-cert-opt-pass test-gssapi-opt-pass \
|
||||
test-gssapi-opt-cert haproxy-auth test-maintenance resumption \
|
||||
test-group-name flowcontrol banner invalid-configs haproxy-proxyproto \
|
||||
haproxy-proxyproto-v1
|
||||
haproxy-proxyproto-v1 test-drain-server
|
||||
|
||||
if HAVE_CWRAP_PAM
|
||||
dist_check_SCRIPTS += test-pam test-pam-noauth
|
||||
|
||||
61
tests/test-drain-server
Executable file
61
tests/test-drain-server
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright (C) 2013 Nikos Mavrogiannopoulos
|
||||
#
|
||||
# This file is part of ocserv.
|
||||
#
|
||||
# ocserv is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at
|
||||
# your option) any later version.
|
||||
#
|
||||
# ocserv is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GnuTLS; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
SERV="${SERV:-../src/ocserv}"
|
||||
srcdir=${srcdir:-.}
|
||||
NO_NEED_ROOT=1
|
||||
PIDFILE=ocserv-pid.$$.tmp
|
||||
|
||||
. `dirname $0`/common.sh
|
||||
|
||||
eval "${GETPORT}"
|
||||
|
||||
echo "Testing local backend with username-password... "
|
||||
|
||||
update_config test1.config
|
||||
echo server-drain-ms=10000 >> ${CONFIG}
|
||||
|
||||
launch_sr_server -d 1 -p ${PIDFILE} -f -c ${CONFIG} & PID=$!
|
||||
wait_server $PID
|
||||
|
||||
echo "Connecting to obtain cookie... "
|
||||
( echo "test" | LD_PRELOAD=libsocket_wrapper.so $OPENCONNECT -q $ADDRESS:$PORT -u test --servercert=d66b507ae074d03b02eafca40d35f87dd81049d3 --cookieonly ) ||
|
||||
fail $PID "Could not receive cookie from server"
|
||||
|
||||
if ! test -f ${PIDFILE};then
|
||||
fail $PID "Could not find pid file ${PIDFILE}"
|
||||
fi
|
||||
|
||||
KILL_TIME=$(date +%s)
|
||||
kill -15 $(cat $PIDFILE)
|
||||
|
||||
sleep 1
|
||||
|
||||
echo "Connecting to obtain cookie... "
|
||||
( echo "test" | LD_PRELOAD=libsocket_wrapper.so $OPENCONNECT -q $ADDRESS:$PORT -u test --servercert=d66b507ae074d03b02eafca40d35f87dd81049d3 --cookieonly ) &&
|
||||
fail $PID "Server is still listening"
|
||||
|
||||
wait
|
||||
END_TIME=$(date +%s)
|
||||
if [ $((END_TIME - KILL_TIME)) -lt 9 ]]; then
|
||||
fail $PID "Server died too early - signalled at $KILL_TIME died at $END_TIME"
|
||||
fi
|
||||
cleanup
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user