[bitnami/pgpool] extend health check configuration (#19124)

- Introduce PGPOOL_CONNECT_TIMEOUT variable and set default to 10000ms. User can setup pgpool to fail faster when trying to connect to unreachable PG node. - Introduce PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT variable with default to 15. User can control "show pool_nodes" command timeout in healthcheck function. - Only nodes in pgpool state DOWN which are UP in reality are attached. All the changes does not bring any change thnaks to defaults. It is tooling for PG-HA. Signed-off-by: Pavel Cerny <pavel.cerny@gooddata.com>
2026-03-12 06:48:14 +08:00 · 2023-03-09 17:08:24 +01:00
parent 99940a8c3d
commit c6ec6a2d2f
2 changed files with 19 additions and 6 deletions
--- a/bitnami/pgpool/4/debian-11/rootfs/opt/bitnami/scripts/libpgpool.sh
+++ b/bitnami/pgpool/4/debian-11/rootfs/opt/bitnami/scripts/libpgpool.sh
@@ -75,6 +75,8 @@ export PGPOOL_HEALTH_CHECK_PERIOD="${PGPOOL_HEALTH_CHECK_PERIOD:-30}"
 export PGPOOL_HEALTH_CHECK_TIMEOUT="${PGPOOL_HEALTH_CHECK_TIMEOUT:-10}"
 export PGPOOL_HEALTH_CHECK_MAX_RETRIES="${PGPOOL_HEALTH_CHECK_MAX_RETRIES:-5}"
 export PGPOOL_HEALTH_CHECK_RETRY_DELAY="${PGPOOL_HEALTH_CHECK_RETRY_DELAY:-5}"
+export PGPOOL_CONNECT_TIMEOUT="${PGPOOL_CONNECT_TIMEOUT:-10000}"
+export PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT="${PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT:-15}"
 export PGPOOL_POSTGRES_CUSTOM_USERS="${PGPOOL_POSTGRES_CUSTOM_USERS:-}"
 export PGPOOL_POSTGRES_CUSTOM_PASSWORDS="${PGPOOL_POSTGRES_CUSTOM_PASSWORDS:-}"
 export PGPOOL_AUTO_FAILBACK="${PGPOOL_AUTO_FAILBACK:-no}"
@@ -209,7 +211,7 @@ pgpool_validate() {
            print_validation_error "The values allowed for $yn are: yes or no"
        fi
    done
-    local positive_values=("PGPOOL_NUM_INIT_CHILDREN" "PGPOOL_MAX_POOL" "PGPOOL_CHILD_MAX_CONNECTIONS" "PGPOOL_CHILD_LIFE_TIME" "PGPOOL_CONNECTION_LIFE_TIME" "PGPOOL_CLIENT_IDLE_LIMIT" "PGPOOL_HEALTH_CHECK_PERIOD" "PGPOOL_HEALTH_CHECK_TIMEOUT" "PGPOOL_HEALTH_CHECK_MAX_RETRIES" "PGPOOL_HEALTH_CHECK_RETRY_DELAY" "PGPOOL_RESERVED_CONNECTIONS")
+    local positive_values=("PGPOOL_NUM_INIT_CHILDREN" "PGPOOL_MAX_POOL" "PGPOOL_CHILD_MAX_CONNECTIONS" "PGPOOL_CHILD_LIFE_TIME" "PGPOOL_CONNECTION_LIFE_TIME" "PGPOOL_CLIENT_IDLE_LIMIT" "PGPOOL_HEALTH_CHECK_PERIOD" "PGPOOL_HEALTH_CHECK_TIMEOUT" "PGPOOL_HEALTH_CHECK_MAX_RETRIES" "PGPOOL_HEALTH_CHECK_RETRY_DELAY" "PGPOOL_RESERVED_CONNECTIONS" "PGPOOL_CONNECT_TIMEOUT" "PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT")
    for p in "${positive_values[@]}"; do
        if [[ -n "${!p:-}" ]]; then
            if ! is_positive_int "${!p}"; then
@@ -292,11 +294,18 @@ pgpool_attach_node() {
 pgpool_healthcheck() {
    info "Checking pgpool health..."
    local backends
-    backends="$(PGCONNECT_TIMEOUT=15 PGPASSWORD="$PGPOOL_POSTGRES_PASSWORD" psql -U "$PGPOOL_POSTGRES_USERNAME" \
-        -d postgres -h "$PGPOOL_TMP_DIR" -p "$PGPOOL_PORT_NUMBER" -tA -c "SHOW pool_nodes;")"
-    if [[ "$backends" ]]; then
-        # look up backends that are marked offline
-        for node in $(echo "${backends}" | grep "down" | tr -d ' '); do
+    # Timeout should be in sync with liveness probe timeout and number of nodes which could be down together
+    # Only nodes marked UP in pgpool are tested. Each failed standby backend consumes up to PGPOOL_CONNECT_TIMEOUT
+    # to test. Network split is worst-case scenario.
+    # NOTE: command blocks indefinitely if primary node is marked UP in pgpool but is DOWN in reality and connection
+    # times-out. Example is again network-split.
+    backends="$(PGCONNECT_TIMEOUT=$PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT PGPASSWORD="$PGPOOL_POSTGRES_PASSWORD" \
+        psql -U "$PGPOOL_POSTGRES_USERNAME" -d postgres -h "$PGPOOL_TMP_DIR" -p "$PGPOOL_PORT_NUMBER" \
+        -tA -c "SHOW pool_nodes;")" || backends="command failed"
+    if [[ "$backends" != "command failed" ]]; then
+        # look up backends that are marked offline and being up - attach only status=down and pg_status=up
+        # situation down|down means PG is not yet ready to be attached
+        for node in $(echo "${backends}" | grep "down|up" | tr -d ' '); do
            IFS="|" read -ra node_info <<< "$node"
            local node_id="${node_info[0]}"
            local node_host="${node_info[1]}"
@@ -307,6 +316,7 @@ pgpool_healthcheck() {
            fi
        done
    else
+        # backends command failed
        return 1
    fi
 }
@@ -488,6 +498,7 @@ pgpool_create_config() {
    pgpool_set_property "health_check_password" "$PGPOOL_HEALTH_CHECK_PASSWORD"
    pgpool_set_property "health_check_max_retries" "$PGPOOL_HEALTH_CHECK_MAX_RETRIES"
    pgpool_set_property "health_check_retry_delay" "$PGPOOL_HEALTH_CHECK_RETRY_DELAY"
+    pgpool_set_property "connect_timeout" "$PGPOOL_CONNECT_TIMEOUT"
    # Failover settings
    pgpool_set_property "failover_command" "echo \">>> Failover - that will initialize new primary node search!\""
    pgpool_set_property "failover_on_backend_error" "off"
--- a/bitnami/pgpool/README.md
+++ b/bitnami/pgpool/README.md
@@ -271,6 +271,8 @@ Pgpool configuration:
 * `PGPOOL_HEALTH_CHECK_TIMEOUT`: Specifies the timeout in seconds to give up connecting to the backend PostgreSQL if the TCP connect does not succeed within this time. Defaults to `10`.
 * `PGPOOL_HEALTH_CHECK_MAX_RETRIES`: Specifies the maximum number of retries to do before giving up and initiating failover when health check fails. Defaults to `5`.
 * `PGPOOL_HEALTH_CHECK_RETRY_DELAY`: Specifies the amount of time in seconds to sleep between failed health check retries. Defaults to `5`.
+* `PGPOOL_CONNECT_TIMEOUT`: Specifies the amount of time in milliseconds before giving up connecting to backend using `connect()` system call. Default is `10000`.
+* `PGPOOL_HEALTH_CHECK_PSQL_TIMEOUT`: Specifies the maximum amount of time in seconds function `pgpool_healthcheck()` waits for result of `show pool_nodes` command. It is set to `PGCONNECT_TIMEOUT` of respective `psql` execution. Default is `15`.
 * `PGPOOL_USER_CONF_FILE`: Configuration file to be added to the generated config file. This allow to override configuration set by the initializacion process. No defaults.
 * `PGPOOL_USER_HBA_FILE`: Configuration file to be added to the generated hba file. This allow to override configuration set by the initialization process. No defaults.
 * `PGPOOL_POSTGRES_CUSTOM_USERS`: List of comma or semicolon separeted list of postgres usernames. This will create entries in `pgpool_passwd`. No defaults.