mirror of
https://github.com/bitnami/charts.git
synced 2026-03-10 23:17:19 +08:00
* [bitnami/etcd] Reuse startFromSnapshot volume in the disasterRecovery mode * Fix typo * Improve logic after testing the different use cases * [bitnami/etcd] Update components versions Signed-off-by: Bitnami Containers <containers@bitnami.com> Co-authored-by: Bitnami Containers <containers@bitnami.com>
295 lines
11 KiB
YAML
295 lines
11 KiB
YAML
{{- $replicaCount := int .Values.statefulset.replicaCount }}
|
|
{{- $clientPort := int .Values.service.port }}
|
|
{{- $peerPort := int .Values.service.peerPort }}
|
|
{{- $etcdAuthOptions := include "etcd.authOptions" . }}
|
|
{{- $etcdFullname := include "etcd.fullname" . }}
|
|
{{- $releaseNamespace := .Release.Namespace }}
|
|
{{- $etcdHeadlessServiceName := printf "%s-%s" $etcdFullname "headless" }}
|
|
{{- $clusterDomain := .Values.clusterDomain }}
|
|
{{- $etcdPeerProtocol := include "etcd.peerProtocol" . }}
|
|
{{- $etcdClientProtocol := include "etcd.clientProtocol" . }}
|
|
{{- $initSnapshotPath := printf "/init-snapshot/%s" .Values.startFromSnapshot.snapshotFilename }}
|
|
{{- if and .Values.disasterRecovery.enabled .Values.startFromSnapshot.enabled (not .Values.disasterRecovery.pvc.existingClaim) }}
|
|
{{- $initSnapshotPath = printf "/snapshots/%s" .Values.startFromSnapshot.snapshotFilename }}
|
|
{{- end }}
|
|
{{- $snapshotHistoryLimit := .Values.disasterRecovery.cronjob.snapshotHistoryLimit }}
|
|
{{- $endpoints := list }}
|
|
{{- range $e, $i := until $replicaCount }}
|
|
{{- $endpoints = append $endpoints (printf "%s-%d.%s.%s.svc.%s:%d" $etcdFullname $i $etcdHeadlessServiceName $releaseNamespace $clusterDomain $clientPort) }}
|
|
{{- end }}
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: {{ include "etcd.fullname" . }}-scripts
|
|
labels: {{- include "etcd.labels" . | nindent 4 }}
|
|
data:
|
|
setup.sh: |-
|
|
#!/bin/bash
|
|
|
|
set -o errexit
|
|
set -o pipefail
|
|
set -o nounset
|
|
|
|
# Debug section
|
|
exec 3>&1
|
|
exec 4>&2
|
|
|
|
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
|
|
echo "==> Bash debug is on"
|
|
else
|
|
echo "==> Bash debug is off"
|
|
exec 1>/dev/null
|
|
exec 2>/dev/null
|
|
fi
|
|
|
|
# Constants
|
|
HOSTNAME="$(hostname -s)"
|
|
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
|
|
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
|
|
export ROOT_PASSWORD="${ETCD_ROOT_PASSWORD:-}"
|
|
if [[ -n "${ETCD_ROOT_PASSWORD:-}" ]]; then
|
|
unset ETCD_ROOT_PASSWORD
|
|
fi
|
|
# Functions
|
|
## Store member id for later member replacement
|
|
store_member_id() {
|
|
while ! etcdctl $AUTH_OPTIONS member list; do sleep 1; done
|
|
etcdctl $AUTH_OPTIONS member list | grep -w "$HOSTNAME" | awk '{ print $1}' | awk -F "," '{ print $1}' > "$ETCD_DATA_DIR/member_id"
|
|
echo "==> Stored member id: $(cat ${ETCD_DATA_DIR}/member_id)" 1>&3 2>&4
|
|
exit 0
|
|
}
|
|
## Configure RBAC
|
|
configure_rbac() {
|
|
# When there's more than one replica, we can assume the 1st member
|
|
# to be created is "{{ $etcdFullname }}-0" since a statefulset is used
|
|
if [[ -n "${ROOT_PASSWORD:-}" ]] && [[ "$HOSTNAME" == "{{ $etcdFullname }}-0" ]]; then
|
|
echo "==> Configuring RBAC authentication!" 1>&3 2>&4
|
|
etcd &
|
|
ETCD_PID=$!
|
|
while ! etcdctl $AUTH_OPTIONS member list; do sleep 1; done
|
|
echo "$ROOT_PASSWORD" | etcdctl $AUTH_OPTIONS user add root --interactive=false
|
|
etcdctl $AUTH_OPTIONS auth enable
|
|
kill "$ETCD_PID"
|
|
sleep 5
|
|
fi
|
|
}
|
|
## Checks whether there was a disaster or not
|
|
is_disastrous_failure() {
|
|
local endpoints_array=(${ETCDCTL_ENDPOINTS//,/ })
|
|
local active_endpoints=0
|
|
local -r min_endpoints=$((({{ $replicaCount }} + 1)/2))
|
|
|
|
for e in "${endpoints_array[@]}"; do
|
|
if [[ "$e" != "$ETCD_ADVERTISE_CLIENT_URLS" ]] && (unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS endpoint health --endpoints="$e"); then
|
|
active_endpoints=$((active_endpoints + 1))
|
|
fi
|
|
done
|
|
{{- if .Values.disasterRecovery.enabled }}
|
|
if [[ -f "/snapshots/.disaster_recovery" ]]; then
|
|
if [[ $active_endpoints -eq $(({{ $replicaCount }} - 1)) ]]; then
|
|
echo "==> I'm the last to recover from the disaster!" 1>&3 2>&4
|
|
rm "/snapshots/.disaster_recovery" 1>&3 2>&4
|
|
fi
|
|
true
|
|
else
|
|
if [[ $active_endpoints -lt $min_endpoints ]]; then
|
|
touch "/snapshots/.disaster_recovery" 1>&3 2>&4
|
|
true
|
|
else
|
|
false
|
|
fi
|
|
fi
|
|
{{- else }}
|
|
if [[ $active_endpoints -lt $min_endpoints ]]; then
|
|
true
|
|
else
|
|
false
|
|
fi
|
|
{{- end }}
|
|
}
|
|
|
|
## Check whether the member was successfully removed from the cluster
|
|
should_add_new_member() {
|
|
return_value=0
|
|
if (grep -E "^Member[[:space:]]+[a-z0-9]+\s+removed\s+from\s+cluster\s+[a-z0-9]+$" "$(dirname "$ETCD_DATA_DIR")/member_removal.log") || \
|
|
! ([[ -d "$ETCD_DATA_DIR/member/snap" ]] && [[ -f "$ETCD_DATA_DIR/member_id" ]]); then
|
|
rm -rf $ETCD_DATA_DIR/* 1>&3 2>&4
|
|
else
|
|
return_value=1
|
|
fi
|
|
rm -f "$(dirname "$ETCD_DATA_DIR")/member_removal.log" 1>&3 2>&4
|
|
return $return_value
|
|
}
|
|
|
|
if [[ ! -d "$ETCD_DATA_DIR" ]]; then
|
|
echo "==> Creating data dir..." 1>&3 2>&4
|
|
echo "==> There is no data at all. Initializing a new member of the cluster..." 1>&3 2>&4
|
|
{{- if .Values.startFromSnapshot.enabled }}
|
|
if [[ -f "{{ $initSnapshotPath }}" ]]; then
|
|
echo "==> Initializing member by restoring etcd cluster from snapshot..." 1>&3 2>&4
|
|
etcdctl snapshot restore {{ $initSnapshotPath }} \
|
|
{{- if gt $replicaCount 1 }}
|
|
--name $ETCD_NAME \
|
|
--initial-cluster $ETCD_INITIAL_CLUSTER \
|
|
--initial-cluster-token $ETCD_INITIAL_CLUSTER_TOKEN \
|
|
--initial-advertise-peer-urls $ETCD_INITIAL_ADVERTISE_PEER_URLS \
|
|
{{- end }}
|
|
--data-dir $ETCD_DATA_DIR 1>&3 2>&4
|
|
store_member_id & 1>&3 2>&4
|
|
else
|
|
echo "==> There was no snapshot to perform data recovery!!" 1>&3 2>&4
|
|
exit 1
|
|
fi
|
|
{{- else }}
|
|
store_member_id & 1>&3 2>&4
|
|
configure_rbac
|
|
{{- end }}
|
|
else
|
|
echo "==> Detected data from previous deployments..." 1>&3 2>&4
|
|
if [[ $(stat -c "%a" "$ETCD_DATA_DIR") != *700 ]]; then
|
|
echo "==> Setting data directory permissions to 700 in a recursive way (required in etcd >=3.4.10)" 1>&3 2>&4
|
|
chmod -R 700 $ETCD_DATA_DIR
|
|
else
|
|
echo "==> The data directory is already configured with the proper permissions" 1>&3 2>&4
|
|
fi
|
|
if [[ {{ $replicaCount }} -eq 1 ]]; then
|
|
echo "==> Single node cluster detected!!" 1>&3 2>&4
|
|
elif is_disastrous_failure; then
|
|
echo "==> Cluster not responding!!" 1>&3 2>&4
|
|
{{- if .Values.disasterRecovery.enabled }}
|
|
latest_snapshot_file="$(find /snapshots/ -maxdepth 1 -type f -name 'db-*' | sort | tail -n 1)"
|
|
if [[ "${latest_snapshot_file}" != "" ]]; then
|
|
echo "==> Restoring etcd cluster from snapshot..." 1>&3 2>&4
|
|
|
|
rm -rf $ETCD_DATA_DIR 1>&3 2>&4
|
|
etcdctl snapshot restore "${latest_snapshot_file}" \
|
|
--name $ETCD_NAME \
|
|
--data-dir $ETCD_DATA_DIR \
|
|
--initial-cluster $ETCD_INITIAL_CLUSTER \
|
|
--initial-cluster-token $ETCD_INITIAL_CLUSTER_TOKEN \
|
|
--initial-advertise-peer-urls $ETCD_INITIAL_ADVERTISE_PEER_URLS 1>&3 2>&4
|
|
store_member_id & 1>&3 2>&4
|
|
else
|
|
echo "==> There was no snapshot to perform data recovery!!" 1>&3 2>&4
|
|
exit 1
|
|
fi
|
|
{{- else }}
|
|
echo "==> Disaster recovery is disabled, the cluster will try to recover on it's own..." 1>&3 2>&4
|
|
{{- end }}
|
|
elif should_add_new_member; then
|
|
echo "==> Adding new member to existing cluster..." 1>&3 2>&4
|
|
etcdctl $AUTH_OPTIONS member add "$HOSTNAME" --peer-urls="{{ $etcdPeerProtocol }}://${HOSTNAME}.{{ $etcdHeadlessServiceName }}.{{ .Release.Namespace }}.svc.{{ $clusterDomain }}:{{ $peerPort }}" | grep "^ETCD_" > "$ETCD_DATA_DIR/new_member_envs"
|
|
sed -ie "s/^/export /" "$ETCD_DATA_DIR/new_member_envs"
|
|
echo "==> Loading env vars of existing cluster..." 1>&3 2>&4
|
|
source "$ETCD_DATA_DIR/new_member_envs" 1>&3 2>&4
|
|
store_member_id & 1>&3 2>&4
|
|
else
|
|
echo "==> Updating member in existing cluster..." 1>&3 2>&4
|
|
etcdctl $AUTH_OPTIONS member update "$(cat "$ETCD_DATA_DIR/member_id")" --peer-urls="{{ $etcdPeerProtocol }}://${HOSTNAME}.{{ $etcdHeadlessServiceName }}.{{ .Release.Namespace }}.svc.{{ $clusterDomain }}:{{ $peerPort }}" 1>&3 2>&4
|
|
fi
|
|
fi
|
|
|
|
{{- if .Values.configFileConfigMap }}
|
|
exec etcd --config-file /opt/bitnami/etcd/conf/etcd.conf.yml 1>&3 2>&4
|
|
{{- else }}
|
|
exec etcd 1>&3 2>&4
|
|
{{- end }}
|
|
prestop-hook.sh: |-
|
|
#!/bin/bash
|
|
|
|
set -o errexit
|
|
set -o pipefail
|
|
set -o nounset
|
|
|
|
# Debug section
|
|
exec 3>&1
|
|
exec 4>&2
|
|
|
|
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
|
|
echo "==> Bash debug is on"
|
|
else
|
|
echo "==> Bash debug is off"
|
|
exec 1>/dev/null
|
|
exec 2>/dev/null
|
|
fi
|
|
|
|
# Constants
|
|
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
|
|
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
|
|
|
|
etcdctl $AUTH_OPTIONS member remove --debug=true "$(cat "$ETCD_DATA_DIR/member_id")" > "$(dirname "$ETCD_DATA_DIR")/member_removal.log" 2>&1
|
|
probes.sh: |-
|
|
#!/bin/bash
|
|
|
|
set -o errexit
|
|
set -o pipefail
|
|
set -o nounset
|
|
|
|
# Debug section
|
|
exec 3>&1
|
|
exec 4>&2
|
|
|
|
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
|
|
echo "==> Bash debug is on"
|
|
else
|
|
echo "==> Bash debug is off"
|
|
exec 1>/dev/null
|
|
exec 2>/dev/null
|
|
fi
|
|
|
|
# Constants
|
|
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
|
|
|
|
echo "==> [DEBUG] Probing etcd cluster"
|
|
echo "==> [DEBUG] Probe command: \"etcdctl $AUTH_OPTIONS endpoint health\""
|
|
etcdctl $AUTH_OPTIONS endpoint health
|
|
{{- if .Values.disasterRecovery.enabled }}
|
|
save-snapshot.sh: |-
|
|
#!/bin/bash
|
|
|
|
set -o errexit
|
|
set -o pipefail
|
|
set -o nounset
|
|
|
|
# Debug section
|
|
exec 3>&1
|
|
exec 4>&2
|
|
|
|
if [[ "${BITNAMI_SNAPSHOT_DEBUG:-false}" = true ]] || [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
|
|
echo "==> Bash debug is on"
|
|
set -x
|
|
else
|
|
echo "==> Bash debug is off"
|
|
exec 1>/dev/null
|
|
exec 2>/dev/null
|
|
fi
|
|
|
|
# Constants
|
|
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
|
|
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
|
|
|
|
mkdir -p "/snapshots" 1>&3 2>&4
|
|
|
|
read -r -a endpoints <<< "$(tr ',;' ' ' <<< "$ETCDCTL_ENDPOINTS")"
|
|
for endp in "${endpoints[@]}"; do
|
|
echo "Using endpoint $endp" 1>&3 2>&4
|
|
if (unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS endpoint health --endpoints=${endp}); then
|
|
echo "Snapshotting the keyspace..." 1>&3 2>&4
|
|
current_time="$(date -u "+%Y-%m-%d_%H-%M")"
|
|
unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS snapshot save "/snapshots/db-${current_time}" --endpoints=${endp} 1>&3 2>&4
|
|
find /snapshots/ -maxdepth 1 -type f -name 'db-*' \! -name "db-${current_time}" \
|
|
| sort -r \
|
|
| tail -n+$((1 + {{ $snapshotHistoryLimit }})) \
|
|
| xargs rm -f 1>&3 2>&4
|
|
exit 0
|
|
else
|
|
echo "Warning - etcd endpoint ${ETCDCTL_ENDPOINTS} not healthy" 1>&3 2>&4
|
|
echo "Trying another endpoint." 1>&3 2>&4
|
|
fi
|
|
done
|
|
|
|
# exit with error if all endpoints are bad
|
|
echo "Error - all etcd endpoints are unhealthy!" 1>&3 2>&4
|
|
exit 1
|
|
{{- end }}
|