Files
charts/bitnami/etcd/templates/scripts-configmap.yaml
Carlos Rodríguez Hernández 2e05d2ab62 [bitnami/etcd] Reuse startFromSnapshot volume in the disasterRecovery mode (#4940)
* [bitnami/etcd] Reuse startFromSnapshot volume in the disasterRecovery mode

* Fix typo

* Improve logic after testing the different use cases

* [bitnami/etcd] Update components versions

Signed-off-by: Bitnami Containers <containers@bitnami.com>

Co-authored-by: Bitnami Containers <containers@bitnami.com>
2021-01-11 21:09:13 +01:00

295 lines
11 KiB
YAML

{{- $replicaCount := int .Values.statefulset.replicaCount }}
{{- $clientPort := int .Values.service.port }}
{{- $peerPort := int .Values.service.peerPort }}
{{- $etcdAuthOptions := include "etcd.authOptions" . }}
{{- $etcdFullname := include "etcd.fullname" . }}
{{- $releaseNamespace := .Release.Namespace }}
{{- $etcdHeadlessServiceName := printf "%s-%s" $etcdFullname "headless" }}
{{- $clusterDomain := .Values.clusterDomain }}
{{- $etcdPeerProtocol := include "etcd.peerProtocol" . }}
{{- $etcdClientProtocol := include "etcd.clientProtocol" . }}
{{- $initSnapshotPath := printf "/init-snapshot/%s" .Values.startFromSnapshot.snapshotFilename }}
{{- if and .Values.disasterRecovery.enabled .Values.startFromSnapshot.enabled (not .Values.disasterRecovery.pvc.existingClaim) }}
{{- $initSnapshotPath = printf "/snapshots/%s" .Values.startFromSnapshot.snapshotFilename }}
{{- end }}
{{- $snapshotHistoryLimit := .Values.disasterRecovery.cronjob.snapshotHistoryLimit }}
{{- $endpoints := list }}
{{- range $e, $i := until $replicaCount }}
{{- $endpoints = append $endpoints (printf "%s-%d.%s.%s.svc.%s:%d" $etcdFullname $i $etcdHeadlessServiceName $releaseNamespace $clusterDomain $clientPort) }}
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "etcd.fullname" . }}-scripts
labels: {{- include "etcd.labels" . | nindent 4 }}
data:
setup.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
# Debug section
exec 3>&1
exec 4>&2
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
echo "==> Bash debug is on"
else
echo "==> Bash debug is off"
exec 1>/dev/null
exec 2>/dev/null
fi
# Constants
HOSTNAME="$(hostname -s)"
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
export ROOT_PASSWORD="${ETCD_ROOT_PASSWORD:-}"
if [[ -n "${ETCD_ROOT_PASSWORD:-}" ]]; then
unset ETCD_ROOT_PASSWORD
fi
# Functions
## Store member id for later member replacement
store_member_id() {
while ! etcdctl $AUTH_OPTIONS member list; do sleep 1; done
etcdctl $AUTH_OPTIONS member list | grep -w "$HOSTNAME" | awk '{ print $1}' | awk -F "," '{ print $1}' > "$ETCD_DATA_DIR/member_id"
echo "==> Stored member id: $(cat ${ETCD_DATA_DIR}/member_id)" 1>&3 2>&4
exit 0
}
## Configure RBAC
configure_rbac() {
# When there's more than one replica, we can assume the 1st member
# to be created is "{{ $etcdFullname }}-0" since a statefulset is used
if [[ -n "${ROOT_PASSWORD:-}" ]] && [[ "$HOSTNAME" == "{{ $etcdFullname }}-0" ]]; then
echo "==> Configuring RBAC authentication!" 1>&3 2>&4
etcd &
ETCD_PID=$!
while ! etcdctl $AUTH_OPTIONS member list; do sleep 1; done
echo "$ROOT_PASSWORD" | etcdctl $AUTH_OPTIONS user add root --interactive=false
etcdctl $AUTH_OPTIONS auth enable
kill "$ETCD_PID"
sleep 5
fi
}
## Checks whether there was a disaster or not
is_disastrous_failure() {
local endpoints_array=(${ETCDCTL_ENDPOINTS//,/ })
local active_endpoints=0
local -r min_endpoints=$((({{ $replicaCount }} + 1)/2))
for e in "${endpoints_array[@]}"; do
if [[ "$e" != "$ETCD_ADVERTISE_CLIENT_URLS" ]] && (unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS endpoint health --endpoints="$e"); then
active_endpoints=$((active_endpoints + 1))
fi
done
{{- if .Values.disasterRecovery.enabled }}
if [[ -f "/snapshots/.disaster_recovery" ]]; then
if [[ $active_endpoints -eq $(({{ $replicaCount }} - 1)) ]]; then
echo "==> I'm the last to recover from the disaster!" 1>&3 2>&4
rm "/snapshots/.disaster_recovery" 1>&3 2>&4
fi
true
else
if [[ $active_endpoints -lt $min_endpoints ]]; then
touch "/snapshots/.disaster_recovery" 1>&3 2>&4
true
else
false
fi
fi
{{- else }}
if [[ $active_endpoints -lt $min_endpoints ]]; then
true
else
false
fi
{{- end }}
}
## Check whether the member was successfully removed from the cluster
should_add_new_member() {
return_value=0
if (grep -E "^Member[[:space:]]+[a-z0-9]+\s+removed\s+from\s+cluster\s+[a-z0-9]+$" "$(dirname "$ETCD_DATA_DIR")/member_removal.log") || \
! ([[ -d "$ETCD_DATA_DIR/member/snap" ]] && [[ -f "$ETCD_DATA_DIR/member_id" ]]); then
rm -rf $ETCD_DATA_DIR/* 1>&3 2>&4
else
return_value=1
fi
rm -f "$(dirname "$ETCD_DATA_DIR")/member_removal.log" 1>&3 2>&4
return $return_value
}
if [[ ! -d "$ETCD_DATA_DIR" ]]; then
echo "==> Creating data dir..." 1>&3 2>&4
echo "==> There is no data at all. Initializing a new member of the cluster..." 1>&3 2>&4
{{- if .Values.startFromSnapshot.enabled }}
if [[ -f "{{ $initSnapshotPath }}" ]]; then
echo "==> Initializing member by restoring etcd cluster from snapshot..." 1>&3 2>&4
etcdctl snapshot restore {{ $initSnapshotPath }} \
{{- if gt $replicaCount 1 }}
--name $ETCD_NAME \
--initial-cluster $ETCD_INITIAL_CLUSTER \
--initial-cluster-token $ETCD_INITIAL_CLUSTER_TOKEN \
--initial-advertise-peer-urls $ETCD_INITIAL_ADVERTISE_PEER_URLS \
{{- end }}
--data-dir $ETCD_DATA_DIR 1>&3 2>&4
store_member_id & 1>&3 2>&4
else
echo "==> There was no snapshot to perform data recovery!!" 1>&3 2>&4
exit 1
fi
{{- else }}
store_member_id & 1>&3 2>&4
configure_rbac
{{- end }}
else
echo "==> Detected data from previous deployments..." 1>&3 2>&4
if [[ $(stat -c "%a" "$ETCD_DATA_DIR") != *700 ]]; then
echo "==> Setting data directory permissions to 700 in a recursive way (required in etcd >=3.4.10)" 1>&3 2>&4
chmod -R 700 $ETCD_DATA_DIR
else
echo "==> The data directory is already configured with the proper permissions" 1>&3 2>&4
fi
if [[ {{ $replicaCount }} -eq 1 ]]; then
echo "==> Single node cluster detected!!" 1>&3 2>&4
elif is_disastrous_failure; then
echo "==> Cluster not responding!!" 1>&3 2>&4
{{- if .Values.disasterRecovery.enabled }}
latest_snapshot_file="$(find /snapshots/ -maxdepth 1 -type f -name 'db-*' | sort | tail -n 1)"
if [[ "${latest_snapshot_file}" != "" ]]; then
echo "==> Restoring etcd cluster from snapshot..." 1>&3 2>&4
rm -rf $ETCD_DATA_DIR 1>&3 2>&4
etcdctl snapshot restore "${latest_snapshot_file}" \
--name $ETCD_NAME \
--data-dir $ETCD_DATA_DIR \
--initial-cluster $ETCD_INITIAL_CLUSTER \
--initial-cluster-token $ETCD_INITIAL_CLUSTER_TOKEN \
--initial-advertise-peer-urls $ETCD_INITIAL_ADVERTISE_PEER_URLS 1>&3 2>&4
store_member_id & 1>&3 2>&4
else
echo "==> There was no snapshot to perform data recovery!!" 1>&3 2>&4
exit 1
fi
{{- else }}
echo "==> Disaster recovery is disabled, the cluster will try to recover on it's own..." 1>&3 2>&4
{{- end }}
elif should_add_new_member; then
echo "==> Adding new member to existing cluster..." 1>&3 2>&4
etcdctl $AUTH_OPTIONS member add "$HOSTNAME" --peer-urls="{{ $etcdPeerProtocol }}://${HOSTNAME}.{{ $etcdHeadlessServiceName }}.{{ .Release.Namespace }}.svc.{{ $clusterDomain }}:{{ $peerPort }}" | grep "^ETCD_" > "$ETCD_DATA_DIR/new_member_envs"
sed -ie "s/^/export /" "$ETCD_DATA_DIR/new_member_envs"
echo "==> Loading env vars of existing cluster..." 1>&3 2>&4
source "$ETCD_DATA_DIR/new_member_envs" 1>&3 2>&4
store_member_id & 1>&3 2>&4
else
echo "==> Updating member in existing cluster..." 1>&3 2>&4
etcdctl $AUTH_OPTIONS member update "$(cat "$ETCD_DATA_DIR/member_id")" --peer-urls="{{ $etcdPeerProtocol }}://${HOSTNAME}.{{ $etcdHeadlessServiceName }}.{{ .Release.Namespace }}.svc.{{ $clusterDomain }}:{{ $peerPort }}" 1>&3 2>&4
fi
fi
{{- if .Values.configFileConfigMap }}
exec etcd --config-file /opt/bitnami/etcd/conf/etcd.conf.yml 1>&3 2>&4
{{- else }}
exec etcd 1>&3 2>&4
{{- end }}
prestop-hook.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
# Debug section
exec 3>&1
exec 4>&2
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
echo "==> Bash debug is on"
else
echo "==> Bash debug is off"
exec 1>/dev/null
exec 2>/dev/null
fi
# Constants
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
etcdctl $AUTH_OPTIONS member remove --debug=true "$(cat "$ETCD_DATA_DIR/member_id")" > "$(dirname "$ETCD_DATA_DIR")/member_removal.log" 2>&1
probes.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
# Debug section
exec 3>&1
exec 4>&2
if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
echo "==> Bash debug is on"
else
echo "==> Bash debug is off"
exec 1>/dev/null
exec 2>/dev/null
fi
# Constants
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
echo "==> [DEBUG] Probing etcd cluster"
echo "==> [DEBUG] Probe command: \"etcdctl $AUTH_OPTIONS endpoint health\""
etcdctl $AUTH_OPTIONS endpoint health
{{- if .Values.disasterRecovery.enabled }}
save-snapshot.sh: |-
#!/bin/bash
set -o errexit
set -o pipefail
set -o nounset
# Debug section
exec 3>&1
exec 4>&2
if [[ "${BITNAMI_SNAPSHOT_DEBUG:-false}" = true ]] || [[ "${BITNAMI_DEBUG:-false}" = true ]]; then
echo "==> Bash debug is on"
set -x
else
echo "==> Bash debug is off"
exec 1>/dev/null
exec 2>/dev/null
fi
# Constants
AUTH_OPTIONS={{ $etcdAuthOptions | quote }}
export ETCDCTL_ENDPOINTS={{ join "," $endpoints | quote }}
mkdir -p "/snapshots" 1>&3 2>&4
read -r -a endpoints <<< "$(tr ',;' ' ' <<< "$ETCDCTL_ENDPOINTS")"
for endp in "${endpoints[@]}"; do
echo "Using endpoint $endp" 1>&3 2>&4
if (unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS endpoint health --endpoints=${endp}); then
echo "Snapshotting the keyspace..." 1>&3 2>&4
current_time="$(date -u "+%Y-%m-%d_%H-%M")"
unset -v ETCDCTL_ENDPOINTS; etcdctl $AUTH_OPTIONS snapshot save "/snapshots/db-${current_time}" --endpoints=${endp} 1>&3 2>&4
find /snapshots/ -maxdepth 1 -type f -name 'db-*' \! -name "db-${current_time}" \
| sort -r \
| tail -n+$((1 + {{ $snapshotHistoryLimit }})) \
| xargs rm -f 1>&3 2>&4
exit 0
else
echo "Warning - etcd endpoint ${ETCDCTL_ENDPOINTS} not healthy" 1>&3 2>&4
echo "Trying another endpoint." 1>&3 2>&4
fi
done
# exit with error if all endpoints are bad
echo "Error - all etcd endpoints are unhealthy!" 1>&3 2>&4
exit 1
{{- end }}