feat: add backup system with cron and daemon

- Create a new namespace for the backup system
- Implement a cron job for scheduled backups
- Add a daemon set to handle backup tasks across nodes
- Introduce necessary service accounts, roles, and role bindings
- Include environment variable handling and configuration via secrets and config maps
- Ensure triggering and execution workflow for backups is efficient

This commit establishes a new backup system that utilizes both a cron job and a daemon set to automate backups. It organizes the configurations and credentials needed for S3-compatible storage, allowing for seamless backup management across the specified nodes in the Kubernetes cluster.
This commit is contained in:
songtianlun 2025-04-22 14:49:02 +08:00
parent ba98dbb1b8
commit 6d2acdcd1e
9 changed files with 398 additions and 87 deletions

View File

@ -0,0 +1,33 @@
apiVersion: v1
kind: Namespace
metadata:
name: backup-system
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-backup-job
namespace: backup-system
spec:
# 每天凌晨2点运行
schedule: "0 2 * * *"
concurrencyPolicy: Forbid
jobTemplate:
spec:
ttlSecondsAfterFinished: 86400 # 1天后删除已完成的任务
template:
spec:
serviceAccountName: backup-service-account
nodeSelector:
kubernetes.io/hostname: "vkvm-us1"
containers:
- name: backup-trigger
image: bitnami/kubectl:latest
command:
- /bin/sh
- -c
- |
kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \
sleep 60 && \
kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system
restartPolicy: OnFailure

View File

@ -0,0 +1,140 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-backup-daemon
namespace: backup-system
spec:
selector:
matchLabels:
app: node-backup
template:
metadata:
labels:
app: node-backup
spec:
nodeSelector:
kubernetes.io/hostname: "vkvm-us1"
containers:
- name: backup-container
image: minio/mc:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
# 添加依赖的工具
apk add --no-cache jq bash findutils tar curl
# 等待触发备份
while true; do
if [ "$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \
https://kubernetes.default.svc/apis/apps/v1/namespaces/backup-system/daemonsets/node-backup-daemon \
--cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \
-X GET | jq -r '.metadata.labels["trigger-backup"]')" = "true" ]; then
echo "备份触发,执行备份脚本..."
bash /scripts/backup.sh
echo "备份完成"
fi
date
# 每分钟检查一次
sleep 60
done
env:
- name: MINIO_ENDPOINT
valueFrom:
secretKeyRef:
name: minio-credentials
key: endpoint
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-credentials
key: access-key
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-credentials
key: secret-key
- name: MINIO_BUCKET
valueFrom:
secretKeyRef:
name: minio-credentials
key: bucket
- name: MINIO_SUBPATH
valueFrom:
configMapKeyRef:
name: backup-config
key: subpath
optional: true
- name: BACKUPS_TO_KEEP
valueFrom:
configMapKeyRef:
name: backup-config
key: backups-to-keep
optional: true
volumeMounts:
- name: host-data
mountPath: /data
- name: scripts
mountPath: /scripts
volumes:
- name: host-data
hostPath:
path: /data
- name: scripts
configMap:
name: backup-script
defaultMode: 0755
---
apiVersion: v1
kind: Secret
metadata:
name: minio-credentials
namespace: backup-system
type: Opaque
data:
# 这些值需要使用base64编码替换
endpoint: aHR0cHM6Ly9hcGkubWluaW8uc2t5Ynl0ZS5tZQ== # https://api.minio.skybyte.me
access-key: RVZuWFViR2xld2t0dFF0em9XUWs= # EVnXUbGlewkttQtzoWQk
secret-key: THNxVFRmc0VEVzBFY3Buc09aOUxyTnhwc21zajdIMGxlR2R0WHBwRg== # LsqTTfsEDW0EcpnsOZ9LrNxpsmsj7H0leGdtXppF
bucket: YmFja3Vwcw== # backups
---
apiVersion: v1
kind: ConfigMap
metadata:
name: backup-config
namespace: backup-system
data:
subpath: "backups"
backups-to-keep: "3"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: backup-service-account
namespace: backup-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: backup-role
namespace: backup-system
rules:
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get", "patch", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: backup-role-binding
namespace: backup-system
subjects:
- kind: ServiceAccount
name: backup-service-account
namespace: backup-system
roleRef:
kind: Role
name: backup-role
apiGroup: rbac.authorization.k8s.io

View File

@ -0,0 +1,43 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: backup-config
namespace: backup-system
data:
subpath: "nodes"
backups-to-keep: "3"
use-https: "True"
signature-v2: "False" # 设置为 "True" 如果 S3 服务需要 V2 签名
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: backup-service-account
namespace: backup-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: backup-role
namespace: backup-system
rules:
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: backup-role-binding
namespace: backup-system
subjects:
- kind: ServiceAccount
name: backup-service-account
namespace: backup-system
roleRef:
kind: Role
name: backup-role
apiGroup: rbac.authorization.k8s.io

View File

@ -0,0 +1,111 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: backup-script
namespace: backup-system
data:
backup.sh: |
#!/bin/bash
set -e
# 获取环境变量
S3_ENDPOINT="${S3_ENDPOINT}"
S3_ACCESS_KEY="${S3_ACCESS_KEY}"
S3_SECRET_KEY="${S3_SECRET_KEY}"
S3_BUCKET="${S3_BUCKET}"
S3_SUBPATH="${S3_SUBPATH:-backups}"
BACKUPS_TO_KEEP="${BACKUPS_TO_KEEP:-7}"
# 获取主机名
HOSTNAME=$(hostname)
HOSTNAME="${NODE_NAME:-$HOSTNAME}"
# 配置 s3cmd
cat > ~/.s3cfg << EOF
[default]
access_key = ${S3_ACCESS_KEY}
secret_key = ${S3_SECRET_KEY}
host_base = ${S3_ENDPOINT}
host_bucket = ${S3_ENDPOINT}
use_https = ${USE_HTTPS:-True}
signature_v2 = ${SIGNATURE_V2:-False}
EOF
# 检查 /data/local-csi 路径是否存在
if [ ! -d "/data/local-csi" ]; then
echo "目录 /data/local-csi 不存在,退出备份"
exit 0
fi
# 检查目录下是否有至少一个子目录
DIR_COUNT=$(find /data/local-csi -mindepth 1 -maxdepth 1 -type d | wc -l)
if [ "$DIR_COUNT" -eq 0 ]; then
echo "目录 /data/local-csi 中没有子目录,退出备份"
exit 0
fi
# 遍历所有目录
find /data/local-csi -mindepth 1 -maxdepth 1 -type d | while read dir; do
DIR_NAME=$(basename "$dir")
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
BACKUP_NAME="backup-${DIR_NAME}-${TIMESTAMP}.tar.gz"
TEMP_DIR=$(mktemp -d)
BACKUP_PATH="/tmp/${BACKUP_NAME}"
echo "正在备份目录: $dir"
rsync -a "/data/local-csi/$DIR_NAME/" "$TEMP_DIR/"
# 创建备份压缩包
# tar -czf "$BACKUP_PATH" -C "/data/local-csi" "$DIR_NAME"
tar -czf "$BACKUP_PATH" -C "/data/local-csi" "$TEMP_DIR"
# 清理临时目录
rm -rf "$TEMP_DIR"
# 计算上传路径
BACKUP_TARGET_PATH="s3://${S3_BUCKET}/${S3_SUBPATH}/${HOSTNAME}/${DIR_NAME}/"
echo "上传备份 $BACKUP_PATH 到 S3 路径: $BACKUP_TARGET_PATH"
# 上传备份文件
s3cmd put "$BACKUP_PATH" "${BACKUP_TARGET_PATH}${BACKUP_NAME}"
# 删除本地临时备份
rm "$BACKUP_PATH"
# 清理旧备份
echo "清理旧备份,保留最新的 $BACKUPS_TO_KEEP 个文件"
# 获取所有备份文件并按时间排序
BACKUP_FILES=$(s3cmd ls "${BACKUP_TARGET_PATH}" | sort -k1,2)
BACKUP_COUNT=$(echo "$BACKUP_FILES" | grep -c "backup-${DIR_NAME}")
if [ "$BACKUP_COUNT" -gt "$BACKUPS_TO_KEEP" ]; then
# 计算需要删除的文件数量
DELETE_COUNT=$((BACKUP_COUNT - BACKUPS_TO_KEEP))
# 获取需要删除的文件列表
FILES_TO_DELETE=$(echo "$BACKUP_FILES" | head -n $DELETE_COUNT | awk '{print $4}')
# 删除文件
for FILE in $FILES_TO_DELETE; do
echo "删除旧备份: $FILE"
s3cmd rm "$FILE"
done
fi
done
echo "备份完成"
s3cmd-trigger.sh: |
#!/bin/bash
set -e
# 检查标记文件
if [ -f "/tmp/backup-triggered" ]; then
echo "发现备份触发标记,执行备份脚本..."
bash /scripts/backup.sh
rm /tmp/backup-triggered
echo "备份完成"
fi

View File

@ -1,8 +1,3 @@
apiVersion: v1
kind: Namespace
metadata:
name: backup-system
---
apiVersion: batch/v1
kind: CronJob
metadata:
@ -19,7 +14,8 @@ spec:
spec:
serviceAccountName: backup-service-account
nodeSelector:
kubernetes.io/hostname: "vkvm-us1"
#kubernetes.io/hostname: "vkvm-us1"
region: us
containers:
- name: backup-trigger
image: bitnami/kubectl:latest
@ -27,7 +23,10 @@ spec:
- /bin/sh
- -c
- |
kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \
sleep 60 && \
kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system
# 创建触发文件到所有备份 Pod 中
for pod in $(kubectl get pods -n backup-system -l app=node-backup -o jsonpath='{.items[*].metadata.name}'); do
echo "触发 Pod $pod 执行备份"
kubectl exec $pod -n backup-system -- touch /tmp/backup-triggered
done
echo "所有节点备份已触发"
restartPolicy: OnFailure

View File

@ -1,4 +1,3 @@
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
@ -13,55 +12,64 @@ spec:
labels:
app: node-backup
spec:
nodeSelector:
kubernetes.io/hostname: "vkvm-us1"
#nodeSelector:
# kubernetes.io/hostname: "vkvm-us1"
#affinity:
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# #- key: region
# # operator: NotIn
# # values:
# # - cn
# - key: kubernetes.io/hostname
# operator: In
# values:
# - zgocloud-us1
containers:
- name: backup-container
image: minio/mc:latest
image: alpine:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
# 添加依赖的工具
apk add --no-cache jq bash findutils tar curl
# 安装依赖工具
apk add --no-cache bash s3cmd tar rsync curl
# 等待触发备份
# 启动一个循环,每分钟检查一次触发文件
while true; do
if [ "$(curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \
https://kubernetes.default.svc/apis/apps/v1/namespaces/backup-system/daemonsets/node-backup-daemon \
--cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \
-X GET | jq -r '.metadata.labels["trigger-backup"]')" = "true" ]; then
echo "备份触发,执行备份脚本..."
bash /scripts/backup.sh
echo "备份完成"
fi
bash /scripts/s3cmd-trigger.sh
date
# 每分钟检查一次
sleep 60
done
env:
- name: MINIO_ENDPOINT
- name: NODE_NAME # 使用 Downward API 获取 Pod 运行的节点名
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: S3_ENDPOINT
valueFrom:
secretKeyRef:
name: minio-credentials
name: s3-credentials
key: endpoint
- name: MINIO_ACCESS_KEY
- name: S3_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-credentials
name: s3-credentials
key: access-key
- name: MINIO_SECRET_KEY
- name: S3_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-credentials
name: s3-credentials
key: secret-key
- name: MINIO_BUCKET
- name: S3_BUCKET
valueFrom:
secretKeyRef:
name: minio-credentials
name: s3-credentials
key: bucket
- name: MINIO_SUBPATH
- name: S3_SUBPATH
valueFrom:
configMapKeyRef:
name: backup-config
@ -73,6 +81,18 @@ spec:
name: backup-config
key: backups-to-keep
optional: true
- name: USE_HTTPS
valueFrom:
configMapKeyRef:
name: backup-config
key: use-https
optional: true
- name: SIGNATURE_V2
valueFrom:
configMapKeyRef:
name: backup-config
key: signature-v2
optional: true
volumeMounts:
- name: host-data
mountPath: /data
@ -86,55 +106,3 @@ spec:
configMap:
name: backup-script
defaultMode: 0755
---
apiVersion: v1
kind: Secret
metadata:
name: minio-credentials
namespace: backup-system
type: Opaque
data:
# 这些值需要使用base64编码替换
endpoint: aHR0cHM6Ly9hcGkubWluaW8uc2t5Ynl0ZS5tZQ== # https://api.minio.skybyte.me
access-key: RVZuWFViR2xld2t0dFF0em9XUWs= # EVnXUbGlewkttQtzoWQk
secret-key: THNxVFRmc0VEVzBFY3Buc09aOUxyTnhwc21zajdIMGxlR2R0WHBwRg== # LsqTTfsEDW0EcpnsOZ9LrNxpsmsj7H0leGdtXppF
bucket: YmFja3Vwcw== # backups
---
apiVersion: v1
kind: ConfigMap
metadata:
name: backup-config
namespace: backup-system
data:
subpath: "backups"
backups-to-keep: "3"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: backup-service-account
namespace: backup-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: backup-role
namespace: backup-system
rules:
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get", "patch", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: backup-role-binding
namespace: backup-system
subjects:
- kind: ServiceAccount
name: backup-service-account
namespace: backup-system
roleRef:
kind: Role
name: backup-role
apiGroup: rbac.authorization.k8s.io

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: backup-system

View File

@ -0,0 +1,13 @@
apiVersion: v1
kind: Secret
metadata:
name: s3-credentials
namespace: backup-system
type: Opaque
data:
# 这些值需要使用base64编码替换
endpoint: aHR0cHM6Ly9hcGkubWluaW8uc2t5Ynl0ZS5tZQ== # https://api.minio.skybyte.me
access-key: RVZuWFViR2xld2t0dFF0em9XUWs= # EVnXUbGlewkttQtzoWQk
secret-key: THNxVFRmc0VEVzBFY3Buc09aOUxyTnhwc21zajdIMGxlR2R0WHBwRg== # LsqTTfsEDW0EcpnsOZ9LrNxpsmsj7H0leGdtXppF
bucket: YmFja3Vwcw== # backups