feat: add cronjob for node backups
- Create a cronjob to back up node1 data to node8 - Define schedule for daily backups at 3:00 AM - Include error handling and notifications via Feishu - Use SSH and rsync for secure and efficient data transfer This commit introduces a new cronjob that automates the backup process for node1 to node8, enabling easier management and recovery of data. The setup includes necessary security measures and proper logging of backups, ensuring smoother operation and notifications in case of failures.
This commit is contained in:
parent
ae8bb36d48
commit
ba98dbb1b8
96
backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml
Normal file
96
backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml
Normal file
@ -0,0 +1,96 @@
|
||||
piVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: backup-node1-to-node8
|
||||
spec:
|
||||
schedule: "0 3 * * *" # 每天凌晨 3:00
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: rsync-backup
|
||||
image: ubuntu:latest # 使用一个包含必要工具的镜像,如果 ubuntu:latest 没有,需要构建或找其他镜像
|
||||
# 或者使用一个预装了 rsync, ssh, curl 的镜像,例如 eeacms/rsync
|
||||
# image: eeacms/rsync # 这个镜像可能需要调整脚本路径和用户
|
||||
command: ["/bin/bash", "-c"]
|
||||
args:
|
||||
- |
|
||||
apt-get update && apt-get install -y --no-install-recommends openssh-client rsync curl jq ; \
|
||||
echo "Starting backup script for node $MY_NODE_NAME to node8..." ; \
|
||||
START_TIME=$(date +%s) ; \
|
||||
SOURCE_DIR="/host-data" ; \
|
||||
DEST_NODE="node8" ; \
|
||||
DEST_BASE_DIR="/data/backups" ; \
|
||||
DEST_DIR="$DEST_BASE_DIR/$MY_NODE_NAME/data" ; \
|
||||
SSH_KEY_PATH="/root/.ssh/id_rsa" ; \
|
||||
FEISHU_WEBHOOK_URL=$(cat /etc/feishu-webhook/url) ; \
|
||||
echo "Ensuring SSH directory exists and setting permissions..." ; \
|
||||
mkdir -p /root/.ssh && chmod 700 /root/.ssh ; \
|
||||
cp /etc/ssh-key/id_rsa $SSH_KEY_PATH ; \
|
||||
chmod 600 $SSH_KEY_PATH ; \
|
||||
echo "Running rsync..." ; \
|
||||
rsync -avz --delete -e "ssh -i $SSH_KEY_PATH -o StrictHostKeyChecking=no" \
|
||||
--rsync-path="mkdir -p $DEST_DIR && rsync" \
|
||||
$SOURCE_DIR/ root@$DEST_NODE:$DEST_DIR ; \
|
||||
RSYNC_EXIT_CODE=$? ; \
|
||||
if [ $RSYNC_EXIT_CODE -eq 0 ]; then \
|
||||
echo "Rsync completed successfully." ; \
|
||||
END_TIME=$(date +%s) ; \
|
||||
DURATION=$((END_TIME - START_TIME)) ; \
|
||||
SOURCE_SIZE=$(du -sh $SOURCE_DIR | cut -f1) ; \
|
||||
echo "Source directory size: $SOURCE_SIZE" ; \
|
||||
echo "Backup duration: $DURATION seconds" ; \
|
||||
MSG_TITLE="✅ [K3s Backup] $MY_NODE_NAME Backup Success" ; \
|
||||
MSG_TEXT="Host: $MY_NODE_NAME\nSource: /data\nDestination Node: $DEST_NODE\nDestination Path: $DEST_DIR\nSource Size: $SOURCE_SIZE\nDuration: ${DURATION}s" ; \
|
||||
JSON_PAYLOAD=$(jq -n \
|
||||
--arg title "$MSG_TITLE" \
|
||||
--arg text "$MSG_TEXT" \
|
||||
'{msg_type: "post", content: {post: {zh_cn: {title: $title, content: [[{tag: "text", text: $text}]]}}}}') ; \
|
||||
echo "Sending notification to Feishu..." ; \
|
||||
curl -X POST -H "Content-Type: application/json" -d "$JSON_PAYLOAD" "$FEISHU_WEBHOOK_URL" ; \
|
||||
echo "Notification sent." ; \
|
||||
else \
|
||||
echo "Rsync failed with exit code $RSYNC_EXIT_CODE." ; \
|
||||
END_TIME=$(date +%s) ; \
|
||||
DURATION=$((END_TIME - START_TIME)) ; \
|
||||
MSG_TITLE="❌ [K3s Backup] $MY_NODE_NAME Backup Failed!" ; \
|
||||
MSG_TEXT="Host: $MY_NODE_NAME\nSource: /data\nDestination Node: $DEST_NODE\nDestination Path: $DEST_DIR\nRsync Exit Code: $RSYNC_EXIT_CODE\nDuration: ${DURATION}s" ; \
|
||||
JSON_PAYLOAD=$(jq -n \
|
||||
--arg title "$MSG_TITLE" \
|
||||
--arg text "$MSG_TEXT" \
|
||||
'{msg_type: "post", content: {post: {zh_cn: {title: $title, content: [[{tag: "text", text: $text}]]}}}}') ; \
|
||||
echo "Sending failure notification to Feishu..." ; \
|
||||
curl -X POST -H "Content-Type: application/json" -d "$JSON_PAYLOAD" "$FEISHU_WEBHOOK_URL" ; \
|
||||
exit 1 ; \
|
||||
fi
|
||||
env:
|
||||
- name: MY_NODE_NAME # 使用 Downward API 获取 Pod 运行的节点名
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- name: data-volume
|
||||
mountPath: /host-data # 将宿主机的 /data 挂载到容器的 /host-data
|
||||
- name: ssh-key-volume
|
||||
mountPath: /etc/ssh-key # 挂载 SSH 私钥 Secret
|
||||
readOnly: true
|
||||
- name: feishu-webhook-volume
|
||||
mountPath: /etc/feishu-webhook # 挂载飞书 Webhook Secret
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: data-volume
|
||||
hostPath:
|
||||
path: /data # 宿主机上的 /data 目录
|
||||
type: Directory # 确保它是一个目录
|
||||
- name: ssh-key-volume
|
||||
secret:
|
||||
secretName: rsync-ssh-key
|
||||
defaultMode: 0400 # 设置权限,虽然脚本会重新设置,但这里设置更安全
|
||||
- name: feishu-webhook-volume
|
||||
secret:
|
||||
secretName: feishu-webhook
|
||||
restartPolicy: OnFailure # Job 失败时 Pod 不会重启,由 CronJob 控制重试(如果配置了)
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: node1 # !!! 关键:确保这个 Pod 只在 node1 上运行 !!!
|
||||
# --- 对于 node2 到 node7,你需要复制这个文件并修改这里的 node 名称 ---
|
9
backups/by-rsync/secret-feishu-webhook.yaml
Normal file
9
backups/by-rsync/secret-feishu-webhook.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
# https://open.feishu.cn/open-apis/bot/v2/hook/f2a8d634-6a90-4f86-ac2f-ef6a53dbd680
|
||||
url: aHR0cHM6Ly9vcGVuLmZlaXNodS5jbi9vcGVuLWFwaXMvYm90L3YyL2hvb2svZjJhOGQ2MzQtNmE5MC00Zjg2LWFjMmYtZWY2YTUzZGJkNjgw
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: feishu-webhook
|
||||
namespace: backups
|
||||
type: Opaque
|
9
backups/by-rsync/secret-rsync-ssh-key.yaml
Normal file
9
backups/by-rsync/secret-rsync-ssh-key.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
id_rsa: LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBZkhxWnJDc0Q2MWxxa2dYVjZZelUwTWRRSDR0NEIwSFBTZzl5MjVwa0tGUUFBQUtoUVEyakRVRU5vCnd3QUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQWZIcVpyQ3NENjFscWtnWFY2WXpVME1kUUg0dDRCMEhQU2c5eTI1cGtLRlEKQUFBRUR2VW9aSmxrVUpCaVJMVHVwOFVML3VDdU9ndE92djV0UFE1UEtqK3lpZm1oOGVwbXNLd1ByV1dxU0JkWHBqTlRReAoxQWZpM2dIUWM5S0QzTGJtbVFvVkFBQUFKWE52Ym1kMGFXRnViSFZ1UUhOeVpDMXpiMjVuZEdsaGJteDFiaTFoY21Ob2JHCmx1ZFhnPQotLS0tLUVORCBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0K
|
||||
kind: Secret
|
||||
metadata:
|
||||
creationTimestamp: "2025-04-22T05:53:08Z"
|
||||
name: rsync-ssh-key
|
||||
namespace: backups
|
||||
type: Opaque
|
8
backups/by-rsync/ssh-key-pair/rsync-key
Normal file
8
backups/by-rsync/ssh-key-pair/rsync-key
Normal file
@ -0,0 +1,8 @@
|
||||
-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
|
||||
QyNTUxOQAAACAfHqZrCsD61lqkgXV6YzU0MdQH4t4B0HPSg9y25pkKFQAAAKhQQ2jDUENo
|
||||
wwAAAAtzc2gtZWQyNTUxOQAAACAfHqZrCsD61lqkgXV6YzU0MdQH4t4B0HPSg9y25pkKFQ
|
||||
AAAEDvUoZJlkUJBiRLTup8UL/uCuOgtOvv5tPQ5PKj+yifmh8epmsKwPrWWqSBdXpjNTQx
|
||||
1Afi3gHQc9KD3LbmmQoVAAAAJXNvbmd0aWFubHVuQHNyZC1zb25ndGlhbmx1bi1hcmNobG
|
||||
ludXg=
|
||||
-----END OPENSSH PRIVATE KEY-----
|
1
backups/by-rsync/ssh-key-pair/rsync-key.pub
Normal file
1
backups/by-rsync/ssh-key-pair/rsync-key.pub
Normal file
@ -0,0 +1 @@
|
||||
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB8epmsKwPrWWqSBdXpjNTQx1Afi3gHQc9KD3LbmmQoV songtianlun@k3s.rsync.backups
|
33
backups/daemonset+cronjob/crontab.yaml
Normal file
33
backups/daemonset+cronjob/crontab.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: backup-system
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: node-backup-job
|
||||
namespace: backup-system
|
||||
spec:
|
||||
# 每天凌晨2点运行
|
||||
schedule: "0 2 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 86400 # 1天后删除已完成的任务
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: backup-service-account
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: "vkvm-us1"
|
||||
containers:
|
||||
- name: backup-trigger
|
||||
image: bitnami/kubectl:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \
|
||||
sleep 60 && \
|
||||
kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system
|
||||
restartPolicy: OnFailure
|
@ -1,36 +1,3 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: backup-system
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: node-backup-job
|
||||
namespace: backup-system
|
||||
spec:
|
||||
# 每天凌晨2点运行
|
||||
schedule: "0 2 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 86400 # 1天后删除已完成的任务
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: backup-service-account
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: "vkvm-us1"
|
||||
containers:
|
||||
- name: backup-trigger
|
||||
image: bitnami/kubectl:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \
|
||||
sleep 60 && \
|
||||
kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system
|
||||
restartPolicy: OnFailure
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
@ -69,7 +36,7 @@ spec:
|
||||
bash /scripts/backup.sh
|
||||
echo "备份完成"
|
||||
fi
|
||||
|
||||
date
|
||||
# 每分钟检查一次
|
||||
sleep 60
|
||||
done
|
Loading…
Reference in New Issue
Block a user