From ba98dbb1b825b486420ade6d261ad39979b913d4 Mon Sep 17 00:00:00 2001 From: songtianlun Date: Tue, 22 Apr 2025 14:04:42 +0800 Subject: [PATCH] feat: add cronjob for node backups - Create a cronjob to back up node1 data to node8 - Define schedule for daily backups at 3:00 AM - Include error handling and notifications via Feishu - Use SSH and rsync for secure and efficient data transfer This commit introduces a new cronjob that automates the backup process for node1 to node8, enabling easier management and recovery of data. The setup includes necessary security measures and proper logging of backups, ensuring smoother operation and notifications in case of failures. --- .../cronjob-backup-all-data-path-to-zgo.yaml | 96 +++++++++++++++++++ backups/by-rsync/secret-feishu-webhook.yaml | 9 ++ backups/by-rsync/secret-rsync-ssh-key.yaml | 9 ++ backups/by-rsync/ssh-key-pair/rsync-key | 8 ++ backups/by-rsync/ssh-key-pair/rsync-key.pub | 1 + backups/{ => daemonset+cronjob}/cfg.yml | 0 backups/daemonset+cronjob/crontab.yaml | 33 +++++++ .../daemonset.yaml} | 35 +------ 8 files changed, 157 insertions(+), 34 deletions(-) create mode 100644 backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml create mode 100644 backups/by-rsync/secret-feishu-webhook.yaml create mode 100644 backups/by-rsync/secret-rsync-ssh-key.yaml create mode 100644 backups/by-rsync/ssh-key-pair/rsync-key create mode 100644 backups/by-rsync/ssh-key-pair/rsync-key.pub rename backups/{ => daemonset+cronjob}/cfg.yml (100%) create mode 100644 backups/daemonset+cronjob/crontab.yaml rename backups/{load.yml => daemonset+cronjob/daemonset.yaml} (79%) diff --git a/backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml b/backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml new file mode 100644 index 0000000..011b75d --- /dev/null +++ b/backups/by-rsync/cronjob-backup-all-data-path-to-zgo.yaml @@ -0,0 +1,96 @@ +piVersion: batch/v1 +kind: CronJob +metadata: + name: backup-node1-to-node8 +spec: + schedule: "0 3 * * *" # 每天凌晨 3:00 + jobTemplate: + spec: + template: + spec: + containers: + - name: rsync-backup + image: ubuntu:latest # 使用一个包含必要工具的镜像,如果 ubuntu:latest 没有,需要构建或找其他镜像 + # 或者使用一个预装了 rsync, ssh, curl 的镜像,例如 eeacms/rsync + # image: eeacms/rsync # 这个镜像可能需要调整脚本路径和用户 + command: ["/bin/bash", "-c"] + args: + - | + apt-get update && apt-get install -y --no-install-recommends openssh-client rsync curl jq ; \ + echo "Starting backup script for node $MY_NODE_NAME to node8..." ; \ + START_TIME=$(date +%s) ; \ + SOURCE_DIR="/host-data" ; \ + DEST_NODE="node8" ; \ + DEST_BASE_DIR="/data/backups" ; \ + DEST_DIR="$DEST_BASE_DIR/$MY_NODE_NAME/data" ; \ + SSH_KEY_PATH="/root/.ssh/id_rsa" ; \ + FEISHU_WEBHOOK_URL=$(cat /etc/feishu-webhook/url) ; \ + echo "Ensuring SSH directory exists and setting permissions..." ; \ + mkdir -p /root/.ssh && chmod 700 /root/.ssh ; \ + cp /etc/ssh-key/id_rsa $SSH_KEY_PATH ; \ + chmod 600 $SSH_KEY_PATH ; \ + echo "Running rsync..." ; \ + rsync -avz --delete -e "ssh -i $SSH_KEY_PATH -o StrictHostKeyChecking=no" \ + --rsync-path="mkdir -p $DEST_DIR && rsync" \ + $SOURCE_DIR/ root@$DEST_NODE:$DEST_DIR ; \ + RSYNC_EXIT_CODE=$? ; \ + if [ $RSYNC_EXIT_CODE -eq 0 ]; then \ + echo "Rsync completed successfully." ; \ + END_TIME=$(date +%s) ; \ + DURATION=$((END_TIME - START_TIME)) ; \ + SOURCE_SIZE=$(du -sh $SOURCE_DIR | cut -f1) ; \ + echo "Source directory size: $SOURCE_SIZE" ; \ + echo "Backup duration: $DURATION seconds" ; \ + MSG_TITLE="✅ [K3s Backup] $MY_NODE_NAME Backup Success" ; \ + MSG_TEXT="Host: $MY_NODE_NAME\nSource: /data\nDestination Node: $DEST_NODE\nDestination Path: $DEST_DIR\nSource Size: $SOURCE_SIZE\nDuration: ${DURATION}s" ; \ + JSON_PAYLOAD=$(jq -n \ + --arg title "$MSG_TITLE" \ + --arg text "$MSG_TEXT" \ + '{msg_type: "post", content: {post: {zh_cn: {title: $title, content: [[{tag: "text", text: $text}]]}}}}') ; \ + echo "Sending notification to Feishu..." ; \ + curl -X POST -H "Content-Type: application/json" -d "$JSON_PAYLOAD" "$FEISHU_WEBHOOK_URL" ; \ + echo "Notification sent." ; \ + else \ + echo "Rsync failed with exit code $RSYNC_EXIT_CODE." ; \ + END_TIME=$(date +%s) ; \ + DURATION=$((END_TIME - START_TIME)) ; \ + MSG_TITLE="❌ [K3s Backup] $MY_NODE_NAME Backup Failed!" ; \ + MSG_TEXT="Host: $MY_NODE_NAME\nSource: /data\nDestination Node: $DEST_NODE\nDestination Path: $DEST_DIR\nRsync Exit Code: $RSYNC_EXIT_CODE\nDuration: ${DURATION}s" ; \ + JSON_PAYLOAD=$(jq -n \ + --arg title "$MSG_TITLE" \ + --arg text "$MSG_TEXT" \ + '{msg_type: "post", content: {post: {zh_cn: {title: $title, content: [[{tag: "text", text: $text}]]}}}}') ; \ + echo "Sending failure notification to Feishu..." ; \ + curl -X POST -H "Content-Type: application/json" -d "$JSON_PAYLOAD" "$FEISHU_WEBHOOK_URL" ; \ + exit 1 ; \ + fi + env: + - name: MY_NODE_NAME # 使用 Downward API 获取 Pod 运行的节点名 + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: data-volume + mountPath: /host-data # 将宿主机的 /data 挂载到容器的 /host-data + - name: ssh-key-volume + mountPath: /etc/ssh-key # 挂载 SSH 私钥 Secret + readOnly: true + - name: feishu-webhook-volume + mountPath: /etc/feishu-webhook # 挂载飞书 Webhook Secret + readOnly: true + volumes: + - name: data-volume + hostPath: + path: /data # 宿主机上的 /data 目录 + type: Directory # 确保它是一个目录 + - name: ssh-key-volume + secret: + secretName: rsync-ssh-key + defaultMode: 0400 # 设置权限,虽然脚本会重新设置,但这里设置更安全 + - name: feishu-webhook-volume + secret: + secretName: feishu-webhook + restartPolicy: OnFailure # Job 失败时 Pod 不会重启,由 CronJob 控制重试(如果配置了) + nodeSelector: + kubernetes.io/hostname: node1 # !!! 关键:确保这个 Pod 只在 node1 上运行 !!! + # --- 对于 node2 到 node7,你需要复制这个文件并修改这里的 node 名称 --- diff --git a/backups/by-rsync/secret-feishu-webhook.yaml b/backups/by-rsync/secret-feishu-webhook.yaml new file mode 100644 index 0000000..0995e3e --- /dev/null +++ b/backups/by-rsync/secret-feishu-webhook.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +data: + # https://open.feishu.cn/open-apis/bot/v2/hook/f2a8d634-6a90-4f86-ac2f-ef6a53dbd680 + url: aHR0cHM6Ly9vcGVuLmZlaXNodS5jbi9vcGVuLWFwaXMvYm90L3YyL2hvb2svZjJhOGQ2MzQtNmE5MC00Zjg2LWFjMmYtZWY2YTUzZGJkNjgw +kind: Secret +metadata: + name: feishu-webhook + namespace: backups +type: Opaque diff --git a/backups/by-rsync/secret-rsync-ssh-key.yaml b/backups/by-rsync/secret-rsync-ssh-key.yaml new file mode 100644 index 0000000..87a0cb1 --- /dev/null +++ b/backups/by-rsync/secret-rsync-ssh-key.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +data: + id_rsa: LS0tLS1CRUdJTiBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0KYjNCbGJuTnphQzFyWlhrdGRqRUFBQUFBQkc1dmJtVUFBQUFFYm05dVpRQUFBQUFBQUFBQkFBQUFNd0FBQUF0emMyZ3RaVwpReU5UVXhPUUFBQUNBZkhxWnJDc0Q2MWxxa2dYVjZZelUwTWRRSDR0NEIwSFBTZzl5MjVwa0tGUUFBQUtoUVEyakRVRU5vCnd3QUFBQXR6YzJndFpXUXlOVFV4T1FBQUFDQWZIcVpyQ3NENjFscWtnWFY2WXpVME1kUUg0dDRCMEhQU2c5eTI1cGtLRlEKQUFBRUR2VW9aSmxrVUpCaVJMVHVwOFVML3VDdU9ndE92djV0UFE1UEtqK3lpZm1oOGVwbXNLd1ByV1dxU0JkWHBqTlRReAoxQWZpM2dIUWM5S0QzTGJtbVFvVkFBQUFKWE52Ym1kMGFXRnViSFZ1UUhOeVpDMXpiMjVuZEdsaGJteDFiaTFoY21Ob2JHCmx1ZFhnPQotLS0tLUVORCBPUEVOU1NIIFBSSVZBVEUgS0VZLS0tLS0K +kind: Secret +metadata: + creationTimestamp: "2025-04-22T05:53:08Z" + name: rsync-ssh-key + namespace: backups +type: Opaque diff --git a/backups/by-rsync/ssh-key-pair/rsync-key b/backups/by-rsync/ssh-key-pair/rsync-key new file mode 100644 index 0000000..b35c4b6 --- /dev/null +++ b/backups/by-rsync/ssh-key-pair/rsync-key @@ -0,0 +1,8 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACAfHqZrCsD61lqkgXV6YzU0MdQH4t4B0HPSg9y25pkKFQAAAKhQQ2jDUENo +wwAAAAtzc2gtZWQyNTUxOQAAACAfHqZrCsD61lqkgXV6YzU0MdQH4t4B0HPSg9y25pkKFQ +AAAEDvUoZJlkUJBiRLTup8UL/uCuOgtOvv5tPQ5PKj+yifmh8epmsKwPrWWqSBdXpjNTQx +1Afi3gHQc9KD3LbmmQoVAAAAJXNvbmd0aWFubHVuQHNyZC1zb25ndGlhbmx1bi1hcmNobG +ludXg= +-----END OPENSSH PRIVATE KEY----- diff --git a/backups/by-rsync/ssh-key-pair/rsync-key.pub b/backups/by-rsync/ssh-key-pair/rsync-key.pub new file mode 100644 index 0000000..f54d2e3 --- /dev/null +++ b/backups/by-rsync/ssh-key-pair/rsync-key.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIB8epmsKwPrWWqSBdXpjNTQx1Afi3gHQc9KD3LbmmQoV songtianlun@k3s.rsync.backups diff --git a/backups/cfg.yml b/backups/daemonset+cronjob/cfg.yml similarity index 100% rename from backups/cfg.yml rename to backups/daemonset+cronjob/cfg.yml diff --git a/backups/daemonset+cronjob/crontab.yaml b/backups/daemonset+cronjob/crontab.yaml new file mode 100644 index 0000000..7fae3f9 --- /dev/null +++ b/backups/daemonset+cronjob/crontab.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: backup-system +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: node-backup-job + namespace: backup-system +spec: + # 每天凌晨2点运行 + schedule: "0 2 * * *" + concurrencyPolicy: Forbid + jobTemplate: + spec: + ttlSecondsAfterFinished: 86400 # 1天后删除已完成的任务 + template: + spec: + serviceAccountName: backup-service-account + nodeSelector: + kubernetes.io/hostname: "vkvm-us1" + containers: + - name: backup-trigger + image: bitnami/kubectl:latest + command: + - /bin/sh + - -c + - | + kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \ + sleep 60 && \ + kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system + restartPolicy: OnFailure diff --git a/backups/load.yml b/backups/daemonset+cronjob/daemonset.yaml similarity index 79% rename from backups/load.yml rename to backups/daemonset+cronjob/daemonset.yaml index b7790a1..d73b8a9 100644 --- a/backups/load.yml +++ b/backups/daemonset+cronjob/daemonset.yaml @@ -1,36 +1,3 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: backup-system ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: node-backup-job - namespace: backup-system -spec: - # 每天凌晨2点运行 - schedule: "0 2 * * *" - concurrencyPolicy: Forbid - jobTemplate: - spec: - ttlSecondsAfterFinished: 86400 # 1天后删除已完成的任务 - template: - spec: - serviceAccountName: backup-service-account - nodeSelector: - kubernetes.io/hostname: "vkvm-us1" - containers: - - name: backup-trigger - image: bitnami/kubectl:latest - command: - - /bin/sh - - -c - - | - kubectl label daemonset/node-backup-daemon trigger-backup=true --overwrite -n backup-system && \ - sleep 60 && \ - kubectl label daemonset/node-backup-daemon trigger-backup- -n backup-system - restartPolicy: OnFailure --- apiVersion: apps/v1 kind: DaemonSet @@ -69,7 +36,7 @@ spec: bash /scripts/backup.sh echo "备份完成" fi - + date # 每分钟检查一次 sleep 60 done