Initial commit: backup + mirror automation for self-hosted Gitea
Includes: - gitea-backups/bin/backup.sh (per-push bundle + DB snapshot to local + S3) - gitea-backups/bin/install-hooks.sh (idempotent post-receive shim installer) - gitea-backups/bin/retention.sh (count-based retention: keep newest 7 dates) - gitea-mirror/bin/auto-mirror.sh (Gitea -> GitHub push mirror automation, hardened against Gitea outages) - crontab.txt (reference for the 3 cron entries) - README.md (architecture, layout, bootstrap)
This commit is contained in:
Executable
+113
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env bash
|
||||
# Gitea per-push backup: bundles the pushed repo + snapshots SQLite DB.
|
||||
# Invoked from each repo's hooks/post-receive.d/zzz-backup shim.
|
||||
# Failures NEVER block the push — git push has already succeeded by the time we run.
|
||||
|
||||
set -u
|
||||
|
||||
# Git hooks run with $HOME pointing oddly (the repo dir or similar), so tools
|
||||
# like rclone can't find ~/.config/rclone/rclone.conf. Force it.
|
||||
export HOME="/home/ubuntu"
|
||||
|
||||
BACKUP_ROOT="/home/ubuntu/gitea-backups"
|
||||
GITEA_REPOS="/home/ubuntu/gitea/data/gitea-repositories"
|
||||
GITEA_DB="/home/ubuntu/gitea/data/gitea.db"
|
||||
RETENTION_DAYS=7
|
||||
LOG="${BACKUP_ROOT}/logs/backup.log"
|
||||
STATUS="${BACKUP_ROOT}/logs/last-status"
|
||||
|
||||
# S3 (offsite). Empty S3_BUCKET disables the upload step entirely.
|
||||
S3_BUCKET="toqqer-gitea-backup"
|
||||
S3_REMOTE="s3" # rclone remote name (configured in ~/.config/rclone/rclone.conf)
|
||||
|
||||
ts="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
|
||||
day="$(date -u +%Y-%m-%d)"
|
||||
log() { printf '%s %s\n' "$(date -u +%FT%TZ)" "$*" >> "$LOG"; }
|
||||
|
||||
# git invokes the hook with $GIT_DIR set to the bare repo path
|
||||
repo_path="${GIT_DIR:-$(pwd)}"
|
||||
repo_path="$(cd "$repo_path" && pwd)"
|
||||
|
||||
# derive owner/name from the path: .../gitea-repositories/<owner>/<name>.git
|
||||
rel="${repo_path#${GITEA_REPOS}/}"
|
||||
owner="${rel%%/*}"
|
||||
name="${rel#*/}"
|
||||
name="${name%.git}"
|
||||
|
||||
if [[ -z "$owner" || -z "$name" || "$owner" == "$rel" ]]; then
|
||||
log "SKIP: could not parse owner/name from $repo_path"
|
||||
echo "FAIL ${ts} parse-error ${repo_path}" > "$STATUS"
|
||||
exit 0 # never block the push
|
||||
fi
|
||||
|
||||
log "START ${owner}/${name}"
|
||||
|
||||
# ---- 1) repo bundle ---------------------------------------------------------
|
||||
bundle_dir="${BACKUP_ROOT}/repos/${owner}/${name}"
|
||||
mkdir -p "$bundle_dir"
|
||||
bundle_file="${bundle_dir}/${ts}.bundle"
|
||||
|
||||
if git -C "$repo_path" bundle create "$bundle_file" --all 2>>"$LOG"; then
|
||||
bundle_size=$(stat -c %s "$bundle_file" 2>/dev/null || echo 0)
|
||||
log "OK bundle ${owner}/${name} -> ${bundle_file} (${bundle_size} bytes)"
|
||||
else
|
||||
log "FAIL bundle ${owner}/${name}"
|
||||
echo "FAIL ${ts} bundle ${owner}/${name}" > "$STATUS"
|
||||
# continue to DB backup anyway
|
||||
fi
|
||||
|
||||
# ---- 2) SQLite hot backup (via python3, avoids sqlite3 CLI dependency) ----
|
||||
db_file="${BACKUP_ROOT}/db/${ts}-gitea.db"
|
||||
if python3 -c "
|
||||
import sqlite3, sys
|
||||
src = sqlite3.connect('${GITEA_DB}')
|
||||
dst = sqlite3.connect('${db_file}')
|
||||
src.backup(dst)
|
||||
dst.close(); src.close()
|
||||
" 2>>"$LOG"; then
|
||||
if gzip -f "$db_file" 2>>"$LOG"; then
|
||||
db_size=$(stat -c %s "${db_file}.gz" 2>/dev/null || echo 0)
|
||||
log "OK db -> ${db_file}.gz (${db_size} bytes)"
|
||||
else
|
||||
log "FAIL gzip db ${db_file}"
|
||||
fi
|
||||
else
|
||||
log "FAIL sqlite .backup -> ${db_file}"
|
||||
fi
|
||||
|
||||
# ---- 3) S3 offsite upload --------------------------------------------------
|
||||
# Layout: s3://<bucket>/YYYY-MM-DD/repos/<owner>/<name>/<ts>.bundle
|
||||
# s3://<bucket>/YYYY-MM-DD/db/<ts>-gitea.db.gz
|
||||
# 7-day retention enforced by the bucket's lifecycle policy, NOT here.
|
||||
s3_status="skipped"
|
||||
if [[ -n "$S3_BUCKET" ]] && command -v rclone >/dev/null 2>&1; then
|
||||
s3_bundle_target="${S3_REMOTE}:${S3_BUCKET}/${day}/repos/${owner}/${name}/${ts}.bundle"
|
||||
s3_db_target="${S3_REMOTE}:${S3_BUCKET}/${day}/db/${ts}-gitea.db.gz"
|
||||
s3_ok=1
|
||||
if [[ -f "$bundle_file" ]]; then
|
||||
if rclone copyto --no-traverse "$bundle_file" "$s3_bundle_target" 2>>"$LOG"; then
|
||||
log "OK s3 bundle -> ${s3_bundle_target}"
|
||||
else
|
||||
log "FAIL s3 bundle -> ${s3_bundle_target}"
|
||||
s3_ok=0
|
||||
fi
|
||||
fi
|
||||
if [[ -f "${db_file}.gz" ]]; then
|
||||
if rclone copyto --no-traverse "${db_file}.gz" "$s3_db_target" 2>>"$LOG"; then
|
||||
log "OK s3 db -> ${s3_db_target}"
|
||||
else
|
||||
log "FAIL s3 db -> ${s3_db_target}"
|
||||
s3_ok=0
|
||||
fi
|
||||
fi
|
||||
s3_status=$([[ $s3_ok -eq 1 ]] && echo "ok" || echo "fail")
|
||||
fi
|
||||
|
||||
# ---- 4) cleanup is handled by retention.sh (daily cron, "keep newest N dates"). ----
|
||||
# Push-triggered cleanup was removed because age-based cleanup would empty the
|
||||
# bucket during quiet periods. retention.sh keeps the most-recent N date-folders
|
||||
# regardless of how old they are.
|
||||
|
||||
echo "OK ${ts} ${owner}/${name} s3=${s3_status}" > "$STATUS"
|
||||
log "END ${owner}/${name} s3=${s3_status}"
|
||||
exit 0
|
||||
Executable
+35
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
# Install / refresh the per-repo backup shim in every Gitea repo.
|
||||
# Idempotent: safe to run anytime, including via cron.
|
||||
# Usage: ./install-hooks.sh [--quiet]
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
GITEA_REPOS="/home/ubuntu/gitea/data/gitea-repositories"
|
||||
SHIM_NAME="zzz-backup"
|
||||
QUIET=0
|
||||
[[ "${1:-}" == "--quiet" ]] && QUIET=1
|
||||
|
||||
shim_content='#!/usr/bin/env bash
|
||||
exec /home/ubuntu/gitea-backups/bin/backup.sh
|
||||
'
|
||||
|
||||
installed=0
|
||||
already=0
|
||||
for repo in "$GITEA_REPOS"/*/*.git; do
|
||||
[[ -d "$repo" ]] || continue
|
||||
hook_dir="$repo/hooks/post-receive.d"
|
||||
mkdir -p "$hook_dir"
|
||||
shim="$hook_dir/$SHIM_NAME"
|
||||
if [[ -f "$shim" ]] && diff -q <(printf '%s' "$shim_content") "$shim" >/dev/null 2>&1; then
|
||||
already=$((already + 1))
|
||||
else
|
||||
printf '%s' "$shim_content" > "$shim"
|
||||
chmod +x "$shim"
|
||||
installed=$((installed + 1))
|
||||
[[ $QUIET -eq 0 ]] && echo "installed: $repo"
|
||||
fi
|
||||
done
|
||||
|
||||
[[ $QUIET -eq 0 ]] && echo "done — installed/updated: $installed, already-current: $already"
|
||||
exit 0
|
||||
Executable
+79
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env bash
|
||||
# Daily retention sweep — keeps the newest KEEP_DAYS distinct calendar
|
||||
# dates of backups, on BOTH local disk and S3, regardless of how old
|
||||
# those dates are. So a long quiet period leaves backups intact instead
|
||||
# of letting them age out.
|
||||
#
|
||||
# Idempotent. Safe to run on a cron.
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
CONFIG_DIR="/home/ubuntu/gitea-backups"
|
||||
LOG="${CONFIG_DIR}/logs/retention.log"
|
||||
KEEP_DAYS=7
|
||||
|
||||
S3_REMOTE="s3"
|
||||
S3_BUCKET="toqqer-gitea-backup"
|
||||
RCLONE_CONF="/home/ubuntu/.config/rclone/rclone.conf"
|
||||
|
||||
QUIET=0
|
||||
[[ "${1:-}" == "--quiet" ]] && QUIET=1
|
||||
log() { printf '%s %s\n' "$(date -u +%FT%TZ)" "$*" >> "$LOG"; }
|
||||
say() { [[ $QUIET -eq 0 ]] && echo "$*"; log "$*"; }
|
||||
|
||||
# ---- S3: keep newest KEEP_DAYS top-level YYYY-MM-DD folders ----
|
||||
mapfile -t s3_dates < <(
|
||||
rclone --config "$RCLONE_CONF" lsd "${S3_REMOTE}:${S3_BUCKET}/" 2>/dev/null \
|
||||
| awk '{print $NF}' \
|
||||
| grep -E '^[0-9]{4}-[0-9]{2}-[0-9]{2}$' \
|
||||
| sort
|
||||
)
|
||||
n=${#s3_dates[@]}
|
||||
if (( n <= KEEP_DAYS )); then
|
||||
say "S3: ${n} date-folder(s) present — all kept (limit ${KEEP_DAYS})"
|
||||
else
|
||||
to_delete=$(( n - KEEP_DAYS ))
|
||||
deleted=0
|
||||
for ((i=0; i<to_delete; i++)); do
|
||||
d="${s3_dates[i]}"
|
||||
if rclone --config "$RCLONE_CONF" purge "${S3_REMOTE}:${S3_BUCKET}/${d}" 2>>"$LOG"; then
|
||||
say "S3: deleted ${d}/"
|
||||
deleted=$((deleted+1))
|
||||
else
|
||||
say "S3: FAIL deleting ${d}/ (see log)"
|
||||
fi
|
||||
done
|
||||
say "S3: kept newest ${KEEP_DAYS} date-folder(s), deleted ${deleted} older"
|
||||
fi
|
||||
|
||||
# ---- LOCAL: same semantics on /home/ubuntu/gitea-backups/{repos,db}/ ----
|
||||
# All filenames are date-prefixed (YYYY-MM-DDTHH-MM-SSZ...), so we group by
|
||||
# the leading date and keep files only from the newest KEEP_DAYS dates seen.
|
||||
mapfile -t local_dates < <(
|
||||
{
|
||||
find "${CONFIG_DIR}/repos" -type f -name '*.bundle' 2>/dev/null
|
||||
find "${CONFIG_DIR}/db" -type f -name '*.db.gz' 2>/dev/null
|
||||
} | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' | sort -u
|
||||
)
|
||||
ln=${#local_dates[@]}
|
||||
if (( ln <= KEEP_DAYS )); then
|
||||
say "LOCAL: ${ln} date(s) present — all kept (limit ${KEEP_DAYS})"
|
||||
else
|
||||
keep_from=$(( ln - KEEP_DAYS ))
|
||||
keep_set="$(printf '%s\n' "${local_dates[@]:$keep_from}")"
|
||||
deleted_files=0
|
||||
while IFS= read -r f; do
|
||||
[[ -z "$f" ]] && continue
|
||||
fd=$(grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' <<< "$(basename "$f")" | head -1)
|
||||
if ! grep -qx "$fd" <<< "$keep_set"; then
|
||||
rm -f "$f" && deleted_files=$((deleted_files+1))
|
||||
fi
|
||||
done < <(
|
||||
find "${CONFIG_DIR}/repos" -type f -name '*.bundle' 2>/dev/null
|
||||
find "${CONFIG_DIR}/db" -type f -name '*.db.gz' 2>/dev/null
|
||||
)
|
||||
find "${CONFIG_DIR}/repos" -mindepth 2 -type d -empty -delete 2>>"$LOG" || true
|
||||
say "LOCAL: kept newest ${KEEP_DAYS} date(s), deleted ${deleted_files} file(s)"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
Reference in New Issue
Block a user