commit a181625c89b01753399359d696b72d0cf2d3e382 Author: prajwal Date: Sat May 9 06:02:09 2026 +0000 Initial commit: backup + mirror automation for self-hosted Gitea Includes: - gitea-backups/bin/backup.sh (per-push bundle + DB snapshot to local + S3) - gitea-backups/bin/install-hooks.sh (idempotent post-receive shim installer) - gitea-backups/bin/retention.sh (count-based retention: keep newest 7 dates) - gitea-mirror/bin/auto-mirror.sh (Gitea -> GitHub push mirror automation, hardened against Gitea outages) - crontab.txt (reference for the 3 cron entries) - README.md (architecture, layout, bootstrap) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f74605 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Never commit secrets or runtime state +*.token +*.pem +*.key +.aws/ +.config/rclone/ +logs/ +db/ +repos/ +*.bundle +*.db.gz diff --git a/README.md b/README.md new file mode 100644 index 0000000..4658cc1 --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# gitea-ops + +Source of truth for the Gitea install at `/home/ubuntu/gitea/` on this server, and the automation around it. + +## Layout + +``` +gitea-ops/ +├── README.md +├── crontab.txt # cron entries (install with: crontab crontab.txt) +├── gitea-backups/bin/ +│ ├── backup.sh # called per push from each repo's post-receive hook +│ ├── install-hooks.sh # cron, every minute: ensures hook shim in every repo +│ └── retention.sh # cron, daily 03:00 UTC: keeps newest 7 date-folders +└── gitea-mirror/bin/ + └── auto-mirror.sh # cron, every minute: ensures GitHub push-mirror per repo +``` + +The deployment target is fixed: scripts run from `/home/ubuntu/gitea-backups/bin/` and `/home/ubuntu/gitea-mirror/bin/`. + +## Components + +| Script | Trigger | Purpose | +|---|---|---| +| `backup.sh` | per `git push` (via post-receive hook) | Bundle repo + snapshot SQLite, upload both to local + S3 | +| `install-hooks.sh` | cron, 1 min | Drop the `zzz-backup` shim into every repo's `hooks/post-receive.d/` | +| `retention.sh` | cron, daily 03:00 UTC | Keep newest 7 calendar dates of backups (S3 + local), prune older | +| `auto-mirror.sh` | cron, 1 min | For every Gitea repo, ensure matching private GitHub repo + push mirror exist | + +## Backup destinations + +1. **Local** — `/home/ubuntu/gitea-backups/repos///*.bundle` and `/home/ubuntu/gitea-backups/db/*-gitea.db.gz` +2. **S3** — `s3://toqqer-gitea-backup//repos/...` and `...//db/...` (region `ap-south-1`) +3. **GitHub** — live mirror at `github.com/prajwalpatil-toqqer/` for every Gitea repo (real-time via `sync_on_commit`) + +## Retention semantics + +Keep the **most recent 7 calendar dates** that have backups, regardless of how old they are. Quiet periods don't empty the store — the last 7 active dates always persist. + +## Required outside-of-repo state + +Not committed (see `.gitignore`): + +- `/home/ubuntu/gitea-mirror/gitea.token` — Gitea PAT (scopes: repo, user) +- `/home/ubuntu/gitea-mirror/github.token` — GitHub classic PAT (scope: repo) +- `/home/ubuntu/.aws/credentials` — IAM `gitea-backup-bot` (scoped to one bucket) +- `/home/ubuntu/.config/rclone/rclone.conf` — same key, rclone format + +## Service unit + +Gitea itself runs under user systemd: + +- Unit: `~/.config/systemd/user/gitea.service` +- Persistence: `loginctl enable-linger ubuntu` +- Status: `systemctl --user status gitea` +- Logs: `journalctl --user -u gitea -f` + +## Bootstrap a fresh server + +```bash +# 1. Place the secret files (gitea.token, github.token, aws creds, rclone.conf) outside the repo +# 2. Mirror the directory layout: +mkdir -p /home/ubuntu/gitea-backups/{bin,db,repos,logs} +mkdir -p /home/ubuntu/gitea-mirror/{bin,logs} +cp gitea-backups/bin/* /home/ubuntu/gitea-backups/bin/ +cp gitea-mirror/bin/* /home/ubuntu/gitea-mirror/bin/ +chmod +x /home/ubuntu/gitea-backups/bin/*.sh /home/ubuntu/gitea-mirror/bin/*.sh +# 3. Install cron +crontab crontab.txt +``` diff --git a/crontab.txt b/crontab.txt new file mode 100644 index 0000000..abdca17 --- /dev/null +++ b/crontab.txt @@ -0,0 +1,12 @@ +# Crontab reference for the Gitea automation. +# Install with: crontab crontab.txt +# Verify with: crontab -l + +# Every minute: ensure backup post-receive hook is installed in every Gitea repo +* * * * * /home/ubuntu/gitea-backups/bin/install-hooks.sh --quiet >> /home/ubuntu/gitea-backups/logs/install-hooks.log 2>&1 + +# Every minute: ensure every Gitea repo has a GitHub push-mirror configured +* * * * * /home/ubuntu/gitea-mirror/bin/auto-mirror.sh --quiet >> /home/ubuntu/gitea-mirror/logs/cron.log 2>&1 + +# Daily at 03:00 UTC: keep newest 7 calendar dates of backups (S3 + local) +0 3 * * * /home/ubuntu/gitea-backups/bin/retention.sh --quiet >> /home/ubuntu/gitea-backups/logs/retention.log 2>&1 diff --git a/gitea-backups/bin/backup.sh b/gitea-backups/bin/backup.sh new file mode 100755 index 0000000..ef67c41 --- /dev/null +++ b/gitea-backups/bin/backup.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Gitea per-push backup: bundles the pushed repo + snapshots SQLite DB. +# Invoked from each repo's hooks/post-receive.d/zzz-backup shim. +# Failures NEVER block the push — git push has already succeeded by the time we run. + +set -u + +# Git hooks run with $HOME pointing oddly (the repo dir or similar), so tools +# like rclone can't find ~/.config/rclone/rclone.conf. Force it. +export HOME="/home/ubuntu" + +BACKUP_ROOT="/home/ubuntu/gitea-backups" +GITEA_REPOS="/home/ubuntu/gitea/data/gitea-repositories" +GITEA_DB="/home/ubuntu/gitea/data/gitea.db" +RETENTION_DAYS=7 +LOG="${BACKUP_ROOT}/logs/backup.log" +STATUS="${BACKUP_ROOT}/logs/last-status" + +# S3 (offsite). Empty S3_BUCKET disables the upload step entirely. +S3_BUCKET="toqqer-gitea-backup" +S3_REMOTE="s3" # rclone remote name (configured in ~/.config/rclone/rclone.conf) + +ts="$(date -u +%Y-%m-%dT%H-%M-%SZ)" +day="$(date -u +%Y-%m-%d)" +log() { printf '%s %s\n' "$(date -u +%FT%TZ)" "$*" >> "$LOG"; } + +# git invokes the hook with $GIT_DIR set to the bare repo path +repo_path="${GIT_DIR:-$(pwd)}" +repo_path="$(cd "$repo_path" && pwd)" + +# derive owner/name from the path: .../gitea-repositories//.git +rel="${repo_path#${GITEA_REPOS}/}" +owner="${rel%%/*}" +name="${rel#*/}" +name="${name%.git}" + +if [[ -z "$owner" || -z "$name" || "$owner" == "$rel" ]]; then + log "SKIP: could not parse owner/name from $repo_path" + echo "FAIL ${ts} parse-error ${repo_path}" > "$STATUS" + exit 0 # never block the push +fi + +log "START ${owner}/${name}" + +# ---- 1) repo bundle --------------------------------------------------------- +bundle_dir="${BACKUP_ROOT}/repos/${owner}/${name}" +mkdir -p "$bundle_dir" +bundle_file="${bundle_dir}/${ts}.bundle" + +if git -C "$repo_path" bundle create "$bundle_file" --all 2>>"$LOG"; then + bundle_size=$(stat -c %s "$bundle_file" 2>/dev/null || echo 0) + log "OK bundle ${owner}/${name} -> ${bundle_file} (${bundle_size} bytes)" +else + log "FAIL bundle ${owner}/${name}" + echo "FAIL ${ts} bundle ${owner}/${name}" > "$STATUS" + # continue to DB backup anyway +fi + +# ---- 2) SQLite hot backup (via python3, avoids sqlite3 CLI dependency) ---- +db_file="${BACKUP_ROOT}/db/${ts}-gitea.db" +if python3 -c " +import sqlite3, sys +src = sqlite3.connect('${GITEA_DB}') +dst = sqlite3.connect('${db_file}') +src.backup(dst) +dst.close(); src.close() +" 2>>"$LOG"; then + if gzip -f "$db_file" 2>>"$LOG"; then + db_size=$(stat -c %s "${db_file}.gz" 2>/dev/null || echo 0) + log "OK db -> ${db_file}.gz (${db_size} bytes)" + else + log "FAIL gzip db ${db_file}" + fi +else + log "FAIL sqlite .backup -> ${db_file}" +fi + +# ---- 3) S3 offsite upload -------------------------------------------------- +# Layout: s3:///YYYY-MM-DD/repos///.bundle +# s3:///YYYY-MM-DD/db/-gitea.db.gz +# 7-day retention enforced by the bucket's lifecycle policy, NOT here. +s3_status="skipped" +if [[ -n "$S3_BUCKET" ]] && command -v rclone >/dev/null 2>&1; then + s3_bundle_target="${S3_REMOTE}:${S3_BUCKET}/${day}/repos/${owner}/${name}/${ts}.bundle" + s3_db_target="${S3_REMOTE}:${S3_BUCKET}/${day}/db/${ts}-gitea.db.gz" + s3_ok=1 + if [[ -f "$bundle_file" ]]; then + if rclone copyto --no-traverse "$bundle_file" "$s3_bundle_target" 2>>"$LOG"; then + log "OK s3 bundle -> ${s3_bundle_target}" + else + log "FAIL s3 bundle -> ${s3_bundle_target}" + s3_ok=0 + fi + fi + if [[ -f "${db_file}.gz" ]]; then + if rclone copyto --no-traverse "${db_file}.gz" "$s3_db_target" 2>>"$LOG"; then + log "OK s3 db -> ${s3_db_target}" + else + log "FAIL s3 db -> ${s3_db_target}" + s3_ok=0 + fi + fi + s3_status=$([[ $s3_ok -eq 1 ]] && echo "ok" || echo "fail") +fi + +# ---- 4) cleanup is handled by retention.sh (daily cron, "keep newest N dates"). ---- +# Push-triggered cleanup was removed because age-based cleanup would empty the +# bucket during quiet periods. retention.sh keeps the most-recent N date-folders +# regardless of how old they are. + +echo "OK ${ts} ${owner}/${name} s3=${s3_status}" > "$STATUS" +log "END ${owner}/${name} s3=${s3_status}" +exit 0 diff --git a/gitea-backups/bin/install-hooks.sh b/gitea-backups/bin/install-hooks.sh new file mode 100755 index 0000000..f99fa36 --- /dev/null +++ b/gitea-backups/bin/install-hooks.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Install / refresh the per-repo backup shim in every Gitea repo. +# Idempotent: safe to run anytime, including via cron. +# Usage: ./install-hooks.sh [--quiet] + +set -euo pipefail + +GITEA_REPOS="/home/ubuntu/gitea/data/gitea-repositories" +SHIM_NAME="zzz-backup" +QUIET=0 +[[ "${1:-}" == "--quiet" ]] && QUIET=1 + +shim_content='#!/usr/bin/env bash +exec /home/ubuntu/gitea-backups/bin/backup.sh +' + +installed=0 +already=0 +for repo in "$GITEA_REPOS"/*/*.git; do + [[ -d "$repo" ]] || continue + hook_dir="$repo/hooks/post-receive.d" + mkdir -p "$hook_dir" + shim="$hook_dir/$SHIM_NAME" + if [[ -f "$shim" ]] && diff -q <(printf '%s' "$shim_content") "$shim" >/dev/null 2>&1; then + already=$((already + 1)) + else + printf '%s' "$shim_content" > "$shim" + chmod +x "$shim" + installed=$((installed + 1)) + [[ $QUIET -eq 0 ]] && echo "installed: $repo" + fi +done + +[[ $QUIET -eq 0 ]] && echo "done — installed/updated: $installed, already-current: $already" +exit 0 diff --git a/gitea-backups/bin/retention.sh b/gitea-backups/bin/retention.sh new file mode 100755 index 0000000..dc93d37 --- /dev/null +++ b/gitea-backups/bin/retention.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Daily retention sweep — keeps the newest KEEP_DAYS distinct calendar +# dates of backups, on BOTH local disk and S3, regardless of how old +# those dates are. So a long quiet period leaves backups intact instead +# of letting them age out. +# +# Idempotent. Safe to run on a cron. + +set -uo pipefail + +CONFIG_DIR="/home/ubuntu/gitea-backups" +LOG="${CONFIG_DIR}/logs/retention.log" +KEEP_DAYS=7 + +S3_REMOTE="s3" +S3_BUCKET="toqqer-gitea-backup" +RCLONE_CONF="/home/ubuntu/.config/rclone/rclone.conf" + +QUIET=0 +[[ "${1:-}" == "--quiet" ]] && QUIET=1 +log() { printf '%s %s\n' "$(date -u +%FT%TZ)" "$*" >> "$LOG"; } +say() { [[ $QUIET -eq 0 ]] && echo "$*"; log "$*"; } + +# ---- S3: keep newest KEEP_DAYS top-level YYYY-MM-DD folders ---- +mapfile -t s3_dates < <( + rclone --config "$RCLONE_CONF" lsd "${S3_REMOTE}:${S3_BUCKET}/" 2>/dev/null \ + | awk '{print $NF}' \ + | grep -E '^[0-9]{4}-[0-9]{2}-[0-9]{2}$' \ + | sort +) +n=${#s3_dates[@]} +if (( n <= KEEP_DAYS )); then + say "S3: ${n} date-folder(s) present — all kept (limit ${KEEP_DAYS})" +else + to_delete=$(( n - KEEP_DAYS )) + deleted=0 + for ((i=0; i>"$LOG"; then + say "S3: deleted ${d}/" + deleted=$((deleted+1)) + else + say "S3: FAIL deleting ${d}/ (see log)" + fi + done + say "S3: kept newest ${KEEP_DAYS} date-folder(s), deleted ${deleted} older" +fi + +# ---- LOCAL: same semantics on /home/ubuntu/gitea-backups/{repos,db}/ ---- +# All filenames are date-prefixed (YYYY-MM-DDTHH-MM-SSZ...), so we group by +# the leading date and keep files only from the newest KEEP_DAYS dates seen. +mapfile -t local_dates < <( + { + find "${CONFIG_DIR}/repos" -type f -name '*.bundle' 2>/dev/null + find "${CONFIG_DIR}/db" -type f -name '*.db.gz' 2>/dev/null + } | grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' | sort -u +) +ln=${#local_dates[@]} +if (( ln <= KEEP_DAYS )); then + say "LOCAL: ${ln} date(s) present — all kept (limit ${KEEP_DAYS})" +else + keep_from=$(( ln - KEEP_DAYS )) + keep_set="$(printf '%s\n' "${local_dates[@]:$keep_from}")" + deleted_files=0 + while IFS= read -r f; do + [[ -z "$f" ]] && continue + fd=$(grep -oE '[0-9]{4}-[0-9]{2}-[0-9]{2}' <<< "$(basename "$f")" | head -1) + if ! grep -qx "$fd" <<< "$keep_set"; then + rm -f "$f" && deleted_files=$((deleted_files+1)) + fi + done < <( + find "${CONFIG_DIR}/repos" -type f -name '*.bundle' 2>/dev/null + find "${CONFIG_DIR}/db" -type f -name '*.db.gz' 2>/dev/null + ) + find "${CONFIG_DIR}/repos" -mindepth 2 -type d -empty -delete 2>>"$LOG" || true + say "LOCAL: kept newest ${KEEP_DAYS} date(s), deleted ${deleted_files} file(s)" +fi + +exit 0 diff --git a/gitea-mirror/bin/auto-mirror.sh b/gitea-mirror/bin/auto-mirror.sh new file mode 100755 index 0000000..4e4123f --- /dev/null +++ b/gitea-mirror/bin/auto-mirror.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# Auto-configure GitHub push mirrors for every Gitea repo. +# Idempotent: safe to run anytime, including via cron. +# - For each Gitea repo, ensures a same-named private repo exists on GitHub. +# - Ensures a push mirror is configured in Gitea pointing to that GitHub repo. +# - On first configuration, triggers an initial sync so existing history uploads. +# Failures log to ${LOG} but never abort the script (other repos still get processed). + +set -uo pipefail + +CONFIG_DIR="/home/ubuntu/gitea-mirror" +GITEA_TOKEN_FILE="${CONFIG_DIR}/gitea.token" +GITHUB_TOKEN_FILE="${CONFIG_DIR}/github.token" +LOG="${CONFIG_DIR}/logs/auto-mirror.log" + +GITEA_BASE="https://127.0.0.1:3030" +# --insecure because Gitea uses a self-signed cert; this is a loopback call so +# MITM risk is non-existent. Remove -k once a real cert is in place. +CURL_OPTS="--insecure" +GITEA_OWNER="prajwal" # mirror only repos owned by this Gitea user +GITHUB_USER="prajwalpatil-toqqer" +GITHUB_API="https://api.github.com" + +QUIET=0 +[[ "${1:-}" == "--quiet" ]] && QUIET=1 + +log() { printf '%s %s\n' "$(date -u +%FT%TZ)" "$*" >> "$LOG"; } +say() { [[ $QUIET -eq 0 ]] && echo "$*"; log "$*"; } + +# --- preflight --------------------------------------------------------------- +[[ -r "$GITEA_TOKEN_FILE" ]] || { log "FAIL: missing $GITEA_TOKEN_FILE"; exit 0; } +[[ -r "$GITHUB_TOKEN_FILE" ]] || { log "FAIL: missing $GITHUB_TOKEN_FILE"; exit 0; } +GITEA_TOKEN="$(<"$GITEA_TOKEN_FILE")" +GITHUB_TOKEN="$(<"$GITHUB_TOKEN_FILE")" + +# --- helpers ----------------------------------------------------------------- +gitea_repos() { + # echoes one repo name per line for $GITEA_OWNER + # On any failure (Gitea restart, non-200, malformed JSON), prints nothing and logs a warning, + # so the cron run becomes a clean no-op instead of dumping a stack trace. + local body code tmp + tmp=$(mktemp) + code=$(curl -sS ${CURL_OPTS} -o "$tmp" -w "%{http_code}" \ + -H "Authorization: token ${GITEA_TOKEN}" \ + "${GITEA_BASE}/api/v1/users/${GITEA_OWNER}/repos?limit=50" 2>/dev/null || echo "000") + if [[ "$code" != "200" ]]; then + log "WARN gitea_repos: HTTP ${code} from Gitea — skipping this run" + rm -f "$tmp" + return 0 + fi + python3 -c " +import sys, json +try: + data = json.load(open('${tmp}')) +except Exception as e: + sys.exit(0) # silent: logged separately +for r in data: + if r.get('owner', {}).get('login') == '${GITEA_OWNER}': + print(r['name']) +" 2>/dev/null + rm -f "$tmp" +} + +github_repo_exists() { + local repo="$1" + local code + code=$(curl -sS -o /dev/null -w "%{http_code}" \ + -H "Authorization: token ${GITHUB_TOKEN}" \ + "${GITHUB_API}/repos/${GITHUB_USER}/${repo}") + [[ "$code" == "200" ]] +} + +github_create_repo() { + local repo="$1" + local code + code=$(curl -sS ${CURL_OPTS} -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${GITHUB_TOKEN}" \ + -H "Content-Type: application/json" \ + "${GITHUB_API}/user/repos" \ + -d "{\"name\":\"${repo}\",\"private\":true,\"auto_init\":false,\"description\":\"Mirror of Gitea ${GITEA_OWNER}/${repo}\"}") + [[ "$code" == "201" ]] +} + +mirror_already_configured() { + local repo="$1" + # Returns: 0 = mirror exists / 1 = does NOT exist / 2 = unknown (API failure → caller skips) + local body code tmp + tmp=$(mktemp) + code=$(curl -sS ${CURL_OPTS} -o "$tmp" -w "%{http_code}" \ + -H "Authorization: token ${GITEA_TOKEN}" \ + "${GITEA_BASE}/api/v1/repos/${GITEA_OWNER}/${repo}/push_mirrors" 2>/dev/null || echo "000") + if [[ "$code" != "200" ]]; then + log "WARN mirror_already_configured(${repo}): HTTP ${code} — treating as unknown" + rm -f "$tmp" + return 2 + fi + python3 -c " +import sys, json +try: + mirrors = json.load(open('${tmp}')) +except Exception: + sys.exit(2) +target = 'github.com/${GITHUB_USER}/${repo}' +for m in mirrors: + if target in m.get('remote_address',''): + sys.exit(0) +sys.exit(1)" 2>/dev/null + local rc=$? + rm -f "$tmp" + return $rc +} + +mirror_configure() { + local repo="$1" + local body + body=$(python3 -c " +import json +print(json.dumps({ + 'remote_address': f'https://github.com/${GITHUB_USER}/${repo}.git', + 'remote_username': '${GITHUB_USER}', + 'remote_password': '${GITHUB_TOKEN}', + 'interval': '0h0m0s', + 'sync_on_commit': True, +}))") + local code + code=$(curl -sS ${CURL_OPTS} -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: token ${GITEA_TOKEN}" \ + -H "Content-Type: application/json" \ + "${GITEA_BASE}/api/v1/repos/${GITEA_OWNER}/${repo}/push_mirrors" \ + -d "$body") + [[ "$code" == "200" || "$code" == "201" ]] +} + +mirror_sync_now() { + local repo="$1" + curl -sS ${CURL_OPTS} -o /dev/null -X POST \ + -H "Authorization: token ${GITEA_TOKEN}" \ + "${GITEA_BASE}/api/v1/repos/${GITEA_OWNER}/${repo}/push_mirrors-sync" || true +} + +# --- main loop --------------------------------------------------------------- +configured=0 +already=0 +errors=0 + +skipped=0 +while IFS= read -r repo; do + [[ -z "$repo" ]] && continue + mirror_already_configured "$repo" + case $? in + 0) already=$((already + 1)); continue ;; # exists + 2) skipped=$((skipped + 1)); continue ;; # API failure → skip safely, retry next minute + # 1 → does not exist, fall through to configure + esac + # New repo (no mirror yet) — ensure GitHub side exists, configure mirror, kick sync + if ! github_repo_exists "$repo"; then + if github_create_repo "$repo"; then + say " created GitHub repo: ${GITHUB_USER}/${repo}" + else + say " FAIL: could not create GitHub repo ${GITHUB_USER}/${repo}" + errors=$((errors + 1)) + continue + fi + fi + if mirror_configure "$repo"; then + mirror_sync_now "$repo" + configured=$((configured + 1)) + say " configured mirror: ${GITEA_OWNER}/${repo} -> ${GITHUB_USER}/${repo} (sync triggered)" + else + say " FAIL: could not configure mirror on ${GITEA_OWNER}/${repo}" + errors=$((errors + 1)) + fi +done < <(gitea_repos) + +say "done — newly-configured: ${configured}, already-current: ${already}, skipped: ${skipped}, errors: ${errors}" +exit 0