fix: 将 k3s-ansible 作为普通目录添加

This commit is contained in:
fei
2026-02-04 23:43:40 +08:00
commit 7f6c8b9b92
40 changed files with 10909 additions and 0 deletions

276
scripts/verify-deployment.sh Executable file
View File

@@ -0,0 +1,276 @@
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
CONFIG_FILE="$PROJECT_DIR/config/cluster-vars.yml"
# Source common library if available
if [ -f "$SCRIPT_DIR/lib/common.sh" ]; then
source "$SCRIPT_DIR/lib/common.sh"
else
log() { echo "[INFO] $1"; }
log_error() { echo "[ERROR] $1" >&2; }
log_warn() { echo "[WARN] $1"; }
fi
log "=== 验证K3s集群部署 ==="
echo ""
# Counters
TOTAL_CHECKS=0
PASSED_CHECKS=0
FAILED_CHECKS=0
WARNING_CHECKS=0
# Check function
check() {
local name="$1"
local command="$2"
local is_critical="${3:-true}"
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
echo -n "检查: $name ... "
if eval "$command" &>/dev/null; then
echo "✓ 通过"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
return 0
else
if [ "$is_critical" = "true" ]; then
echo "✗ 失败"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
else
echo "⚠ 警告"
WARNING_CHECKS=$((WARNING_CHECKS + 1))
fi
return 1
fi
}
# Detailed check with output
check_detailed() {
local name="$1"
local command="$2"
echo ""
echo "=========================================="
echo " $name"
echo "=========================================="
eval "$command"
echo ""
}
echo "=========================================="
echo " 1. 基础环境检查"
echo "=========================================="
echo ""
check "kubectl命令可用" "command -v kubectl"
check "kubectl连接集群" "kubectl cluster-info"
check "配置文件存在" "test -f $CONFIG_FILE"
if command -v yq &>/dev/null; then
check "yq工具可用" "command -v yq"
else
check "yq工具可用" "false" "false"
fi
echo ""
echo "=========================================="
echo " 2. K3s集群状态"
echo "=========================================="
echo ""
check "所有节点Ready" "kubectl get nodes | grep -v NotReady | grep Ready"
check "kube-system命名空间存在" "kubectl get namespace kube-system"
check "CoreDNS运行正常" "kubectl get deployment coredns -n kube-system -o jsonpath='{.status.availableReplicas}' | grep -v '^0$'"
check_detailed "节点状态" "kubectl get nodes -o wide"
check_detailed "系统Pod状态" "kubectl get pods -n kube-system"
echo ""
echo "=========================================="
echo " 3. Gitea服务检查"
echo "=========================================="
echo ""
if kubectl get namespace gitea &>/dev/null; then
check "Gitea命名空间存在" "kubectl get namespace gitea"
check "Gitea部署存在" "kubectl get deployment gitea -n gitea"
if kubectl get deployment gitea -n gitea &>/dev/null; then
check "Gitea Pod运行正常" "kubectl get pods -n gitea -l app.kubernetes.io/name=gitea -o jsonpath='{.items[0].status.phase}' | grep Running"
check "Gitea服务可访问" "kubectl get svc gitea-http -n gitea"
check_detailed "Gitea服务详情" "kubectl get all -n gitea"
# Get Gitea access info
GITEA_NODEPORT=$(kubectl get svc gitea-http -n gitea -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
if [ -z "$NODE_IP" ]; then
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
fi
echo "Gitea访问信息:"
echo " URL: http://$NODE_IP:$GITEA_NODEPORT"
echo ""
fi
else
check "Gitea命名空间存在" "false" "false"
log_warn "Gitea未部署"
fi
echo ""
echo "=========================================="
echo " 4. ArgoCD服务检查"
echo "=========================================="
echo ""
if kubectl get namespace argocd &>/dev/null; then
check "ArgoCD命名空间存在" "kubectl get namespace argocd"
check "ArgoCD Server部署存在" "kubectl get deployment argocd-server -n argocd"
if kubectl get deployment argocd-server -n argocd &>/dev/null; then
check "ArgoCD Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-server -o jsonpath='{.items[0].status.phase}' | grep Running"
check "ArgoCD Application Controller运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-application-controller -o jsonpath='{.items[0].status.phase}' | grep Running"
check "ArgoCD Repo Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-repo-server -o jsonpath='{.items[0].status.phase}' | grep Running"
check_detailed "ArgoCD服务详情" "kubectl get all -n argocd"
# Get ArgoCD access info
ARGOCD_NODEPORT=$(kubectl get svc argocd-server -n argocd -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
echo "ArgoCD访问信息:"
echo " URL: https://$NODE_IP:$ARGOCD_NODEPORT"
echo " 用户名: admin"
echo ""
fi
else
check "ArgoCD命名空间存在" "false" "false"
log_warn "ArgoCD未部署"
fi
echo ""
echo "=========================================="
echo " 5. HTTPS证书检查"
echo "=========================================="
echo ""
if kubectl get namespace cert-manager &>/dev/null; then
check "cert-manager命名空间存在" "kubectl get namespace cert-manager"
check "cert-manager部署存在" "kubectl get deployment cert-manager -n cert-manager"
if kubectl get deployment cert-manager -n cert-manager &>/dev/null; then
check "cert-manager运行正常" "kubectl get pods -n cert-manager -l app=cert-manager -o jsonpath='{.items[0].status.phase}' | grep Running"
# Check ClusterIssuers
if kubectl get clusterissuer &>/dev/null 2>&1; then
check_detailed "ClusterIssuer状态" "kubectl get clusterissuer"
fi
# Check Certificates
if kubectl get certificate -A &>/dev/null 2>&1; then
check_detailed "证书状态" "kubectl get certificate -A"
fi
fi
else
check "cert-manager命名空间存在" "false" "false"
log_warn "cert-manager未部署HTTPS功能不可用"
fi
echo ""
echo "=========================================="
echo " 6. GitOps工作流检查"
echo "=========================================="
echo ""
if kubectl get namespace argocd &>/dev/null; then
# Check for ArgoCD Applications
if kubectl get application -n argocd &>/dev/null 2>&1; then
APP_COUNT=$(kubectl get application -n argocd --no-headers 2>/dev/null | wc -l)
if [ "$APP_COUNT" -gt 0 ]; then
check "ArgoCD应用已创建" "test $APP_COUNT -gt 0"
check_detailed "ArgoCD应用状态" "kubectl get application -n argocd"
else
check "ArgoCD应用已创建" "false" "false"
log_warn "未找到ArgoCD应用"
fi
else
check "ArgoCD应用已创建" "false" "false"
log_warn "ArgoCD CRD可能未就绪"
fi
else
log_warn "ArgoCD未部署跳过GitOps检查"
fi
echo ""
echo "=========================================="
echo " 7. 存储检查"
echo "=========================================="
echo ""
check "PersistentVolume存在" "kubectl get pv" "false"
check "PersistentVolumeClaim存在" "kubectl get pvc -A" "false"
if kubectl get pvc -A &>/dev/null 2>&1; then
check_detailed "存储卷状态" "kubectl get pv,pvc -A"
fi
echo ""
echo "=========================================="
echo " 验证总结"
echo "=========================================="
echo ""
echo "总检查项: $TOTAL_CHECKS"
echo "通过: $PASSED_CHECKS"
echo "失败: $FAILED_CHECKS"
echo "警告: $WARNING_CHECKS"
echo ""
if [ $FAILED_CHECKS -eq 0 ]; then
log "✓ 所有关键检查通过!"
if [ $WARNING_CHECKS -gt 0 ]; then
log_warn "存在 $WARNING_CHECKS 个警告项,建议检查"
fi
echo ""
echo "=========================================="
echo " 快速访问指南"
echo "=========================================="
echo ""
if [ -n "${NODE_IP:-}" ]; then
if [ -n "${GITEA_NODEPORT:-}" ] && [ "$GITEA_NODEPORT" != "N/A" ]; then
echo "Gitea:"
echo " http://$NODE_IP:$GITEA_NODEPORT"
echo ""
fi
if [ -n "${ARGOCD_NODEPORT:-}" ] && [ "$ARGOCD_NODEPORT" != "N/A" ]; then
echo "ArgoCD:"
echo " https://$NODE_IP:$ARGOCD_NODEPORT"
echo " 用户名: admin"
echo ""
fi
fi
echo "常用命令:"
echo " 查看所有Pod: kubectl get pods -A"
echo " 查看节点: kubectl get nodes"
echo " 查看服务: kubectl get svc -A"
echo ""
exit 0
else
log_error "发现 $FAILED_CHECKS 个失败项,请检查并修复"
echo ""
echo "故障排查建议:"
echo " 1. 查看Pod日志: kubectl logs <pod-name> -n <namespace>"
echo " 2. 查看Pod详情: kubectl describe pod <pod-name> -n <namespace>"
echo " 3. 查看事件: kubectl get events -A --sort-by='.lastTimestamp'"
echo " 4. 重新部署: ./scripts/deploy-all.sh"
echo ""
exit 1
fi