Golang

关注公众号 jb51net

关闭
首页 > 脚本专栏 > Golang > Golang健康检查接口

Golang健康检查接口的实现

作者:穿越時空

本文主要介绍了Golang健康检查接口的实现,包括存活探针和就绪探针的区别,文中通过示例代码介绍的非常详细,对大家的学习或者工作具有一定的参考学习价值,需要的朋友们下面随着小编来一起学习学习吧

Go 语言健康检查接口的设计需要区分 存活探针(Liveness)就绪探针(Readiness),两者在 Kubernetes 中的行为截然不同。以下是完整的设计指南和代码实现。

1. 三种探针的区别

探针类型职责失败后果适用场景
Liveness(存活)进程是否活着重启容器检测死锁、内存泄漏导致的僵死
Readiness(就绪)是否准备好接收流量从负载均衡移除初始化未完成、依赖服务不可用
Startup(启动)慢启动应用是否已启动禁用其他探针启动耗时长的应用(JVM/加载大量数据)

2. Go 健康检查接口实现

基础版本(最小可用)

package main
import (
	"encoding/json"
	"log"
	"net/http"
	"sync/atomic"
	"time"
)
var ready atomic.Bool
func main() {
	go initialize()
	http.HandleFunc("/healthz", livenessHandler)   // 存活探针
	http.HandleFunc("/readyz", readinessHandler)   // 就绪探针
	http.HandleFunc("/startupz", startupHandler)   // 启动探针(可选)
	log.Println("Server starting on :8080")
	log.Fatal(http.ListenAndServe(":8080", nil))
}
func initialize() {
	log.Println("Initializing...")
	time.Sleep(5 * time.Second) // 模拟启动耗时
	ready.Store(true)
	log.Println("Ready")
}
// 存活探针:只检查进程是否活着,不要检查外部依赖
func livenessHandler(w http.ResponseWriter, r *http.Request) {
	w.WriteHeader(http.StatusOK)
	json.NewEncoder(w).Encode(map[string]string{"status": "alive"})
}
// 就绪探针:检查应用是否准备好接收流量
func readinessHandler(w http.ResponseWriter, r *http.Request) {
	if !ready.Load() {
		w.WriteHeader(http.StatusServiceUnavailable)
		json.NewEncoder(w).Encode(map[string]string{"status": "not ready"})
		return
	}
	w.WriteHeader(http.StatusOK)
	json.NewEncoder(w).Encode(map[string]string{"status": "ready"})
}
// 启动探针:用于慢启动应用
func startupHandler(w http.ResponseWriter, r *http.Request) {
	if !ready.Load() {
		w.WriteHeader(http.StatusServiceUnavailable)
		return
	}
	w.WriteHeader(http.StatusOK)
}

生产级版本(带依赖检查)

package health
import (
	"context"
	"database/sql"
	"encoding/json"
	"errors"
	"net/http"
	"sync"
	"time"
)
// Status 健康状态
type Status string
const (
	StatusHealthy   Status = "healthy"
	StatusUnhealthy Status = "unhealthy"
	StatusDegraded  Status = "degraded"
)
// CheckResult 单个检查项结果
type CheckResult struct {
	Name     string        `json:"name"`
	Status   Status        `json:"status"`
	Message  string        `json:"message,omitempty"`
	Duration time.Duration `json:"duration_ms,omitempty"`
}
// HealthResponse 整体健康响应
type HealthResponse struct {
	Status    Status        `json:"status"`
	Timestamp time.Time     `json:"timestamp"`
	Checks    []CheckResult `json:"checks,omitempty"`
}
// Checker 依赖检查器接口
type Checker interface {
	Name() string
	Check(ctx context.Context) CheckResult
}
// Service 健康检查服务
type Service struct {
	checkers []Checker
	timeout  time.Duration
	mu       sync.RWMutex
	ready    bool
}
func NewService(timeout time.Duration) *Service {
	return &Service{timeout: timeout}
}
func (s *Service) Register(checkers ...Checker) {
	s.checkers = append(s.checkers, checkers...)
}
func (s *Service) SetReady(ready bool) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.ready = ready
}
func (s *Service) IsReady() bool {
	s.mu.RLock()
	defer s.mu.RUnlock()
	return s.ready
}
// LivenessHandler 存活探针 - 只检查进程是否活着
func (s *Service) LivenessHandler(w http.ResponseWriter, r *http.Request) {
	// 存活探针必须保持简单,不要检查外部依赖
	// 否则数据库故障会导致所有 Pod 被重启,引发级联故障 [^15^]
	response := HealthResponse{
		Status:    StatusHealthy,
		Timestamp: time.Now(),
	}
	w.WriteHeader(http.StatusOK)
	json.NewEncoder(w).Encode(response)
}
// ReadinessHandler 就绪探针 - 检查依赖是否就绪
func (s *Service) ReadinessHandler(w http.ResponseWriter, r *http.Request) {
	if !s.IsReady() {
		w.WriteHeader(http.StatusServiceUnavailable)
		json.NewEncoder(w).Encode(HealthResponse{
			Status:    StatusUnhealthy,
			Timestamp: time.Now(),
			Message:   "service not initialized",
		})
		return
	}
	ctx, cancel := context.WithTimeout(r.Context(), s.timeout)
	defer cancel()
	results := make([]CheckResult, 0, len(s.checkers))
	overallStatus := StatusHealthy
	for _, checker := range s.checkers {
		start := time.Now()
		result := checker.Check(ctx)
		result.Duration = time.Since(start)
		results = append(results, result)
		if result.Status == StatusUnhealthy {
			overallStatus = StatusUnhealthy
		} else if result.Status == StatusDegraded && overallStatus == StatusHealthy {
			overallStatus = StatusDegraded
		}
	}
	response := HealthResponse{
		Status:    overallStatus,
		Timestamp: time.Now(),
		Checks:    results,
	}
	if overallStatus == StatusUnhealthy {
		w.WriteHeader(http.StatusServiceUnavailable)
	} else {
		w.WriteHeader(http.StatusOK)
	}
	json.NewEncoder(w).Encode(response)
}
// ========== 具体检查器实现 ==========
// DatabaseChecker 数据库连接检查
type DatabaseChecker struct {
	DB *sql.DB
}
func (d *DatabaseChecker) Name() string { return "database" }
func (d *DatabaseChecker) Check(ctx context.Context) CheckResult {
	// 使用 PingContext 而非复杂查询,避免影响性能 [^15^]
	err := d.DB.PingContext(ctx)
	if err != nil {
		return CheckResult{
			Name:    d.Name(),
			Status:  StatusUnhealthy,
			Message: err.Error(),
		}
	}
	return CheckResult{
		Name:   d.Name(),
		Status: StatusHealthy,
	}
}
// RedisChecker Redis 连接检查
type RedisChecker struct {
	// redis.Client
}
func (r *RedisChecker) Name() string { return "redis" }
func (r *RedisChecker) Check(ctx context.Context) CheckResult {
	// 实现类似 DatabaseChecker
	return CheckResult{Name: r.Name(), Status: StatusHealthy}
}
// ExternalAPIChecker 外部 API 检查
type ExternalAPIChecker struct {
	URL    string
	Client *http.Client
}
func (e *ExternalAPIChecker) Name() string { return "external-api" }
func (e *ExternalAPIChecker) Check(ctx context.Context) CheckResult {
	req, _ := http.NewRequestWithContext(ctx, "GET", e.URL, nil)
	resp, err := e.Client.Do(req)
	if err != nil {
		return CheckResult{
			Name:    e.Name(),
			Status:  StatusDegraded, // 外部服务不可用,标记为降级而非不可用
			Message: err.Error(),
		}
	}
	defer resp.Body.Close()
	if resp.StatusCode >= 200 && resp.StatusCode < 300 {
		return CheckResult{Name: e.Name(), Status: StatusHealthy}
	}
	return CheckResult{
		Name:    e.Name(),
		Status:  StatusDegraded,
		Message: "status: " + resp.Status,
	}
}

3. 优雅关闭集成

package main
import (
	"context"
	"log"
	"net/http"
	"os"
	"os/signal"
	"syscall"
	"time"
)
func main() {
	healthSvc := health.NewService(3 * time.Second)
	healthSvc.Register(
		&health.DatabaseChecker{DB: db},
		&health.RedisChecker{},
	)
	mux := http.NewServeMux()
	mux.HandleFunc("/healthz", healthSvc.LivenessHandler)
	mux.HandleFunc("/readyz", healthSvc.ReadinessHandler)
	server := &http.Server{Addr: ":8080", Handler: mux}
	// 启动服务
	go func() {
		if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
			log.Fatal(err)
		}
	}()
	// 等待信号
	sigChan := make(chan os.Signal, 1)
	signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT)
	<-sigChan
	// 优雅关闭:先标记为未就绪,停止接收新流量
	log.Println("Shutting down gracefully...")
	healthSvc.SetReady(false)
	// 等待负载均衡器更新(匹配 K8s readiness probe 周期)
	time.Sleep(10 * time.Second)
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()
	if err := server.Shutdown(ctx); err != nil {
		log.Printf("Server forced to shutdown: %v", err)
	}
	log.Println("Server exited")
}

4. Kubernetes 配置

apiVersion: apps/v1
kind: Deployment
metadata:
  name: my-go-app
spec:
  replicas: 3
  template:
    spec:
      containers:
        - name: app
          image: my-go-app:latest
          ports:
            - containerPort: 8080
          # 存活探针:只检查进程是否活着
          livenessProbe:
            httpGet:
              path: /healthz
              port: 8080
            initialDelaySeconds: 10   # 启动后等待时间
            periodSeconds: 10          # 检查频率
            timeoutSeconds: 3          # 超时时间(必须 < 代码中的 timeout)
            failureThreshold: 3        # 连续失败次数才重启 [^12^][^15^]
          # 就绪探针:检查依赖是否就绪
          readinessProbe:
            httpGet:
              path: /readyz
              port: 8080
            initialDelaySeconds: 5
            periodSeconds: 5           # 检查频率更高,快速切换流量
            timeoutSeconds: 3
            failureThreshold: 3
          # 启动探针:保护慢启动应用(可选)
          startupProbe:
            httpGet:
              path: /readyz
              port: 8080
            failureThreshold: 30       # 最多允许 30 次失败
            periodSeconds: 10          # 总启动时间 = 30 * 10 = 300s
          lifecycle:
            preStop:
              exec:
                command: ["/bin/sh", "-c", "sleep 10"]  # 给优雅关闭留时间

5. 关键最佳实践 

✅ 应该做的

❌ 不应该做的

6. gRPC 健康检查

如果服务使用 gRPC,需要实现标准健康检查协议:

import (
	"google.golang.org/grpc/health/grpc_health_v1"
)
type HealthService struct{}
func (h *HealthService) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) {
	return &grpc_health_v1.HealthCheckResponse{
		Status: grpc_health_v1.HealthCheckResponse_SERVING,
	}, nil
}
func (h *HealthService) Watch(req *grpc_health_v1.HealthCheckRequest, stream grpc_health_v1.Health_WatchServer) error {
	return nil
}
// 注册
grpc_health_v1.RegisterHealthServer(grpcServer, &HealthService{})

Kubernetes v1.25+ 原生支持 gRPC 探针:

livenessProbe:
  grpc:
    port: 50051
    service: "my-service"

到此这篇关于Golang健康检查接口的实现的文章就介绍到这了,更多相关Golang健康检查接口内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!

您可能感兴趣的文章:
阅读全文