首页 > 网站技巧 > 服务器 > 服务器其它 > Keepalived实现集群高可用

Keepalived实现集群高可用全过程

2026-02-24 09:28:08 作者：feng68_

文章描述了三种高可用架构的实现：LVS DR模式、Nginx/HAProxy高可用方案以及数据库高可用性,每种方案都详细描述了环境架构、实践步骤和关键配置,以确保系统的高可用性和可靠性

1 场景1：Keepalived + LVS

1.1.1 环境

直接用之前做的LVS的DR模式了，加个VSNode即可。

主机名	角色	IP地址	VIP	网关	VRRP状态
Router	路由器	172.25.254.100/192.168.0.100	NULL	172.25.254.2	NULL
VSNode1	LVS Director (KA1)	192.168.0.50	192.168.0.200（keepalived管理VIP浮动）	192.168.0.100	MASTER (priority 100)
VSNode2	LVS Director (KA2)	192.168.0.60	192.168.0.200（keepalived管理VIP浮动）	192.168.0.100	BACKUP (priority 80)
RS1	Real Server	192.168.0.20	192.168.0.200(lo)	192.168.0.100	NULL
RS2	Real Server	192.168.0.30	192.168.0.200(lo)	192.168.0.100	NULL
Client	测试机	172.25.254.101	NULL	可以抵达Router都可	NULL

 # Router
 [root@Router ~]# echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
 [root@Router ~]# sysctl -p
 net.ipv4.ip_forward = 1
 [root@Router ~]# ip addr | egrep "eth0$|eth1$"
     inet 172.25.254.100/24 brd 172.25.254.255 scope global noprefixroute eth0
     inet 192.168.0.100/24 brd 192.168.0.255 scope global noprefixroute eth1
 # VS50
 [root@VSNode50 ~]# ip addr | grep "eth0$"
     inet 192.168.0.50/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@VSNode50 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # VS60
 [root@VSNode60 ~]# ip addr | grep "eth0$"
     inet 192.168.0.60/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@VSNode60 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # RS20
 [root@RS20 ~]# ip a | egrep "lo$|eth0"
     inet 127.0.0.1/8 scope host lo
     inet 192.168.0.200/32 scope global lo
 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
     inet 192.168.0.20/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@RS20 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # RS30
 [root@RS30 ~]# ip a | egrep "lo$|eth0"
     inet 127.0.0.1/8 scope host lo
     inet 192.168.0.200/32 scope global lo
 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
     inet 192.168.0.30/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@RS30 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # 防止RS响应VIP的ARP请求，避免VIP冲突（永久生效-->改/etc/sysctl.conf文件）
 [root@RS20 ~]# cat >> /etc/sysctl.conf << 'EOF'
 > net.ipv4.conf.all.arp_ignore = 1
 > net.ipv4.conf.all.arp_announce = 2
 > net.ipv4.conf.lo.arp_ignore = 1
 > net.ipv4.conf.lo.arp_announce = 2
 > EOF
 [root@RS20 ~]# sysctl -p
 net.ipv4.conf.all.arp_ignore = 1
 net.ipv4.conf.all.arp_announce = 2
 net.ipv4.conf.lo.arp_ignore = 1
 net.ipv4.conf.lo.arp_announce = 2
 [root@RS20 ~]# sysctl net.ipv4.conf.all.arp_ignore
 net.ipv4.conf.all.arp_ignore = 1
 [root@RS20 ~]# sysctl net.ipv4.conf.all.arp_announce
 net.ipv4.conf.all.arp_announce = 2
 [root@RS30 ~]# cat >> /etc/sysctl.conf << 'EOF'
 > net.ipv4.conf.all.arp_ignore = 1
 > net.ipv4.conf.all.arp_announce = 2
 > net.ipv4.conf.lo.arp_ignore = 1
 > net.ipv4.conf.lo.arp_announce = 2
 > EOF
 [root@RS30 ~]# sysctl -p
 net.ipv4.conf.all.arp_ignore = 1
 net.ipv4.conf.all.arp_announce = 2
 net.ipv4.conf.lo.arp_ignore = 1
 net.ipv4.conf.lo.arp_announce = 2
 [root@RS30 ~]# sysctl net.ipv4.conf.all.arp_ignore
 net.ipv4.conf.all.arp_ignore = 1
 [root@RS30 ~]# sysctl net.ipv4.conf.all.arp_announce
 net.ipv4.conf.all.arp_announce = 2

ARP参数：

参数	值	含义
arp_ignore = 1	只响应目的IP是本地接口的ARP请求	避免lo上的VIP响应ARP
arp_announce = 2	始终使用最佳本地地址发送ARP	避免宣告VIP的MAC地址

 [root@VSNode50 ~]# dnf install -y keepalived ipvsadm >/dev/null
 [root@VSNode60 ~]# dnf install -y keepalived ipvsadm >/dev/null
 [root@RS20 ~]# dnf install httpd -y >/dev/null
 [root@RS20 ~]# echo RS20 - 192.168.0.20 > /var/www/html/index.html
 [root@RS20 ~]# systemctl enable --now httpd
 [root@RS30 ~]# dnf install httpd -y >/dev/null
 [root@RS30 ~]#  echo RS30 - 192.168.0.30 > /var/www/html/index.html
 [root@RS30 ~]# systemctl enable --now httpd

1.1.2 实践

Router火墙规则

 [root@Router ~]# iptables -t nat -F
 [root@Router ~]# iptables -t nat -L
 Chain PREROUTING (policy ACCEPT)
 target     prot opt source               destination
 
 Chain INPUT (policy ACCEPT)
 target     prot opt source               destination
 
 Chain OUTPUT (policy ACCEPT)
 target     prot opt source               destination
 
 Chain POSTROUTING (policy ACCEPT)
 target     prot opt source               destination
 [root@Router ~]# iptables -t nat -A PREROUTING -d 172.25.254.100 -p tcp --dport 80 -j DNAT --to-destination 192.168.0.200:80
 [root@Router ~]# iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -o eth0 -j SNAT --to-source 172.25.254.100
 [root@Router ~]# iptables -t nat -L -n -v
 Chain PREROUTING (policy ACCEPT 1 packets, 108 bytes)
  pkts bytes target     prot opt in     out     source               destination
     0     0 DNAT       6    --  *      *       0.0.0.0/0            172.25.254.100       tcp dpt:80 to:192.168.0.200:80
 Chain POSTROUTING (policy ACCEPT 1 packets, 76 bytes)
  pkts bytes target     prot opt in     out     source               destination
     0     0 SNAT       0    --  *      eth0    192.168.0.0/24       0.0.0.0/0            to:172.25.254.100
 [root@Router ~]# dnf install -y iptables-services > /dev/null
 [root@Router ~]# iptables-save > /etc/sysconfig/iptables
 [root@Router ~]# systemctl enable --now iptables.service

VS

 # keepalive
 [root@VSNode50 ~]# vim /etc/keepalived/keepalived.conf
   1 ! Configuration File for keepalived
   2
   3 global_defs {
   4    notification_email {
   5      acassen@firewall.loc
   6      failover@firewall.loc
   7      sysadmin@firewall.loc
   8    }
   9    notification_email_from Alexandre.Cassen@firewall.loc
  10    smtp_server 192.168.200.1
  11    smtp_connect_timeout 30
  12    router_id VS50
  13    vrrp_skip_check_adv_addr
  14    #vrrp_strict     # 必须注释掉，否则会添加iptables规则阻断转发
  15    vrrp_garp_interval 0
  16    vrrp_gna_interval 0
  17 }
  18
  19 # VRRP实例：实现Director高可用
  20 vrrp_instance VI_1 {
  21     state MASTER
  22     interface eth0
  23     virtual_router_id 51
  24     priority 100
  25     advert_int 1
  26     authentication {
  27         auth_type PASS
  28         auth_pass 1111
  29     }
  30     virtual_ipaddress {
  31         192.168.0.200/32 dev eth0 label eth0:0
  32     }
  33 }
  34 # LVS虚拟服务器配置（核心）
  35 virtual_server 192.168.0.200 80 {
  36     delay_loop 6                      # 健康检查间隔（秒）
  37     lb_algo rr                        # 负载均衡算法：rr=轮询，wrr=加权轮询
  38     lb_kind DR                        # LVS模式：DR=直接路由（性能最佳）
  39     protocol TCP                      # 协议类型
  40
  41     # 后端真实服务器 RS1
  42     real_server 192.168.0.20 80 {
  43         weight 1                    # 权重（rr算法下无效，wrr时生效）
  44         TCP_CHECK {                 # TCP健康检查
  45             connect_timeout 3       # 连接超时（秒）
  46             nb_get_retry 3          # 重试次数
  47             delay_before_retry 3    # 重试间隔（秒）
  48         }
  49     }
  50
  51     # 后端真实服务器 RS2
  52     real_server 192.168.0.30 80 {
  53         weight 1
  54         TCP_CHECK {
  55             connect_timeout 3
  56             nb_get_retry 3
  57             delay_before_retry 3
  58         }
  59     }
  60 }
 [root@VSNode50 ~]# scp /etc/keepalived/keepalived.conf root@192.168.0.60:/etc/keepalived/keepalived.conf
 Warning: Permanently added '192.168.0.60' (ED25519) to the list of known hosts.
 keepalived.conf                                                                   100% 1578     3.1MB/s   00:00
 [root@VSNode50 ~]# systemctl enable --now keepalived.service
 [root@VSNode60 ~]# vim /etc/keepalived/keepalived.conf
 # VS60
 
   1 ! Configuration File for keepalived
   2
   3 global_defs {
 ………………
  12    router_id VS60
  13    vrrp_skip_check_adv_addr
  14    #vrrp_strict
 ………………
  17 }
  19 # VRRP实例：实现Director高可用
  20 vrrp_instance VI_1 {
  21     state BACKUP
 ………………
  24     priority 80
 ………………
  33 }
 # LVS配置与VSNode1完全相同
 [root@VSNode60 ~]# systemctl enable --now keepalived.service
 
 # 查看LVS规则
 [root@VSNode60 ~]# ipvsadm -Ln
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port Scheduler Flags
   -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
 TCP  192.168.0.200:80 rr
   -> 192.168.0.20:80              Route   1      0          0
   -> 192.168.0.30:80              Route   1      0          0

测试

 [root@VSNode50 ~]# ip a | grep eth0:0
     inet 192.168.0.200/32 scope global eth0:0
 [root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 [root@VSNode50 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    4       24        0     1592        0
   -> 192.168.0.20:80                     2       12        0      796        0
   -> 192.168.0.30:80                     2       12        0      796        0
 [root@VSNode60 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    0        0        0        0        0
   -> 192.168.0.20:80                     0        0        0        0        0
   -> 192.168.0.30:80                     0        0        0        0        0
 [root@VSNode50 ~]# systemctl stop keepalived.service
 [root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 [root@VSNode60 ~]# ip a | grep eth0:0
     inet 192.168.0.200/32 scope global eth0:0
 [root@VSNode60 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    4       24        0     1592        0
   -> 192.168.0.20:80                     2       12        0      796        0
   -> 192.168.0.30:80                     2       12        0      796        0

2 场景2：Keepalived + Nginx/HAProxy

2.1 Keepalived+Nginx方案

2.1.1 环境

架构设计

环境信息

节点	IP地址	角色	VRRP实例
KA1	172.25.254.50	Nginx + Keepalived	WEB_VIP: MASTER, DB_VIP: BACKUP
KA2	172.25.254.60	Nginx + Keepalived	WEB_VIP: BACKUP, DB_VIP: MASTER
RS1	172.25.254.20	后端Web服务器	NULL
RS2	172.25.254.30	后端Web服务器	NULL
VIP1	172.25.254.100	Web服务入口	主: KA1, 备: KA2
VIP2	172.25.254.200	备用入口（或DB服务）	主: KA2, 备: KA1

2.2.2 实践

RS测试页

 [root@RS1 ~]# dnf install nginx -y > /dev/null
 [root@RS1 ~]# echo "RS1 - 172.25.254.20" > /usr/share/nginx/html/index.html
 [root@RS2 ~]# echo "RS2 - 172.25.254.30" > /usr/share/nginx/html/index.html
 [root@RS2 ~]# systemctl enable --now nginx.service
 [2026-02-22 15:29.09]  ~
 [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.20
 RS1 - 172.25.254.20
 [2026-02-22 15:29.15]  ~
 [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.30
 RS2 - 172.25.254.30

KA实现高可用

KA1

 # KA配置Nginx反向代理
 [root@KA1 ~]# dnf install nginx -y > /dev/null
 [root@KA1 ~]# vim /etc/nginx/conf.d/upstream.conf
   1 upstream backend {
   2     server 172.25.254.20:80 weight=5;
   3     server 172.25.254.30:80 weight=5;
   4     keepalive 32;  # 长连接
   5 }
   6 server {
   7     listen 80;
   8     server_name localhost;
   9     location / {
  10         proxy_pass http://backend;
  11         proxy_set_header Host $host;
  12         proxy_set_header X-Real-IP $remote_addr;
  13         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
  14         proxy_connect_timeout 5s;
  15         proxy_send_timeout 5s;
  16         proxy_read_timeout 5s;
  17     }
  18     # 健康检查端点（用于Keepalived检测）
  19     location /health {
  20         access_log off;
  21         return 200 "healthy\n";
  22         add_header Content-Type text/plain;
  23     }
  24 }
 # 采用双主模式为基础
 # 创建 health 检查页
 [root@KA1 ~]# mkdir /usr/share/nginx/html/health
 [root@KA1 ~]# echo "OK" > /usr/share/nginx/html/health/health.html
 # 健康检查脚本
 [root@KA1 ~]# vim /etc/keepalived/check_nginx.sh
   1 #!/bin/bash
   3 # 检查Nginx进程
   4 if ! pgrep -x "nginx" > /dev/null; then
   5     # 尝试启动Nginx
   6     systemctl start nginx
   7     sleep 2
   8     # 再次检查
   9     if ! pgrep -x "nginx" > /dev/null; then
  10         # Nginx启动失败，返回错误触发切换
  11         exit 1
  12     fi
  13 fi
  15 # 检查Nginx端口是否监听
  16 if ! ss -tlnp | grep -q ":80"; then
  17     exit 1
  18 fi
  20 # 可选：检查HTTP响应
  21 if ! curl -sf http://localhost/health > /dev/null; then
  22      exit 1
  23 fi
  24 exit 0
 [root@KA1 ~]# chmod +x /etc/keepalived/check_nginx.sh
 [root@KA1 ~]# nginx -t && systemctl enable --now nginx
 [root@KA1 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
 Check OK
 # 通知脚本
 [root@KA1 ~]# systemctl is-active postfix.service
 inactive
 [root@KA1 ~]# systemctl start postfix.service
 [root@KA1 ~]# chmod +x /etc/keepalived/notify_nginx.sh
   1 #!/bin/bash
   2 mail_dest='收件人'
   3 TYPE=$1          # WEB_VIP 或 DB_VIP
   4 STATE=$2         # MASTER, BACKUP, FAULT
   5 LOG_FILE="/var/log/keepalived/notify.log"
   6 mkdir -p $(dirname $LOG_FILE)
   7 echo "$(date '+%Y-%m-%d %H:%M:%S') - Instance: $TYPE, State: $STATE, Host: $(hostname)" >> $LOG_FILE
   8 case $STATE in
   9     MASTER)
  10         # 成为Master时的额外操作
  11         echo "$(date) - Becoming MASTER for $TYPE" >> $LOG_FILE
  12         # 可以在这里添加告警通知
  13         ;;
  14     BACKUP)
  15         echo "$(date) - Becoming BACKUP for $TYPE" >> $LOG_FILE
  16         ;;
  17     FAULT)
  18         echo "$(date) - FAULT state for $TYPE" >> $LOG_FILE
  19         # 发送紧急告警
  20         ;;
  21 esac
 # KA1中keepalived设置
 [root@KA1 ~]# vim /etc/keepalived/keepalived.conf
   3 global_defs {
 ………………
  18    enable_script_security   # 启用脚本执行权限
  19    script_user root root    # 指定专用用户，因为所有部署都是用root用户去部署的
  20 }
  21
  22 # Nginx健康检查脚本
  23 vrrp_script check_nginx {
  24     script "/etc/keepalived/check_nginx.sh"
  25     interval 2          # 每2秒检查一次
  26     weight -20          # 检查失败，优先级降低20
  27     fall 2              # 连续2次失败才判定失败
  28     rise 1              # 1次成功恢复
  29 }
  30
  31 vrrp_instance WEB_VIP {
  32     state MASTER
  33     interface eth0
  34     virtual_router_id 51
  35     priority 100
  36     advert_int 1
  37     authentication {
  38         auth_type PASS
  39         auth_pass 1111
  40     }
  41     virtual_ipaddress {
  42         172.25.254.100/24 dev eth0 label eth0:1
  43     }
  44     # 追踪健康检查脚本
  45     track_script {
  46         check_nginx
  47     }
  48     # 状态切换通知
  49     notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
  50     notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
  51     notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
  52 }
  53
  54 vrrp_instance DB_VIP {
  55     state BACKUP
  56     interface eth0
  57     virtual_router_id 52
  58     priority 80
  59     advert_int 1
  60     authentication {
  61         auth_type PASS
  62         auth_pass 1111
  63     }
  64     virtual_ipaddress {
  65         172.25.254.200/24 dev eth0 label eth0:0
  66     }
  67         track_script {
  68         check_nginx
  69     }
  70     notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
  71     notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
  72     notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
  73 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 SECURITY VIOLATION - scripts are being executed but script_security not enabled.
 
 [root@KA2 ~]# mkdir /usr/share/nginx/html/health
 
 [root@KA1 ~]# scp /etc/nginx/conf.d/upstream.conf root@172.25.254.60:/etc/nginx/conf.d/upstream.conf
 upstream.conf                                                   100%  672     1.5MB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/check_nginx.sh root@172.25.254.60:/etc/keepalived/
 check_nginx.sh                                                                         100%  469   978.8KB/s   00:00
 [root@KA1 ~]# scp /usr/share/nginx/html/health/health.html root@172.25.254.60:/usr/share/nginx/html/health/
 health.html                                                                            100%    3     5.6KB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/notify_nginx.sh root@172.25.254.60:/etc/keepalived/
 notify_nginx.sh                                                               100%  654     1.1MB/s   00:00

KA2

 # KA2：
 [root@KA2 ~]# dnf install nginx -y > /dev/null
 [root@KA2 ~]# ll /etc/nginx/conf.d/upstream.conf /etc/keepalived/check_nginx.sh /usr/share/nginx/html/health/health.html /etc/keepalived/notify_nginx.sh
 -rwxr-xr-x 1 root root 469 Feb 22 16:19 /etc/keepalived/check_nginx.sh
 -rwxr-xr-x 1 root root 654 Feb 22 16:27 /etc/keepalived/notify_nginx.sh
 -rw-r--r-- 1 root root 672 Feb 22 15:34 /etc/nginx/conf.d/upstream.conf
 -rw-r--r-- 1 root root   3 Feb 22 16:20 /usr/share/nginx/html/health/health.html
 [root@KA2 ~]# nginx -t && systemctl enable --now nginx
 [root@KA2 ~]# systemctl is-active postfix.service
 inactive
 [root@KA2 ~]# systemctl start postfix.service
 [root@KA2 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
 Check OK
 
 # KA2中keepalived设置
 [root@KA2 ~]# vim /etc/keepalived/keepalived.conf
   3 global_defs {
 ………………    
  17    enable_script_security  # 启用脚本执行权限
  18    script_user root root    # 指定专用用户，因为所有部署都是用root用户去部署的
  19 }
  20 # Nginx健康检查脚本
  21 vrrp_script check_nginx {
  22     script "/etc/keepalived/check_nginx.sh"
  23     interval 2
  24     weight -20
  25     fall 2
  26     rise 1
  27 }
  28
  29 vrrp_instance WEB_VIP {
  30     state BACKUP
  31     interface eth0
  32     virtual_router_id 51
  33     preempt_delay 10        # 抢占延迟10秒（避免网络抖动）
  34     priority 80
  35     advert_int 1
  36     authentication {
  37         auth_type PASS
  38         auth_pass 1111
  39     }
  40     virtual_ipaddress {
  41         172.25.254.100/24 dev eth0 label eth0:1
  42     }
  43     track_script {
  44         check_nginx
  45     }
  46     notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
  47     notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
  48     notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
  49 }
  50
  51 vrrp_instance DB_VIP {
  52     state MASTER
  53     interface eth0
  54     virtual_router_id 52
  55     preempt_delay 10
  56     priority 100
  57     advert_int 1
  58     authentication {
  59         auth_type PASS
  60         auth_pass 1111
  61     }
  62     virtual_ipaddress {
  63         172.25.254.200/24 dev eth0 label eth0:0
  64     }
  65     track_script {
  66         check_nginx
  67     }
  68     notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
  69     notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
  70     notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
  71 }
 [root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 (DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing

测试

 # 测试
 [root@KA1 ~]# systemctl is-active keepalived.service
 active
 [root@KA1 ~]# systemctl reload keepalived.service
 [root@KA2 ~]# systemctl reload keepalived.service
 [root@KA1 ~]# ip a s | grep eth0:1$
     inet 172.25.254.100/24 scope global secondary eth0:1
 [root@KA2 ~]# ip a s | grep eth0:0$
     inet 172.25.254.200/24 scope global secondary eth0:0

global_defs { enable_script_security # 启用脚本执行权限 script_user root root # 指定专用用户，因为所有部署都是用root用户去部署的 }

现实中都是为所有软件创建对应的用户和用户组，具体操作和注意点看大数据笔记，这里为了方便就只能root用户实践。

2.2 keepalive+HAProxy

基于Keepalived+Nginx改动

2.2.1 环境

架构设计

环境信息

节点	IP地址	角色	VRRP实例
KA1	172.25.254.50	HAProxy + Keepalived	WEB_VIP: MASTER, API_VIP: BACKUP
KA2	172.25.254.60	HAProxy + Keepalived	WEB_VIP: BACKUP, API_VIP: MASTER
RS1	172.25.254.20	后端Web/API服务器	NULL
RS2	172.25.254.30	后端Web/API服务器	NULL

2.2.2 实践

RS

 [root@RS1 ~]# mkdir -p /usr/share/nginx/html/api
 [root@RS1 ~]# echo '{"status":"ok","server":"RS1","ip":"172.25.254.20"}' > /usr/share/nginx/html/api/status.json
 [root@RS1 ~]# vim /etc/nginx/conf.d/default.conf
 [root@RS1 ~]# nginx -t && nginx -s reload
 [root@RS1 ~]# curl http://172.25.254.20
 RS1 - 172.25.254.20
 [root@RS1 ~]# curl http://172.25.254.20/api/status.json
 {"status":"ok","server":"RS1","ip":"172.25.254.20"}
 [root@RS1 ~]# curl http://172.25.254.20/health
 healthy
 [root@RS1 ~]# scp /etc/nginx/conf.d/default.conf root@172.25.254.30:/etc/nginx/conf.d/
 default.conf                                                               100%  502   928.3KB/s   00:00
 
 [root@RS2 ~]# mkdir -p /usr/share/nginx/html/api
 [root@RS2 ~]# echo '{"status":"ok","server":"RS2","ip":"172.25.254.30"}' > /usr/share/nginx/html/api/status.json
 [root@RS2 ~]# nginx -t && nginx -s reload

KA实现高可用性

 [root@KA1 ~]# systemctl stop keepalived.service     # 由于上个实验做了nginx健康保护，直接stop的话Nginx会被其他进程守护自动重启，或者说不能及时关闭，先把keepalived停后才可以，也说明有看门狗机制在保护它。
 [root@KA1 ~]# systemctl disable --now nginx
 [root@KA1 ~]# netstat -lntupa | grep nginx
 [root@KA1 ~]# systemctl is-active nginx
 inactive
 [root@KA1 ~]# dnf install haproxy -y > /dev/null
 [root@KA1 ~]# systemctl enable --now haproxy
 
 [root@KA2 ~]# systemctl stop keepalived.service
 [root@KA2 ~]# systemctl disable --now nginx
 [root@KA2 ~]# netstat -lntupa | grep nginx
 [root@KA2 ~]# systemctl is-active nginx
 inactive
 [root@KA2 ~]# dnf install haproxy -y > /dev/null
 [root@KA2 ~]# systemctl enable --now haproxy

KA1

 # HAProxy设定：KA1与KA2一致
 [root@KA1 ~]# vim /etc/haproxy/haproxy.cfg
  64 # 统计页面
  65 listen stats
  66     bind *:8080
  67     stats enable
  68     stats uri /stats
  69     stats auth admin:admin123
  70     stats refresh 30s
  71
  72 # Web服务前端（对应VIP1 172.25.254.100）
  73 frontend web_frontend
  74     bind *:80
  75     acl is_api path_beg /api
  76     use_backend api_servers if is_api
  77     default_backend web_servers
  78
  79 # Web服务后端（/health 使用 Nginx return 指令）
  80 backend web_servers
  81     balance roundrobin
  82     option httpchk GET /health
  83     http-check expect status 200
  84     server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
  85     server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
  86
  87 # API服务后端（/api/status.json 物理文件）
  88 backend api_servers
  89     balance roundrobin
  90     option httpchk GET /api/status.json
  91     http-check expect status 200
  92     server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
  93     server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
 # HAProxy 健康检查脚本（KA1和KA2相同）
 [root@KA1 ~]# vim /etc/keepalived/check_haproxy.sh
   1 #!/bin/bash
   2 # 检查HAProxy进程
   3 if ! pgrep -x "haproxy" > /dev/null; then
   4     systemctl start haproxy
   5     sleep 2
   6     if ! pgrep -x "haproxy" > /dev/null; then
   7         exit 1
   8     fi
   9 fi
  10 # 检查HAProxy端口（80和8080统计页面）
  11 if ! ss -tlnp | grep -q ":80"; then
  12     exit 1
  13 fi
  14 exit 0
 [root@KA1 ~]# chmod +x /etc/keepalived/check_haproxy.sh
 [root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 [root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 HAProxy OK
 # 通知告警脚本
 [root@KA1 ~]# cp /etc/keepalived/notify_nginx.sh /etc/keepalived/notify_haproxy.sh
 [root@KA1 ~]# scp /etc/keepalived/notify_haproxy.sh root@172.25.254.60:/etc/keepalived/
 notify_haproxy.sh                                                                                   100%  684   564.8KB/s   00:00
 
 [root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
 haproxy.cfg                                                                                100% 4220     4.7MB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/check_haproxy.sh root@172.25.254.60:/etc/keepalived/
 check_haproxy.sh                                                                           100%  293   465.5KB/s   00:00
 
 # KA1 Keepalived配置,基于Keepalived + Nginx双主模式
 [root@KA1 ~]# vim /etc/keepalived/keepalived.conf
  22 # HAProxy健康检查脚本
  23 vrrp_script check_haproxy {
  24     script "/etc/keepalived/check_haproxy.sh"
  25     interval 2          # 每2秒检查一次
  26     weight -20          # 检查失败，优先级降低20
  27     fall 2              # 连续2次失败才判定失败
  28     rise 1              # 1次成功恢复
  29 }
  31 # VIP1: Web服务入口，KA1为主
  32 vrrp_instance WEB_VIP {
 ………………
  45     track_script {
  46         check_haproxy
  47     }
  48     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  49     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  50     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  51 }
  52
  53 # VIP2: API服务入口，KA1为备
  54 vrrp_instance API_VIP {
 ………………
  70     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  71     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  72     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  73 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf

KA2

 [root@KA2 ~]# systemctl reload haproxy.service
 [root@KA2 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 HAProxy OK
 
 # KA2 Keepalived配置,基于Keepalived + Nginx双主模式
 [root@KA2 ~]# vim /etc/keepalived/keepalived.conf
  31 # VIP1: Web服务入口，KA2为备
  32 vrrp_instance WEB_VIP {
 ………………
  46     track_script {
  47         check_haproxy
  48     }
  49     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  50     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  51     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  52 }
  54
  55 # VIP2: API服务入口，KA2为主
  56 vrrp_instance API_VIP {
 ………………
  70     track_script {
  71         check_haproxy
  72     }
  73     notify_master "/etc/keepalived/notify_haproxy.sh API_VIP MASTER"
  74     notify_backup "/etc/keepalived/notify_haproxy.sh API_VIP BACKUP"
  75     notify_fault "/etc/keepalived/notify_haproxy.sh API_VIP FAULT"
  76 }
 [root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 (API_VIP) Warning - preempt delay will not work with initial state MASTER - clearing

测试

[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
# 双主状态验证
[root@KA1 ~]# ip a s | grep eth0:1$
    inet 172.25.254.100/24 scope global secondary eth0:1
[root@KA2 ~]# ip a s | grep eth0:0$
    inet 172.25.254.200/24 scope global secondary eth0:0
# 服务访问测试
[root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
RS1 - 172.25.254.20
RS2 - 172.25.254.30
RS1 - 172.25.254.20
RS2 - 172.25.254.30
[root@Client ~]# for i in {1..4};do curl http://172.25.254.200/api/status.json;done
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
[root@Client ~]# curl http://172.25.254.100/health
healthy

3 场景3：数据库高可用

3.1.1 环境

架构设计

核心原则：数据库必须单主

原则	说明
单 VIP	只有一个入口，确保写操作唯一性
单 Master	任何时候只有一个节点接受写操作
自动检测	Keepalived 检测 MySQL 状态，故障时 VIP 漂移
手动/半自动切换	主从切换需要谨慎，建议配合 MHA 或手动

环境信息

节点	IP地址	角色	说明
KA1	172.25.254.50	Keepalived BACKUP	VIP 故障时接管
KA2	172.25.254.60	Keepalived MASTER	正常时持有 VIP
DB1	172.25.254.40	MySQL Master	主库，接受读写
DB2	172.25.254.41	MySQL Slave	从库，只读/热备
VIP	172.25.254.200	数据库入口	指向当前 Master

3.2.2 实践

DB主从复制

DB1

# DB1:
[root@RS1 ~]# systemctl stop nginx.service
[root@RS1 ~]# dnf install mysql-server -y >/dev/null
[root@RS1 ~]# vim /etc/my.cnf
 11 [mysqld]
 12 server-id = 20
 13 # GTID 复制（推荐）
 14 gtid_mode = ON
 15 enforce_gtid_consistency = ON
 16 log-bin = mysql-bin
 17 binlog-format = ROW
 18 expire_logs_days = 7
 19 max_binlog_size = 100M
 20 # 半同步复制（可选但推荐）
 21 plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
 22 rpl-semi-sync-master-enabled = 1
 23 rpl-semi-sync-slave-enabled = 1
 24 rpl-semi-sync-master-timeout = 1000
 25 # 字符集
 26 character-set-server = utf8mb4
 27 # 绑定所有接口
 28 bind-address = 0.0.0.0
 29 # 为 Keepalived 检测预留的用户
 30 skip-name-resolve
[root@RS1 ~]# systemctl enable --now mysqld
[root@RS1 ~]# mysql
# 创建复制用户
mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant replication slave on *.* to 'repl'@'172.25.254.%';
# 创建健康检查用户（用于 Keepalived 检测）
mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%';
mysql> flush privileges;
mysql> show master status;
+---------------+----------+--------------+------------------+-------------------+
| File          | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+---------------+----------+--------------+------------------+-------------------+
| binlog.000001 |     1237 |              |                  |                   |
+---------------+----------+--------------+------------------+-------------------+
mysql> SHOW VARIABLES LIKE 'gtid%';
+----------------------------------+-----------+
| Variable_name                    | Value     |
+----------------------------------+-----------+
| gtid_executed                    |           |
| gtid_executed_compression_period | 0         |
| gtid_mode                        | ON        |
| gtid_next                        | AUTOMATIC |
| gtid_owned                       |           |
| gtid_purged                      |           |
+----------------------------------+-----------+
# 数据库刚启动，还没有执行任何写入操作,没有产生事务,gtid_executed为空的原因。
mysql> create database test_gtid;
mysql> use test_gtid;
Database changed
mysql> create table t1 (id int primary key, name varchar(50));
mysql> insert into t1 values (1,'test');
mysql> SHOW VARIABLES LIKE 'gtid_executed';
+---------------+------------------------------------------+
| Variable_name | Value                                    |
+---------------+------------------------------------------+
| gtid_executed | b650e3a0-102f-11f1-9134-000c29999103:1-3 |
+---------------+------------------------------------------+

DB2

# DB2:
[root@RS2 ~]# systemctl stop nginx.service
[root@RS2 ~]# dnf install mysql-server -y >/dev/null
[root@RS2 ~]# vim /etc/my.cnf
 10 !includedir /etc/my.cnf.d
 11 [mysqld]
 12 server-id = 30
 13 # GTID
 14 gtid_mode = ON
 15 enforce_gtid_consistency = ON
 16 log-bin = mysql-bin
 17 binlog-format = ROW
 18 expire_logs_days = 7
 19 # 半同步复制，起不来，后面想办法加上去。
 20 # plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
 21 # rpl-semi-sync-master-enabled = 1
 22 # rpl-semi-sync-slave-enabled = 1
 23 # 只读（重要！防止误写入）
 24 read_only = 1
 25 super_read_only = 1
 26 character-set-server = utf8mb4
 27 bind-address = 0.0.0.0
 28 skip-name-resolve
[root@RS2 ~]# systemctl enable --now mysqld
mysql> INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
mysql> INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
# 关闭只读
mysql> set global super_read_only = OFF;
mysql> set global read_only = OFF;
# 安装插件
mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
mysql> install plugin rpl_semi_sync_slave soname 'semisync_slave.so';
# 启用插件
mysql> set global rpl_semi_sync_master_enabled = ON;
mysql> set global rpl_semi_sync_slave_enabled = ON;
# 创建复制用户
mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant replication slave on *.* to 'repl'@'172.25.254.%';
# 创建健康检查用户（用于 Keepalived 检测）
mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%';
# 重新开启只读（从库必须保持只读）
mysql> set global read_only = ON;
# 查看插件状态
mysql> show plugins;
………………	# 正常
# 查看变量
mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name                             | Value      |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled              | ON         |
| rpl_semi_sync_master_timeout              | 10000      |
| rpl_semi_sync_master_trace_level          | 32         |
| rpl_semi_sync_master_wait_for_slave_count | 1          |
| rpl_semi_sync_master_wait_no_slave        | ON         |
| rpl_semi_sync_master_wait_point           | AFTER_SYNC |
| rpl_semi_sync_slave_enabled               | ON         |
| rpl_semi_sync_slave_trace_level           | 32         |
+-------------------------------------------+------------+
8 rows in set (0.00 sec)
# 配置主从复制
mysql> change master to
    -> master_host = '172.25.254.20',
    -> master_port = 3306,
    -> master_user = 'repl',
    -> master_password = '123',
    -> master_auto_position = 1,	# GTID 自动定位
    -> master_connect_retry = 10;
mysql> start slave;
mysql> show slave status\G
*************************** 1. row ***************************
…………
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
…………

测试

[root@RS1 ~]# mysql
mysql> create database test_haha;
mysql> use test_haha
mysql> create table t1 (id int primary key,name varchar(50));
mysql> insert into t1 values (1,'from Master DB1');
[root@RS2 ~]# mysql -e "SELECT * FROM test_haha.t1;"
+----+-----------------+
| id | name            |
+----+-----------------+
|  1 | from Master DB1 |
+----+-----------------+

KA实现高可用

# 安装 MySQL 客户端用于检测
[root@KA1 ~]# systemctl stop keepalived.service
[root@KA1 ~]# systemctl stop haproxy.service
[root@KA1 ~]# dnf install mysql -y >/dev/null
[root@KA2 ~]# dnf install mysql -y >/dev/null
[root@KA1 ~]# mysql -h172.25.254.30 -ucheck -p123 -e "SELECT 1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---+
| 1 |
+---+
| 1 |
+---+
[root@KA1 ~]# mysql -h172.25.254.20 -ucheck -p123 -e "SELECT 1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---+
| 1 |
+---+
| 1 |
+---+

KA1

# MySQL 检查脚本
[root@KA1 ~]# vim /etc/keepalived/check_mysql.sh
  1 #!/bin/bash
  2
  3 # MySQL 检测脚本
  4 # 检测远程 MySQL 是否可连接且为主库(read_only=OFF)
  5
  6 MYSQL_HOST="172.25.254.20"      # 当前主库 IP
  7 MYSQL_USER="check"
  8 MYSQL_PASS="123"
  9 MYSQL_PORT=3306
 10 LOG_FILE="/var/log/keepalived_mysql_check.log"
 11 # 记录检测时间
 12 echo "$(date '+%Y-%m-%d %H:%M:%S') - Checking MySQL at $MYSQL_HOST" >> $LOG_FILE
 13 # 检测1：MySQL 是否可连接
 14 if ! mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SELECT 1;" > /dev/null 2>&1; then
 15     echo "$(date): MySQL $MYSQL_HOST connection FAILED" >> $LOG_FILE
 16     exit 1
 17 fi
 18 # 检测2：是否为 Master（read_only = OFF）
 19 READ_ONLY=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW VARIABLES LIKE 'read_only';" 2>/dev/null | grep read_only     | awk '{print $2}')
 20 if [ "$READ_ONLY" = "ON" ]; then
 21     echo "$(date): MySQL $MYSQL_HOST is read_only, not master" >> $LOG_FILE
 22     exit 1
 23 fi
 24 # 检测3：复制延迟检测
 25 SLAVE_LAG=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW SLAVE STATUS\G" 2>/dev/null | grep Seconds_Behind_Master |     awk '{print $2}')
 26 if [ "$SLAVE_LAG" != "NULL" ] && [ "$SLAVE_LAG" -gt 60 ]; then
 27     echo "$(date): Replication lag $SLAVE_LAG seconds" >> $LOG_FILE
 28     exit 1
 29 fi
 30 echo "$(date): MySQL $MYSQL_HOST check PASSED" >> $LOG_FILE
 31 exit 0
[root@KA1 ~]# chmod +x /etc/keepalived/check_mysql.sh
[root@KA1 ~]# /etc/keepalived/check_mysql.sh && echo "MySQL OK" || echo "MySQL FAILED"
/etc/keepalived/check_mysql.sh: line 26: [: : integer expression expected
MySQL OK

# 切换脚本（Master故障时自动切换主从）
[root@KA1 ~]# vim /etc/keepalived/notify_mysql.sh
  1 #!/bin/bash
  2
  3 TYPE=$1         # DB_VIP
  4 STATE=$2        # MASTER, BACKUP, FAULT
  5 CURRENT_HOST=$(hostname)
  6 LOG_FILE="/var/log/keepalived_mysql.log"
  7 mkdir -p $(dirname $LOG_FILE)
  8 echo "$(date '+%Y-%m-%d %H:%M:%S') - $TYPE on $CURRENT_HOST changed to $STATE" >> $LOG_FILE
  9 case $STATE in
 10     MASTER)
 11         echo "$(date): !!! This node is now MASTER for MySQL VIP !!!" >> $LOG_FILE
 12         echo "$(date): VIP 172.25.254.200 is now on $CURRENT_HOST" >> $LOG_FILE
 13         # 重要：发送告警通知管理员
 14         echo "MySQL VIP failover occurred at $(date)" | \
 15             mail -s "CRITICAL: MySQL HA Failover" xiafeng_68@163.com 2>/dev/null || true
 16         # 注意：这里不做自动主从切换！
 17         # 原因：1. 脑裂风险 2. 数据一致性风险
 18         # 建议：人工确认后手动执行切换脚本
 19         # 自动提升DB2为主（风险高，需谨慎）
 20         # mysql -h172.25.254.30 -ugxf -p123 -e "STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only = 0;"
 21
 22         echo "$(date): ACTION REQUIRED: Please verify and execute manual failover if needed" >> $LOG_FILE
 23         ;;
 24
 25     BACKUP)
 26         echo "$(date): This node is now BACKUP for MySQL VIP" >> $LOG_FILE
 27         ;;
 28
 29     FAULT)
 30         echo "$(date): FAULT state detected on $CURRENT_HOST" >> $LOG_FILE
 31         ;;
 32 esac
[root@KA1 ~]# chmod +x /etc/keepalived/notify_mysql.sh
# 手动主从切换脚本（故障时使用）
[root@KA1 ~]# vim /etc/keepalived/manual_failover.sh
  1 #!/bin/bash
  2
  3 # 当 DB1 故障时，在确认后手动执行此脚本将 DB2 提升为yy主库
  4 NEW_MASTER="172.25.254.30"      # DB2
  5 OLD_MASTER="172.25.254.20"      # DB1
  6 VIP="172.25.254.200"
  7 echo "=== MySQL 手动故障切换 ==="
  8 echo "时间: $(date)"
  9 echo "新主库: $NEW_MASTER (DB2)"
 10 echo "旧主库: $OLD_MASTER (DB1) - 假设已故障"
 11 # 1. 在 DB2 上停止复制并提升为主库
 12 echo "步骤1: 在 DB2 上停止复制..."
 13 mysql -h$NEW_MASTER -ugxf -p123 -e "
 14     STOP SLAVE;
 15     RESET SLAVE ALL;
 16     SET GLOBAL read_only = OFF;
 17     SET GLOBAL super_read_only = OFF;
 18     SELECT 'DB2 is now MASTER' as status;
 19 "
 20 # 2. 更新 KA 的检测脚本指向新主库
 21 echo "步骤2: 更新 Keepalived 检测脚本..."
 22 sed -i "s/MYSQL_HOST=\"$OLD_MASTER\"/MYSQL_HOST=\"$NEW_MASTER\"/" /etc/keepalived/check_mysql.sh
 23 # 3. 如果旧主库恢复，需要重新配置为从库（可选）
 24 echo "步骤3: 当旧主库恢复后，执行以下命令重新加入："
 25 echo "  CHANGE MASTER TO MASTER_HOST='$NEW_MASTER', MASTER_USER='repl', MASTER_PASSWORD='Repl123', MASTER_AUTO_POSITION=1;"
 26 echo "  START SLAVE;"
 27 echo "切换完成！VIP $VIP 现在指向新主库 $NEW_MASTER"
[root@KA1 ~]# chmod +x /etc/keepalived/manual_failover.sh
[root@KA1 ~]# scp /etc/keepalived/manual_failover.sh root@172.25.254.60:/etc/keepalived/
manual_failover.sh                                                                          100% 1175   736.0KB/s   00:00
[root@KA1 ~]# scp /etc/keepalived/check_mysql.sh root@172.25.254.60:/etc/keepalived/
check_mysql.sh                                                                                                 100% 1309     1.6MB/s   00:00
[root@KA1 ~]# scp /etc/keepalived/notify_mysql.sh root@172.25.254.60:/etc/keepalived/
notify_mysql.sh                                                                                                           100% 1323     2.6MB/s   00:00

# KA1配置keepalived
  1 ! Configuration File for keepalived
  2
  3 global_defs {
………………
 13    vrrp_skip_check_adv_addr
 14    #vrrp_strict				# 必须关闭，否则阻断MySQL连接
………………
 18    enable_script_security   # 启用脚本执行权限
 19    script_user root root    # 指定专用用户，因为所有部署都是用root用户去部署的
 20 }
 21
 22 # MySql健康检查脚本
 23 vrrp_script check_mysql {
 24     script "/etc/keepalived/check_mysql.sh"
 25     interval 3          # MySQL检测间隔稍长
 26     weight -20          # 检查失败，优先级降低20
 27     fall 2              # 连续2次失败才判定失败
 28     rise 2              # 连续2次成功恢复
 29 }
 30
 31 # 单VIP：数据库入口
 32 vrrp_instance DB_VIP {
 33     state BACKUP        # KA1作为BACKUP
 34     interface eth0
 35     virtual_router_id 52
 36     priority 80         # 低于KA2
 37     advert_int 1
 38     authentication {
 39         auth_type PASS
 40         auth_pass 2222
 41     }
 42     virtual_ipaddress {
 43         172.25.254.200/24 dev eth0 label eth0:0
 44     }
 45     track_script {
 46         check_mysql
 47     }
 48     # 禁止抢占（重要！防止网络抖动导致频繁切换）
 49     nopreempt
 50     notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
 51     notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
 52     notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
 53 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 
# KA1配置haproxy
 64 listen mysql
 65     bind 0.0.0.0:3306	# MySQL - 绑定所有接口，如果指定172.25.254.200会起不了服务
 66     mode tcp
 67     option tcp-check
 68     tcp-check connect port 3306
 69     timeout connect 5s
 70     timeout client 30s
 71     timeout server 30s
 72     server db1 172.25.254.20:3306 check inter 2s rise 2 fall 3
 73     server db2 172.25.254.30:3306 check backup inter 2s rise 2 fall 3
 74
 75 # 统计页面
 76 listen stats
 77     bind *:8080
 78     mode http
 79     stats enable
 80     stats uri /stats
 81     stats auth gxf:123
 [root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
haproxy.cfg

KA2

[root@KA2 ~]# ll /etc/keepalived/notify_mysql.sh /etc/keepalived/check_mysql.sh /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1309 Feb 23 06:36 /etc/keepalived/check_mysql.sh
-rwxr-xr-x 1 root root 1175 Feb 23 07:08 /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1323 Feb 23 06:50 /etc/keepalived/notify_mysql.sh
[root@KA2 ~]# vim /etc/keepalived/keepalived.conf
  1 ! Configuration File for keepalived
  2
  3 global_defs {
  4    notification_email {
………………
 14    #vrrp_strict
………………
 17    #vrrp_mcast_group4 224.0.0.44
 18    enable_script_security  # 启用脚本执行权限
 19    script_user root root    # 指定专用用户，因为所有部署都是用root用户去部署的
 20 }
 22 vrrp_script check_mysql {
 23     script "/etc/keepalived/check_mysql.sh"
 24     interval 3
 25     weight -30
 26     fall 2
 27     rise 2
 28 }
 30 vrrp_instance DB_VIP {
 31     state MASTER        # KA2作为MASTER
 32     interface eth0
 33     virtual_router_id 52
 34     priority 100        # 高于KA1
 35     advert_int 1
 36     authentication {
 37         auth_type PASS
 38         auth_pass 2222
 39     }
 40     virtual_ipaddress {
 41         172.25.254.200/24 dev eth0 label eth0:0
 42     }
 43     track_script {
 44         check_mysql
 45     }
 47     # 可选：抢占延迟
 48     preempt_delay 10
 50     notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
 51     notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
 52     notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
 53 }
[root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
(DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing

测试

[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
[root@KA1 ~]# systemctl start haproxy.service
[root@KA2 ~]# systemctl start haproxy.service
# 查看VIP绑定（应该在KA2上）
[root@KA2 ~]# ip addr show eth0 | grep 172.25.254.200
    inet 172.25.254.200/24 scope global secondary eth0:0
tail -f /var/log/keepalived_mysql.log
[root@KA2 ~]# tail -f /var/log/keepalived_mysql.log
2026-02-23 07:49:14 - DB_VIP on KA2 changed to MASTER
Mon Feb 23 07:49:14 AM CST 2026: !!! This node is now MASTER for MySQL VIP !!!
Mon Feb 23 07:49:14 AM CST 2026: VIP 172.25.254.200 is now on KA2
Mon Feb 23 07:49:14 AM CST 2026: ACTION REQUIRED: Please verify and execute manual failover if needed
# 客户端连接测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| server_id     | 20    |
+---------------+-------+
# 写入测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "INSERT INTO test_haha.t1 VALUES (2, 'Via VIP');"
mysql: [Warning] Using a password on the command line interface can be insecure.
# 检测在DB2上验证同步
[root@Client ~]# mysql -h172.25.254.30 -ugxf -p123 -e "SELECT * FROM test_haha.t1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+----+-----------------+
| id | name            |
+----+-----------------+
|  1 | from Master DB1 |
|  2 | Via VIP         |
+----+-----------------+

# 故障测试如图
# 在 DB1 上停止 MySQL
# 观察 KA 日志
tail -f /var/log/keepalived_mysql_check.log
# 手动切换测试
/etc/keepalived/manual_failover.sh
# 验证新主库
mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"

3.3.3 关键配置对比(数据库与Web)

特性	Web（Nginx/HAProxy）	数据库（MySQL）
模式	双主（两个VIP）	单主（一个VIP）
状态	MASTER + BACKUP 互换	严格区分 MASTER/BACKUP
抢占	默认开启	建议 nopreempt
检测间隔	2秒	3秒（更保守）
权重降幅	-20	-30（更敏感）
自动切换	是	否（建议手动）
脑裂风险	低	高（必须避免）

总结

以上为个人经验，希望能给大家一个参考，也希望大家多多支持脚本之家。

Keepalived实现集群高可用全过程

1 场景1：Keepalived + LVS

1.1.1 环境

1.1.2 实践

Router火墙规则

VS

测试

2 场景2：Keepalived + Nginx/HAProxy

2.1 Keepalived+Nginx方案

2.1.1 环境

架构设计

环境信息

2.2.2 实践

RS测试页

KA实现高可用

KA1

KA2

测试

2.2 keepalive+HAProxy

2.2.1 环境

架构设计

环境信息

2.2.2 实践

RS

KA实现高可用性

KA1

KA2

测试

3 场景3：数据库高可用

3.1.1 环境

架构设计

环境信息

3.2.2 实践

DB主从复制

DB1

DB2

测试

KA实现高可用

KA1

KA2

测试

3.3.3 关键配置对比(数据库与Web)

总结

您可能感兴趣的文章: