Keepalived实现集群高可用全过程
作者:feng68_
文章描述了三种高可用架构的实现:LVS DR模式、Nginx/HAProxy高可用方案以及数据库高可用性,每种方案都详细描述了环境架构、实践步骤和关键配置,以确保系统的高可用性和可靠性
1 场景1:Keepalived + LVS
1.1.1 环境
直接用之前做的LVS的DR模式了,加个VSNode即可。
| 主机名 | 角色 | IP地址 | VIP | 网关 | VRRP状态 |
|---|---|---|---|---|---|
| Router | 路由器 | 172.25.254.100/192.168.0.100 | NULL | 172.25.254.2 | NULL |
| VSNode1 | LVS Director (KA1) | 192.168.0.50 | 192.168.0.200(keepalived管理VIP浮动) | 192.168.0.100 | MASTER (priority 100) |
| VSNode2 | LVS Director (KA2) | 192.168.0.60 | 192.168.0.200(keepalived管理VIP浮动) | 192.168.0.100 | BACKUP (priority 80) |
| RS1 | Real Server | 192.168.0.20 | 192.168.0.200(lo) | 192.168.0.100 | NULL |
| RS2 | Real Server | 192.168.0.30 | 192.168.0.200(lo) | 192.168.0.100 | NULL |
| Client | 测试机 | 172.25.254.101 | NULL | 可以抵达Router都可 | NULL |
# Router
[root@Router ~]# echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
[root@Router ~]# sysctl -p
net.ipv4.ip_forward = 1
[root@Router ~]# ip addr | egrep "eth0$|eth1$"
inet 172.25.254.100/24 brd 172.25.254.255 scope global noprefixroute eth0
inet 192.168.0.100/24 brd 192.168.0.255 scope global noprefixroute eth1
# VS50
[root@VSNode50 ~]# ip addr | grep "eth0$"
inet 192.168.0.50/24 brd 192.168.0.255 scope global noprefixroute eth0
[root@VSNode50 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.100 0.0.0.0 UG 100 0 0 eth0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 eth0
# VS60
[root@VSNode60 ~]# ip addr | grep "eth0$"
inet 192.168.0.60/24 brd 192.168.0.255 scope global noprefixroute eth0
[root@VSNode60 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.100 0.0.0.0 UG 100 0 0 eth0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 eth0
# RS20
[root@RS20 ~]# ip a | egrep "lo$|eth0"
inet 127.0.0.1/8 scope host lo
inet 192.168.0.200/32 scope global lo
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
inet 192.168.0.20/24 brd 192.168.0.255 scope global noprefixroute eth0
[root@RS20 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.100 0.0.0.0 UG 100 0 0 eth0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 eth0
# RS30
[root@RS30 ~]# ip a | egrep "lo$|eth0"
inet 127.0.0.1/8 scope host lo
inet 192.168.0.200/32 scope global lo
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
inet 192.168.0.30/24 brd 192.168.0.255 scope global noprefixroute eth0
[root@RS30 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.100 0.0.0.0 UG 100 0 0 eth0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 eth0
# 防止RS响应VIP的ARP请求,避免VIP冲突(永久生效-->改/etc/sysctl.conf文件)
[root@RS20 ~]# cat >> /etc/sysctl.conf << 'EOF'
> net.ipv4.conf.all.arp_ignore = 1
> net.ipv4.conf.all.arp_announce = 2
> net.ipv4.conf.lo.arp_ignore = 1
> net.ipv4.conf.lo.arp_announce = 2
> EOF
[root@RS20 ~]# sysctl -p
net.ipv4.conf.all.arp_ignore = 1
net.ipv4.conf.all.arp_announce = 2
net.ipv4.conf.lo.arp_ignore = 1
net.ipv4.conf.lo.arp_announce = 2
[root@RS20 ~]# sysctl net.ipv4.conf.all.arp_ignore
net.ipv4.conf.all.arp_ignore = 1
[root@RS20 ~]# sysctl net.ipv4.conf.all.arp_announce
net.ipv4.conf.all.arp_announce = 2
[root@RS30 ~]# cat >> /etc/sysctl.conf << 'EOF'
> net.ipv4.conf.all.arp_ignore = 1
> net.ipv4.conf.all.arp_announce = 2
> net.ipv4.conf.lo.arp_ignore = 1
> net.ipv4.conf.lo.arp_announce = 2
> EOF
[root@RS30 ~]# sysctl -p
net.ipv4.conf.all.arp_ignore = 1
net.ipv4.conf.all.arp_announce = 2
net.ipv4.conf.lo.arp_ignore = 1
net.ipv4.conf.lo.arp_announce = 2
[root@RS30 ~]# sysctl net.ipv4.conf.all.arp_ignore
net.ipv4.conf.all.arp_ignore = 1
[root@RS30 ~]# sysctl net.ipv4.conf.all.arp_announce
net.ipv4.conf.all.arp_announce = 2ARP参数:
| 参数 | 值 | 含义 |
|---|---|---|
| arp_ignore = 1 | 只响应目的IP是本地接口的ARP请求 | 避免lo上的VIP响应ARP |
| arp_announce = 2 | 始终使用最佳本地地址发送ARP | 避免宣告VIP的MAC地址 |
[root@VSNode50 ~]# dnf install -y keepalived ipvsadm >/dev/null [root@VSNode60 ~]# dnf install -y keepalived ipvsadm >/dev/null [root@RS20 ~]# dnf install httpd -y >/dev/null [root@RS20 ~]# echo RS20 - 192.168.0.20 > /var/www/html/index.html [root@RS20 ~]# systemctl enable --now httpd [root@RS30 ~]# dnf install httpd -y >/dev/null [root@RS30 ~]# echo RS30 - 192.168.0.30 > /var/www/html/index.html [root@RS30 ~]# systemctl enable --now httpd
1.1.2 实践
Router火墙规则
[root@Router ~]# iptables -t nat -F
[root@Router ~]# iptables -t nat -L
Chain PREROUTING (policy ACCEPT)
target prot opt source destination
Chain INPUT (policy ACCEPT)
target prot opt source destination
Chain OUTPUT (policy ACCEPT)
target prot opt source destination
Chain POSTROUTING (policy ACCEPT)
target prot opt source destination
[root@Router ~]# iptables -t nat -A PREROUTING -d 172.25.254.100 -p tcp --dport 80 -j DNAT --to-destination 192.168.0.200:80
[root@Router ~]# iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -o eth0 -j SNAT --to-source 172.25.254.100
[root@Router ~]# iptables -t nat -L -n -v
Chain PREROUTING (policy ACCEPT 1 packets, 108 bytes)
pkts bytes target prot opt in out source destination
0 0 DNAT 6 -- * * 0.0.0.0/0 172.25.254.100 tcp dpt:80 to:192.168.0.200:80
Chain POSTROUTING (policy ACCEPT 1 packets, 76 bytes)
pkts bytes target prot opt in out source destination
0 0 SNAT 0 -- * eth0 192.168.0.0/24 0.0.0.0/0 to:172.25.254.100
[root@Router ~]# dnf install -y iptables-services > /dev/null
[root@Router ~]# iptables-save > /etc/sysconfig/iptables
[root@Router ~]# systemctl enable --now iptables.serviceVS
# keepalive
[root@VSNode50 ~]# vim /etc/keepalived/keepalived.conf
1 ! Configuration File for keepalived
2
3 global_defs {
4 notification_email {
5 acassen@firewall.loc
6 failover@firewall.loc
7 sysadmin@firewall.loc
8 }
9 notification_email_from Alexandre.Cassen@firewall.loc
10 smtp_server 192.168.200.1
11 smtp_connect_timeout 30
12 router_id VS50
13 vrrp_skip_check_adv_addr
14 #vrrp_strict # 必须注释掉,否则会添加iptables规则阻断转发
15 vrrp_garp_interval 0
16 vrrp_gna_interval 0
17 }
18
19 # VRRP实例:实现Director高可用
20 vrrp_instance VI_1 {
21 state MASTER
22 interface eth0
23 virtual_router_id 51
24 priority 100
25 advert_int 1
26 authentication {
27 auth_type PASS
28 auth_pass 1111
29 }
30 virtual_ipaddress {
31 192.168.0.200/32 dev eth0 label eth0:0
32 }
33 }
34 # LVS虚拟服务器配置(核心)
35 virtual_server 192.168.0.200 80 {
36 delay_loop 6 # 健康检查间隔(秒)
37 lb_algo rr # 负载均衡算法:rr=轮询,wrr=加权轮询
38 lb_kind DR # LVS模式:DR=直接路由(性能最佳)
39 protocol TCP # 协议类型
40
41 # 后端真实服务器 RS1
42 real_server 192.168.0.20 80 {
43 weight 1 # 权重(rr算法下无效,wrr时生效)
44 TCP_CHECK { # TCP健康检查
45 connect_timeout 3 # 连接超时(秒)
46 nb_get_retry 3 # 重试次数
47 delay_before_retry 3 # 重试间隔(秒)
48 }
49 }
50
51 # 后端真实服务器 RS2
52 real_server 192.168.0.30 80 {
53 weight 1
54 TCP_CHECK {
55 connect_timeout 3
56 nb_get_retry 3
57 delay_before_retry 3
58 }
59 }
60 }
[root@VSNode50 ~]# scp /etc/keepalived/keepalived.conf root@192.168.0.60:/etc/keepalived/keepalived.conf
Warning: Permanently added '192.168.0.60' (ED25519) to the list of known hosts.
keepalived.conf 100% 1578 3.1MB/s 00:00
[root@VSNode50 ~]# systemctl enable --now keepalived.service
[root@VSNode60 ~]# vim /etc/keepalived/keepalived.conf
# VS60
1 ! Configuration File for keepalived
2
3 global_defs {
………………
12 router_id VS60
13 vrrp_skip_check_adv_addr
14 #vrrp_strict
………………
17 }
19 # VRRP实例:实现Director高可用
20 vrrp_instance VI_1 {
21 state BACKUP
………………
24 priority 80
………………
33 }
# LVS配置与VSNode1完全相同
[root@VSNode60 ~]# systemctl enable --now keepalived.service
# 查看LVS规则
[root@VSNode60 ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.0.200:80 rr
-> 192.168.0.20:80 Route 1 0 0
-> 192.168.0.30:80 Route 1 0 0测试
[root@VSNode50 ~]# ip a | grep eth0:0
inet 192.168.0.200/32 scope global eth0:0
[root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
RS30 - 192.168.0.30
RS20 - 192.168.0.20
RS30 - 192.168.0.30
RS20 - 192.168.0.20
[root@VSNode50 ~]# ipvsadm -Ln --stats
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes
-> RemoteAddress:Port
TCP 192.168.0.200:80 4 24 0 1592 0
-> 192.168.0.20:80 2 12 0 796 0
-> 192.168.0.30:80 2 12 0 796 0
[root@VSNode60 ~]# ipvsadm -Ln --stats
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes
-> RemoteAddress:Port
TCP 192.168.0.200:80 0 0 0 0 0
-> 192.168.0.20:80 0 0 0 0 0
-> 192.168.0.30:80 0 0 0 0 0
[root@VSNode50 ~]# systemctl stop keepalived.service
[root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
RS30 - 192.168.0.30
RS20 - 192.168.0.20
RS30 - 192.168.0.30
RS20 - 192.168.0.20
[root@VSNode60 ~]# ip a | grep eth0:0
inet 192.168.0.200/32 scope global eth0:0
[root@VSNode60 ~]# ipvsadm -Ln --stats
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes
-> RemoteAddress:Port
TCP 192.168.0.200:80 4 24 0 1592 0
-> 192.168.0.20:80 2 12 0 796 0
-> 192.168.0.30:80 2 12 0 796 02 场景2:Keepalived + Nginx/HAProxy
2.1 Keepalived+Nginx方案
2.1.1 环境
架构设计

环境信息
| 节点 | IP地址 | 角色 | VRRP实例 |
|---|---|---|---|
| KA1 | 172.25.254.50 | Nginx + Keepalived | WEB_VIP: MASTER, DB_VIP: BACKUP |
| KA2 | 172.25.254.60 | Nginx + Keepalived | WEB_VIP: BACKUP, DB_VIP: MASTER |
| RS1 | 172.25.254.20 | 后端Web服务器 | NULL |
| RS2 | 172.25.254.30 | 后端Web服务器 | NULL |
| VIP1 | 172.25.254.100 | Web服务入口 | 主: KA1, 备: KA2 |
| VIP2 | 172.25.254.200 | 备用入口(或DB服务) | 主: KA2, 备: KA1 |
2.2.2 实践
RS测试页
[root@RS1 ~]# dnf install nginx -y > /dev/null [root@RS1 ~]# echo "RS1 - 172.25.254.20" > /usr/share/nginx/html/index.html [root@RS2 ~]# echo "RS2 - 172.25.254.30" > /usr/share/nginx/html/index.html [root@RS2 ~]# systemctl enable --now nginx.service [2026-02-22 15:29.09] ~ [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.20 RS1 - 172.25.254.20 [2026-02-22 15:29.15] ~ [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.30 RS2 - 172.25.254.30
KA实现高可用
KA1
# KA配置Nginx反向代理
[root@KA1 ~]# dnf install nginx -y > /dev/null
[root@KA1 ~]# vim /etc/nginx/conf.d/upstream.conf
1 upstream backend {
2 server 172.25.254.20:80 weight=5;
3 server 172.25.254.30:80 weight=5;
4 keepalive 32; # 长连接
5 }
6 server {
7 listen 80;
8 server_name localhost;
9 location / {
10 proxy_pass http://backend;
11 proxy_set_header Host $host;
12 proxy_set_header X-Real-IP $remote_addr;
13 proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
14 proxy_connect_timeout 5s;
15 proxy_send_timeout 5s;
16 proxy_read_timeout 5s;
17 }
18 # 健康检查端点(用于Keepalived检测)
19 location /health {
20 access_log off;
21 return 200 "healthy\n";
22 add_header Content-Type text/plain;
23 }
24 }
# 采用双主模式为基础
# 创建 health 检查页
[root@KA1 ~]# mkdir /usr/share/nginx/html/health
[root@KA1 ~]# echo "OK" > /usr/share/nginx/html/health/health.html
# 健康检查脚本
[root@KA1 ~]# vim /etc/keepalived/check_nginx.sh
1 #!/bin/bash
3 # 检查Nginx进程
4 if ! pgrep -x "nginx" > /dev/null; then
5 # 尝试启动Nginx
6 systemctl start nginx
7 sleep 2
8 # 再次检查
9 if ! pgrep -x "nginx" > /dev/null; then
10 # Nginx启动失败,返回错误触发切换
11 exit 1
12 fi
13 fi
15 # 检查Nginx端口是否监听
16 if ! ss -tlnp | grep -q ":80"; then
17 exit 1
18 fi
20 # 可选:检查HTTP响应
21 if ! curl -sf http://localhost/health > /dev/null; then
22 exit 1
23 fi
24 exit 0
[root@KA1 ~]# chmod +x /etc/keepalived/check_nginx.sh
[root@KA1 ~]# nginx -t && systemctl enable --now nginx
[root@KA1 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
Check OK
# 通知脚本
[root@KA1 ~]# systemctl is-active postfix.service
inactive
[root@KA1 ~]# systemctl start postfix.service
[root@KA1 ~]# chmod +x /etc/keepalived/notify_nginx.sh
1 #!/bin/bash
2 mail_dest='收件人'
3 TYPE=$1 # WEB_VIP 或 DB_VIP
4 STATE=$2 # MASTER, BACKUP, FAULT
5 LOG_FILE="/var/log/keepalived/notify.log"
6 mkdir -p $(dirname $LOG_FILE)
7 echo "$(date '+%Y-%m-%d %H:%M:%S') - Instance: $TYPE, State: $STATE, Host: $(hostname)" >> $LOG_FILE
8 case $STATE in
9 MASTER)
10 # 成为Master时的额外操作
11 echo "$(date) - Becoming MASTER for $TYPE" >> $LOG_FILE
12 # 可以在这里添加告警通知
13 ;;
14 BACKUP)
15 echo "$(date) - Becoming BACKUP for $TYPE" >> $LOG_FILE
16 ;;
17 FAULT)
18 echo "$(date) - FAULT state for $TYPE" >> $LOG_FILE
19 # 发送紧急告警
20 ;;
21 esac
# KA1中keepalived设置
[root@KA1 ~]# vim /etc/keepalived/keepalived.conf
3 global_defs {
………………
18 enable_script_security # 启用脚本执行权限
19 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的
20 }
21
22 # Nginx健康检查脚本
23 vrrp_script check_nginx {
24 script "/etc/keepalived/check_nginx.sh"
25 interval 2 # 每2秒检查一次
26 weight -20 # 检查失败,优先级降低20
27 fall 2 # 连续2次失败才判定失败
28 rise 1 # 1次成功恢复
29 }
30
31 vrrp_instance WEB_VIP {
32 state MASTER
33 interface eth0
34 virtual_router_id 51
35 priority 100
36 advert_int 1
37 authentication {
38 auth_type PASS
39 auth_pass 1111
40 }
41 virtual_ipaddress {
42 172.25.254.100/24 dev eth0 label eth0:1
43 }
44 # 追踪健康检查脚本
45 track_script {
46 check_nginx
47 }
48 # 状态切换通知
49 notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
50 notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
51 notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
52 }
53
54 vrrp_instance DB_VIP {
55 state BACKUP
56 interface eth0
57 virtual_router_id 52
58 priority 80
59 advert_int 1
60 authentication {
61 auth_type PASS
62 auth_pass 1111
63 }
64 virtual_ipaddress {
65 172.25.254.200/24 dev eth0 label eth0:0
66 }
67 track_script {
68 check_nginx
69 }
70 notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
71 notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
72 notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
73 }
[root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
SECURITY VIOLATION - scripts are being executed but script_security not enabled.
[root@KA2 ~]# mkdir /usr/share/nginx/html/health
[root@KA1 ~]# scp /etc/nginx/conf.d/upstream.conf root@172.25.254.60:/etc/nginx/conf.d/upstream.conf
upstream.conf 100% 672 1.5MB/s 00:00
[root@KA1 ~]# scp /etc/keepalived/check_nginx.sh root@172.25.254.60:/etc/keepalived/
check_nginx.sh 100% 469 978.8KB/s 00:00
[root@KA1 ~]# scp /usr/share/nginx/html/health/health.html root@172.25.254.60:/usr/share/nginx/html/health/
health.html 100% 3 5.6KB/s 00:00
[root@KA1 ~]# scp /etc/keepalived/notify_nginx.sh root@172.25.254.60:/etc/keepalived/
notify_nginx.sh 100% 654 1.1MB/s 00:00KA2
# KA2:
[root@KA2 ~]# dnf install nginx -y > /dev/null
[root@KA2 ~]# ll /etc/nginx/conf.d/upstream.conf /etc/keepalived/check_nginx.sh /usr/share/nginx/html/health/health.html /etc/keepalived/notify_nginx.sh
-rwxr-xr-x 1 root root 469 Feb 22 16:19 /etc/keepalived/check_nginx.sh
-rwxr-xr-x 1 root root 654 Feb 22 16:27 /etc/keepalived/notify_nginx.sh
-rw-r--r-- 1 root root 672 Feb 22 15:34 /etc/nginx/conf.d/upstream.conf
-rw-r--r-- 1 root root 3 Feb 22 16:20 /usr/share/nginx/html/health/health.html
[root@KA2 ~]# nginx -t && systemctl enable --now nginx
[root@KA2 ~]# systemctl is-active postfix.service
inactive
[root@KA2 ~]# systemctl start postfix.service
[root@KA2 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
Check OK
# KA2中keepalived设置
[root@KA2 ~]# vim /etc/keepalived/keepalived.conf
3 global_defs {
………………
17 enable_script_security # 启用脚本执行权限
18 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的
19 }
20 # Nginx健康检查脚本
21 vrrp_script check_nginx {
22 script "/etc/keepalived/check_nginx.sh"
23 interval 2
24 weight -20
25 fall 2
26 rise 1
27 }
28
29 vrrp_instance WEB_VIP {
30 state BACKUP
31 interface eth0
32 virtual_router_id 51
33 preempt_delay 10 # 抢占延迟10秒(避免网络抖动)
34 priority 80
35 advert_int 1
36 authentication {
37 auth_type PASS
38 auth_pass 1111
39 }
40 virtual_ipaddress {
41 172.25.254.100/24 dev eth0 label eth0:1
42 }
43 track_script {
44 check_nginx
45 }
46 notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
47 notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
48 notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
49 }
50
51 vrrp_instance DB_VIP {
52 state MASTER
53 interface eth0
54 virtual_router_id 52
55 preempt_delay 10
56 priority 100
57 advert_int 1
58 authentication {
59 auth_type PASS
60 auth_pass 1111
61 }
62 virtual_ipaddress {
63 172.25.254.200/24 dev eth0 label eth0:0
64 }
65 track_script {
66 check_nginx
67 }
68 notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
69 notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
70 notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
71 }
[root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
(DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing测试
# 测试
[root@KA1 ~]# systemctl is-active keepalived.service
active
[root@KA1 ~]# systemctl reload keepalived.service
[root@KA2 ~]# systemctl reload keepalived.service
[root@KA1 ~]# ip a s | grep eth0:1$
inet 172.25.254.100/24 scope global secondary eth0:1
[root@KA2 ~]# ip a s | grep eth0:0$
inet 172.25.254.200/24 scope global secondary eth0:0global_defs { enable_script_security # 启用脚本执行权限 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的 }
现实中都是为所有软件创建对应的用户和用户组,具体操作和注意点看大数据笔记,这里为了方便就只能root用户实践。

2.2 keepalive+HAProxy
基于Keepalived+Nginx改动
2.2.1 环境
架构设计

环境信息
| 节点 | IP地址 | 角色 | VRRP实例 |
|---|---|---|---|
| KA1 | 172.25.254.50 | HAProxy + Keepalived | WEB_VIP: MASTER, API_VIP: BACKUP |
| KA2 | 172.25.254.60 | HAProxy + Keepalived | WEB_VIP: BACKUP, API_VIP: MASTER |
| RS1 | 172.25.254.20 | 后端Web/API服务器 | NULL |
| RS2 | 172.25.254.30 | 后端Web/API服务器 | NULL |
2.2.2 实践
RS
[root@RS1 ~]# mkdir -p /usr/share/nginx/html/api
[root@RS1 ~]# echo '{"status":"ok","server":"RS1","ip":"172.25.254.20"}' > /usr/share/nginx/html/api/status.json
[root@RS1 ~]# vim /etc/nginx/conf.d/default.conf
[root@RS1 ~]# nginx -t && nginx -s reload
[root@RS1 ~]# curl http://172.25.254.20
RS1 - 172.25.254.20
[root@RS1 ~]# curl http://172.25.254.20/api/status.json
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
[root@RS1 ~]# curl http://172.25.254.20/health
healthy
[root@RS1 ~]# scp /etc/nginx/conf.d/default.conf root@172.25.254.30:/etc/nginx/conf.d/
default.conf 100% 502 928.3KB/s 00:00
[root@RS2 ~]# mkdir -p /usr/share/nginx/html/api
[root@RS2 ~]# echo '{"status":"ok","server":"RS2","ip":"172.25.254.30"}' > /usr/share/nginx/html/api/status.json
[root@RS2 ~]# nginx -t && nginx -s reloadKA实现高可用性
[root@KA1 ~]# systemctl stop keepalived.service # 由于上个实验做了nginx健康保护,直接stop的话Nginx会被其他进程守护自动重启,或者说不能及时关闭,先把keepalived停后才可以,也说明有看门狗机制在保护它。 [root@KA1 ~]# systemctl disable --now nginx [root@KA1 ~]# netstat -lntupa | grep nginx [root@KA1 ~]# systemctl is-active nginx inactive [root@KA1 ~]# dnf install haproxy -y > /dev/null [root@KA1 ~]# systemctl enable --now haproxy [root@KA2 ~]# systemctl stop keepalived.service [root@KA2 ~]# systemctl disable --now nginx [root@KA2 ~]# netstat -lntupa | grep nginx [root@KA2 ~]# systemctl is-active nginx inactive [root@KA2 ~]# dnf install haproxy -y > /dev/null [root@KA2 ~]# systemctl enable --now haproxy
KA1
# HAProxy设定:KA1与KA2一致
[root@KA1 ~]# vim /etc/haproxy/haproxy.cfg
64 # 统计页面
65 listen stats
66 bind *:8080
67 stats enable
68 stats uri /stats
69 stats auth admin:admin123
70 stats refresh 30s
71
72 # Web服务前端(对应VIP1 172.25.254.100)
73 frontend web_frontend
74 bind *:80
75 acl is_api path_beg /api
76 use_backend api_servers if is_api
77 default_backend web_servers
78
79 # Web服务后端(/health 使用 Nginx return 指令)
80 backend web_servers
81 balance roundrobin
82 option httpchk GET /health
83 http-check expect status 200
84 server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
85 server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
86
87 # API服务后端(/api/status.json 物理文件)
88 backend api_servers
89 balance roundrobin
90 option httpchk GET /api/status.json
91 http-check expect status 200
92 server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
93 server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
# HAProxy 健康检查脚本(KA1和KA2相同)
[root@KA1 ~]# vim /etc/keepalived/check_haproxy.sh
1 #!/bin/bash
2 # 检查HAProxy进程
3 if ! pgrep -x "haproxy" > /dev/null; then
4 systemctl start haproxy
5 sleep 2
6 if ! pgrep -x "haproxy" > /dev/null; then
7 exit 1
8 fi
9 fi
10 # 检查HAProxy端口(80和8080统计页面)
11 if ! ss -tlnp | grep -q ":80"; then
12 exit 1
13 fi
14 exit 0
[root@KA1 ~]# chmod +x /etc/keepalived/check_haproxy.sh
[root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
[root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
HAProxy OK
# 通知告警脚本
[root@KA1 ~]# cp /etc/keepalived/notify_nginx.sh /etc/keepalived/notify_haproxy.sh
[root@KA1 ~]# scp /etc/keepalived/notify_haproxy.sh root@172.25.254.60:/etc/keepalived/
notify_haproxy.sh 100% 684 564.8KB/s 00:00
[root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
haproxy.cfg 100% 4220 4.7MB/s 00:00
[root@KA1 ~]# scp /etc/keepalived/check_haproxy.sh root@172.25.254.60:/etc/keepalived/
check_haproxy.sh 100% 293 465.5KB/s 00:00
# KA1 Keepalived配置,基于Keepalived + Nginx双主模式
[root@KA1 ~]# vim /etc/keepalived/keepalived.conf
22 # HAProxy健康检查脚本
23 vrrp_script check_haproxy {
24 script "/etc/keepalived/check_haproxy.sh"
25 interval 2 # 每2秒检查一次
26 weight -20 # 检查失败,优先级降低20
27 fall 2 # 连续2次失败才判定失败
28 rise 1 # 1次成功恢复
29 }
31 # VIP1: Web服务入口,KA1为主
32 vrrp_instance WEB_VIP {
………………
45 track_script {
46 check_haproxy
47 }
48 notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
49 notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
50 notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
51 }
52
53 # VIP2: API服务入口,KA1为备
54 vrrp_instance API_VIP {
………………
70 notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
71 notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
72 notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
73 }
[root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.confKA2
[root@KA2 ~]# systemctl reload haproxy.service
[root@KA2 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
HAProxy OK
# KA2 Keepalived配置,基于Keepalived + Nginx双主模式
[root@KA2 ~]# vim /etc/keepalived/keepalived.conf
31 # VIP1: Web服务入口,KA2为备
32 vrrp_instance WEB_VIP {
………………
46 track_script {
47 check_haproxy
48 }
49 notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
50 notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
51 notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
52 }
54
55 # VIP2: API服务入口,KA2为主
56 vrrp_instance API_VIP {
………………
70 track_script {
71 check_haproxy
72 }
73 notify_master "/etc/keepalived/notify_haproxy.sh API_VIP MASTER"
74 notify_backup "/etc/keepalived/notify_haproxy.sh API_VIP BACKUP"
75 notify_fault "/etc/keepalived/notify_haproxy.sh API_VIP FAULT"
76 }
[root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
(API_VIP) Warning - preempt delay will not work with initial state MASTER - clearing测试
[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
# 双主状态验证
[root@KA1 ~]# ip a s | grep eth0:1$
inet 172.25.254.100/24 scope global secondary eth0:1
[root@KA2 ~]# ip a s | grep eth0:0$
inet 172.25.254.200/24 scope global secondary eth0:0
# 服务访问测试
[root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
RS1 - 172.25.254.20
RS2 - 172.25.254.30
RS1 - 172.25.254.20
RS2 - 172.25.254.30
[root@Client ~]# for i in {1..4};do curl http://172.25.254.200/api/status.json;done
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
[root@Client ~]# curl http://172.25.254.100/health
healthy3 场景3:数据库高可用
3.1.1 环境
架构设计

核心原则:数据库必须单主
| 原则 | 说明 |
|---|---|
| 单 VIP | 只有一个入口,确保写操作唯一性 |
| 单 Master | 任何时候只有一个节点接受写操作 |
| 自动检测 | Keepalived 检测 MySQL 状态,故障时 VIP 漂移 |
| 手动/半自动切换 | 主从切换需要谨慎,建议配合 MHA 或手动 |
环境信息
| 节点 | IP地址 | 角色 | 说明 |
|---|---|---|---|
| KA1 | 172.25.254.50 | Keepalived BACKUP | VIP 故障时接管 |
| KA2 | 172.25.254.60 | Keepalived MASTER | 正常时持有 VIP |
| DB1 | 172.25.254.40 | MySQL Master | 主库,接受读写 |
| DB2 | 172.25.254.41 | MySQL Slave | 从库,只读/热备 |
| VIP | 172.25.254.200 | 数据库入口 | 指向当前 Master |
3.2.2 实践
DB主从复制
DB1
# DB1: [root@RS1 ~]# systemctl stop nginx.service [root@RS1 ~]# dnf install mysql-server -y >/dev/null [root@RS1 ~]# vim /etc/my.cnf 11 [mysqld] 12 server-id = 20 13 # GTID 复制(推荐) 14 gtid_mode = ON 15 enforce_gtid_consistency = ON 16 log-bin = mysql-bin 17 binlog-format = ROW 18 expire_logs_days = 7 19 max_binlog_size = 100M 20 # 半同步复制(可选但推荐) 21 plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so" 22 rpl-semi-sync-master-enabled = 1 23 rpl-semi-sync-slave-enabled = 1 24 rpl-semi-sync-master-timeout = 1000 25 # 字符集 26 character-set-server = utf8mb4 27 # 绑定所有接口 28 bind-address = 0.0.0.0 29 # 为 Keepalived 检测预留的用户 30 skip-name-resolve [root@RS1 ~]# systemctl enable --now mysqld [root@RS1 ~]# mysql # 创建复制用户 mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123'; mysql> grant replication slave on *.* to 'repl'@'172.25.254.%'; # 创建健康检查用户(用于 Keepalived 检测) mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123'; mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%'; mysql> flush privileges; mysql> show master status; +---------------+----------+--------------+------------------+-------------------+ | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set | +---------------+----------+--------------+------------------+-------------------+ | binlog.000001 | 1237 | | | | +---------------+----------+--------------+------------------+-------------------+ mysql> SHOW VARIABLES LIKE 'gtid%'; +----------------------------------+-----------+ | Variable_name | Value | +----------------------------------+-----------+ | gtid_executed | | | gtid_executed_compression_period | 0 | | gtid_mode | ON | | gtid_next | AUTOMATIC | | gtid_owned | | | gtid_purged | | +----------------------------------+-----------+ # 数据库刚启动,还没有执行任何写入操作,没有产生事务,gtid_executed为空的原因。 mysql> create database test_gtid; mysql> use test_gtid; Database changed mysql> create table t1 (id int primary key, name varchar(50)); mysql> insert into t1 values (1,'test'); mysql> SHOW VARIABLES LIKE 'gtid_executed'; +---------------+------------------------------------------+ | Variable_name | Value | +---------------+------------------------------------------+ | gtid_executed | b650e3a0-102f-11f1-9134-000c29999103:1-3 | +---------------+------------------------------------------+
DB2
# DB2:
[root@RS2 ~]# systemctl stop nginx.service
[root@RS2 ~]# dnf install mysql-server -y >/dev/null
[root@RS2 ~]# vim /etc/my.cnf
10 !includedir /etc/my.cnf.d
11 [mysqld]
12 server-id = 30
13 # GTID
14 gtid_mode = ON
15 enforce_gtid_consistency = ON
16 log-bin = mysql-bin
17 binlog-format = ROW
18 expire_logs_days = 7
19 # 半同步复制,起不来,后面想办法加上去。
20 # plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
21 # rpl-semi-sync-master-enabled = 1
22 # rpl-semi-sync-slave-enabled = 1
23 # 只读(重要!防止误写入)
24 read_only = 1
25 super_read_only = 1
26 character-set-server = utf8mb4
27 bind-address = 0.0.0.0
28 skip-name-resolve
[root@RS2 ~]# systemctl enable --now mysqld
mysql> INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
mysql> INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
# 关闭只读
mysql> set global super_read_only = OFF;
mysql> set global read_only = OFF;
# 安装插件
mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
mysql> install plugin rpl_semi_sync_slave soname 'semisync_slave.so';
# 启用插件
mysql> set global rpl_semi_sync_master_enabled = ON;
mysql> set global rpl_semi_sync_slave_enabled = ON;
# 创建复制用户
mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant replication slave on *.* to 'repl'@'172.25.254.%';
# 创建健康检查用户(用于 Keepalived 检测)
mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%';
# 重新开启只读(从库必须保持只读)
mysql> set global read_only = ON;
# 查看插件状态
mysql> show plugins;
……………… # 正常
# 查看变量
mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name | Value |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled | ON |
| rpl_semi_sync_master_timeout | 10000 |
| rpl_semi_sync_master_trace_level | 32 |
| rpl_semi_sync_master_wait_for_slave_count | 1 |
| rpl_semi_sync_master_wait_no_slave | ON |
| rpl_semi_sync_master_wait_point | AFTER_SYNC |
| rpl_semi_sync_slave_enabled | ON |
| rpl_semi_sync_slave_trace_level | 32 |
+-------------------------------------------+------------+
8 rows in set (0.00 sec)
# 配置主从复制
mysql> change master to
-> master_host = '172.25.254.20',
-> master_port = 3306,
-> master_user = 'repl',
-> master_password = '123',
-> master_auto_position = 1, # GTID 自动定位
-> master_connect_retry = 10;
mysql> start slave;
mysql> show slave status\G
*************************** 1. row ***************************
…………
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
…………测试
[root@RS1 ~]# mysql mysql> create database test_haha; mysql> use test_haha mysql> create table t1 (id int primary key,name varchar(50)); mysql> insert into t1 values (1,'from Master DB1'); [root@RS2 ~]# mysql -e "SELECT * FROM test_haha.t1;" +----+-----------------+ | id | name | +----+-----------------+ | 1 | from Master DB1 | +----+-----------------+
KA实现高可用
# 安装 MySQL 客户端用于检测 [root@KA1 ~]# systemctl stop keepalived.service [root@KA1 ~]# systemctl stop haproxy.service [root@KA1 ~]# dnf install mysql -y >/dev/null [root@KA2 ~]# dnf install mysql -y >/dev/null [root@KA1 ~]# mysql -h172.25.254.30 -ucheck -p123 -e "SELECT 1;" mysql: [Warning] Using a password on the command line interface can be insecure. +---+ | 1 | +---+ | 1 | +---+ [root@KA1 ~]# mysql -h172.25.254.20 -ucheck -p123 -e "SELECT 1;" mysql: [Warning] Using a password on the command line interface can be insecure. +---+ | 1 | +---+ | 1 | +---+
KA1
# MySQL 检查脚本
[root@KA1 ~]# vim /etc/keepalived/check_mysql.sh
1 #!/bin/bash
2
3 # MySQL 检测脚本
4 # 检测远程 MySQL 是否可连接且为主库(read_only=OFF)
5
6 MYSQL_HOST="172.25.254.20" # 当前主库 IP
7 MYSQL_USER="check"
8 MYSQL_PASS="123"
9 MYSQL_PORT=3306
10 LOG_FILE="/var/log/keepalived_mysql_check.log"
11 # 记录检测时间
12 echo "$(date '+%Y-%m-%d %H:%M:%S') - Checking MySQL at $MYSQL_HOST" >> $LOG_FILE
13 # 检测1:MySQL 是否可连接
14 if ! mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SELECT 1;" > /dev/null 2>&1; then
15 echo "$(date): MySQL $MYSQL_HOST connection FAILED" >> $LOG_FILE
16 exit 1
17 fi
18 # 检测2:是否为 Master(read_only = OFF)
19 READ_ONLY=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW VARIABLES LIKE 'read_only';" 2>/dev/null | grep read_only | awk '{print $2}')
20 if [ "$READ_ONLY" = "ON" ]; then
21 echo "$(date): MySQL $MYSQL_HOST is read_only, not master" >> $LOG_FILE
22 exit 1
23 fi
24 # 检测3:复制延迟检测
25 SLAVE_LAG=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW SLAVE STATUS\G" 2>/dev/null | grep Seconds_Behind_Master | awk '{print $2}')
26 if [ "$SLAVE_LAG" != "NULL" ] && [ "$SLAVE_LAG" -gt 60 ]; then
27 echo "$(date): Replication lag $SLAVE_LAG seconds" >> $LOG_FILE
28 exit 1
29 fi
30 echo "$(date): MySQL $MYSQL_HOST check PASSED" >> $LOG_FILE
31 exit 0
[root@KA1 ~]# chmod +x /etc/keepalived/check_mysql.sh
[root@KA1 ~]# /etc/keepalived/check_mysql.sh && echo "MySQL OK" || echo "MySQL FAILED"
/etc/keepalived/check_mysql.sh: line 26: [: : integer expression expected
MySQL OK
# 切换脚本(Master故障时自动切换主从)
[root@KA1 ~]# vim /etc/keepalived/notify_mysql.sh
1 #!/bin/bash
2
3 TYPE=$1 # DB_VIP
4 STATE=$2 # MASTER, BACKUP, FAULT
5 CURRENT_HOST=$(hostname)
6 LOG_FILE="/var/log/keepalived_mysql.log"
7 mkdir -p $(dirname $LOG_FILE)
8 echo "$(date '+%Y-%m-%d %H:%M:%S') - $TYPE on $CURRENT_HOST changed to $STATE" >> $LOG_FILE
9 case $STATE in
10 MASTER)
11 echo "$(date): !!! This node is now MASTER for MySQL VIP !!!" >> $LOG_FILE
12 echo "$(date): VIP 172.25.254.200 is now on $CURRENT_HOST" >> $LOG_FILE
13 # 重要:发送告警通知管理员
14 echo "MySQL VIP failover occurred at $(date)" | \
15 mail -s "CRITICAL: MySQL HA Failover" xiafeng_68@163.com 2>/dev/null || true
16 # 注意:这里不做自动主从切换!
17 # 原因:1. 脑裂风险 2. 数据一致性风险
18 # 建议:人工确认后手动执行切换脚本
19 # 自动提升DB2为主(风险高,需谨慎)
20 # mysql -h172.25.254.30 -ugxf -p123 -e "STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only = 0;"
21
22 echo "$(date): ACTION REQUIRED: Please verify and execute manual failover if needed" >> $LOG_FILE
23 ;;
24
25 BACKUP)
26 echo "$(date): This node is now BACKUP for MySQL VIP" >> $LOG_FILE
27 ;;
28
29 FAULT)
30 echo "$(date): FAULT state detected on $CURRENT_HOST" >> $LOG_FILE
31 ;;
32 esac
[root@KA1 ~]# chmod +x /etc/keepalived/notify_mysql.sh
# 手动主从切换脚本(故障时使用)
[root@KA1 ~]# vim /etc/keepalived/manual_failover.sh
1 #!/bin/bash
2
3 # 当 DB1 故障时,在确认后手动执行此脚本将 DB2 提升为yy主库
4 NEW_MASTER="172.25.254.30" # DB2
5 OLD_MASTER="172.25.254.20" # DB1
6 VIP="172.25.254.200"
7 echo "=== MySQL 手动故障切换 ==="
8 echo "时间: $(date)"
9 echo "新主库: $NEW_MASTER (DB2)"
10 echo "旧主库: $OLD_MASTER (DB1) - 假设已故障"
11 # 1. 在 DB2 上停止复制并提升为主库
12 echo "步骤1: 在 DB2 上停止复制..."
13 mysql -h$NEW_MASTER -ugxf -p123 -e "
14 STOP SLAVE;
15 RESET SLAVE ALL;
16 SET GLOBAL read_only = OFF;
17 SET GLOBAL super_read_only = OFF;
18 SELECT 'DB2 is now MASTER' as status;
19 "
20 # 2. 更新 KA 的检测脚本指向新主库
21 echo "步骤2: 更新 Keepalived 检测脚本..."
22 sed -i "s/MYSQL_HOST=\"$OLD_MASTER\"/MYSQL_HOST=\"$NEW_MASTER\"/" /etc/keepalived/check_mysql.sh
23 # 3. 如果旧主库恢复,需要重新配置为从库(可选)
24 echo "步骤3: 当旧主库恢复后,执行以下命令重新加入:"
25 echo " CHANGE MASTER TO MASTER_HOST='$NEW_MASTER', MASTER_USER='repl', MASTER_PASSWORD='Repl123', MASTER_AUTO_POSITION=1;"
26 echo " START SLAVE;"
27 echo "切换完成!VIP $VIP 现在指向新主库 $NEW_MASTER"
[root@KA1 ~]# chmod +x /etc/keepalived/manual_failover.sh
[root@KA1 ~]# scp /etc/keepalived/manual_failover.sh root@172.25.254.60:/etc/keepalived/
manual_failover.sh 100% 1175 736.0KB/s 00:00
[root@KA1 ~]# scp /etc/keepalived/check_mysql.sh root@172.25.254.60:/etc/keepalived/
check_mysql.sh 100% 1309 1.6MB/s 00:00
[root@KA1 ~]# scp /etc/keepalived/notify_mysql.sh root@172.25.254.60:/etc/keepalived/
notify_mysql.sh 100% 1323 2.6MB/s 00:00
# KA1配置keepalived
1 ! Configuration File for keepalived
2
3 global_defs {
………………
13 vrrp_skip_check_adv_addr
14 #vrrp_strict # 必须关闭,否则阻断MySQL连接
………………
18 enable_script_security # 启用脚本执行权限
19 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的
20 }
21
22 # MySql健康检查脚本
23 vrrp_script check_mysql {
24 script "/etc/keepalived/check_mysql.sh"
25 interval 3 # MySQL检测间隔稍长
26 weight -20 # 检查失败,优先级降低20
27 fall 2 # 连续2次失败才判定失败
28 rise 2 # 连续2次成功恢复
29 }
30
31 # 单VIP:数据库入口
32 vrrp_instance DB_VIP {
33 state BACKUP # KA1作为BACKUP
34 interface eth0
35 virtual_router_id 52
36 priority 80 # 低于KA2
37 advert_int 1
38 authentication {
39 auth_type PASS
40 auth_pass 2222
41 }
42 virtual_ipaddress {
43 172.25.254.200/24 dev eth0 label eth0:0
44 }
45 track_script {
46 check_mysql
47 }
48 # 禁止抢占(重要!防止网络抖动导致频繁切换)
49 nopreempt
50 notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
51 notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
52 notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
53 }
[root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
# KA1配置haproxy
64 listen mysql
65 bind 0.0.0.0:3306 # MySQL - 绑定所有接口,如果指定172.25.254.200会起不了服务
66 mode tcp
67 option tcp-check
68 tcp-check connect port 3306
69 timeout connect 5s
70 timeout client 30s
71 timeout server 30s
72 server db1 172.25.254.20:3306 check inter 2s rise 2 fall 3
73 server db2 172.25.254.30:3306 check backup inter 2s rise 2 fall 3
74
75 # 统计页面
76 listen stats
77 bind *:8080
78 mode http
79 stats enable
80 stats uri /stats
81 stats auth gxf:123
[root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
haproxy.cfg KA2
[root@KA2 ~]# ll /etc/keepalived/notify_mysql.sh /etc/keepalived/check_mysql.sh /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1309 Feb 23 06:36 /etc/keepalived/check_mysql.sh
-rwxr-xr-x 1 root root 1175 Feb 23 07:08 /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1323 Feb 23 06:50 /etc/keepalived/notify_mysql.sh
[root@KA2 ~]# vim /etc/keepalived/keepalived.conf
1 ! Configuration File for keepalived
2
3 global_defs {
4 notification_email {
………………
14 #vrrp_strict
………………
17 #vrrp_mcast_group4 224.0.0.44
18 enable_script_security # 启用脚本执行权限
19 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的
20 }
22 vrrp_script check_mysql {
23 script "/etc/keepalived/check_mysql.sh"
24 interval 3
25 weight -30
26 fall 2
27 rise 2
28 }
30 vrrp_instance DB_VIP {
31 state MASTER # KA2作为MASTER
32 interface eth0
33 virtual_router_id 52
34 priority 100 # 高于KA1
35 advert_int 1
36 authentication {
37 auth_type PASS
38 auth_pass 2222
39 }
40 virtual_ipaddress {
41 172.25.254.200/24 dev eth0 label eth0:0
42 }
43 track_script {
44 check_mysql
45 }
47 # 可选:抢占延迟
48 preempt_delay 10
50 notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
51 notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
52 notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
53 }
[root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
(DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing测试
[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
[root@KA1 ~]# systemctl start haproxy.service
[root@KA2 ~]# systemctl start haproxy.service
# 查看VIP绑定(应该在KA2上)
[root@KA2 ~]# ip addr show eth0 | grep 172.25.254.200
inet 172.25.254.200/24 scope global secondary eth0:0
tail -f /var/log/keepalived_mysql.log
[root@KA2 ~]# tail -f /var/log/keepalived_mysql.log
2026-02-23 07:49:14 - DB_VIP on KA2 changed to MASTER
Mon Feb 23 07:49:14 AM CST 2026: !!! This node is now MASTER for MySQL VIP !!!
Mon Feb 23 07:49:14 AM CST 2026: VIP 172.25.254.200 is now on KA2
Mon Feb 23 07:49:14 AM CST 2026: ACTION REQUIRED: Please verify and execute manual failover if needed
# 客户端连接测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| server_id | 20 |
+---------------+-------+
# 写入测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "INSERT INTO test_haha.t1 VALUES (2, 'Via VIP');"
mysql: [Warning] Using a password on the command line interface can be insecure.
# 检测在DB2上验证同步
[root@Client ~]# mysql -h172.25.254.30 -ugxf -p123 -e "SELECT * FROM test_haha.t1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+----+-----------------+
| id | name |
+----+-----------------+
| 1 | from Master DB1 |
| 2 | Via VIP |
+----+-----------------+
# 故障测试如图
# 在 DB1 上停止 MySQL
# 观察 KA 日志
tail -f /var/log/keepalived_mysql_check.log
# 手动切换测试
/etc/keepalived/manual_failover.sh
# 验证新主库
mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"
3.3.3 关键配置对比(数据库与Web)
| 特性 | Web(Nginx/HAProxy) | 数据库(MySQL) |
|---|---|---|
| 模式 | 双主(两个VIP) | 单主(一个VIP) |
| 状态 | MASTER + BACKUP 互换 | 严格区分 MASTER/BACKUP |
| 抢占 | 默认开启 | 建议 nopreempt |
| 检测间隔 | 2秒 | 3秒(更保守) |
| 权重降幅 | -20 | -30(更敏感) |
| 自动切换 | 是 | 否(建议手动) |
| 脑裂风险 | 低 | 高(必须避免) |
总结
以上为个人经验,希望能给大家一个参考,也希望大家多多支持脚本之家。
