服务器其它

关注公众号 jb51net

关闭
首页 > 网站技巧 > 服务器 > 服务器其它 > Keepalived实现集群高可用

Keepalived实现集群高可用全过程

作者:feng68_

文章描述了三种高可用架构的实现:LVS DR模式、Nginx/HAProxy高可用方案以及数据库高可用性,每种方案都详细描述了环境架构、实践步骤和关键配置,以确保系统的高可用性和可靠性

1 场景1:Keepalived + LVS

1.1.1 环境

直接用之前做的LVS的DR模式了,加个VSNode即可。

LVS(linuxvirtualserver)

主机名角色IP地址VIP网关VRRP状态
Router路由器172.25.254.100/192.168.0.100NULL172.25.254.2NULL
VSNode1LVS Director (KA1)192.168.0.50192.168.0.200(keepalived管理VIP浮动)192.168.0.100MASTER (priority 100)
VSNode2LVS Director (KA2)192.168.0.60192.168.0.200(keepalived管理VIP浮动)192.168.0.100BACKUP (priority 80)
RS1Real Server192.168.0.20192.168.0.200(lo)192.168.0.100NULL
RS2Real Server192.168.0.30192.168.0.200(lo)192.168.0.100NULL
Client测试机172.25.254.101NULL可以抵达Router都可NULL
 # Router
 [root@Router ~]# echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf
 [root@Router ~]# sysctl -p
 net.ipv4.ip_forward = 1
 [root@Router ~]# ip addr | egrep "eth0$|eth1$"
     inet 172.25.254.100/24 brd 172.25.254.255 scope global noprefixroute eth0
     inet 192.168.0.100/24 brd 192.168.0.255 scope global noprefixroute eth1
 # VS50
 [root@VSNode50 ~]# ip addr | grep "eth0$"
     inet 192.168.0.50/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@VSNode50 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # VS60
 [root@VSNode60 ~]# ip addr | grep "eth0$"
     inet 192.168.0.60/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@VSNode60 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # RS20
 [root@RS20 ~]# ip a | egrep "lo$|eth0"
     inet 127.0.0.1/8 scope host lo
     inet 192.168.0.200/32 scope global lo
 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
     inet 192.168.0.20/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@RS20 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # RS30
 [root@RS30 ~]# ip a | egrep "lo$|eth0"
     inet 127.0.0.1/8 scope host lo
     inet 192.168.0.200/32 scope global lo
 2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
     inet 192.168.0.30/24 brd 192.168.0.255 scope global noprefixroute eth0
 [root@RS30 ~]# route -n
 Kernel IP routing table
 Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
 0.0.0.0         192.168.0.100   0.0.0.0         UG    100    0        0 eth0
 192.168.0.0     0.0.0.0         255.255.255.0   U     100    0        0 eth0
 # 防止RS响应VIP的ARP请求,避免VIP冲突(永久生效-->改/etc/sysctl.conf文件)
 [root@RS20 ~]# cat >> /etc/sysctl.conf << 'EOF'
 > net.ipv4.conf.all.arp_ignore = 1
 > net.ipv4.conf.all.arp_announce = 2
 > net.ipv4.conf.lo.arp_ignore = 1
 > net.ipv4.conf.lo.arp_announce = 2
 > EOF
 [root@RS20 ~]# sysctl -p
 net.ipv4.conf.all.arp_ignore = 1
 net.ipv4.conf.all.arp_announce = 2
 net.ipv4.conf.lo.arp_ignore = 1
 net.ipv4.conf.lo.arp_announce = 2
 [root@RS20 ~]# sysctl net.ipv4.conf.all.arp_ignore
 net.ipv4.conf.all.arp_ignore = 1
 [root@RS20 ~]# sysctl net.ipv4.conf.all.arp_announce
 net.ipv4.conf.all.arp_announce = 2
 [root@RS30 ~]# cat >> /etc/sysctl.conf << 'EOF'
 > net.ipv4.conf.all.arp_ignore = 1
 > net.ipv4.conf.all.arp_announce = 2
 > net.ipv4.conf.lo.arp_ignore = 1
 > net.ipv4.conf.lo.arp_announce = 2
 > EOF
 [root@RS30 ~]# sysctl -p
 net.ipv4.conf.all.arp_ignore = 1
 net.ipv4.conf.all.arp_announce = 2
 net.ipv4.conf.lo.arp_ignore = 1
 net.ipv4.conf.lo.arp_announce = 2
 [root@RS30 ~]# sysctl net.ipv4.conf.all.arp_ignore
 net.ipv4.conf.all.arp_ignore = 1
 [root@RS30 ~]# sysctl net.ipv4.conf.all.arp_announce
 net.ipv4.conf.all.arp_announce = 2

ARP参数:

参数含义
arp_ignore = 1只响应目的IP是本地接口的ARP请求避免lo上的VIP响应ARP
arp_announce = 2始终使用最佳本地地址发送ARP避免宣告VIP的MAC地址
 [root@VSNode50 ~]# dnf install -y keepalived ipvsadm >/dev/null
 [root@VSNode60 ~]# dnf install -y keepalived ipvsadm >/dev/null
 [root@RS20 ~]# dnf install httpd -y >/dev/null
 [root@RS20 ~]# echo RS20 - 192.168.0.20 > /var/www/html/index.html
 [root@RS20 ~]# systemctl enable --now httpd
 [root@RS30 ~]# dnf install httpd -y >/dev/null
 [root@RS30 ~]#  echo RS30 - 192.168.0.30 > /var/www/html/index.html
 [root@RS30 ~]# systemctl enable --now httpd

1.1.2 实践

Router火墙规则

 [root@Router ~]# iptables -t nat -F
 [root@Router ~]# iptables -t nat -L
 Chain PREROUTING (policy ACCEPT)
 target     prot opt source               destination
 ​
 Chain INPUT (policy ACCEPT)
 target     prot opt source               destination
 ​
 Chain OUTPUT (policy ACCEPT)
 target     prot opt source               destination
 ​
 Chain POSTROUTING (policy ACCEPT)
 target     prot opt source               destination
 [root@Router ~]# iptables -t nat -A PREROUTING -d 172.25.254.100 -p tcp --dport 80 -j DNAT --to-destination 192.168.0.200:80
 [root@Router ~]# iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -o eth0 -j SNAT --to-source 172.25.254.100
 [root@Router ~]# iptables -t nat -L -n -v
 Chain PREROUTING (policy ACCEPT 1 packets, 108 bytes)
  pkts bytes target     prot opt in     out     source               destination
     0     0 DNAT       6    --  *      *       0.0.0.0/0            172.25.254.100       tcp dpt:80 to:192.168.0.200:80
 Chain POSTROUTING (policy ACCEPT 1 packets, 76 bytes)
  pkts bytes target     prot opt in     out     source               destination
     0     0 SNAT       0    --  *      eth0    192.168.0.0/24       0.0.0.0/0            to:172.25.254.100
 [root@Router ~]# dnf install -y iptables-services > /dev/null
 [root@Router ~]# iptables-save > /etc/sysconfig/iptables
 [root@Router ~]# systemctl enable --now iptables.service

VS

 # keepalive
 [root@VSNode50 ~]# vim /etc/keepalived/keepalived.conf
   1 ! Configuration File for keepalived
   2
   3 global_defs {
   4    notification_email {
   5      acassen@firewall.loc
   6      failover@firewall.loc
   7      sysadmin@firewall.loc
   8    }
   9    notification_email_from Alexandre.Cassen@firewall.loc
  10    smtp_server 192.168.200.1
  11    smtp_connect_timeout 30
  12    router_id VS50
  13    vrrp_skip_check_adv_addr
  14    #vrrp_strict     # 必须注释掉,否则会添加iptables规则阻断转发
  15    vrrp_garp_interval 0
  16    vrrp_gna_interval 0
  17 }
  18
  19 # VRRP实例:实现Director高可用
  20 vrrp_instance VI_1 {
  21     state MASTER
  22     interface eth0
  23     virtual_router_id 51
  24     priority 100
  25     advert_int 1
  26     authentication {
  27         auth_type PASS
  28         auth_pass 1111
  29     }
  30     virtual_ipaddress {
  31         192.168.0.200/32 dev eth0 label eth0:0
  32     }
  33 }
  34 # LVS虚拟服务器配置(核心)
  35 virtual_server 192.168.0.200 80 {
  36     delay_loop 6                      # 健康检查间隔(秒)
  37     lb_algo rr                        # 负载均衡算法:rr=轮询,wrr=加权轮询
  38     lb_kind DR                        # LVS模式:DR=直接路由(性能最佳)
  39     protocol TCP                      # 协议类型
  40
  41     # 后端真实服务器 RS1
  42     real_server 192.168.0.20 80 {
  43         weight 1                    # 权重(rr算法下无效,wrr时生效)
  44         TCP_CHECK {                 # TCP健康检查
  45             connect_timeout 3       # 连接超时(秒)
  46             nb_get_retry 3          # 重试次数
  47             delay_before_retry 3    # 重试间隔(秒)
  48         }
  49     }
  50
  51     # 后端真实服务器 RS2
  52     real_server 192.168.0.30 80 {
  53         weight 1
  54         TCP_CHECK {
  55             connect_timeout 3
  56             nb_get_retry 3
  57             delay_before_retry 3
  58         }
  59     }
  60 }
 [root@VSNode50 ~]# scp /etc/keepalived/keepalived.conf root@192.168.0.60:/etc/keepalived/keepalived.conf
 Warning: Permanently added '192.168.0.60' (ED25519) to the list of known hosts.
 keepalived.conf                                                                   100% 1578     3.1MB/s   00:00
 [root@VSNode50 ~]# systemctl enable --now keepalived.service
 [root@VSNode60 ~]# vim /etc/keepalived/keepalived.conf
 # VS60
 ​
   1 ! Configuration File for keepalived
   2
   3 global_defs {
 ………………
  12    router_id VS60
  13    vrrp_skip_check_adv_addr
  14    #vrrp_strict
 ………………
  17 }
  19 # VRRP实例:实现Director高可用
  20 vrrp_instance VI_1 {
  21     state BACKUP
 ………………
  24     priority 80
 ………………
  33 }
 # LVS配置与VSNode1完全相同
 [root@VSNode60 ~]# systemctl enable --now keepalived.service
 ​
 # 查看LVS规则
 [root@VSNode60 ~]# ipvsadm -Ln
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port Scheduler Flags
   -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
 TCP  192.168.0.200:80 rr
   -> 192.168.0.20:80              Route   1      0          0
   -> 192.168.0.30:80              Route   1      0          0

测试

 [root@VSNode50 ~]# ip a | grep eth0:0
     inet 192.168.0.200/32 scope global eth0:0
 [root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 [root@VSNode50 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    4       24        0     1592        0
   -> 192.168.0.20:80                     2       12        0      796        0
   -> 192.168.0.30:80                     2       12        0      796        0
 [root@VSNode60 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    0        0        0        0        0
   -> 192.168.0.20:80                     0        0        0        0        0
   -> 192.168.0.30:80                     0        0        0        0        0
 [root@VSNode50 ~]# systemctl stop keepalived.service
 [root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 RS30 - 192.168.0.30
 RS20 - 192.168.0.20
 [root@VSNode60 ~]# ip a | grep eth0:0
     inet 192.168.0.200/32 scope global eth0:0
 [root@VSNode60 ~]# ipvsadm -Ln --stats
 IP Virtual Server version 1.2.1 (size=4096)
 Prot LocalAddress:Port               Conns   InPkts  OutPkts  InBytes OutBytes
   -> RemoteAddress:Port
 TCP  192.168.0.200:80                    4       24        0     1592        0
   -> 192.168.0.20:80                     2       12        0      796        0
   -> 192.168.0.30:80                     2       12        0      796        0

2 场景2:Keepalived + Nginx/HAProxy

2.1 Keepalived+Nginx方案

2.1.1 环境

架构设计

环境信息
节点IP地址角色VRRP实例
KA1172.25.254.50Nginx + KeepalivedWEB_VIP: MASTER, DB_VIP: BACKUP
KA2172.25.254.60Nginx + KeepalivedWEB_VIP: BACKUP, DB_VIP: MASTER
RS1172.25.254.20后端Web服务器NULL
RS2172.25.254.30后端Web服务器NULL
VIP1172.25.254.100Web服务入口主: KA1, 备: KA2
VIP2172.25.254.200备用入口(或DB服务)主: KA2, 备: KA1

2.2.2 实践

RS测试页
 [root@RS1 ~]# dnf install nginx -y > /dev/null
 [root@RS1 ~]# echo "RS1 - 172.25.254.20" > /usr/share/nginx/html/index.html
 [root@RS2 ~]# echo "RS2 - 172.25.254.30" > /usr/share/nginx/html/index.html
 [root@RS2 ~]# systemctl enable --now nginx.service
 [2026-02-22 15:29.09]  ~
 [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.20
 RS1 - 172.25.254.20
 [2026-02-22 15:29.15]  ~
 [Is XiaFeng Computer.IsXiaFengComputer] ⮞ curl 172.25.254.30
 RS2 - 172.25.254.30
KA实现高可用
KA1
 # KA配置Nginx反向代理
 [root@KA1 ~]# dnf install nginx -y > /dev/null
 [root@KA1 ~]# vim /etc/nginx/conf.d/upstream.conf
   1 upstream backend {
   2     server 172.25.254.20:80 weight=5;
   3     server 172.25.254.30:80 weight=5;
   4     keepalive 32;  # 长连接
   5 }
   6 server {
   7     listen 80;
   8     server_name localhost;
   9     location / {
  10         proxy_pass http://backend;
  11         proxy_set_header Host $host;
  12         proxy_set_header X-Real-IP $remote_addr;
  13         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
  14         proxy_connect_timeout 5s;
  15         proxy_send_timeout 5s;
  16         proxy_read_timeout 5s;
  17     }
  18     # 健康检查端点(用于Keepalived检测)
  19     location /health {
  20         access_log off;
  21         return 200 "healthy\n";
  22         add_header Content-Type text/plain;
  23     }
  24 }
 # 采用双主模式为基础
 # 创建 health 检查页
 [root@KA1 ~]# mkdir /usr/share/nginx/html/health
 [root@KA1 ~]# echo "OK" > /usr/share/nginx/html/health/health.html
 # 健康检查脚本
 [root@KA1 ~]# vim /etc/keepalived/check_nginx.sh
   1 #!/bin/bash
   3 # 检查Nginx进程
   4 if ! pgrep -x "nginx" > /dev/null; then
   5     # 尝试启动Nginx
   6     systemctl start nginx
   7     sleep 2
   8     # 再次检查
   9     if ! pgrep -x "nginx" > /dev/null; then
  10         # Nginx启动失败,返回错误触发切换
  11         exit 1
  12     fi
  13 fi
  15 # 检查Nginx端口是否监听
  16 if ! ss -tlnp | grep -q ":80"; then
  17     exit 1
  18 fi
  20 # 可选:检查HTTP响应
  21 if ! curl -sf http://localhost/health > /dev/null; then
  22      exit 1
  23 fi
  24 exit 0
 [root@KA1 ~]# chmod +x /etc/keepalived/check_nginx.sh
 [root@KA1 ~]# nginx -t && systemctl enable --now nginx
 [root@KA1 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
 Check OK
 # 通知脚本
 [root@KA1 ~]# systemctl is-active postfix.service
 inactive
 [root@KA1 ~]# systemctl start postfix.service
 [root@KA1 ~]# chmod +x /etc/keepalived/notify_nginx.sh
   1 #!/bin/bash
   2 mail_dest='收件人'
   3 TYPE=$1          # WEB_VIP 或 DB_VIP
   4 STATE=$2         # MASTER, BACKUP, FAULT
   5 LOG_FILE="/var/log/keepalived/notify.log"
   6 mkdir -p $(dirname $LOG_FILE)
   7 echo "$(date '+%Y-%m-%d %H:%M:%S') - Instance: $TYPE, State: $STATE, Host: $(hostname)" >> $LOG_FILE
   8 case $STATE in
   9     MASTER)
  10         # 成为Master时的额外操作
  11         echo "$(date) - Becoming MASTER for $TYPE" >> $LOG_FILE
  12         # 可以在这里添加告警通知
  13         ;;
  14     BACKUP)
  15         echo "$(date) - Becoming BACKUP for $TYPE" >> $LOG_FILE
  16         ;;
  17     FAULT)
  18         echo "$(date) - FAULT state for $TYPE" >> $LOG_FILE
  19         # 发送紧急告警
  20         ;;
  21 esac
 # KA1中keepalived设置
 [root@KA1 ~]# vim /etc/keepalived/keepalived.conf
   3 global_defs {
 ………………
  18    enable_script_security   # 启用脚本执行权限
  19    script_user root root    # 指定专用用户,因为所有部署都是用root用户去部署的
  20 }
  21
  22 # Nginx健康检查脚本
  23 vrrp_script check_nginx {
  24     script "/etc/keepalived/check_nginx.sh"
  25     interval 2          # 每2秒检查一次
  26     weight -20          # 检查失败,优先级降低20
  27     fall 2              # 连续2次失败才判定失败
  28     rise 1              # 1次成功恢复
  29 }
  30
  31 vrrp_instance WEB_VIP {
  32     state MASTER
  33     interface eth0
  34     virtual_router_id 51
  35     priority 100
  36     advert_int 1
  37     authentication {
  38         auth_type PASS
  39         auth_pass 1111
  40     }
  41     virtual_ipaddress {
  42         172.25.254.100/24 dev eth0 label eth0:1
  43     }
  44     # 追踪健康检查脚本
  45     track_script {
  46         check_nginx
  47     }
  48     # 状态切换通知
  49     notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
  50     notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
  51     notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
  52 }
  53
  54 vrrp_instance DB_VIP {
  55     state BACKUP
  56     interface eth0
  57     virtual_router_id 52
  58     priority 80
  59     advert_int 1
  60     authentication {
  61         auth_type PASS
  62         auth_pass 1111
  63     }
  64     virtual_ipaddress {
  65         172.25.254.200/24 dev eth0 label eth0:0
  66     }
  67         track_script {
  68         check_nginx
  69     }
  70     notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
  71     notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
  72     notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
  73 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 SECURITY VIOLATION - scripts are being executed but script_security not enabled.
 ​
 [root@KA2 ~]# mkdir /usr/share/nginx/html/health
 ​
 [root@KA1 ~]# scp /etc/nginx/conf.d/upstream.conf root@172.25.254.60:/etc/nginx/conf.d/upstream.conf
 upstream.conf                                                   100%  672     1.5MB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/check_nginx.sh root@172.25.254.60:/etc/keepalived/
 check_nginx.sh                                                                         100%  469   978.8KB/s   00:00
 [root@KA1 ~]# scp /usr/share/nginx/html/health/health.html root@172.25.254.60:/usr/share/nginx/html/health/
 health.html                                                                            100%    3     5.6KB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/notify_nginx.sh root@172.25.254.60:/etc/keepalived/
 notify_nginx.sh                                                               100%  654     1.1MB/s   00:00
KA2
 # KA2:
 [root@KA2 ~]# dnf install nginx -y > /dev/null
 [root@KA2 ~]# ll /etc/nginx/conf.d/upstream.conf /etc/keepalived/check_nginx.sh /usr/share/nginx/html/health/health.html /etc/keepalived/notify_nginx.sh
 -rwxr-xr-x 1 root root 469 Feb 22 16:19 /etc/keepalived/check_nginx.sh
 -rwxr-xr-x 1 root root 654 Feb 22 16:27 /etc/keepalived/notify_nginx.sh
 -rw-r--r-- 1 root root 672 Feb 22 15:34 /etc/nginx/conf.d/upstream.conf
 -rw-r--r-- 1 root root   3 Feb 22 16:20 /usr/share/nginx/html/health/health.html
 [root@KA2 ~]# nginx -t && systemctl enable --now nginx
 [root@KA2 ~]# systemctl is-active postfix.service
 inactive
 [root@KA2 ~]# systemctl start postfix.service
 [root@KA2 ~]# /etc/keepalived/check_nginx.sh && echo "Check OK" || echo "Check FAILED"
 Check OK
 ​
 # KA2中keepalived设置
 [root@KA2 ~]# vim /etc/keepalived/keepalived.conf
   3 global_defs {
 ………………    
  17    enable_script_security  # 启用脚本执行权限
  18    script_user root root    # 指定专用用户,因为所有部署都是用root用户去部署的
  19 }
  20 # Nginx健康检查脚本
  21 vrrp_script check_nginx {
  22     script "/etc/keepalived/check_nginx.sh"
  23     interval 2
  24     weight -20
  25     fall 2
  26     rise 1
  27 }
  28
  29 vrrp_instance WEB_VIP {
  30     state BACKUP
  31     interface eth0
  32     virtual_router_id 51
  33     preempt_delay 10        # 抢占延迟10秒(避免网络抖动)
  34     priority 80
  35     advert_int 1
  36     authentication {
  37         auth_type PASS
  38         auth_pass 1111
  39     }
  40     virtual_ipaddress {
  41         172.25.254.100/24 dev eth0 label eth0:1
  42     }
  43     track_script {
  44         check_nginx
  45     }
  46     notify_master "/etc/keepalived/notify_nginx.sh WEB_VIP MASTER"
  47     notify_backup "/etc/keepalived/notify_nginx.sh WEB_VIP BACKUP"
  48     notify_fault "/etc/keepalived/notify_nginx.sh WEB_VIP FAULT"
  49 }
  50
  51 vrrp_instance DB_VIP {
  52     state MASTER
  53     interface eth0
  54     virtual_router_id 52
  55     preempt_delay 10
  56     priority 100
  57     advert_int 1
  58     authentication {
  59         auth_type PASS
  60         auth_pass 1111
  61     }
  62     virtual_ipaddress {
  63         172.25.254.200/24 dev eth0 label eth0:0
  64     }
  65     track_script {
  66         check_nginx
  67     }
  68     notify_master "/etc/keepalived/notify_nginx.sh DB_VIP MASTER"
  69     notify_backup "/etc/keepalived/notify_nginx.sh DB_VIP BACKUP"
  70     notify_fault "/etc/keepalived/notify_nginx.sh DB_VIP FAULT"
  71 }
 [root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 (DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing
测试
 # 测试
 [root@KA1 ~]# systemctl is-active keepalived.service
 active
 [root@KA1 ~]# systemctl reload keepalived.service
 [root@KA2 ~]# systemctl reload keepalived.service
 [root@KA1 ~]# ip a s | grep eth0:1$
     inet 172.25.254.100/24 scope global secondary eth0:1
 [root@KA2 ~]# ip a s | grep eth0:0$
     inet 172.25.254.200/24 scope global secondary eth0:0

global_defs { enable_script_security # 启用脚本执行权限 script_user root root # 指定专用用户,因为所有部署都是用root用户去部署的 }

现实中都是为所有软件创建对应的用户和用户组,具体操作和注意点看大数据笔记,这里为了方便就只能root用户实践。

2.2 keepalive+HAProxy

基于Keepalived+Nginx改动

2.2.1 环境

架构设计

环境信息
节点IP地址角色VRRP实例
KA1172.25.254.50HAProxy + KeepalivedWEB_VIP: MASTER, API_VIP: BACKUP
KA2172.25.254.60HAProxy + KeepalivedWEB_VIP: BACKUP, API_VIP: MASTER
RS1172.25.254.20后端Web/API服务器NULL
RS2172.25.254.30后端Web/API服务器NULL

2.2.2 实践

RS
 [root@RS1 ~]# mkdir -p /usr/share/nginx/html/api
 [root@RS1 ~]# echo '{"status":"ok","server":"RS1","ip":"172.25.254.20"}' > /usr/share/nginx/html/api/status.json
 [root@RS1 ~]# vim /etc/nginx/conf.d/default.conf
 [root@RS1 ~]# nginx -t && nginx -s reload
 [root@RS1 ~]# curl http://172.25.254.20
 RS1 - 172.25.254.20
 [root@RS1 ~]# curl http://172.25.254.20/api/status.json
 {"status":"ok","server":"RS1","ip":"172.25.254.20"}
 [root@RS1 ~]# curl http://172.25.254.20/health
 healthy
 [root@RS1 ~]# scp /etc/nginx/conf.d/default.conf root@172.25.254.30:/etc/nginx/conf.d/
 default.conf                                                               100%  502   928.3KB/s   00:00
 ​
 [root@RS2 ~]# mkdir -p /usr/share/nginx/html/api
 [root@RS2 ~]# echo '{"status":"ok","server":"RS2","ip":"172.25.254.30"}' > /usr/share/nginx/html/api/status.json
 [root@RS2 ~]# nginx -t && nginx -s reload
KA实现高可用性
 [root@KA1 ~]# systemctl stop keepalived.service     # 由于上个实验做了nginx健康保护,直接stop的话Nginx会被其他进程守护自动重启,或者说不能及时关闭,先把keepalived停后才可以,也说明有看门狗机制在保护它。
 [root@KA1 ~]# systemctl disable --now nginx
 [root@KA1 ~]# netstat -lntupa | grep nginx
 [root@KA1 ~]# systemctl is-active nginx
 inactive
 [root@KA1 ~]# dnf install haproxy -y > /dev/null
 [root@KA1 ~]# systemctl enable --now haproxy
 ​
 [root@KA2 ~]# systemctl stop keepalived.service
 [root@KA2 ~]# systemctl disable --now nginx
 [root@KA2 ~]# netstat -lntupa | grep nginx
 [root@KA2 ~]# systemctl is-active nginx
 inactive
 [root@KA2 ~]# dnf install haproxy -y > /dev/null
 [root@KA2 ~]# systemctl enable --now haproxy
KA1
 # HAProxy设定:KA1与KA2一致
 [root@KA1 ~]# vim /etc/haproxy/haproxy.cfg
  64 # 统计页面
  65 listen stats
  66     bind *:8080
  67     stats enable
  68     stats uri /stats
  69     stats auth admin:admin123
  70     stats refresh 30s
  71
  72 # Web服务前端(对应VIP1 172.25.254.100)
  73 frontend web_frontend
  74     bind *:80
  75     acl is_api path_beg /api
  76     use_backend api_servers if is_api
  77     default_backend web_servers
  78
  79 # Web服务后端(/health 使用 Nginx return 指令)
  80 backend web_servers
  81     balance roundrobin
  82     option httpchk GET /health
  83     http-check expect status 200
  84     server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
  85     server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
  86
  87 # API服务后端(/api/status.json 物理文件)
  88 backend api_servers
  89     balance roundrobin
  90     option httpchk GET /api/status.json
  91     http-check expect status 200
  92     server rs1 172.25.254.20:80 check weight 5 inter 2s rise 2 fall 3
  93     server rs2 172.25.254.30:80 check weight 5 inter 2s rise 2 fall 3
 # HAProxy 健康检查脚本(KA1和KA2相同)
 [root@KA1 ~]# vim /etc/keepalived/check_haproxy.sh
   1 #!/bin/bash
   2 # 检查HAProxy进程
   3 if ! pgrep -x "haproxy" > /dev/null; then
   4     systemctl start haproxy
   5     sleep 2
   6     if ! pgrep -x "haproxy" > /dev/null; then
   7         exit 1
   8     fi
   9 fi
  10 # 检查HAProxy端口(80和8080统计页面)
  11 if ! ss -tlnp | grep -q ":80"; then
  12     exit 1
  13 fi
  14 exit 0
 [root@KA1 ~]# chmod +x /etc/keepalived/check_haproxy.sh
 [root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 [root@KA1 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 HAProxy OK
 # 通知告警脚本
 [root@KA1 ~]# cp /etc/keepalived/notify_nginx.sh /etc/keepalived/notify_haproxy.sh
 [root@KA1 ~]# scp /etc/keepalived/notify_haproxy.sh root@172.25.254.60:/etc/keepalived/
 notify_haproxy.sh                                                                                   100%  684   564.8KB/s   00:00
 ​
 [root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
 haproxy.cfg                                                                                100% 4220     4.7MB/s   00:00
 [root@KA1 ~]# scp /etc/keepalived/check_haproxy.sh root@172.25.254.60:/etc/keepalived/
 check_haproxy.sh                                                                           100%  293   465.5KB/s   00:00
 ​
 # KA1 Keepalived配置,基于Keepalived + Nginx双主模式
 [root@KA1 ~]# vim /etc/keepalived/keepalived.conf
  22 # HAProxy健康检查脚本
  23 vrrp_script check_haproxy {
  24     script "/etc/keepalived/check_haproxy.sh"
  25     interval 2          # 每2秒检查一次
  26     weight -20          # 检查失败,优先级降低20
  27     fall 2              # 连续2次失败才判定失败
  28     rise 1              # 1次成功恢复
  29 }
  31 # VIP1: Web服务入口,KA1为主
  32 vrrp_instance WEB_VIP {
 ………………
  45     track_script {
  46         check_haproxy
  47     }
  48     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  49     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  50     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  51 }
  52
  53 # VIP2: API服务入口,KA1为备
  54 vrrp_instance API_VIP {
 ………………
  70     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  71     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  72     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  73 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
KA2
 [root@KA2 ~]# systemctl reload haproxy.service
 [root@KA2 ~]# /etc/keepalived/check_haproxy.sh && echo "HAProxy OK" || echo "HAProxy FAILED"
 HAProxy OK
 ​
 # KA2 Keepalived配置,基于Keepalived + Nginx双主模式
 [root@KA2 ~]# vim /etc/keepalived/keepalived.conf
  31 # VIP1: Web服务入口,KA2为备
  32 vrrp_instance WEB_VIP {
 ………………
  46     track_script {
  47         check_haproxy
  48     }
  49     notify_master "/etc/keepalived/notify_haproxy.sh WEB_VIP MASTER"
  50     notify_backup "/etc/keepalived/notify_haproxy.sh WEB_VIP BACKUP"
  51     notify_fault "/etc/keepalived/notify_haproxy.sh WEB_VIP FAULT"
  52 }
  54
  55 # VIP2: API服务入口,KA2为主
  56 vrrp_instance API_VIP {
 ………………
  70     track_script {
  71         check_haproxy
  72     }
  73     notify_master "/etc/keepalived/notify_haproxy.sh API_VIP MASTER"
  74     notify_backup "/etc/keepalived/notify_haproxy.sh API_VIP BACKUP"
  75     notify_fault "/etc/keepalived/notify_haproxy.sh API_VIP FAULT"
  76 }
 [root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 (API_VIP) Warning - preempt delay will not work with initial state MASTER - clearing
测试
[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
# 双主状态验证
[root@KA1 ~]# ip a s | grep eth0:1$
    inet 172.25.254.100/24 scope global secondary eth0:1
[root@KA2 ~]# ip a s | grep eth0:0$
    inet 172.25.254.200/24 scope global secondary eth0:0
# 服务访问测试
[root@Client ~]# for i in {1..4};do curl 172.25.254.100;done
RS1 - 172.25.254.20
RS2 - 172.25.254.30
RS1 - 172.25.254.20
RS2 - 172.25.254.30
[root@Client ~]# for i in {1..4};do curl http://172.25.254.200/api/status.json;done
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
{"status":"ok","server":"RS1","ip":"172.25.254.20"}
{"status":"ok","server":"RS2","ip":"172.25.254.30"}
[root@Client ~]# curl http://172.25.254.100/health
healthy

3 场景3:数据库高可用

3.1.1 环境

架构设计

核心原则:数据库必须单主

原则说明
单 VIP只有一个入口,确保写操作唯一性
单 Master任何时候只有一个节点接受写操作
自动检测Keepalived 检测 MySQL 状态,故障时 VIP 漂移
手动/半自动切换主从切换需要谨慎,建议配合 MHA 或手动

环境信息

节点IP地址角色说明
KA1172.25.254.50Keepalived BACKUPVIP 故障时接管
KA2172.25.254.60Keepalived MASTER正常时持有 VIP
DB1172.25.254.40MySQL Master主库,接受读写
DB2172.25.254.41MySQL Slave从库,只读/热备
VIP172.25.254.200数据库入口指向当前 Master

3.2.2 实践

DB主从复制

DB1
# DB1:
[root@RS1 ~]# systemctl stop nginx.service
[root@RS1 ~]# dnf install mysql-server -y >/dev/null
[root@RS1 ~]# vim /etc/my.cnf
 11 [mysqld]
 12 server-id = 20
 13 # GTID 复制(推荐)
 14 gtid_mode = ON
 15 enforce_gtid_consistency = ON
 16 log-bin = mysql-bin
 17 binlog-format = ROW
 18 expire_logs_days = 7
 19 max_binlog_size = 100M
 20 # 半同步复制(可选但推荐)
 21 plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
 22 rpl-semi-sync-master-enabled = 1
 23 rpl-semi-sync-slave-enabled = 1
 24 rpl-semi-sync-master-timeout = 1000
 25 # 字符集
 26 character-set-server = utf8mb4
 27 # 绑定所有接口
 28 bind-address = 0.0.0.0
 29 # 为 Keepalived 检测预留的用户
 30 skip-name-resolve
[root@RS1 ~]# systemctl enable --now mysqld
[root@RS1 ~]# mysql
# 创建复制用户
mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant replication slave on *.* to 'repl'@'172.25.254.%';
# 创建健康检查用户(用于 Keepalived 检测)
mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%';
mysql> flush privileges;
mysql> show master status;
+---------------+----------+--------------+------------------+-------------------+
| File          | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+---------------+----------+--------------+------------------+-------------------+
| binlog.000001 |     1237 |              |                  |                   |
+---------------+----------+--------------+------------------+-------------------+
mysql> SHOW VARIABLES LIKE 'gtid%';
+----------------------------------+-----------+
| Variable_name                    | Value     |
+----------------------------------+-----------+
| gtid_executed                    |           |
| gtid_executed_compression_period | 0         |
| gtid_mode                        | ON        |
| gtid_next                        | AUTOMATIC |
| gtid_owned                       |           |
| gtid_purged                      |           |
+----------------------------------+-----------+
# 数据库刚启动,还没有执行任何写入操作,没有产生事务,gtid_executed为空的原因。
mysql> create database test_gtid;
mysql> use test_gtid;
Database changed
mysql> create table t1 (id int primary key, name varchar(50));
mysql> insert into t1 values (1,'test');
mysql> SHOW VARIABLES LIKE 'gtid_executed';
+---------------+------------------------------------------+
| Variable_name | Value                                    |
+---------------+------------------------------------------+
| gtid_executed | b650e3a0-102f-11f1-9134-000c29999103:1-3 |
+---------------+------------------------------------------+
DB2
# DB2:
[root@RS2 ~]# systemctl stop nginx.service
[root@RS2 ~]# dnf install mysql-server -y >/dev/null
[root@RS2 ~]# vim /etc/my.cnf
 10 !includedir /etc/my.cnf.d
 11 [mysqld]
 12 server-id = 30
 13 # GTID
 14 gtid_mode = ON
 15 enforce_gtid_consistency = ON
 16 log-bin = mysql-bin
 17 binlog-format = ROW
 18 expire_logs_days = 7
 19 # 半同步复制,起不来,后面想办法加上去。
 20 # plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
 21 # rpl-semi-sync-master-enabled = 1
 22 # rpl-semi-sync-slave-enabled = 1
 23 # 只读(重要!防止误写入)
 24 read_only = 1
 25 super_read_only = 1
 26 character-set-server = utf8mb4
 27 bind-address = 0.0.0.0
 28 skip-name-resolve
[root@RS2 ~]# systemctl enable --now mysqld
mysql> INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
mysql> INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
ERROR 1290 (HY000): The MySQL server is running with the --super-read-only option so it cannot execute this statement
# 关闭只读
mysql> set global super_read_only = OFF;
mysql> set global read_only = OFF;
# 安装插件
mysql> install plugin rpl_semi_sync_master soname 'semisync_master.so';
mysql> install plugin rpl_semi_sync_slave soname 'semisync_slave.so';
# 启用插件
mysql> set global rpl_semi_sync_master_enabled = ON;
mysql> set global rpl_semi_sync_slave_enabled = ON;
# 创建复制用户
mysql> create user 'repl'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant replication slave on *.* to 'repl'@'172.25.254.%';
# 创建健康检查用户(用于 Keepalived 检测)
mysql> create user 'check'@'172.25.254.%' identified with mysql_native_password by '123';
mysql> grant process,replication client,show databases on *.* to 'check'@'172.25.254.%';
# 重新开启只读(从库必须保持只读)
mysql> set global read_only = ON;
# 查看插件状态
mysql> show plugins;
………………	# 正常
# 查看变量
mysql> show variables like 'rpl_semi_sync%';
+-------------------------------------------+------------+
| Variable_name                             | Value      |
+-------------------------------------------+------------+
| rpl_semi_sync_master_enabled              | ON         |
| rpl_semi_sync_master_timeout              | 10000      |
| rpl_semi_sync_master_trace_level          | 32         |
| rpl_semi_sync_master_wait_for_slave_count | 1          |
| rpl_semi_sync_master_wait_no_slave        | ON         |
| rpl_semi_sync_master_wait_point           | AFTER_SYNC |
| rpl_semi_sync_slave_enabled               | ON         |
| rpl_semi_sync_slave_trace_level           | 32         |
+-------------------------------------------+------------+
8 rows in set (0.00 sec)
# 配置主从复制
mysql> change master to
    -> master_host = '172.25.254.20',
    -> master_port = 3306,
    -> master_user = 'repl',
    -> master_password = '123',
    -> master_auto_position = 1,	# GTID 自动定位
    -> master_connect_retry = 10;
mysql> start slave;
mysql> show slave status\G
*************************** 1. row ***************************
…………
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
…………
测试
[root@RS1 ~]# mysql
mysql> create database test_haha;
mysql> use test_haha
mysql> create table t1 (id int primary key,name varchar(50));
mysql> insert into t1 values (1,'from Master DB1');
[root@RS2 ~]# mysql -e "SELECT * FROM test_haha.t1;"
+----+-----------------+
| id | name            |
+----+-----------------+
|  1 | from Master DB1 |
+----+-----------------+

KA实现高可用

# 安装 MySQL 客户端用于检测
[root@KA1 ~]# systemctl stop keepalived.service
[root@KA1 ~]# systemctl stop haproxy.service
[root@KA1 ~]# dnf install mysql -y >/dev/null
[root@KA2 ~]# dnf install mysql -y >/dev/null
[root@KA1 ~]# mysql -h172.25.254.30 -ucheck -p123 -e "SELECT 1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---+
| 1 |
+---+
| 1 |
+---+
[root@KA1 ~]# mysql -h172.25.254.20 -ucheck -p123 -e "SELECT 1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---+
| 1 |
+---+
| 1 |
+---+
KA1
# MySQL 检查脚本
[root@KA1 ~]# vim /etc/keepalived/check_mysql.sh
  1 #!/bin/bash
  2
  3 # MySQL 检测脚本
  4 # 检测远程 MySQL 是否可连接且为主库(read_only=OFF)
  5
  6 MYSQL_HOST="172.25.254.20"      # 当前主库 IP
  7 MYSQL_USER="check"
  8 MYSQL_PASS="123"
  9 MYSQL_PORT=3306
 10 LOG_FILE="/var/log/keepalived_mysql_check.log"
 11 # 记录检测时间
 12 echo "$(date '+%Y-%m-%d %H:%M:%S') - Checking MySQL at $MYSQL_HOST" >> $LOG_FILE
 13 # 检测1:MySQL 是否可连接
 14 if ! mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SELECT 1;" > /dev/null 2>&1; then
 15     echo "$(date): MySQL $MYSQL_HOST connection FAILED" >> $LOG_FILE
 16     exit 1
 17 fi
 18 # 检测2:是否为 Master(read_only = OFF)
 19 READ_ONLY=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW VARIABLES LIKE 'read_only';" 2>/dev/null | grep read_only     | awk '{print $2}')
 20 if [ "$READ_ONLY" = "ON" ]; then
 21     echo "$(date): MySQL $MYSQL_HOST is read_only, not master" >> $LOG_FILE
 22     exit 1
 23 fi
 24 # 检测3:复制延迟检测
 25 SLAVE_LAG=$(mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASS -P$MYSQL_PORT -e "SHOW SLAVE STATUS\G" 2>/dev/null | grep Seconds_Behind_Master |     awk '{print $2}')
 26 if [ "$SLAVE_LAG" != "NULL" ] && [ "$SLAVE_LAG" -gt 60 ]; then
 27     echo "$(date): Replication lag $SLAVE_LAG seconds" >> $LOG_FILE
 28     exit 1
 29 fi
 30 echo "$(date): MySQL $MYSQL_HOST check PASSED" >> $LOG_FILE
 31 exit 0
[root@KA1 ~]# chmod +x /etc/keepalived/check_mysql.sh
[root@KA1 ~]# /etc/keepalived/check_mysql.sh && echo "MySQL OK" || echo "MySQL FAILED"
/etc/keepalived/check_mysql.sh: line 26: [: : integer expression expected
MySQL OK

# 切换脚本(Master故障时自动切换主从)
[root@KA1 ~]# vim /etc/keepalived/notify_mysql.sh
  1 #!/bin/bash
  2
  3 TYPE=$1         # DB_VIP
  4 STATE=$2        # MASTER, BACKUP, FAULT
  5 CURRENT_HOST=$(hostname)
  6 LOG_FILE="/var/log/keepalived_mysql.log"
  7 mkdir -p $(dirname $LOG_FILE)
  8 echo "$(date '+%Y-%m-%d %H:%M:%S') - $TYPE on $CURRENT_HOST changed to $STATE" >> $LOG_FILE
  9 case $STATE in
 10     MASTER)
 11         echo "$(date): !!! This node is now MASTER for MySQL VIP !!!" >> $LOG_FILE
 12         echo "$(date): VIP 172.25.254.200 is now on $CURRENT_HOST" >> $LOG_FILE
 13         # 重要:发送告警通知管理员
 14         echo "MySQL VIP failover occurred at $(date)" | \
 15             mail -s "CRITICAL: MySQL HA Failover" xiafeng_68@163.com 2>/dev/null || true
 16         # 注意:这里不做自动主从切换!
 17         # 原因:1. 脑裂风险 2. 数据一致性风险
 18         # 建议:人工确认后手动执行切换脚本
 19         # 自动提升DB2为主(风险高,需谨慎)
 20         # mysql -h172.25.254.30 -ugxf -p123 -e "STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only = 0;"
 21
 22         echo "$(date): ACTION REQUIRED: Please verify and execute manual failover if needed" >> $LOG_FILE
 23         ;;
 24
 25     BACKUP)
 26         echo "$(date): This node is now BACKUP for MySQL VIP" >> $LOG_FILE
 27         ;;
 28
 29     FAULT)
 30         echo "$(date): FAULT state detected on $CURRENT_HOST" >> $LOG_FILE
 31         ;;
 32 esac
[root@KA1 ~]# chmod +x /etc/keepalived/notify_mysql.sh
# 手动主从切换脚本(故障时使用)
[root@KA1 ~]# vim /etc/keepalived/manual_failover.sh
  1 #!/bin/bash
  2
  3 # 当 DB1 故障时,在确认后手动执行此脚本将 DB2 提升为yy主库
  4 NEW_MASTER="172.25.254.30"      # DB2
  5 OLD_MASTER="172.25.254.20"      # DB1
  6 VIP="172.25.254.200"
  7 echo "=== MySQL 手动故障切换 ==="
  8 echo "时间: $(date)"
  9 echo "新主库: $NEW_MASTER (DB2)"
 10 echo "旧主库: $OLD_MASTER (DB1) - 假设已故障"
 11 # 1. 在 DB2 上停止复制并提升为主库
 12 echo "步骤1: 在 DB2 上停止复制..."
 13 mysql -h$NEW_MASTER -ugxf -p123 -e "
 14     STOP SLAVE;
 15     RESET SLAVE ALL;
 16     SET GLOBAL read_only = OFF;
 17     SET GLOBAL super_read_only = OFF;
 18     SELECT 'DB2 is now MASTER' as status;
 19 "
 20 # 2. 更新 KA 的检测脚本指向新主库
 21 echo "步骤2: 更新 Keepalived 检测脚本..."
 22 sed -i "s/MYSQL_HOST=\"$OLD_MASTER\"/MYSQL_HOST=\"$NEW_MASTER\"/" /etc/keepalived/check_mysql.sh
 23 # 3. 如果旧主库恢复,需要重新配置为从库(可选)
 24 echo "步骤3: 当旧主库恢复后,执行以下命令重新加入:"
 25 echo "  CHANGE MASTER TO MASTER_HOST='$NEW_MASTER', MASTER_USER='repl', MASTER_PASSWORD='Repl123', MASTER_AUTO_POSITION=1;"
 26 echo "  START SLAVE;"
 27 echo "切换完成!VIP $VIP 现在指向新主库 $NEW_MASTER"
[root@KA1 ~]# chmod +x /etc/keepalived/manual_failover.sh
[root@KA1 ~]# scp /etc/keepalived/manual_failover.sh root@172.25.254.60:/etc/keepalived/
manual_failover.sh                                                                          100% 1175   736.0KB/s   00:00
[root@KA1 ~]# scp /etc/keepalived/check_mysql.sh root@172.25.254.60:/etc/keepalived/
check_mysql.sh                                                                                                 100% 1309     1.6MB/s   00:00
[root@KA1 ~]# scp /etc/keepalived/notify_mysql.sh root@172.25.254.60:/etc/keepalived/
notify_mysql.sh                                                                                                           100% 1323     2.6MB/s   00:00

# KA1配置keepalived
  1 ! Configuration File for keepalived
  2
  3 global_defs {
………………
 13    vrrp_skip_check_adv_addr
 14    #vrrp_strict				# 必须关闭,否则阻断MySQL连接
………………
 18    enable_script_security   # 启用脚本执行权限
 19    script_user root root    # 指定专用用户,因为所有部署都是用root用户去部署的
 20 }
 21
 22 # MySql健康检查脚本
 23 vrrp_script check_mysql {
 24     script "/etc/keepalived/check_mysql.sh"
 25     interval 3          # MySQL检测间隔稍长
 26     weight -20          # 检查失败,优先级降低20
 27     fall 2              # 连续2次失败才判定失败
 28     rise 2              # 连续2次成功恢复
 29 }
 30
 31 # 单VIP:数据库入口
 32 vrrp_instance DB_VIP {
 33     state BACKUP        # KA1作为BACKUP
 34     interface eth0
 35     virtual_router_id 52
 36     priority 80         # 低于KA2
 37     advert_int 1
 38     authentication {
 39         auth_type PASS
 40         auth_pass 2222
 41     }
 42     virtual_ipaddress {
 43         172.25.254.200/24 dev eth0 label eth0:0
 44     }
 45     track_script {
 46         check_mysql
 47     }
 48     # 禁止抢占(重要!防止网络抖动导致频繁切换)
 49     nopreempt
 50     notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
 51     notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
 52     notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
 53 }
 [root@KA1 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
 
# KA1配置haproxy
 64 listen mysql
 65     bind 0.0.0.0:3306	# MySQL - 绑定所有接口,如果指定172.25.254.200会起不了服务
 66     mode tcp
 67     option tcp-check
 68     tcp-check connect port 3306
 69     timeout connect 5s
 70     timeout client 30s
 71     timeout server 30s
 72     server db1 172.25.254.20:3306 check inter 2s rise 2 fall 3
 73     server db2 172.25.254.30:3306 check backup inter 2s rise 2 fall 3
 74
 75 # 统计页面
 76 listen stats
 77     bind *:8080
 78     mode http
 79     stats enable
 80     stats uri /stats
 81     stats auth gxf:123
 [root@KA1 ~]# scp /etc/haproxy/haproxy.cfg root@172.25.254.60:/etc/haproxy/haproxy.cfg
haproxy.cfg 
KA2
[root@KA2 ~]# ll /etc/keepalived/notify_mysql.sh /etc/keepalived/check_mysql.sh /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1309 Feb 23 06:36 /etc/keepalived/check_mysql.sh
-rwxr-xr-x 1 root root 1175 Feb 23 07:08 /etc/keepalived/manual_failover.sh
-rwxr-xr-x 1 root root 1323 Feb 23 06:50 /etc/keepalived/notify_mysql.sh
[root@KA2 ~]# vim /etc/keepalived/keepalived.conf
  1 ! Configuration File for keepalived
  2
  3 global_defs {
  4    notification_email {
………………
 14    #vrrp_strict
………………
 17    #vrrp_mcast_group4 224.0.0.44
 18    enable_script_security  # 启用脚本执行权限
 19    script_user root root    # 指定专用用户,因为所有部署都是用root用户去部署的
 20 }
 22 vrrp_script check_mysql {
 23     script "/etc/keepalived/check_mysql.sh"
 24     interval 3
 25     weight -30
 26     fall 2
 27     rise 2
 28 }
 30 vrrp_instance DB_VIP {
 31     state MASTER        # KA2作为MASTER
 32     interface eth0
 33     virtual_router_id 52
 34     priority 100        # 高于KA1
 35     advert_int 1
 36     authentication {
 37         auth_type PASS
 38         auth_pass 2222
 39     }
 40     virtual_ipaddress {
 41         172.25.254.200/24 dev eth0 label eth0:0
 42     }
 43     track_script {
 44         check_mysql
 45     }
 47     # 可选:抢占延迟
 48     preempt_delay 10
 50     notify_master "/etc/keepalived/notify_mysql.sh DB_VIP MASTER"
 51     notify_backup "/etc/keepalived/notify_mysql.sh DB_VIP BACKUP"
 52     notify_fault "/etc/keepalived/notify_mysql.sh DB_VIP FAULT"
 53 }
[root@KA2 ~]# keepalived -t -f /etc/keepalived/keepalived.conf
(DB_VIP) Warning - preempt delay will not work with initial state MASTER - clearing
测试
[root@KA1 ~]# systemctl start keepalived.service
[root@KA2 ~]# systemctl start keepalived.service
[root@KA1 ~]# systemctl start haproxy.service
[root@KA2 ~]# systemctl start haproxy.service
# 查看VIP绑定(应该在KA2上)
[root@KA2 ~]# ip addr show eth0 | grep 172.25.254.200
    inet 172.25.254.200/24 scope global secondary eth0:0
tail -f /var/log/keepalived_mysql.log
[root@KA2 ~]# tail -f /var/log/keepalived_mysql.log
2026-02-23 07:49:14 - DB_VIP on KA2 changed to MASTER
Mon Feb 23 07:49:14 AM CST 2026: !!! This node is now MASTER for MySQL VIP !!!
Mon Feb 23 07:49:14 AM CST 2026: VIP 172.25.254.200 is now on KA2
Mon Feb 23 07:49:14 AM CST 2026: ACTION REQUIRED: Please verify and execute manual failover if needed
# 客户端连接测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"
mysql: [Warning] Using a password on the command line interface can be insecure.
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| server_id     | 20    |
+---------------+-------+
# 写入测试
[root@Client ~]# mysql -h172.25.254.200 -ugxf -p123 -e "INSERT INTO test_haha.t1 VALUES (2, 'Via VIP');"
mysql: [Warning] Using a password on the command line interface can be insecure.
# 检测在DB2上验证同步
[root@Client ~]# mysql -h172.25.254.30 -ugxf -p123 -e "SELECT * FROM test_haha.t1;"
mysql: [Warning] Using a password on the command line interface can be insecure.
+----+-----------------+
| id | name            |
+----+-----------------+
|  1 | from Master DB1 |
|  2 | Via VIP         |
+----+-----------------+

# 故障测试如图
# 在 DB1 上停止 MySQL
# 观察 KA 日志
tail -f /var/log/keepalived_mysql_check.log
# 手动切换测试
/etc/keepalived/manual_failover.sh
# 验证新主库
mysql -h172.25.254.200 -ugxf -p123 -e "SHOW VARIABLES LIKE 'server_id';"

3.3.3 关键配置对比(数据库与Web)

特性Web(Nginx/HAProxy)数据库(MySQL)
模式双主(两个VIP)单主(一个VIP)
状态MASTER + BACKUP 互换严格区分 MASTER/BACKUP
抢占默认开启建议 nopreempt
检测间隔2秒3秒(更保守)
权重降幅-20-30(更敏感)
自动切换否(建议手动)
脑裂风险高(必须避免)

总结

以上为个人经验,希望能给大家一个参考,也希望大家多多支持脚本之家。

您可能感兴趣的文章:
阅读全文