有没有做皮艺的网站/整合营销是什么
文章目录
- 一、基础环境配置
- 1.1 关闭防火墙和selinux
- 1.1.1 关闭防火墙
- 1.1.2 关闭selinux
- 1.2 配置IP域名映射
- 1.3 设置swap分区大小
- 1.4 操作系统限制
- 1.5 关闭numa
- 1.6 配置互信
- 二、实例安装
- 2.1 创建MySQL用户组和用户
- 2.2 创建相关目录
- 2.3 解压安装包并设置权限
- 2.4 软链接程序目录并配置环境变量
- 2.5 修改配置文件
- 2.6 初始化数据库
- 2.7 启动数据库服务
- 2.8 搭建复制
- 三、搭建MHA
- 3.1 Ansible 配置(可选)
- 3.2 MHA node安装
- 3.3 MHA manager节点安装
- 3.4 MHA manager 配置
- 3.5 检查ssh
- 3.6 检查主从复制
- 3.7 MHA运行状态
- 3.8 其他命令
- 四、keepalived配置
- 4.1 安装ipvsadm、keepalived
- 4.2 配置keepalived
- 五、测试
- 5.1 手动切换
- 5.2 自动切换
- 六、MHA日常操作
- 七、附录
- 7.1 相关命令说明
- 7.2 master_ip_failover脚本代码
- 7.3 master_ip_online_change脚本代码
- 7.4 send_report脚本代码
一、基础环境配置
1.1 关闭防火墙和selinux
1.1.1 关闭防火墙
systemctl stop firewalld.service #停止防火墙
systemctl disable firewalld.service #禁止开机启动
1.1.2 关闭selinux
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
cat /etc/sysconfig/selinux
/usr/sbin/sestatus -v
#这个地方最好重启下
1.2 配置IP域名映射
vim /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.0.97 zj1
192.168.1.156 zj2
192.168.0.48 zj3
1.3 设置swap分区大小
echo "vm.swappiness = 0" >>/etc/sysctl.conf
sysctl -p
#通过sysctl -a 查看
PS :swappiness值在0-100之间,0尽力使用物理内存,100尽力使用swap分区。
1.4 操作系统限制
cat /etc/security/limits.conf
...
* soft nproc 65536
* hard nproc 65536
* soft nofile 65536
* hard nofile 65536
...
1.5 关闭numa
sed -i 's/GRUB_CMDLINE_LINUX.*/GRUB_CMDLINE_LINUX="crashkernel=auto rd.lvm.lv=centos\/root rd.lvm.lv=centos\/swap rhgb quiet numa=off"/g' /etc/default/grub
grub2-mkconfig -o /etc/grub2.cfg
cat /etc/grub2.cfg
reboot
cat /proc/cmdline
dmesg | grep -i numa
1.6 配置互信
#在各节点执行
ssh-keygen -t rsa
ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.0.97
ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.1.156
ssh-copy-id -i /root/.ssh/id_rsa.pub root@192.168.0.48
二、实例安装
2.1 创建MySQL用户组和用户
groupadd mysql
useradd -g mysql mysql -s /sbin/nologin
id mysql
2.2 创建相关目录
mkdir -pv /usr/local/mysql/
mkdir -pv /data/mysql/{data,logs}
2.3 解压安装包并设置权限
tar xf mysql-5.7.28-el7-x86_64.tar -C /usr/local/mysql/
cd /usr/local/mysql/
tar xf mysql-5.7.28-el7-x86_64.tar.gz
tar xf mysql-test-5.7.28-el7-x86_64.tar.gz
chown mysql:mysql /data/mysql -R
2.4 软链接程序目录并配置环境变量
ln -s /usr/local/mysql/mysql-5.7.28-el7-x86_64 /usr/local/mysql/mysql
export PATH=/usr/local/mysql/mysql/bin:$PATH
echo 'PATH=/usr/local/mysql/mysql/bin:$PATH' >> /etc/profile
source /etc/profile
2.5 修改配置文件
[client]
port = 3306
socket = /data/mysql/data/mysql.sock[mysql]
prompt="\u@mysql1 \R:\m:\s [\d]> "
no-auto-rehash[mysqld]
user = mysql
port = 3306
#软件目录
basedir = /usr/local/mysql/mysql
#数据目录
datadir = /data/mysql/data
#套接字文件
socket = /data/mysql/data/mysql.sock
#PID文件
pid-file = mysql1.pid
#字符集
character-set-server = utf8mb4
#是否跳过域名解析
skip_name_resolve = 1#若MySQL数据库主要运行在境外,请务必根据实际情况调整本参数
default_time_zone = "+8:00"
#最大打开文件数
open_files_limit= 65535
#MySQL(处理大量请求时,由于满负荷)暂时停止回复新请求前能够堆放的请求数
back_log = 1024
#最大连接数限制
max_connections = 256
#单个用户最大连接数限制
max_user_connections = 64
#最大连接错误数,防止密码暴力破解
max_connect_errors = 1000000
table_open_cache = 1024
table_definition_cache = 1024
table_open_cache_instances = 64
thread_stack = 512K
external-locking = FALSE
max_allowed_packet = 32M
sort_buffer_size = 4M
join_buffer_size = 4M
thread_cache_size = 384
interactive_timeout = 600
wait_timeout = 600
tmp_table_size = 32M
max_heap_table_size = 32M
slow_query_log = 1
log_timestamps = SYSTEM
slow_query_log_file = /data/mysql/logs/slow.log
log-error = /data/mysql/logs/error.log
long_query_time = 0.1
log_queries_not_using_indexes =1
log_throttle_queries_not_using_indexes = 60
min_examined_row_limit = 100
log_slow_admin_statements = 1
log_slow_slave_statements = 1
server-id = 33061
log-bin = /data/mysql/logs/binlog
sync_binlog = 1
binlog_cache_size = 4M
max_binlog_cache_size = 2G
max_binlog_size = 1G
auto_increment_offset=1
auto_increment_increment=1
#注意:MySQL 8.0开始,binlog_expire_logs_seconds选项也存在的话,会忽略expire_logs_days选项
expire_logs_days = 7
master_info_repository = TABLE
relay_log_info_repository = TABLE
gtid_mode = on
enforce_gtid_consistency = 1
log_slave_updates
slave-rows-search-algorithms = 'INDEX_SCAN,HASH_SCAN'
binlog_format = row
binlog_checksum = 1
relay_log_recovery = 1
relay-log-purge = 1
key_buffer_size = 32M
read_buffer_size = 8M
read_rnd_buffer_size = 4M
bulk_insert_buffer_size = 64M
myisam_sort_buffer_size = 128M
myisam_max_sort_file_size = 10G
myisam_repair_threads = 1
lock_wait_timeout = 3600
explicit_defaults_for_timestamp = 1
innodb_thread_concurrency = 0
innodb_sync_spin_loops = 100
innodb_spin_wait_delay = 30
transaction_isolation = READ-COMMITTED
#innodb_additional_mem_pool_size = 16M
innodb_buffer_pool_size = 1434M
innodb_buffer_pool_instances = 4
innodb_buffer_pool_load_at_startup = 1
innodb_buffer_pool_dump_at_shutdown = 1
innodb_data_file_path = ibdata1:1G:autoextend
innodb_flush_log_at_trx_commit = 1
innodb_log_buffer_size = 32M
innodb_log_file_size = 2G
innodb_log_files_in_group = 2
innodb_max_undo_log_size = 4G
innodb_undo_directory = /data/mysql/data
innodb_undo_tablespaces = 2# 根据您的服务器IOPS能力适当调整
# 一般配普通SSD盘的话,可以调整到 10000 - 20000
# 配置高端PCIe SSD卡的话,则可以调整的更高,比如 50000 - 80000
innodb_io_capacity = 4000
innodb_io_capacity_max = 8000
innodb_flush_sync = 0
innodb_flush_neighbors = 0
innodb_write_io_threads = 8
innodb_read_io_threads = 8
innodb_purge_threads = 4
innodb_page_cleaners = 4
innodb_open_files = 65535
innodb_max_dirty_pages_pct = 50
innodb_flush_method = O_DIRECT
innodb_lru_scan_depth = 4000
innodb_checksum_algorithm = crc32
innodb_lock_wait_timeout = 10
innodb_rollback_on_timeout = 1
innodb_print_all_deadlocks = 1
innodb_file_per_table = 1
innodb_online_alter_log_max_size = 4G
innodb_stats_on_metadata = 0
innodb_undo_log_truncate = 1#注意:MySQL 8.0.16开始删除该选项
#internal_tmp_disk_storage_engine = InnoDB# some var for MySQL 8
#log_error_verbosity = 3
#innodb_print_ddl_logs = 1
#binlog_expire_logs_seconds = 604800
#innodb_dedicated_server = 0innodb_status_file = 1
#注意: 开启 innodb_status_output & innodb_status_output_locks 后, 可能会导致log-error文件增长较快
innodb_status_output = 0
innodb_status_output_locks = 0#performance_schema
performance_schema = 1
performance_schema_instrument = '%memory%=on'
performance_schema_instrument = '%lock%=on'#innodb monitor
innodb_monitor_enable="module_innodb"
innodb_monitor_enable="module_server"
innodb_monitor_enable="module_dml"
innodb_monitor_enable="module_ddl"
innodb_monitor_enable="module_trx"
innodb_monitor_enable="module_os"
innodb_monitor_enable="module_purge"
innodb_monitor_enable="module_log"
innodb_monitor_enable="module_lock"
innodb_monitor_enable="module_buffer"
innodb_monitor_enable="module_index"
innodb_monitor_enable="module_ibuf_system"
innodb_monitor_enable="module_buffer_page"
innodb_monitor_enable="module_adaptive_hash"
#组提交&并行复制
slave_parallel_type=LOGICAL_CLOCK
slave_parallel_workers=2
binlog_group_commit_sync_delay=1000000
binlog_group_commit_sync_no_delay_count=10
#半同步
plugin-load = "rpl_semi_sync_master=semisync_master.so;rpl_semi_sync_slave=semisync_slave.so"
rpl-semi-sync-master-enabled = 1
rpl-semi-sync-slave-enabled = 1[mysqldump]
quick
max_allowed_packet = 32M
2.6 初始化数据库
mysqld --defaults-file=/etc/my.cnf --initialize-insecure &
--initialize:该选项初始化时会在错误日志中写一个随机root密码,初始化完成之后在错误日志中搜索password,紧跟其后的一串字符串就是这个随机密码,在初始化完成并启动mysqld之后,初次登录需要使用这个随机密码才能够登录。
--initialize-insecure:该选项初始化时不会产生随机密码,而是像5.7版本之前一样初始化完成之后第一次登录数据库使用空的root密码
2.7 启动数据库服务
mysqld_safe --defaults-file=/etc/my.cnf &
#扫描配置文件的顺序,也可以人为指定其他位置
/etc/my.cnf>/etc/mysql/my.cnf>$mysql_home/my.cnf>~/.my.cnf
#查看是否启动成功,若失败,则去错误日志查看原因
ps -ef | grep mysql
netstat -anultp | grep 端口号
2.8 搭建复制
#master创建复制用户
create user repl@'192.168.1.%' identified by 'repl';
grant replication slave,replication client on *.* to repl@'192.168.1.%';
#安装半同步插件
INSTALL PLUGIN rpl_semi_sync_master SONAME 'semisync_master.so';
INSTALL PLUGIN rpl_semi_sync_slave SONAME 'semisync_slave.so';
SELECT PLUGIN_NAME, PLUGIN_STATUS FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME LIKE '%semi%';
#建立主从复制
change master to
master_host='192.168.0.97',
master_port=3306,
master_user='repl',
master_password='repl',
master_auto_position=1;
start slave;
#查看同步状态; 查看半同步的主备状态
show slave status;
show status like 'Rpl_semi_sync_%_status';
三、搭建MHA
3.1 Ansible 配置(可选)
# manager节点安装 ansible
yum install -y ansible# vim /etc/ansible/hosts
[mysql-mha-node]
192.168.0.97
192.168.1.156
192.168.0.48# vim /etc/ansible/ansible.cfg
[defaults]
sudo_user=root
remote_port=22
remote_user=root
module_name=command
host_key_checking=False
command_warnings = False
inventory=/etc/ansible/hosts
roles_path=/etc/ansible/roles
log_path=/var/log/ansible.log
private_key_file=/root/.ssh/id_rsamanager节点执行:各节点hosts绑定 ip 和 服务器名称
ansible mysql-mha-node -m shell -a "echo -e '
192.168.0.97 zj1
192.168.1.156 zj2
192.168.0.48 zj3' >> /etc/hosts"
3.2 MHA node安装
yum install -y perl-DBD-MySQL perl-ExtUtils-MakeMaker perl-CPAN
yum install -y https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm
3.3 MHA manager节点安装
#安装centos7的epel源
yum -y install epel-release
yum install -y https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
3.4 MHA manager 配置
mkdir -pv /var/log/mha/app1/
[server default]
remote_workdir=/var/log/mha
manager_workdir=/var/log/mha
manager_log=/var/log/mha/app1/manager.log
master_binlog_dir=/data/mysql/logs/ #设置master保存binlog的位置,以便MHA可以找到master的日志
user=root
password=970125
repl_user=repl
repl_password=repl
ssh_user=root
ping_interval=2 #设置监控主库,发送ping包的时间间隔,默认是3秒,尝试三次没有回应的时候自动进行failover
#可从manager二进制包解压找到(mha4mysql-manager-0.58/samples/scripts)
report_script= /usr/bin/send_report //设置发生切换后发送的报警的脚本
#shutdown_script= /usr/bin/power_manager
master_ip_failover_script= /usr/bin/master_ip_failover #设置自动failover时候的切换脚本
master_ip_online_change_script=/usr/bin/master_ip_online_change #设置手动切换时候的切换脚本[server1]
port=3306
hostname=192.168.0.97
candidate_master=1 #设置为候选master,如果设置该参数以后,发生主从切换以后将会将此从库提升为主库,即使这个主库不是集群中事件最新的slave
check_repl_delay=0 #默认情况下如果一个slave落后master 100M的relay logs的话,MHA将不会选择该slave作为一个新的master,因为对于这个slave的恢复需要花费很长时间,通过设置check_repl_delay=0,MHA触发切换在选择一个新的master的时候将会忽略复制延时,这个参数对于设置了candidate_master=1的主机非常有用,因为这个候选主在切换的过程中一定是新的master[server2]
port=3306
hostname=192.168.1.156
candidate_master=1
check_repl_delay=0[server3]
port=3306
hostname=192.168.0.48
candidate_master=1
check_repl_delay=0
3.5 检查ssh
/usr/bin/masterha_check_ssh --conf=/etc/masterha.cnf
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-o7A0D8GD-1614831853969)(C:\Users\薛世杰\AppData\Roaming\Typora\typora-user-images\image-20191231143908596.png)]
3.6 检查主从复制
/usr/bin/masterha_check_repl --conf=/etc/masterha.cnf
注意:配置文件里面的账号要有响应的权限,并且可以连接。
3.7 MHA运行状态
masterha_check_status --conf=/etc/masterha.cnf
[root@zj3 bin]# /usr/bin/masterha_check_status --conf=/etc/masterha.cnf
masterha is stopped(2:NOT_RUNNING).
3.8 其他命令
# MHA master 监控(与 masterha_manager 一样)
masterha_master_monitor --conf=/etc/masterha.cnf#启动 MHA
nohup masterha_manager --conf=/etc/masterha.cnf &> /var/log/mha/app1/manager.log &[root@zj3 bin]# /usr/bin/masterha_check_status --conf=/etc/masterha.cnf
masterha (pid:73434) is running(0:PING_OK), master:192.168.0.97
#关闭 MHA
masterha_stop --conf=/etc/masterha.cnf#添加或删除配置的server信息
masterha_conf_host
四、keepalived配置
4.1 安装ipvsadm、keepalived
yum -y install keepalived
4.2 配置keepalived
修改配置文件
vi/etc/keepalived/keepalived.conf
master和备选master配置文件内容相同
#ConfigurationFile for keepalived
global_defs {
notification_email { ######定义接受邮件的邮箱zijie@dtstack.com}notification_email_from 673846401@qq.com ######定义发送邮件的邮箱smtp_server mail.qq.comsmtp_connect_timeout 10
}
vrrp_instance vrrptest { ######定义vrrptest实例state BACKUP ######服务器状态interface ens32 ######使用的接口virtual_router_id 51 ######虚拟路由的标志,一组lvs的虚拟路由标识必须相同,这样才能切换priority 150 ######服务启动优先级,值越大,优先级越高,BACKUP 不能大于MASTERadvert_int 1 ######服务器之间的存活检查时间
authentication {auth_type PASS ######认证类型auth_pass 970125 ######认证密码,一组lvs 服务器的认证密码必须一致
}
virtual_ipaddress { ######虚拟IP地址192.168.1.1
}
}
这里master服务器的state不配置成MASTER,且配置的优先级一样,是期望在master宕机后再恢复时,不主动将MASTER状态抢过来,避免MySQL服务的波动。
这里没有配置vrrp_script,在后面会让mha实现vip的自动漂移。
五、测试
5.1 手动切换
#关闭 MHA
masterha_stop --conf=/etc/masterha.cnf# 在线手动切换
masterha_master_switch --conf=/etc/masterha.cnf \
--master_state=alive \
--new_master_host=192.168.1.156 \
--new_master_port=3306 \
--orig_master_is_new_slave \
--running_updates_limit=10000Tue Dec 31 14:57:00 2019 - [info] MHA::MasterRotate version 0.58.
Tue Dec 31 14:57:00 2019 - [info] Starting online master switch..
Tue Dec 31 14:57:00 2019 - [info]
Tue Dec 31 14:57:00 2019 - [info] * Phase 1: Configuration Check Phase..
Tue Dec 31 14:57:00 2019 - [info]
Tue Dec 31 14:57:00 2019 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Dec 31 14:57:00 2019 - [info] Reading application default configuration from /etc/masterha.cnf..
Tue Dec 31 14:57:00 2019 - [info] Reading server configuration from /etc/masterha.cnf..
Tue Dec 31 14:57:01 2019 - [info] GTID failover mode = 1
Tue Dec 31 14:57:01 2019 - [info] Current Alive Master: 192.168.0.97(192.168.0.97:3306)
Tue Dec 31 14:57:01 2019 - [info] Alive Slaves:
Tue Dec 31 14:57:01 2019 - [info] 192.168.1.156(192.168.1.156:3306) Version=5.7.28-log (oldest major version between slaves) log-bin:enabled
Tue Dec 31 14:57:01 2019 - [info] GTID ON
Tue Dec 31 14:57:01 2019 - [info] Replicating from 192.168.0.97(192.168.0.97:3306)
Tue Dec 31 14:57:01 2019 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 31 14:57:01 2019 - [info] 192.168.0.48(192.168.0.48:3306) Version=5.7.28-log (oldest major version between slaves) log-bin:enabled
Tue Dec 31 14:57:01 2019 - [info] GTID ON
Tue Dec 31 14:57:01 2019 - [info] Replicating from 192.168.0.97(192.168.0.97:3306)
Tue Dec 31 14:57:01 2019 - [info] Primary candidate for the new Master (candidate_master is set)It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.0.97(192.168.0.97:3306)? (YES/no): YES
Tue Dec 31 14:57:27 2019 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Tue Dec 31 14:57:27 2019 - [info] ok.
Tue Dec 31 14:57:27 2019 - [info] Checking MHA is not monitoring or doing failover..
Tue Dec 31 14:57:27 2019 - [info] Checking replication health on 192.168.1.156..
Tue Dec 31 14:57:27 2019 - [info] ok.
Tue Dec 31 14:57:27 2019 - [info] Checking replication health on 192.168.0.48..
Tue Dec 31 14:57:27 2019 - [info] ok.
Tue Dec 31 14:57:27 2019 - [info] 192.168.1.156 can be new master.
Tue Dec 31 14:57:27 2019 - [info]
From:
192.168.0.97(192.168.0.97:3306) (current master)+--192.168.1.156(192.168.1.156:3306)+--192.168.0.48(192.168.0.48:3306)To:
192.168.1.156(192.168.1.156:3306) (new master)+--192.168.0.48(192.168.0.48:3306)+--192.168.0.97(192.168.0.97:3306)Starting master switch from 192.168.0.97(192.168.0.97:3306) to 192.168.1.156(192.168.1.156:3306)? (yes/NO): yes
Tue Dec 31 14:57:30 2019 - [info] Checking whether 192.168.1.156(192.168.1.156:3306) is ok for the new master..
Tue Dec 31 14:57:30 2019 - [info] ok.
Tue Dec 31 14:57:30 2019 - [info] 192.168.0.97(192.168.0.97:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Tue Dec 31 14:57:30 2019 - [info] 192.168.0.97(192.168.0.97:3306): Resetting slave pointing to the dummy host.
Tue Dec 31 14:57:30 2019 - [info] ** Phase 1: Configuration Check Phase completed.
Tue Dec 31 14:57:30 2019 - [info]
Tue Dec 31 14:57:30 2019 - [info] * Phase 2: Rejecting updates Phase..
Tue Dec 31 14:57:30 2019 - [info]
master_ip_online_change_script is not defined. If you do not disable writes on the current master manually, applications keep writing on the current master. Is it ok to proceed? (yes/NO): yes
Tue Dec 31 14:57:32 2019 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Tue Dec 31 14:57:32 2019 - [info] Executing FLUSH TABLES WITH READ LOCK..
Tue Dec 31 14:57:32 2019 - [info] ok.
Tue Dec 31 14:57:32 2019 - [info] Orig master binlog:pos is binlog.000003:876.
Tue Dec 31 14:57:32 2019 - [info] Waiting to execute all relay logs on 192.168.1.156(192.168.1.156:3306)..
Tue Dec 31 14:57:32 2019 - [info] master_pos_wait(binlog.000003:876) completed on 192.168.1.156(192.168.1.156:3306). Executed 0 events.
Tue Dec 31 14:57:32 2019 - [info] done.
Tue Dec 31 14:57:32 2019 - [info] Getting new master's binlog name and position..
Tue Dec 31 14:57:32 2019 - [info] binlog.000005:876
Tue Dec 31 14:57:32 2019 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.1.156', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';
Tue Dec 31 14:57:32 2019 - [info]
Tue Dec 31 14:57:32 2019 - [info] * Switching slaves in parallel..
Tue Dec 31 14:57:32 2019 - [info]
Tue Dec 31 14:57:32 2019 - [info] -- Slave switch on host 192.168.0.48(192.168.0.48:3306) started, pid: 74104
Tue Dec 31 14:57:32 2019 - [info]
Tue Dec 31 14:57:33 2019 - [info] Log messages from 192.168.0.48 ...
Tue Dec 31 14:57:33 2019 - [info]
Tue Dec 31 14:57:32 2019 - [info] Waiting to execute all relay logs on 192.168.0.48(192.168.0.48:3306)..
Tue Dec 31 14:57:32 2019 - [info] master_pos_wait(binlog.000003:876) completed on 192.168.0.48(192.168.0.48:3306). Executed 0 events.
Tue Dec 31 14:57:32 2019 - [info] done.
Tue Dec 31 14:57:32 2019 - [info] Resetting slave 192.168.0.48(192.168.0.48:3306) and starting replication from the new master 192.168.1.156(192.168.1.156:3306)..
Tue Dec 31 14:57:32 2019 - [info] Executed CHANGE MASTER.
Tue Dec 31 14:57:32 2019 - [info] Slave started.
Tue Dec 31 14:57:33 2019 - [info] End of log messages from 192.168.0.48 ...
Tue Dec 31 14:57:33 2019 - [info]
Tue Dec 31 14:57:33 2019 - [info] -- Slave switch on host 192.168.0.48(192.168.0.48:3306) succeeded.
Tue Dec 31 14:57:33 2019 - [info] Unlocking all tables on the orig master:
Tue Dec 31 14:57:33 2019 - [info] Executing UNLOCK TABLES..
Tue Dec 31 14:57:33 2019 - [info] ok.
Tue Dec 31 14:57:33 2019 - [info] Starting orig master as a new slave..
Tue Dec 31 14:57:33 2019 - [info] Resetting slave 192.168.0.97(192.168.0.97:3306) and starting replication from the new master 192.168.1.156(192.168.1.156:3306)..
Tue Dec 31 14:57:33 2019 - [info] Executed CHANGE MASTER.
Tue Dec 31 14:57:33 2019 - [info] Slave started.
Tue Dec 31 14:57:33 2019 - [info] All new slave servers switched successfully.
Tue Dec 31 14:57:33 2019 - [info]
Tue Dec 31 14:57:33 2019 - [info] * Phase 5: New master cleanup phase..
Tue Dec 31 14:57:33 2019 - [info]
Tue Dec 31 14:57:33 2019 - [info] 192.168.1.156: Resetting slave info succeeded.
Tue Dec 31 14:57:33 2019 - [info] Switching master to 192.168.1.156(192.168.1.156:3306) completed successfully.
5.2 自动切换
master dead后,MHA当时已经开启,候选Master库(Slave)会自动failover为Master.
六、MHA日常操作
MHA Manager 端日常主要操作步骤
① 检查是否有下列文件,有则删除。发生主从切换后,MHAmanager服务会自动停掉,且在manager_workdir目录下面生成文件app1.failover.complete,若要启动MHA,必须先确保无此文件)# ll /var/log/mha/app1/app1.failover.complete# ll /var/log/mha/app1/app1.failover.error
② 检查MHA当前置:# masterha_check_repl --conf=/etc/masterha.cnf
③ 启动MHA:# nohup masterha_manager --conf=/etc/masterha.cnf > /var/log/mha/app1/mha_manager.log 2>&1 &当有slave 节点宕掉时,默认是启动不了的,加上 --ignore_fail_on_start 即使有节点宕掉也能启动MHA,如下:
#--last_failover_minute=(minutes):当最近的一个failover 切换发生在last_failover_minute(默认为8小时) 之内,MHA manager 将不会在切换。因为它会认为有些问题没有得到解决。如果设置了 --ignore_last_failover 参数,参数(--last_failover_minute) 将会失效--ignore_last_failover:如果最近failover 失败,MHA 将不会再次开始failover机制,因为这个问题可能再次发生。常规步骤:手动清理failover 错误文件,此文件一般在manager_workdir/app_name.failover.error文件,然后在启动failover机制。如果设置此参数,MHA 将会继续failover 不管上次的failover状态--wait_on_failover_error=(seconds):在failover的过程,当发出错误了,masterha_manager 等待 wait_no_failover_error 的时间后,退出。如果设置为了0,直接退出。这个好处,是当后台运行master monitor 和 failover scripts的时候,masterha_manager 可以在 wait_no_failover_error 时间到达之前重启监控--remove_dead_master_conf:如果设置此参数,当成功failover后,MHA manager将会自动删除配置文件中关于dead master的配置选项。
# nohup masterha_manager --conf=/etc/masterha.cnf --ignore_fail_on_start >/var/log/mha/app1/mha_manager.log 2>&1 &
④ 检查状态:# masterha_check_status --conf=/etc/masterha.cnf
⑤ 检查日志:#tail -f /var/log/mha/app1/manager.log
⑥ 主从切换后续工作主库切换后,把原主库修复成新从库,然后重新执行以上5步。原主库数据文件完整的情况下,可通过以下方式找出最后执行的CHANGE MASTER命令:# grep "CHANGE MASTER TO MASTER" /var/log/mha/app1/manager.log | tail -1CHANGE MASTER TO MASTER_HOST='10.1.5.9',MASTER_PORT=3306, MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=120,MASTER_USER='repl_user', MASTER_PASSWORD='xxx';--最后启动新从库# start slave;# show slave status\G
purge_relay_logs -h
corntab -e0 5 * * * /usr/bin/purge_relay_logs --user=root --password=970125 --port=3306 --disable_relay_log_purge >> /var/log/purge_relay.log 2>&1
七、附录
7.1 相关命令说明
Manager命令说明
masterha_check_ssh 检查MHA的SSH配置状况
masterha_check_repl 检查MySQL复制状况
masterha_manger 启动MHA
masterha_check_status 检测当前MHA运行状态
masterha_master_monitor 检测master是否宕机
masterha_master_switch 控制故障转移(自动或者手动)
masterha_conf_host 添加或删除配置的server信息
Node命令说明
save_binary_logs 保存和复制master的二进制日志
apply_diff_relay_logs 识别差异的中继日志事件并将其差异的事件应用于其他的slave
filter_mysqlbinlog 去除不必要的ROLLBACK事件(MHA已不再使用这个工具)
purge_relay_logs 清除中继日志(不会阻塞SQL线程)
7.2 master_ip_failover脚本代码
要想把keepalived服务引入MHA,我们只需要修改切换是触发的脚本文件master_ip_failover即可,在该脚本中添加在master发生宕机时对keepalived的处理。
在管理节点编辑脚本/usr/bin/master_ip_failover,修改后如下:
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';use Getopt::Long;my ($command, $ssh_user, $orig_master_host, $orig_master_ip,$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);my $vip = '192.168.1.1';
my $ssh_start_vip ="/etc/init.d/keepalived start";
my $ssh_stop_vip ="/etc/init.d/keepalived stop";GetOptions('command=s' =>\$command,'ssh_user=s' =>\$ssh_user,'orig_master_host=s' => \$orig_master_host,'orig_master_ip=s' =>\$orig_master_ip,'orig_master_port=i' => \$orig_master_port,'new_master_host=s' =>\$new_master_host,'new_master_ip=s' =>\$new_master_ip,'new_master_port=i' =>\$new_master_port,
);exit &main();sub main {print "\n\nIN SCRIPTTEST====$ssh_stop_vip==$ssh_start_vip===\n\n";if ( $command eq "stop" || $command eq "stopssh" ) {my $exit_code = 1;eval {print "Disabling the VIP on old master: $orig_master_host \n";&stop_vip();$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "start" ) {my $exit_code = 10;eval {print "Enabling the VIP - $vip on the new master - $new_master_host\n";&start_vip();$exit_code = 0;};if ($@) {warn $@;exit $exit_code;}exit $exit_code;}elsif ( $command eq "status" ) {print "Checking the Status of the script.. OK \n";exit 0;}else {&usage();exit 1;}
}
sub start_vip() {`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIPon the old_master
sub stop_vip() {`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}sub usage {print"Usage: master_ip_failover --command=start|stop|stopssh|status--orig_master_host=host --orig_master_ip=ip --orig_master_port=port--new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
授予管理权限
chmod +x /usr/bin/master_ip_failover
** 注意:my $vip = ‘10.192.203.108’;这一行中的vip请根据情况改成自己的虚拟IP。
7.3 master_ip_online_change脚本代码
在管理节点编辑脚本/usr/bin/master_ip_online_change#!/usr/bin/env perl# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# itunder the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA## Note: This is a sample script and is notcomplete. Modify the script based on your environment.use strict;
use warnings FATAL => 'all';use Getopt::Long;
use MHA::DBHelper;
use MHA::NodeUtil;
use Time::HiRes qw( sleep gettimeofdaytv_interval );
use Data::Dumper;my $_tstart;
my $_running_interval = 0.1;
my ($command, $orig_master_is_new_slave, $orig_master_host,$orig_master_ip, $orig_master_port, $orig_master_user,$orig_master_password, $orig_master_ssh_user, $new_master_host,$new_master_ip, $new_master_port, $new_master_user,$new_master_password, $new_master_ssh_user
);
my $vip = '192.168.1.1/32';
my $key = '1';
my $ssh_start_vip = "/etc/init.d/keepalived start";
my $ssh_stop_vip = "/etc/init.d/keepalived stop";
my $orig_master_ssh_port = 22;
my $new_master_ssh_port = 22;
GetOptions('command=s' =>\$command,'orig_master_is_new_slave' => \$orig_master_is_new_slave,'orig_master_host=s' =>\$orig_master_host,'orig_master_ip=s' =>\$orig_master_ip,'orig_master_port=i' =>\$orig_master_port,'orig_master_user=s' =>\$orig_master_user,'orig_master_password=s' =>\$orig_master_password,'orig_master_ssh_user=s' =>\$orig_master_ssh_user,'new_master_host=s' =>\$new_master_host,'new_master_ip=s' =>\$new_master_ip,'new_master_port=i' =>\$new_master_port,'new_master_user=s' =>\$new_master_user,'new_master_password=s' =>\$new_master_password,'new_master_ssh_user=s' =>\$new_master_ssh_user,'orig_master_ssh_port=i' =>\$orig_master_ssh_port,'new_master_ssh_port=i' =>\$new_master_ssh_port,
);exit &main();sub current_time_us {my( $sec, $microsec ) = gettimeofday();my$curdate = localtime($sec);return $curdate . " " . sprintf( "%06d", $microsec);
}sub sleep_until {my$elapsed = tv_interval($_tstart);if( $_running_interval > $elapsed ) {sleep( $_running_interval - $elapsed );}
}sub get_threads_util {my$dbh = shift;my$my_connection_id = shift;my$running_time_threshold = shift;my$type = shift;$running_time_threshold = 0 unless ($running_time_threshold);$type = 0 unless($type);my@threads;my$sth = $dbh->prepare("SHOW PROCESSLIST");$sth->execute();while ( my $ref = $sth->fetchrow_hashref() ) {my $id = $ref->{Id};my $user = $ref->{User};my $host = $ref->{Host};my $command =$ref->{Command};my $state = $ref->{State};my $query_time = $ref->{Time};my $info = $ref->{Info};$info =~ s/^\s*(.*?)\s*$/$1/ if defined($info);next if ( $my_connection_id == $id );next if ( defined($query_time) && $query_time <$running_time_threshold );next if ( defined($command) && $command eq "Binlog Dump" );next if ( defined($user) && $user eq "system user" );nextif ( defined($command)&& $command eq "Sleep"&& defined($query_time)&& $query_time >= 1 );if ( $type >= 1 ) {next if ( defined($command) && $command eq "Sleep" );next if ( defined($command) && $command eq "Connect");}if ( $type >= 2 ) {next if ( defined($info) && $info =~ m/^select/i );next if ( defined($info) && $info =~ m/^show/i );}push @threads, $ref;}return @threads;
}sub main {if( $command eq "stop" ) {## Gracefully killing connections on the current master#1. Set read_only= 1 on the new master#2. DROP USER so that no app user can establish new connections#3. Set read_only= 1 on the current master#4. Kill current queries#* Any database access failure will result in script die.my $exit_code = 1;eval {## Setting read_only=1 on the new master (to avoid accident)my $new_master_handler = new MHA::DBHelper();# args: hostname, port, user, password, raise_error(die_on_error)_or_not$new_master_handler->connect( $new_master_ip, $new_master_port,$new_master_user, $new_master_password, 1 );print current_time_us() . " Set read_only on the new master..";$new_master_handler->enable_read_only();if ( $new_master_handler->is_read_only() ) {print "ok.\n";}else {die "Failed!\n";}$new_master_handler->disconnect();# Connecting to the orig master, die if any database error happensmy $orig_master_handler = new MHA::DBHelper();$orig_master_handler->connect( $orig_master_ip, $orig_master_port,$orig_master_user, $orig_master_password, 1 );## Drop application user so that nobody can connect. Disablingper-session binlog beforehand$orig_master_handler->disable_log_bin_local();print current_time_us() . " Drpping appuser on the orig master..\n";#FIXME_xxx_drop_app_user($orig_master_handler);## Waiting for N * 100 milliseconds so that current connections can exitmy $time_until_read_only = 15;$_tstart = [gettimeofday];my @threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );while ( $time_until_read_only > 0 && $#threads >= 0 ) {if ( $time_until_read_only % 5 == 0 ) {printf
"%s Waiting all running %d threads aredisconnected.. (max %d milliseconds)\n",current_time_us(), $#threads + 1, $time_until_read_only * 100;if ( $#threads < 5 ) {print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump ."\n"foreach (@threads);}}sleep_until();$_tstart = [gettimeofday];$time_until_read_only--;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );}## Setting read_only=1 on the current master so that nobody(exceptSUPER) can writeprint current_time_us() . " Set read_only=1 on the orig master..";$orig_master_handler->enable_read_only();if ( $orig_master_handler->is_read_only() ) {print "ok.\n";}else {die "Failed!\n";}## Waiting for M * 100 milliseconds so that current update queries cancompletemy $time_until_kill_threads = 5;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );while ( $time_until_kill_threads > 0 && $#threads >= 0 ) {if ( $time_until_kill_threads % 5 == 0 ) {printf
"%s Waiting all running %d queries aredisconnected.. (max %d milliseconds)\n",current_time_us(), $#threads + 1, $time_until_kill_threads * 100;if ( $#threads < 5 ) {print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump ."\n"foreach (@threads);}}sleep_until();$_tstart = [gettimeofday];$time_until_kill_threads--;@threads = get_threads_util( $orig_master_handler->{dbh},$orig_master_handler->{connection_id} );}## Terminating all threadsprint current_time_us() . " Killing all applicationthreads..\n";$orig_master_handler->kill_threads(@threads) if ( $#threads >= 0);print current_time_us() . " done.\n";$orig_master_handler->enable_log_bin_local();$orig_master_handler->disconnect();## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCKeval {`ssh -p$orig_master_ssh_port $orig_master_ssh_user\@$orig_master_host\" $ssh_stop_vip \"`;};if ($@) {warn $@;}$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "start" ) {## Activating master ip on the new master#1. Create app user with write privileges#2. Moving backup script if needed#3. Register new master's ip to the catalog database# We don't return error even thoughactivating updatable accounts/ip failed so that we don't interrupt slaves'recovery.
# If exit code is 0 or 10, MHA does notabortmy $exit_code = 10;eval {my $new_master_handler = new MHA::DBHelper();# args: hostname, port, user, password, raise_error_or_not$new_master_handler->connect( $new_master_ip, $new_master_port,$new_master_user, $new_master_password, 1 );## Set read_only=0 on the new master$new_master_handler->disable_log_bin_local();print current_time_us() . " Setread_only=0 on the new master.\n";$new_master_handler->disable_read_only();## Creating an app user on the new masterprint current_time_us() . " Creating app user on the newmaster..\n";#FIXME_xxx_create_app_user($new_master_handler);$new_master_handler->enable_log_bin_local();$new_master_handler->disconnect();## Update master ip on the catalog database, etc`ssh -p$new_master_ssh_port $new_master_ssh_user\@$new_master_host\" $ssh_start_vip \"`;$exit_code = 0;};if ($@) {warn "Got Error: $@\n";exit $exit_code;}exit $exit_code;}elsif ( $command eq "status" ) {#do nothingexit 0;}else {&usage();exit 1;}
}sub usage {print
"Usage: master_ip_online_change--command=start|stop|status --orig_master_host=host --orig_master_ip=ip--orig_master_port=port --new_master_host=host --new_master_ip=ip--new_master_port=port\n";die;
}
授予管理权限:
chmod +x /usr/bin/master_ip_online_change
注意:需要把my $vip = ‘10.192.203.108/32’;改成自己的vip即可。
7.4 send_report脚本代码
#!/usr/bin/perl# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# itunder the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA## Note: This is a sample script and is notcomplete. Modify the script based on your environment.use strict;
use warnings FATAL => 'all';
use Mail::Sender;
use Getopt::Long;#new_master_host and new_slave_hosts areset only when recovering master succeeded
my ( $dead_master_host, $new_master_host, $new_slave_hosts,$subject, $body );
my $smtp='smtp.163.com';
my $mail_from='xxxxxxx@163.com';
my $mail_user='xxxxxxx@163.com';
my $mail_pass='Password';
my$mail_to=['949538827@qq.com','15521xxxx@139.com'];
GetOptions('orig_master_host=s' => \$dead_master_host,'new_master_host=s' =>\$new_master_host,'new_slave_hosts=s' =>\$new_slave_hosts,'subject=s' =>\$subject,'body=s' => \$body,
);mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body);sub mailToContacts {my ( $smtp, $mail_from, $user, $passwd, $mail_to, $subject, $msg ) = @_;open my $DEBUG, "> /tmp/monitormail.log"or die "Can't open the debug file:$!\n";my $sender = new Mail::Sender {ctype => 'text/plain;charset=utf-8',encoding => 'utf-8',smtp => $smtp,from => $mail_from,auth => 'LOGIN',TLS_allowed => '0',authid => $user,authpwd => $passwd,to => $mail_to,subject => $subject,debug => $DEBUG};$sender->MailMsg({ msg => $msg,debug => $DEBUG})or print $Mail::Sender::Error;return 1;
}# Do whatever you want hereexit 0;注意:需要修改下以下几行内容:
my $smtp='smtp.163.com';
my $mail_from='xxxxxxx@163.com';
my $mail_user='xxxxxxx@163.com';
my $mail_pass='Password';
my$mail_to=['673846401@qq.com'];
赋予执行权限
chmod +x /usr/bin/send_report
**注意:需要确保管理节点服务器可以正常发送邮件。可以先用sendEmail命令试下。