k8s集群剔除一个master节点重新加入集群
场景
生产环境剔除master1,重新加入集群
kubeadm方式部署的集群
etcd静态pod启动
192.168.64.52 master节点ip
方法
1. 获取 join 命令
# 获取 join 命令
kubeadm token create --print-join-command
kubeadm join 192.168.64.45:16443 --token 69muu9.mvs42y5uxvarwjqp --discovery-token-ca-cert-hash sha256:7d0804220e2b5c836cb792df94134fc57b12f340a4a0bd55677ef5d7219099b9
kubeadm token list
2. 备份etcd数据
# 备份etcd数据
## 查看etcd成员列表,找到master1的id
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member list
## 剔除已删除的master1
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member remove b8434eb7a3474524
## 使用etcdctl备份集群etcd上的etcd数据
mkdir /opt/etcd-bak
cd /opt/etcd-bak
etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.52:2379 \
snapshot save snapshot-20240531.db
## 安装etcdctl
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert /etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert /etc/kubernetes/pki/etcd/ca.crt member list
docker cp 73912616f41d:/usr/local/bin/etcdctl /usr/local/bin
chmod 777 /usr/local/bin/etcdctl
## 恢复etcd数据(需要时操作)
ETCDCTL_API=3 etcdctl snapshot restore --data-dir /var/lib/etcd/ /opt/etcd.bak/snapshot-20240531.db
3. 剔除master1节点
## 备份/etc/kubernetes/
cp -r /etc/kubernetes /opt/k8s-etc_kubernetes
## 驱逐pod
kubectl drain master1 --delete-local-data --force --ignore-daemonsets
kubectl delete node master1
4. 修改api-server的nginx配置,注释master1节点
vim /etc/nginx/nginx.conf
...
upstream k8s-apiserver {
#server 192.168.64.52:6443;
server 192.168.64.xx:6443;
server 192.168.64.xx:6443;
}
...
nginx -t
nginx -s reload
5. 重置master1
## 修改/etc/kubernetes/manifests api-server的连接地址为master1的ip+6443端口
grep -rl 16443
cd /etc/kubernetes/manifests
cp -r /etc/kubernetes/manifests /opt/k8s-etc_kubernetes_manifests
## 修改/root/.kube/config api-server的连接地址为master1的ip+6443端口
cp /root/.kube/config /root/.kube/config.bak
vim /root/.kube/config
server: https://192.168.64.52:6443
## 重置master1主机
kubeadm reset
rm -rf $HOME/.kube/config
6. 清理etcd数据
## 清理/var/lib/etcd目录为空
rm -rf /var/lib/etcd
7. 拷贝证书到master1
# 如果reset未成功需要执行下面证书备份拷贝,再从53上拉下来。如果成功,直接从53上拉证书即可
cd /etc/kubernetes/pki/
cp -r /etc/kubernetes/pki /etc/kubernetes/pki.bak
## 创建etcd证书目录
mkdir /etc/kubernetes/pki/etcd
scp -P 8822 192.168.64.53:/etc/kubernetes/pki/ca.* /etc/kubernetes/pki/
ca.crt
ca.key scp -P 8822 192.168.64.53:/etc/kubernetes/pki/sa.* /etc/kubernetes/pki/
sa.key
sa.pub
scp -P 8822 192.168.64.53:/etc/kubernetes/pki/front-proxy-ca.* /etc/kubernetes/pki/
front-proxy-ca.crt
front-proxy-ca.key
scp -P 8822 192.168.64.53:/etc/kubernetes/pki/etcd/ca.* /etc/kubernetes/pki/etcd/
ca.crt
ca.key
8. master1加入集群
# 加入之前先确认镜像是否存在
crictl image ls
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/etcd:3.5.7-0 registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-apiserver:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-controller-manager:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-proxy:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-scheduler:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/pause:3.9 registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.9
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/coredns:v1.10.1 registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.10.1
# 加入集群
kubeadm join 192.168.64.45:16443 --token 69muu9.mvs42y5uxvarwjqp \
--discovery-token-ca-cert-hash sha256:7d0804220e2b5c836cb792df94134fc57b12f340a4a0bd55677ef5d7219099b9 \
--control-plane --node-name=master1
9. etcd健康检查
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member list
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.52:2379,https://192.168.64.53:2379,https://192.168.64.54:2379 \
endpoint health --cluster
10. k8s集群检查
kubectl get no
kubectl get po -A
# 设置role为master
kubectl label node master1 node-role.kubernetes.io/master=master
# 设置role为管理节点
kubectl label node master2 node-role.kubernetes.io/control-plane=true
11. 修改api-server的nginx配置,放开master1节点
vim /etc/nginx/nginx.conf
...
upstream k8s-apiserver {
server 192.168.64.52:6443;
server 192.168.64.xx:6443;
server 192.168.64.xx:6443;
}
...
nginx -t
nginx -s relaod
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 悩姜!



