场景

1
2
3
4
5
生产环境剔除master1,重新加入集群

kubeadm方式部署的集群
etcd静态pod启动
192.168.64.52 master节点ip

方法

1. 获取 join 命令

1
2
3
4
5
6
# 获取 join 命令
kubeadm token create --print-join-command

kubeadm join 192.168.64.45:16443 --token 69muu9.mvs42y5uxvarwjqp --discovery-token-ca-cert-hash sha256:7d0804220e2b5c836cb792df94134fc57b12f340a4a0bd55677ef5d7219099b9

kubeadm token list

2. 备份etcd数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 备份etcd数据
## 查看etcd成员列表,找到master1的id
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member list
## 剔除已删除的master1
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member remove b8434eb7a3474524


## 使用etcdctl备份集群etcd上的etcd数据
mkdir /opt/etcd-bak
cd /opt/etcd-bak

etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.52:2379 \
snapshot save snapshot-20240531.db


## 安装etcdctl
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert /etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert /etc/kubernetes/pki/etcd/ca.crt member list

docker cp 73912616f41d:/usr/local/bin/etcdctl /usr/local/bin
chmod 777 /usr/local/bin/etcdctl

## 恢复etcd数据(需要时操作)
ETCDCTL_API=3 etcdctl snapshot restore --data-dir /var/lib/etcd/ /opt/etcd.bak/snapshot-20240531.db

3. 剔除master1节点

1
2
3
4
5
6
## 备份/etc/kubernetes/
cp -r /etc/kubernetes /opt/k8s-etc_kubernetes

## 驱逐pod
kubectl drain master1 --delete-local-data --force --ignore-daemonsets
kubectl delete node master1

4. 修改api-server的nginx配置,注释master1节点

1
2
3
4
5
6
7
8
9
10
11
vim /etc/nginx/nginx.conf
...
upstream k8s-apiserver {
#server 192.168.64.52:6443;
server 192.168.64.xx:6443;
server 192.168.64.xx:6443;
}
...

nginx -t
nginx -s reload

5. 重置master1

1
2
3
4
5
6
7
8
9
10
11
12
13
## 修改/etc/kubernetes/manifests api-server的连接地址为master1的ip+6443端口
grep -rl 16443
cd /etc/kubernetes/manifests
cp -r /etc/kubernetes/manifests /opt/k8s-etc_kubernetes_manifests

## 修改/root/.kube/config api-server的连接地址为master1的ip+6443端口
cp /root/.kube/config /root/.kube/config.bak
vim /root/.kube/config
server: https://192.168.64.52:6443

## 重置master1主机
kubeadm reset
rm -rf $HOME/.kube/config

6. 清理etcd数据

1
2
## 清理/var/lib/etcd目录为空
rm -rf /var/lib/etcd

7. 拷贝证书到master1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 如果reset未成功需要执行下面证书备份拷贝,再从53上拉下来。如果成功,直接从53上拉证书即可
cd /etc/kubernetes/pki/
cp -r /etc/kubernetes/pki /etc/kubernetes/pki.bak

## 创建etcd证书目录
mkdir /etc/kubernetes/pki/etcd

scp -P 8822 192.168.64.53:/etc/kubernetes/pki/ca.* /etc/kubernetes/pki/
ca.crt
ca.key scp -P 8822 192.168.64.53:/etc/kubernetes/pki/sa.* /etc/kubernetes/pki/
sa.key
sa.pub
scp -P 8822 192.168.64.53:/etc/kubernetes/pki/front-proxy-ca.* /etc/kubernetes/pki/
front-proxy-ca.crt
front-proxy-ca.key
scp -P 8822 192.168.64.53:/etc/kubernetes/pki/etcd/ca.* /etc/kubernetes/pki/etcd/
ca.crt
ca.key

8. master1加入集群

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 加入之前先确认镜像是否存在
crictl image ls
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/etcd:3.5.7-0 registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-apiserver:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-controller-manager:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-proxy:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/kube-scheduler:v1.27.2 registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.27.2
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/pause:3.9 registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.9
ctr -n k8s.io i tag 192.168.64.61:5000/k8s/coredns:v1.10.1 registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:v1.10.1

# 加入集群
kubeadm join 192.168.64.45:16443 --token 69muu9.mvs42y5uxvarwjqp \
--discovery-token-ca-cert-hash sha256:7d0804220e2b5c836cb792df94134fc57b12f340a4a0bd55677ef5d7219099b9 \
--control-plane --node-name=master1

9. etcd健康检查

1
2
3
4
5
6
7
8
9
10
11
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes  \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member list

docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.52:2379,https://192.168.64.53:2379,https://192.168.64.54:2379 \
endpoint health --cluster

10. k8s集群检查

1
2
3
4
5
6
7
8
kubectl get no
kubectl get po -A

# 设置role为master
kubectl label node master1 node-role.kubernetes.io/master=master

# 设置role为管理节点
kubectl label node master2 node-role.kubernetes.io/control-plane=true

11. 修改api-server的nginx配置,放开master1节点

1
2
3
4
5
6
7
8
9
10
11
vim /etc/nginx/nginx.conf
...
upstream k8s-apiserver {
server 192.168.64.52:6443;
server 192.168.64.xx:6443;
server 192.168.64.xx:6443;
}
...

nginx -t
nginx -s relaod