ETCD集群清理历史版本

1
2
3
4
5
6
7
8
9
10
11
12
13
# 查看 etcd 集群成员
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt member list

# 查看 etcd 集群状态
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.37:2379,https://192.168.64.38:2379 \
endpoint health --cluster

清理历史版本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
清理当前版本之前的数据(这是清理etcd集群)
# 查看存储统计信息
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.37:2379,https://192.168.64.38:2379 \
endpoint status -w table

# 获取当前版本信息
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.37:2379,https://192.168.64.38:2379 \
endpoint status --write-out="json" | egrep -o '"revision":[0-9]*' | egrep -o '[0-9].*'
# 返回:302908777

# 当前版本之前的版本全部压缩
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.37:2379,https://192.168.64.38:2379 \
compact 302908777

# 将上一步中压缩的数据全部清理,只保留当前版本最新key值
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.37:2379,https://192.168.64.38:2379 \
defrag --cluster

------以下是其它一些操作
# 按前缀删除
docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.35:2379,https://192.168.64.37:2379 \
del /tmp/ --prefix

# 分页列出key
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.35:2379,https://192.168.64.37:2379 \
get --prefix "" | less

# 列出所有键值对并获取它们的创建时间
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.35:2379,https://192.168.64.37:2379 \
get "" --prefix --keys-only

# 获取键的详细信息
# docker run --rm -it --net host -v /etc/kubernetes:/etc/kubernetes \
registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.5.7-0 etcdctl --cert \
/etc/kubernetes/pki/etcd/peer.crt --key /etc/kubernetes/pki/etcd/peer.key --cacert \
/etc/kubernetes/pki/etcd/ca.crt \
--endpoints=https://192.168.64.35:2379,https://192.168.64.35:2379,https://192.168.64.37:2379 \
get "" --prefix --write-out=fields

kuboard清理etcd数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# 进入kuboard容器内部(这是清理kuboard容器里的etcd数据)
docker exec -it kuboard-admin bash
cd /data/etcd-data/member/snap
ls -l 显示db超过了2.1G

# 查看ETCD集群告警情况
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" alarm list
返回:memberID:6460912315094810421 alarm:NOSPACE

# 查看存储统计信息
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" endpoint status -w table

# 获取当前版本信息
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" endpoint status --write-out="json" | egrep -o '"revision":[0-9]*' | egrep -o '[0-9].*'
返回:6076030

# 当前版本之前的版本全部压缩
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" compact 6076030

# 将上一步中压缩的数据全部清理,只保留当前版本最新key值
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" defrag
{"level":"warn","ts":"2024-12-26T12:47:05.580+0800","caller":"clientv3/retry_interceptor.go:62","msg":"retrying of unary invoker failed","target":"passthrough:///http://127.0.0.1:2379","attempt":0,"error":"rpc error: code = DeadlineExceeded desc = context deadline exceeded"}
Failed to defragment etcd member[http://127.0.0.1:2379] (context deadline exceeded)
错误原因:etcdctl的默认命令超时为5秒,但碎片整理花费的时间比这更长
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" --command-timeout=3000s defrag

# 查看etcd告警
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" alarm list

# 取消告警信息
ETCDETL_API=3 etcdctl --endpoints="http://127.0.0.1:2379" alarm disarm