efk在k8s中部署

版本:
filebeat:8.5.0
elasticsearch:7.16.2
kibana:7.16.2

1. efk-nfs

1.1. efk-nfs-class.yaml

# cat efk-nfs-class.yaml 
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: efk-nfs-storage
  namespace: elasticsearch
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
  archiveOnDelete: "false"
reclaimPolicy: Retain

1.2. efk-nfs-class.yaml

# cat efk-nfs-class.yaml 
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: efk-nfs-storage
  namespace: elasticsearch
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
  archiveOnDelete: "false"
reclaimPolicy: Retain
[root@V002012016-k8s-master1-pro-51 efk-nfs]# ls
efk-nfs-class.yaml  efk-nfs-deployment.yaml  efk-nfs-rbac.yaml
[root@V002012016-k8s-master1-pro-51 efk-nfs]# cat efk-nfs-rbac.yaml 
apiVersion: v1
kind: ServiceAccount
metadata:
  name: efk-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: elasticsearch
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: efk-nfs-client-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: run-efk-nfs-client-provisioner
subjects:
  - kind: ServiceAccount
    name: efk-nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: elasticsearch
roleRef:
  kind: ClusterRole
  name: efk-nfs-client-provisioner-runner
  apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-efk-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: elasticsearch
rules:
  - apiGroups: [""]
    resources: ["endpoints"]
    verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-efk-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: elasticsearch
subjects:
  - kind: ServiceAccount
    name: efk-nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: elasticsearch
roleRef:
  kind: Role
  name: leader-locking-efk-nfs-client-provisioner
  apiGroup: rbac.authorization.k8s.io

1.3. efk-nfs-deployment.yaml

# cat efk-nfs-deployment.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: efk-nfs-client-provisioner
  labels:
    app: efk-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: elasticsearch
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: efk-nfs-client-provisioner
  template:
    metadata:
      labels:
        app: efk-nfs-client-provisioner
    spec:
      serviceAccountName: efk-nfs-client-provisioner
      containers:
        - name: efk-nfs-client-provisioner
          image: 192.168.xxx.61:5000/k8s/nfs-subdir-external-provisioner:v4.0.1 
          volumeMounts:
            - name: efk-nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: k8s-sigs.io/nfs-subdir-external-provisioner
            - name: NFS_SERVER
              value: nas1.local.cloud
            - name: NFS_PATH
              value: /sf/logs-001-t6c48k/es-logs
      volumes:
        - name: efk-nfs-client-root
          nfs:
            server: nas1.local.cloud.scp 
            path: /sf/logs-001-t6c48k/es-logs

2. efk-sts

2.1. elasticsearch-config.yaml

# cat elasticsearch-config.yaml 
apiVersion: v1
data:
  elasticsearch.yml: |
    cluster.name: k8s-logs      
    node.master: true
    node.data: true
    node.ingest: true      
    #discovery.zen.minimum_master_nodes: 1
    discovery.seed_hosts: "es-0.elasticsearch,es-1.elasticsearch,es-2.elasticsearch"
    cluster.initial_master_nodes: "es-0,es-1,es-2"
    xpack.security.enabled: false
    xpack.security.transport.ssl.enabled: false
    path.repo: "/usr/share/elasticsearch/es-back"

    #thread_pool.search.queue_size: 50
    #thread_pool.search.size: 20
    #thread_pool.search.min_queue_size: 10
    #thread_pool.search.max_queue_size: 100
 
kind: ConfigMap
metadata:
  name: elasticsearch-config
  namespace: elasticsearch

2.2. es-statefulset.yaml

# cat es-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: es
  namespace: elasticsearch 
spec:
  serviceName: elasticsearch
  replicas: 3
  selector:
    matchLabels:
      app: elasticsearch
  template:
    metadata:
      labels: 
        app: elasticsearch
    spec:
      nodeSelector:
      #  #es: log
        efkmonitoring: efkmonitoring
      #tolerations:
      #- key: "efkmonitoring"
      #  operator: "Equal"
      #  value: "efkmonitoring"
      #  effect: "NoSchedule"
      initContainers:
      #- name: fix-permissions
      #  image: 192.168.xxx.61:5000/es/busybox:latest
      #  command: ["sh", "-c", "chown -R 1000:1000 /usr/share/elasticsearch/data"]
      #  securityContext:
      #    privileged: true
      #  volumeMounts:
      #  - name: es-data
      #    mountPath: /usr/share/elasticsearch/data
      - name: increase-vm-max-map
        image: 192.168.xxx.61:5000/es/busybox:latest
        imagePullPolicy: IfNotPresent
        command: ["sysctl", "-w", "vm.max_map_count=262144"]
        securityContext:
          privileged: true
      - name: increase-fd-ulimit
        image: 192.168.xxx.61:5000/es/busybox:latest
        imagePullPolicy: IfNotPresent
        command: ["sh", "-c", "ulimit -n 65536"]
        securityContext:
          privileged: true
      containers:
      - name: elasticsearch
        #image: 192.168.xxx.61:5000/es/elasticsearch:7.17.10
        image: 192.168.xxx.61:5000/es/elasticsearch:7.16.2
        imagePullPolicy: IfNotPresent
        ports:
        - name: rest
          containerPort: 9200
        - name: inter
          containerPort: 9300
        resources:
          limits:
              memory: 18Gi
              cpu: '6000m'
          requests:
              memory: 14Gi
              cpu: '1000m'
        volumeMounts:
        - mountPath: /usr/share/elasticsearch/config/elasticsearch.yml
          name: elasticsearch-config
          subPath: elasticsearch.yml
        - name: es-data
          mountPath: /usr/share/elasticsearch/data
        - name: es-data-plugins
          mountPath: /usr/share/elasticsearch/plugins  
        - name: es-back
          mountPath: /usr/share/elasticsearch/es-back
        env:
        - name: cluster.name
          value: k8s-logs
        - name: node.name
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        #- name: cluster.initial_master_nodes
        #  value: "es-0,es-1,es-2"
        #- name: discovery.zen.minimum_master_nodes
        #  value: "1"
        #- name: discovery.seed_hosts
        #  value: "es-0.elasticsearch,es-1.elasticsearch,es-2.elasticsearch"
        - name: ES_JAVA_OPTS
          #value: "-Xms4G -Xmx4G"
          value: "-Xms14G -Xmx14G"
        - name: network.host
          value: "0.0.0.0"
        - name: ingest.geoip.downloader.enabled
          value: "false"
      volumes:
        - name: es-back
          nfs:
            server: nas1.local.cloud
            path: /sf/logs-001-t6c48k/es-logs/es-back
        - configMap:
            defaultMode: 420
            items:
              - key: elasticsearch.yml
                path: elasticsearch.yml
            name: elasticsearch-config
          name: elasticsearch-config 
  volumeClaimTemplates:
  - metadata:
      name: es-data
      labels:
        app: elasticsearch
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "efk-nfs-storage"
      resources:
        requests:
          storage: 3072Gi
  - metadata:
      name: es-data-plugins
      labels:
        app: elasticsearch
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "efk-nfs-storage"
      resources:
        requests:
          storage: 1Gi
 
# 连接不到es节点可在pod中配置hosts
# 添加hosts
vi /etc/hosts
# 生产
10.244.13.32    es-0.elasticsearch.svc.cluster.local    es-0.efk    es-0
10.244.14.29    es-1.elasticsearch.svc.cluster.local    es-1.efk    es-1
10.244.15.30    es-2.elasticsearch.svc.cluster.local    es-2.efk    es-2

2.3. es-service.yaml

# cat es-service.yaml 
kind: Service
apiVersion: v1
metadata:
  name: elasticsearch
  namespace: elasticsearch
  labels:
    app: elasticsearch
spec:
  selector:
    app: elasticsearch
  #clusterIP: None
  type: NodePort
  ports:
    - port: 9200
      name: rest
      nodePort: 30092
    - port: 9300
      name: inter-node
#---
#apiVersion: v1
#kind: Service
#metadata:
#  name: es-headless
#  namespace: elasticsearch
#  labels:
#    k8s-app: elasticsearch
#spec:
#  type: ClusterIP
#  clusterIP: None
#  ports:
#  - name: rest
#    port: 9200
#    protocol: TCP
#  - port: 9300
#    name: inter-node
#  selector:
#    k8s-app: elasticsearch

3. filebeat

3.1. filebeat-config.yaml

# 此yaml文件只收集指定命令空间的日志,并每个服务创建一个索引 
# cat filebeat-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: filebeat-config
  namespace: elasticsearch
  labels:
    k8s-app: filebeat
data:
  filebeat.yml: |-
    filebeat.inputs:
    - type: container
      fields:
        kubernetes.namespace: ["project-a-pro", "project-b-pro"]
        #type: "all"
      paths:
        - /var/log/containers/*.log
          #- /var/lib/docker/containers/*/*.log
      # 不采集指定内容的日志
      exclude_lines: ['^DEBUG']
      #多行合并
      multiline.pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}'
      multiline.negate: true
      multiline.match: after
      multiline.timeout: 30
      scan_frequency: 1s
      #禁止收集host.xxxx字段 
      publisher_pipeline.disable_host: true
      processors:
        - add_kubernetes_metadata:
            #添加k8s描述字段
            default_indexers.enabled: true
            default_matchers.enabled: true
            host: ${NODE_NAME}
            matchers:
              - logs_path:
                  logs_path: "/var/log/containers/"
        - drop_fields:
          #删除的多余字段
            fields: ["host", "tags", "ecs", "log", "prospector", "agent", "input", "beat", "offset"]

    #- type: container
    #  fields:
    #    kubernetes.namespace: "project-b-pro"
    #    #type: "all"
    #  paths:
    #    - /var/log/containers/*.log
    #      #- /var/lib/docker/containers/*/*.log
    #  # 不采集指定内容的日志
    #  exclude_lines: ['^DEBUG']
    #  #多行合并
    #  multiline.pattern: '^[0-9]{4}-[0-9]{2}-[0-9]{2}'
    #  multiline.negate: true
    #  multiline.match: after
    #  multiline.timeout: 30
    #  scan_frequency: 1s
    #  #禁止收集host.xxxx字段 
    #  publisher_pipeline.disable_host: true
    #  processors:
    #    - add_kubernetes_metadata:
    #        #添加k8s描述字段
    #        default_indexers.enabled: true
    #        default_matchers.enabled: true
    #        host: ${NODE_NAME}
    #        matchers:
    #          - logs_path:
    #              logs_path: "/var/log/containers/"
    #    - drop_fields:
    #      #删除的多余字段
    #        fields: ["host", "tags", "ecs", "log", "prospector", "agent", "input", "beat", "offset"] 
    
    output.elasticsearch:
      hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}']
      allow_older_versions: true
      indices:
        - index: "k8s-prod-%{[kubernetes][namespace]}-%{[kubernetes][labels][app]}-%{+yyyy.MM.dd}"
          when.contains:
            kubernetes.namespace: "project-a-pro"

        - index: "k8s-prod-%{[kubernetes][namespace]}-%{[kubernetes][labels][app]}-%{+yyyy.MM.dd}"
          when.contains:
            kubernetes.namespace: "project-b-pro"
            #fields.type: "all"

3.2. filebeat-ds.yaml

# cat filebeat-ds.yaml 
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: filebeat
subjects:
- kind: ServiceAccount
  name: filebeat
  namespace: elasticsearch
roleRef:
  kind: ClusterRole
  name: filebeat
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: filebeat
  labels:
    k8s-app: filebeat
rules:
- apiGroups: [""] # "" indicates the core API group
  resources:
  - namespaces
  - nodes
  - pods
  verbs:
  - get
  - watch
  - list
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: filebeat
  namespace: elasticsearch
  labels:
    k8s-app: filebeat
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    k8s-app: filebeat
  name: filebeat
  namespace: elasticsearch
spec:
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      k8s-app: filebeat
  template:
    metadata:
      creationTimestamp: null
      labels:
        k8s-app: filebeat
    spec:
      tolerations:
      - effect: NoSchedule
        operator: Exists
      - key: node-role.kubernetes.io/control-plane
        effect: NoSchedule
      - key: node-role.kubernetes.io/master
        effect: NoSchedule
      containers:
      - args:
        - -c
        - /etc/filebeat.yml
        - -e
        env:
        - name: ELASTICSEARCH_HOST
          value: elasticsearch
        - name: ELASTICSEARCH_PORT
          value: "9200"
        - name: ELASTICSEARCH_USERNAME
        - name: ELASTICSEARCH_PASSWORD
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: spec.nodeName
        #image: docker.elastic.co/beats/filebeat:8.5.0
        image: 192.168.xxx.61:5000/es/filebeat:8.5.0
        imagePullPolicy: IfNotPresent
        name: filebeat
        resources:
          limits:
            cpu: 600m
            memory: 1024Mi
          requests:
            cpu: 300m
            memory: 800Mi
        securityContext:
          runAsUser: 0
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /etc/filebeat.yml
          name: config
          readOnly: true
          subPath: filebeat.yml
        - mountPath: /usr/share/filebeat/data
          name: data
        - mountPath: /var/log/containers
          name: varlibdockercontainers
          readOnly: true
        - mountPath: /var/log
          name: varlog
          readOnly: true
      dnsPolicy: ClusterFirstWithHostNet
      hostNetwork: true
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: filebeat
      serviceAccountName: filebeat
      terminationGracePeriodSeconds: 30
      volumes:
      - configMap:
          defaultMode: 416
          name: filebeat-config
        name: config
      - hostPath:
          path: /var/log/containers
          type: ""
        name: varlibdockercontainers
      - hostPath:
          path: /var/log
          type: ""
        name: varlog
      - hostPath:
          path: /var/lib/filebeat-data
          type: DirectoryOrCreate
        name: data
  updateStrategy:
    rollingUpdate:
      maxSurge: 0
      maxUnavailable: 1
    type: RollingUpdate

4. kibana

4.1. kibana.yaml

# cat kibana.yaml 
apiVersion: v1
kind: Service
metadata:
  name: kibana
  namespace: elasticsearch
  labels:
    app: kibana
spec:
  ports:
  - port: 5601
    nodePort: 30001
  type: NodePort
  selector:
    app: kibana
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: kibana
  namespace: elasticsearch
  labels:
    app: kibana
spec:
  selector:
    matchLabels:
      app: kibana
  template:
    metadata:
      labels:
        app: kibana
    spec:
      nodeSelector:
        es: log
      containers:
      - name: kibana
        image: 192.168.xxx.61:5000/es/kibana:7.16.2
        imagePullPolicy: IfNotPresent
        resources:
          limits:
            cpu: 1500m
            memory: 4Gi
          requests:
            cpu: 800m
            memory: 2Gi
        env:
        - name: ELASTICSEARCH_HOSTS
          value: http://elasticsearch:9200
        - name: I18N_LOCALE
          value: "zh-CN"
        - name: SERVER_PUBLICBASEURL
          value: "http://192.168.xxx.xx:30001"
        ports:
        - containerPort: 5601

5. 断路配置

# 域数据断路器会估算需要加载到  field data cache(域数据缓存)的堆内存,如果加载会超过预先定义的内存限制,则断路器会中断操作并返回错误。
indices.breaker.fielddata.limit,域数据断路器限制;默认是JVM堆内存的40%。
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.fielddata.limit": "60%"
  }
} 
 
# 请求断路器用来防止每一个请求数据结构(请求中,用于计算聚合信息的内存)超过指定的内存大小。
indices.breaker.request.limit,(动态)请求断路器的限制值,默认是JVM堆内存的60%。 
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.request.limit": "40%"
  }
} 

# 父级断路器可以通过以下配置实现
indices.breaker.total.use_real_memory,(静态)是否使用真实内存;如果是 true ,则计算断路器真实使用的内存作为总内存的一部分;如果是 false ,则只考虑断路器配置的预留内存。
indices.breaker.total.limit,(动态)父级断路器所有限制。如果 indices.breaker.total.use_real_memory 配置的是 false (真实使用内存),则默认值为JVM堆内存的70%,如果  indices.breaker.total.use_real_memory 配置的是 true (预留内存),则默认值为JVM堆内存的90%。
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.total.limit": "70%"
  }
}

6. es 高内存/高cpu使用率

#参考文档:https://blog.51cto.com/u_16099268/10114149

# 高内存/高cpu使用率
GET _cat/nodes?v=true&s=cpu:desc
GET _nodes/stats?filter_path=nodes.*.jvm.mem.pools.old
GET _nodes/

# 清除fielddata缓存
POST _cache/clear?fielddata=true

# 获取热点线程,取消长时间运行的请求
GET /_nodes/hot_threads

7. es添加插件

192.168.129.33
/home/es-nfs/plugins
/usr/share/elasticsearch/plugins
volumeMounts:
        - name : es-nfs-plugins #指定名称必须与下面一致
          mountPath: "/usr/share/elasticsearch/plugins" #容器内的挂载点
volumes:
    - name: es-nfs-plugins  #指定名称必须与上面一致
      nfs:  #nfs存储
        server: 192.168.129.33  #nfs服务器ip或是域名
        path: "/home/es-nfs/plugins"  #nfs服务器共享的目录

8. 常用操作

GET /_cat/health?v
GET /_cat/nodes?v
GET /_cluster/health?pretty

# list all indices
GET /_cat/indices?v
http://192.168.129.36:30092/_cat/indices?format=json&pretty

# create an index
PUT /customer?pretty
GET /_cat/indices?v

# delete an index 
DELETE /customer?pretty
GET /_cat/indices?v

# plugins
GET _cat/plugins

# 查看CPU占用高的线程
/_nodes/hot_threads
# 查看ES正在执行任务
curl -XGET "http://*:9200/_tasks?pretty"
# 检查分片的状态和分配情况
/_cluster/state

/_nodes?filter_path=**.mlockall

# 查看索引在集群中的分配情况和剩余空间
http://192.168.129.45:30092/_cat/allocation?v

# 分片信息
curl -XGET http://192.168.129.36:30092/_cat/shards/k8s-prod*?v
index:所有名称
shard:分片数
prirep:分片类型,p=pri=primary为主分片,r=rep=replicas为复制分片
state:分片状态,STARTED为正常分片,INITIALIZING为异常分片
docs:记录数
store:存储大小
ip:es节点ip
node:es节点名称

# 获取未分配的分片列表
GET /_cat/shards?h=index,shard,prirep,state,unassigned.reason

# 手动分配一个未分配的分片
POST /_cluster/reroute
{
  "commands": [
    {
      "allocate": {
        "index": "your_index",
        "shard": shard_number,
        "node": "your_node",
        "allow_primary": true
      }
    }
  ]
}

# 获取unassigned shards的原因
GET /_cluster/allocation/explain

# 健康状态
GET /_cluster/health?pretty    # unassigned_shards 存在大量分片没有被分配

# 查询 UNASSIGNED 类型的索引名字
GET /_cat/shards

# 索引为red,日志有write.lock
find /usr/share/elasticsearch/data/ -name write.lock -exec rm -rf {} \;  # 需要重启es

# 获取hot_threads信息来确认什么线程在消耗 CPU
http://192.168.129.45:30092/_nodes/hot_threads

# 查询当前elasticsearch正在运行的Task
http://192.168.129.45:30092/_cat/tasks?v&detailed=true

8.1 kibana 开发工具操作示例

GET _search
{
  "query": {
    "match_all": {}
  }
}

GET /_cluster/settings

GET /_cluster/settings

GET /_cat/health?v

GET /_cat/nodes?v

GET _cluster/stats

GET _cluster/settings

GET _nodes/stats/breaker?

GET _cluster/stats

GET _nodes/stats/breaker?

GET /_cluster/settings

PUT /_cluster/settings
{
  "persistent": {
    "cluster": {
      "max_shards_per_node":10000
    }
  }
}

GET k8s-bright-mc-test-bright-membercenter-party-service-2024.05.21/_settings

PUT /_cluster/_settings
{

    "index" : {

        "highlight.max_analyzed_offset" : 500000

    }

}

PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.total.limit": "95%"
  }
}

GET /_settings

PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.fielddata.limit": "60%"
  }
} 
 
 
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.request.limit": "40%"
  }
} 
 
 
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.total.limit": "70%"
  }
}

DELETE /k8s-prod-info-2023.11.19

# 清除fielddata缓存
POST _cache/clear?fielddata=true

GET /_cluster/health?pretty



# 高内存/高cpu使用率
GET _cat/nodes?v=true&s=cpu:desc
GET _nodes/stats?filter_path=nodes.*.jvm.mem.pools.old
GET _nodes/

# 清除fielddata缓存
POST _cache/clear?fielddata=true

GET _cat/shards/bright*?v
GET _cat/shards/k8s-bright-*?v

9. 遇到问题

9.1 es Data too large

# 现象: {"type":"circuit_breaking_exception","reason":"[parent] Data too large, data for [<http_request>] would be [6299707000/5.8gb], which is larger than the limit of [6012954214/5.5gb], real usage: [6299703296/5.8gb], new bytes reserved: [3704/3.6kb], usages [request=0/0b, fielddata=0/0b, in_flight_requests=198394/193.7kb, model_inference=0/0b, eql_sequence=0/0b, accounting=382067180/364.3mb]","bytes_wanted":6299707000,"bytes_limit":6012954214,"durability":"PERMANENT"}],"type":"circuit_breaking_exception","reason":"[parent] Data too large, data for [<http_request>] would be [6299707000/5.8gb], which is larger than the limit of [6012954214/5.5gb], real usage: [6299703296/5.8gb], new bytes reserved: [3704/3.6kb], usages [request=0/0b, fielddata=0/0b, in_flight_requests=198394/193.7kb, model_inference=0/0b, eql_sequence=0/0b, accounting=382067180/364.3mb]","bytes_wanted":6299707000,"bytes_limit":6012954214,"durability":"PERMANENT"},"status":429}

# 问题排查过程
1. 这个错误是什么原因引起的?
Elasticsearch为了避免内存溢出问题设置了多个熔断器(beaker),一旦某个内存使用达到设定的熔断门限则触发熔断,不再响应任何请求。报错信息中的“Elasticsearch exception [type=circuit_breaking_exception”表明是触发了熔断器导致报错

2. 是哪个熔断器触发了这个报错?
熔断器主要包含parent circuit breaker(父熔断器)、Field data circuit breaker( 列数据熔断器)、Request circuit breaker(请求熔断器)

以生产为例:已知系统JVM heap大小的配置为8g,根据下面报错红色标注的部分看出是父熔断器达到了门限,而且门限的大小为5.5g,5.5g/8g大约等于70%左右

通过 GET _cluster/settings 方法查看ES的配置发现:indices.breaker.total.limit设置的值70%(这个值不是默认配置,如果设置的为默认值可能不会显示),和推断的结果一致


3. 是谁消耗了大内存导致熔断了呢?
既然断定了是父熔断器(parent)触发了门限,接下来就是想办法排查到底是谁消耗了大内存
通过GET _nodes/stats/breaker? 可以查看各缓存的使用情况,发现只有parent下面的内存使用占用较大,且在慢慢增长,其他项的内存占用非常小。

示操作:再接下来通过jmap取出heapdump文件,使用IBM的MAT工具分析可以明显发现占用较大堆内存的主要是ES的写操作线程


# 解决方法:
在基本上理清楚了JVM堆内存占用大的原因之后,接着在镜像环境进行验证是否需要增加堆内存配置,通过测试发现当used heap逐渐增加到6gb(最大堆内存配置为8g的情况)左右就会触发一次Full GC,进而把内存释放出来,见下图。通过这个测试结果可以得出只要把父熔断器的门限值设定在6g以上就不会触发熔断了,因此这个问题的解决办法就是把
indices.breaker.total.limit从70%改为95%(其实默认值就是95%),也不需要额外增大堆内存配置
PUT /_cluster/settings
{
  "persistent": {
    "indices.breaker.total.limit": "95%"
  }
}

9.2 es片数不足

# 现象:filebeat日志:Validation Failed: 1: this action would add [2] shards, but this cluster currently has [3000]/[3000] maximum normal shards open;  
# 上述报错是因集群分片数不足引起的,

# 解决方法:1.增加分片数10000  2.删除旧的索引
# 永久生效
PUT /_cluster/settings
{
  "persistent": {
    "cluster": {
      "max_shards_per_node":10000
    }
  }
}
# 临时生效
PUT /_cluster/settings
{
  "transient": {
    "cluster": {
      "max_shards_per_node":10000
    }
  }
}

# 查看分片数量
http://192.168.xxx.xx:30092/_cluster/settings?pretty

# 如何恢复因分片不足而丢失的索引
# 未研究

9.3 索引 red状态恢复

方法一:
# 1. 找到状态为 red 的索引
curl -X GET "http://192.168.xxx.xx:30092/_cat/indices?v="

# 2. 
状态为 red 是无法对外提供服务的,说明有主节点没有分配到对应的机子上。

找到 UNASSIGNED 节点,_cat/shards 能够看到节点的分配情况
curl -X GET "http://192.168.xxx.xx:30092/_cat/shards"
index                            shard prirep state        docs   store   ip             node         
index                      1    p     STARTED     764505 338.6mb 172.xxx.xxx.174 Calypso      
index                      1    r     STARTED     764505 338.6mb 172.xxx.xxx.89  Savage Steel
index                      2    p     STARTED     763750 336.6mb 172.xxx.xxx.174 Calypso      
index                      2    r     STARTED     763750 336.6mb 172.xxx.xxx.88  Temugin      
index                      3    p     STARTED     764537 340.2mb 172.xxx.xxx.89  Savage Steel
index                      3    r     STARTED     764537 340.2mb 172.xxx.xxx.88  Temugin      
index                      4    p     STARTED     765476 339.3mb 172.xxx.xxx.89  Savage Steel
index                      4    r     STARTED     765476 339.3mb 172.xxx.xxx.88  Temugin      
index                      0    p     UNASSIGNED                                             
index                      0    r     UNASSIGNED
index 有一个主节点 0 和一个副本 0 处于 UNASSIGNED 状态,也就是没有分配到机子上,因为主节点没有分配到机子上,所以状态为 red。
 从 ip 列可以看出一共有三台机子,尾数分别为 174,89 以及 88。一共有 10 个 index 所以对应的 elasticsearch 的 index.number_of_shards: 5,index.number_of_replicas: 1。一共有 10 个分片,可以按照 3,3,4 这样分配到三台不同的机子上。88 和 89 机子都分配多个节点,所以可以将另外一个主节点分配到 174 机子上。
 
# 3. 找出机子的 id,找到 174 机子对应的 id,后续重新分配主节点得要用到,174 机子对应的 id 为 Leivp0laTYSqvMVm49SulQ
curl -X GET "http://172.xxx.xxx.174:9288/_nodes/process?v="
{
  "cluster_name": "es2.3.2-titan-cl",
  "nodes": {
    "Leivp0laTYSqvMVm49SulQ": {
      "name": "Calypso",
      "transport_address": "172.xxx.xxx.174:9388",
      "host": "172.xxx.xxx.174",
      "ip": "172.xxx.xxx.174",
      "version": "2.3.2",
      "build": "b9e4a6a",
      "http_address": "172.xxx.xxx.174:9288",
      "process": {
        "refresh_interval_in_millis": 1000,
        "id": 32130,
        "mlockall": false
      }
    },
    "EafIS3ByRrm4g-14KmY_wg": {
      "name": "Savage Steel",
      "transport_address": "172.xxx.xxx.89:9388",
      "host": "172.xxx.xxx.89",
      "ip": "172.xxx.xxx.89",
      "version": "2.3.2",
      "build": "b9e4a6a",
      "http_address": "172.xxx.xxx.89:9288",
      "process": {
        "refresh_interval_in_millis": 1000,
        "id": 7560,
        "mlockall": false
      }
    },
    "tojQ9EiXS0m6ZP16N7Ug3A": {
      "name": "Temugin",
      "transport_address": "172.xxx.xxx.88:9388",
      "host": "172.xxx.xxx.88",
      "ip": "172.xxx.xxx.88",
      "version": "2.3.2",
      "build": "b9e4a6a",
      "http_address": "172.xxx.xxx.88:9288",
      "process": {
        "refresh_interval_in_millis": 1000,
        "id": 47701,
        "mlockall": false
      }
    }
  }
}

# 4. 或者 为了简单也可以直接将该主分片放到 master 机子上,但是如果节点过于集中肯定会影响性能,同时会影响宕机后数据丢失的可能性,所以建议根据机子目前节点的分布情况重新分配。
curl -X GET "http://172.xxx.xxx.174:9288/_cat/master?v="
id                     host          ip            node         
EafIS3ByRrm4g-14KmY_wg 172.xxx.xxx.89 172.xxx.xxx.89 Savage Steel

# 5. 分配 UNASSIGNED 节点到机子
得要找到 UNASSIGNED 状态的主分片才能够重新分配,如果重新分配不是 UNASSIGNED 状态的主分片,例如我视图重新分配 shard 1 会出现
curl -X POST -d '{
    "commands" : [ {
      "allocate" : {
          "index" : "index",
          "shard" : 1,
          "node" : "EafIS3ByRrm4g-14KmY_wg",
          "allow_primary" : true
      }
    }]
}' "http://172.xxx.xxx.174:9288/_cluster/reroute"

{
  "error": {
    "root_cause": [
      {
        "type": "remote_transport_exception",
        "reason": "[Savage Steel][172.xxx.xxx.89:9388][cluster:admin/reroute]"
      }
    ],
    "type": "illegal_argument_exception",
    "reason": "[allocate] failed to find [index][1] on the list of unassigned shards"
  },
  "status": 400
}

# 6. 重新分配 index shard 0 到某一台机子。_cluster/reroute 的参数 allow_primary 得要小心,有概率会导致数据丢失。
curl -X POST -d '{
    "commands" : [ {
      "allocate" : {
          "index" : "index",
          "shard" : 0,
          "node" : "Leivp0laTYSqvMVm49SulQ",
          "allow_primary" : true
      }
    }]
}' "http://172.xxx.xxx.174:9288/_cluster/reroute"

{
  "acknowledged": true,
  .........
  "index": {
    "shards": {
      "0": [
        {
          "state": "INITIALIZING",
          "primary": true,
          "node": "Leivp0laTYSqvMVm49SulQ",
          "relocating_node": null,
          "shard": 0,
          "index": "index",
          "version": 1,
          "allocation_id": {
            "id": "wk5q0CryQpmworGFalfWQQ"
          },
          "unassigned_info": {
            "reason": "INDEX_CREATED",
            "at": "2017-03-23T12:27:33.405Z",
            "details": "force allocation from previous reason INDEX_REOPENED, null"
          }
        },
        {
          "state": "UNASSIGNED",
          "primary": false,
          "node": null,
          "relocating_node": null,
          "shard": 0,
          "index": "index",
          "version": 1,
          "unassigned_info": {
            "reason": "INDEX_REOPENED",
            "at": "2017-03-23T11:56:25.568Z"
          }
        }
      ]
      }
    }
    .............
}

# 7. 输出结果只罗列出了关键部分,主节点处于 INITIALIZING 状态,在看看索引的状态
curl -X GET "http://172.xxx.xxx.174:9288/_cat/indices?v="

green  open   index                          5   1    3058268        97588      2.6gb          1.3gb
索引状态已经为 green,恢复正常使用。

方法二:
找一台空的机子,与现有的机子组成集群,由于新机子的加入机子的节点将会被分配,状态也就会恢复。等集群中所有的节点的状态变为 green 就可以关闭新加入的机子。