Skip to main content

PromQL 表达式参考

本文档中的 PromQL 表达式可用于配置告警。

关于查询 Prometheus 时间序列数据库的更多信息,请参阅官方Prometheus 文档。

集群指标#

集群 CPU 利用率#

目录表达式
详细信息1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))
摘要1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])))

集群平均负载#

目录表达式
详细信息
load1sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode="system"}) by (instance)
load5sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode="system"}) by (instance)
load15sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode="system"}) by (instance)
摘要
load1sum(node_load1) by (instance) / count(node_cpu_seconds_total{mode="system"})
load5sum(node_load5) by (instance) / count(node_cpu_seconds_total{mode="system"})
load15sum(node_load15) by (instance) / count(node_cpu_seconds_total{mode="system"})

集群内存利用率#

目录表达式
详细信息1 - sum(node_memory_MemAvailable_bytes) by (instance) / sum(node_memory_MemTotal_bytes) by (instance)
摘要1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes)

集群磁盘利用率#

目录表达式
详细信息(sum(node_filesystem_size_bytes{device!="rootfs"}) by (instance) - sum(node_filesystem_free_bytes{device!="rootfs"}) by (instance)) / sum(node_filesystem_size_bytes{device!="rootfs"}) by (instance)
摘要(sum(node_filesystem_size_bytes{device!="rootfs"}) - sum(node_filesystem_free_bytes{device!="rootfs"})) / sum(node_filesystem_size_bytes{device!="rootfs"})

集群磁盘 I/O#

目录表达式
详细信息
readsum(rate(node_disk_read_bytes_total[5m])) by (instance)
writtensum(rate(node_disk_written_bytes_total[5m])) by (instance)
摘要
readsum(rate(node_disk_read_bytes_total[5m]))
writtensum(rate(node_disk_written_bytes_total[5m]))

集群网络数据包#

请参阅英文官网

集群网络 I/O#

请参阅英文官网

节点指标#

节点 CPU 利用率#

目录表达式
详细信息avg(irate(node_cpu_seconds_total{mode!="idle", instance=~"$instance"}[5m])) by (mode)
摘要1 - (avg(irate(node_cpu_seconds_total{mode="idle", instance=~"$instance"}[5m])))

节点平均负载#

目录表达式
详细信息
load1sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
load5sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
load15sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
摘要
load1sum(node_load1{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
load5sum(node_load5{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})
load15sum(node_load15{instance=~"$instance"}) / count(node_cpu_seconds_total{mode="system",instance=~"$instance"})

节点内存利用率#

目录表达式
详细信息1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"}) / sum(node_memory_MemTotal_bytes{instance=~"$instance"})
摘要1 - sum(node_memory_MemAvailable_bytes{instance=~"$instance"}) / sum(node_memory_MemTotal_bytes{instance=~"$instance"})

节点磁盘利用率#

目录表达式
详细信息(sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) by (device) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"}) by (device)) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) by (device)
摘要(sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"}) - sum(node_filesystem_free_bytes{device!="rootfs",instance=~"$instance"})) / sum(node_filesystem_size_bytes{device!="rootfs",instance=~"$instance"})

节点磁盘 I/O#

目录表达式
详细信息
readsum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m]))
writtensum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m]))
摘要
readsum(rate(node_disk_read_bytes_total{instance=~"$instance"}[5m]))
writtensum(rate(node_disk_written_bytes_total{instance=~"$instance"}[5m]))

节点网络数据包#

请参阅英文官网

节点网络 I/O#

请参阅英文官网

Etcd 指标#

Etcd Leader#

max(etcd_server_has_leader)

Leader 改变次数#

max(etcd_server_leader_changes_seen_total)

失败 Proposals 次数#

sum(etcd_server_proposals_failed_total)

GRPC 客户端流量#

目录表达式
详细信息
insum(rate(etcd_network_client_grpc_received_bytes_total[5m])) by (instance)
outsum(rate(etcd_network_client_grpc_sent_bytes_total[5m])) by (instance)
摘要
insum(rate(etcd_network_client_grpc_received_bytes_total[5m]))
outsum(rate(etcd_network_client_grpc_sent_bytes_total[5m]))

对等流量#

目录表达式
详细信息
insum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)
outsum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)
摘要
insum(rate(etcd_network_peer_received_bytes_total[5m]))
outsum(rate(etcd_network_peer_sent_bytes_total[5m]))

DB 大小#

目录表达式
详细信息sum(etcd_debugging_mvcc_db_total_size_in_bytes) by (instance)
摘要sum(etcd_debugging_mvcc_db_total_size_in_bytes)

活跃 Streams#

目录表达式
详细信息
lease-watchsum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) by (instance)
watchsum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) by (instance) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) by (instance)
摘要
lease-watchsum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
watchsum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})

Raft 建议#

目录表达式
详细信息
appliedsum(increase(etcd_server_proposals_applied_total[5m])) by (instance)
committedsum(increase(etcd_server_proposals_committed_total[5m])) by (instance)
pendingsum(increase(etcd_server_proposals_pending[5m])) by (instance)
failedsum(increase(etcd_server_proposals_failed_total[5m])) by (instance)
摘要
appliedsum(increase(etcd_server_proposals_applied_total[5m]))
committedsum(increase(etcd_server_proposals_committed_total[5m]))
pendingsum(increase(etcd_server_proposals_pending[5m]))
failedsum(increase(etcd_server_proposals_failed_total[5m]))

RPC 速率#

目录表达式
详细信息
totalsum(rate(grpc_server_started_total{grpc_type="unary"}[5m])) by (instance)
failsum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m])) by (instance)
摘要
totalsum(rate(grpc_server_started_total{grpc_type="unary"}[5m]))
failsum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m]))

磁盘操作#

目录表达式
详细信息
commit-called-by-backendsum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m])) by (instance)
fsync-called-by-walsum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m])) by (instance)
摘要
commit-called-by-backendsum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))
fsync-called-by-walsum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))

磁盘同步持续时间#

目录表达式
详细信息
walhistogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))
dbhistogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))
摘要
walsum(histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le)))
dbsum(histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le)))

Kubernetes 组件指标#

API Server 请求延迟#

目录表达式
详细信息avg(apiserver_request_latencies_sum / apiserver_request_latencies_count) by (instance, verb) /1e+06
摘要avg(apiserver_request_latencies_sum / apiserver_request_latencies_count) by (instance) /1e+06

API Server 请求率#

目录表达式
详细信息sum(rate(apiserver_request_count[5m])) by (instance, code)
摘要sum(rate(apiserver_request_count[5m])) by (instance)

调度失败的 Pod#

目录表达式
详细信息sum(kube_pod_status_scheduled{condition="false"})
摘要sum(kube_pod_status_scheduled{condition="false"})

控制器管理器队列深度#

目录表达式
详细信息
volumessum(volumes_depth) by instance
deploymentsum(deployment_depth) by instance
replicasetsum(replicaset_depth) by instance
servicesum(service_depth) by instance
serviceaccountsum(serviceaccount_depth) by instance
endpointsum(endpoint_depth) by instance
daemonsetsum(daemonset_depth) by instance
statefulsetsum(statefulset_depth) by instance
replicationmanagersum(replicationmanager_depth) by instance
摘要
volumessum(volumes_depth)
deploymentsum(deployment_depth)
replicasetsum(replicaset_depth)
servicesum(service_depth)
serviceaccountsum(serviceaccount_depth)
endpointsum(endpoint_depth)
daemonsetsum(daemonset_depth)
statefulsetsum(statefulset_depth)
replicationmanagersum(replicationmanager_depth)

调度器 E2E 调度延迟#

目录表达式
详细信息histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) by (le, instance)) / 1e+06
摘要sum(histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket) by (le, instance)) / 1e+06)

调度程序抢占尝试#

目录表达式
详细信息sum(rate(scheduler_total_preemption_attempts[5m])) by (instance)
摘要sum(rate(scheduler_total_preemption_attempts[5m]))

Ingress Controller 连接数#

目录表达式
详细信息
readingsum(nginx_ingress_controller_nginx_process_connections{state="reading"}) by (instance)
waitingsum(nginx_ingress_controller_nginx_process_connections{state="waiting"}) by (instance)
writingsum(nginx_ingress_controller_nginx_process_connections{state="writing"}) by (instance)
acceptedsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="accepted"}[5m]))) by (instance)
activesum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="active"}[5m]))) by (instance)
handledsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="handled"}[5m]))) by (instance)
摘要
readingsum(nginx_ingress_controller_nginx_process_connections{state="reading"})
waitingsum(nginx_ingress_controller_nginx_process_connections{state="waiting"})
writingsum(nginx_ingress_controller_nginx_process_connections{state="writing"})
acceptedsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="accepted"}[5m])))
activesum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="active"}[5m])))
handledsum(ceil(increase(nginx_ingress_controller_nginx_process_connections_total{state="handled"}[5m])))

Ingress Controller 请求处理时间#

目录表达式
详细信息topk(10, histogram_quantile(0.95,sum by (le, host, path)(rate(nginx_ingress_controller_request_duration_seconds_bucket{host!="_"}[5m]))))
摘要topk(10, histogram_quantile(0.95,sum by (le, host)(rate(nginx_ingress_controller_request_duration_seconds_bucket{host!="_"}[5m]))))

Rancher Logging 指标#

Fluentd 缓冲区队列速率#

目录表达式
详细信息sum(rate(fluentd_output_status_buffer_queue_length[5m])) by (instance)
摘要sum(rate(fluentd_output_status_buffer_queue_length[5m]))

Fluentd 输入率#

目录表达式
详细信息sum(rate(fluentd_input_status_num_records_total[5m])) by (instance)
摘要sum(rate(fluentd_input_status_num_records_total[5m]))

Fluentd 输出错误率#

目录表达式
详细信息sum(rate(fluentd_output_status_num_errors[5m])) by (type)
摘要sum(rate(fluentd_output_status_num_errors[5m]))

Fluentd 输出率#

目录表达式
详细信息sum(rate(fluentd_output_status_num_records_total[5m])) by (instance)
摘要sum(rate(fluentd_output_status_num_records_total[5m]))

Workload 指标#

Workload CPU 利用率#

目录表达式
详细信息
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
user secondssum(rate(container_cpu_user_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
system secondssum(rate(container_cpu_system_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
usage secondssum(rate(container_cpu_usage_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
摘要
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))

Workload 内存利用率#

目录表达式
详细信息sum(container_memory_working_set_bytes{namespace="$namespace",pod_name=~"$podName", container_name!=""}) by (pod_name)
摘要sum(container_memory_working_set_bytes{namespace="$namespace",pod_name=~"$podName", container_name!=""})

Workload 网络数据包#

目录表达式
详细信息
receive-packetssum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
receive-errorssum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
transmit-packetssum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
transmit-errorssum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
摘要
receive-packetssum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))

Workload 网络 I/O#

目录表达式
详细信息
receivesum(rate(container_network_receive_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
transmitsum(rate(container_network_transmit_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
摘要
receivesum(rate(container_network_receive_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))

Workload 磁盘 I/O#

目录表达式
详细信息
readsum(rate(container_fs_reads_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
writesum(rate(container_fs_writes_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m])) by (pod_name)
摘要
readsum(rate(container_fs_reads_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace="$namespace",pod_name=~"$podName",container_name!=""}[5m]))

Pod 指标#

Pod CPU 利用率#

目录表达式
详细信息
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m])) by (container_name)
usage secondssum(rate(container_cpu_usage_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m])) by (container_name)
system secondssum(rate(container_cpu_system_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m])) by (container_name)
user secondssum(rate(container_cpu_user_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m])) by (container_name)
摘要
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{container_name!="POD",namespace="$namespace",pod_name="$podName", container_name!=""}[5m]))

Pod 内存利用率#

目录表达式
详细信息sum(container_memory_working_set_bytes{container_name!="POD",namespace="$namespace",pod_name="$podName",container_name!=""}) by (container_name)
摘要sum(container_memory_working_set_bytes{container_name!="POD",namespace="$namespace",pod_name="$podName",container_name!=""})

Pod 网络数据包#

目录表达式
详细信息
receive-packetssum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
摘要
receive-packetssum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
receive-droppedsum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
receive-errorssum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-packetssum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-droppedsum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmit-errorssum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))

Pod 网络 I/O#

目录表达式
详细信息
receivesum(rate(container_network_receive_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
摘要
receivesum(rate(container_network_receive_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
transmitsum(rate(container_network_transmit_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))

Pod 磁盘 I/O#

目录表达式
详细信息
readsum(rate(container_fs_reads_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m])) by (container_name)
writesum(rate(container_fs_writes_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m])) by (container_name)
摘要
readsum(rate(container_fs_reads_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace="$namespace",pod_name="$podName",container_name!=""}[5m]))

Container 指标#

Container CPU 利用率#

目录表达式
cfs throttled secondssum(rate(container_cpu_cfs_throttled_seconds_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))
usage secondssum(rate(container_cpu_usage_seconds_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))
system secondssum(rate(container_cpu_system_seconds_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))
user secondssum(rate(container_cpu_user_seconds_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))

Container 内存利用率#

sum(container_memory_working_set_bytes{namespace="$namespace",pod_name="$podName",container_name="$containerName"})

Container 磁盘 I/O#

目录表达式
readsum(rate(container_fs_reads_bytes_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))
writesum(rate(container_fs_writes_bytes_total{namespace="$namespace",pod_name="$podName",container_name="$containerName"}[5m]))
Last updated on by kingsd041