30 lines
997 B
YAML
30 lines
997 B
YAML
|
groups:
|
||
|
- name: Prometheus
|
||
|
rules:
|
||
|
- alert: ExporterDown
|
||
|
expr: up == 0
|
||
|
for: 1m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
annotations:
|
||
|
summary: "Instance {{ $labels.instance }} down"
|
||
|
description: "{{ $labels.instance }} has been down for more than 5 minutes."
|
||
|
|
||
|
- alert: PrometheusConfigurationReload
|
||
|
expr: prometheus_config_last_reload_successful != 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: error
|
||
|
annotations:
|
||
|
summary: "Prometheus configuration reload (instance {{ $labels.instance }})"
|
||
|
description: "Prometheus configuration reload error\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|
||
|
|
||
|
- alert: AlertmanagerConfigurationReload
|
||
|
expr: alertmanager_config_last_reload_successful != 1
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: error
|
||
|
annotations:
|
||
|
summary: "AlertManager configuration reload (instance {{ $labels.instance }})"
|
||
|
description: "AlertManager configuration reload error\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
|