groups: - name: Prometheus rules: - alert: ExporterDown expr: up == 0 for: 1m labels: severity: critical annotations: summary: "Instance {{ $labels.instance }} down" description: "{{ $labels.instance }} has been down for more than 5 minutes." - alert: PrometheusConfigurationReload expr: prometheus_config_last_reload_successful != 1 for: 5m labels: severity: error annotations: summary: "Prometheus configuration reload (instance {{ $labels.instance }})" description: "Prometheus configuration reload error\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: AlertmanagerConfigurationReload expr: alertmanager_config_last_reload_successful != 1 for: 5m labels: severity: error annotations: summary: "AlertManager configuration reload (instance {{ $labels.instance }})" description: "AlertManager configuration reload error\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"