Add alert rules for `systemd` services.

This commit is contained in:
Kazuhiro MUSASHI 2023-02-05 13:40:53 +09:00
parent 9aca3252b6
commit e509c531ae
2 changed files with 13 additions and 1 deletions

View File

@ -20,7 +20,7 @@ encrypted_remote_file '/etc/prometheus.d/alertmanager.yml' do
end end
# Deploy alert setting file: # Deploy alert setting file:
%w(node_exporter prometheus filestat).each do |conf| %w(node_exporter prometheus filestat services snmp).each do |conf|
remote_file "/etc/prometheus.d/alerts/#{conf}.yml" do remote_file "/etc/prometheus.d/alerts/#{conf}.yml" do
owner 'root' owner 'root'
group 'root' group 'root'

View File

@ -0,0 +1,12 @@
groups:
- name: services
rules:
- alert: Digdag
expr: node_systemd_unit_state{name="digdag.service", state="active"} != 1
for: 5m
labels:
severity: error
annotations:
summary: "Digdag is not running: {{ $labels.instance }}."
description: "Digdag is not running: {{ $labels.instance }}."