File: //lib/netdata/conf.d/health.d/vernemq.conf
# Socket errors
template: vernemq_socket_errors
on: vernemq.node_socket_errors
class: Errors
type: Messaging
component: VerneMQ
lookup: sum -1m unaligned
units: errors
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} socket errors
info: Node ${label:node} socket errors in the last minute
to: sysadmin
# Queues dropped/expired/unhandled PUBLISH messages
template: vernemq_queue_message_drop
on: vernemq.node_queue_undelivered_messages
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of dropped
units: dropped messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} dropped messages
info: Node ${label:node} dropped messages due to full queues in the last minute
to: sysadmin
template: vernemq_queue_message_expired
on: vernemq.node_queue_undelivered_messages
class: Latency
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of expired
units: expired messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} expired messages
info: Node ${label:node} expired before delivery messages in the last minute
to: sysadmin
template: vernemq_queue_message_unhandled
on: vernemq.node_queue_undelivered_messages
class: Latency
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of unhandled
units: unhandled messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} unhandled messages
info: Node ${label:node} unhandled messages in the last minute
to: sysadmin
# Erlang VM
template: vernemq_average_scheduler_utilization
on: vernemq.node_average_scheduler_utilization
class: Utilization
type: Messaging
component: VerneMQ
lookup: average -10m unaligned
units: %
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
summary: Node ${label:node} scheduler utilization
info: Node ${label:node} scheduler utilization over the last 10 minutes
to: sysadmin
# Cluster communication and netsplits
template: vernemq_cluster_dropped
on: vernemq.node_cluster_dropped
class: Errors
type: Messaging
component: VerneMQ
lookup: sum -1m unaligned
units: KiB
every: 1m
warn: $this > 0
delay: up 5m down 5m multiplier 1.5 max 1h
summary: Node ${label:node} dropped cluster traffic
info: Node ${label:node} traffic dropped during communication with the cluster nodes in the last minute
to: sysadmin
template: vernemq_netsplits
on: vernemq.node_netsplits
class: Workload
type: Messaging
component: VerneMQ
lookup: sum -1m unaligned absolute of detected
units: netsplits
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 2h
summary: Node ${label:node} detected netsplits
info: Node ${label:node} detected netsplits (split brain) in the last minute
to: sysadmin
# Unsuccessful CONNACK
template: vernemq_mqtt_connack_sent_reason_unsuccessful
on: vernemq.node_mqtt_connack_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} unsuccessful sent CONNACK
info: Node ${label:node} unsuccessful sent v5 CONNACK packets in the last minute
to: sysadmin
# Not normal DISCONNECT
template: vernemq_mqtt_disconnect_received_reason_not_normal
on: vernemq.node_mqtt_disconnect_received_by_reason_code
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !normal_disconnect,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} received not normal DISCONNECT
info: Node ${label:node} received not normal v5 DISCONNECT packets in the last minute
to: sysadmin
template: vernemq_mqtt_disconnect_sent_reason_not_normal
on: vernemq.node_mqtt_disconnect_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !normal_disconnect,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} sent not normal DISCONNECT
info: Node ${label:node} sent not normal v5 DISCONNECT packets in the last minute
to: sysadmin
# SUBSCRIBE errors and unauthorized attempts
template: vernemq_mqtt_subscribe_error
on: vernemq.node_mqtt_subscribe_error
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: failed ops
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} failed SUBSCRIBE
info: Node ${label:node} mqtt v${label:mqtt_version} failed SUBSCRIBE operations in the last minute
to: sysadmin
template: vernemq_mqtt_subscribe_auth_error
on: vernemq.node_mqtt_subscribe_auth_error
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: attempts
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} unauthorized SUBSCRIBE
info: Node ${label:node} mqtt v${label:mqtt_version} unauthorized SUBSCRIBE attempts in the last minute
to: sysadmin
# UNSUBSCRIBE errors
template: vernemq_mqtt_unsubscribe_error
on: vernemq.node_mqtt_unsubscribe_error
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: failed ops
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} failed UNSUBSCRIBE
info: Node ${label:node} mqtt v${label:mqtt_version} failed UNSUBSCRIBE operations in the last minute
to: sysadmin
# PUBLISH errors and unauthorized attempts
template: vernemq_mqtt_publish_errors
on: vernemq.node_mqtt_publish_errors
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: failed ops
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} failed PUBLISH
info: Node ${label:node} mqtt v${label:mqtt_version} failed PUBLISH operations in the last minute
to: sysadmin
template: vernemq_mqtt_publish_auth_errors
on: vernemq.node_mqtt_publish_auth_errors
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: attempts
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} unauthorized PUBLISH
info: Node ${label:node} mqtt v${label:mqtt_version} unauthorized PUBLISH attempts in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBACK
template: vernemq_mqtt_puback_received_reason_unsuccessful
on: vernemq.node_mqtt_puback_received_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 received unsuccessful PUBACK
info: Node ${label:node} mqtt v5 received unsuccessful PUBACK packets in the last minute
to: sysadmin
template: vernemq_mqtt_puback_sent_reason_unsuccessful
on: vernemq.node_mqtt_puback_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 unsuccessful sent PUBACK
info: Node ${label:node} mqtt v5 unsuccessful sent PUBACK packets in the last minute
to: sysadmin
template: vernemq_mqtt_puback_unexpected
on: vernemq.node_mqtt_puback_invalid_error
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} recieved unnexpected PUBACK
info: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBACK messages in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBREC
template: vernemq_mqtt_pubrec_received_reason_unsuccessful
on: vernemq.node_mqtt_pubrec_received_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 received unsuccessful PUBREC
info: Node ${label:node} mqtt v5 received unsuccessful PUBREC packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
on: vernemq.node_mqtt_pubrec_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 unsuccessful sent PUBREC
info: Node ${label:node} mqtt v5 unsuccessful sent PUBREC packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrec_invalid_error
on: vernemq.node_mqtt_pubrec_invalid_error
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} received invalid PUBREC
info: Node ${label:node} mqtt v${label:mqtt_version} received invalid PUBREC packets in the last minute
to: sysadmin
# Unsuccessful PUBREL
template: vernemq_mqtt_pubrel_received_reason_unsuccessful
on: vernemq.node_mqtt_pubrel_received_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 received unsuccessful PUBREL
info: Node ${label:node} mqtt v5 received unsuccessful PUBREL packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
on: vernemq.node_mqtt_pubrel_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 unsuccessful sent PUBREL
info: Node ${label:node} mqtt v5 unsuccessful sent PUBREL packets in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBCOMP
template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
on: vernemq.node_mqtt_pubcomp_received_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 received unsuccessful PUBCOMP
info: Node ${label:node} mqtt v5 received unsuccessful PUBCOMP packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
on: vernemq.node_mqtt_pubcomp_sent_by_reason_code
class: Errors
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute of !success,*
units: packets
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v5 unsuccessful sent PUBCOMP
info: Node ${label:node} mqtt v5 unsuccessful sent PUBCOMP packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubcomp_unexpected
on: vernemq.node_mqtt_pubcomp_invalid_error
class: Workload
type: Messaging
component: VerneMQ
lookup: average -1m unaligned absolute
units: messages
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: up 2m down 5m multiplier 1.5 max 2h
summary: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBCOMP
info: Node ${label:node} mqtt v${label:mqtt_version} received unexpected PUBCOMP packets in the last minute
to: sysadmin