File: //lib/netdata/conf.d/health.d/clickhouse.conf
# you can disable an alarm notification by setting the 'to' line to: silent
template: clickhouse_restarted
on: clickhouse.uptime
class: Error
type: Database
component: ClickHouse
calc: $uptime
units: seconds
every: 10s
warn: $this > 1 AND $this < 180
summary: ClickHouse restart detected
info: ClickHouse has recently been restarted
to: silent
template: clickhouse_queries_preempted
on: clickhouse.queries_preempted
class: Workload
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: preempted_queries
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse preempted queries detected
info: ClickHouse has queries that are stopped and waiting due to priority setting
to: dba
template: clickhouse_long_running_query
on: clickhouse.longest_running_query_time
class: Latency
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: seconds
every: 10s
warn: $this > (($status >= $WARNING) ? (300) : (600))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse long-running query detected
info: ClickHouse has a long-running query exceeding the threshold
to: dba
template: clickhouse_rejected_inserts
on: clickhouse.rejected_inserts
class: Workload
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: rejected_inserts
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse rejected INSERT queries detected
info: ClickHouse has INSERT queries that are rejected due to high number of active data parts for partition in a MergeTree
to: dba
template: clickhouse_delayed_inserts
on: clickhouse.delayed_inserts
class: Workload
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: delayed_inserts
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse delayed INSERT queries detected
info: ClickHouse has INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree
to: silent
template: clickhouse_replication_lag
on: clickhouse.replicas_max_absolute_delay
class: Workload
type: Database
component: ClickHouse
lookup: avg -1m unaligned
units: seconds
every: 10s
warn: $this > (($status >= $WARNING) ? (250) : (300))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high replication lag detected
info: ClickHouse is experiencing replication lag greater than 5 minutes
to: dba
template: clickhouse_replicated_readonly_tables
on: clickhouse.replicated_readonly_tables
class: Error
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: readonly_tables
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse replicated tables in readonly state detected
info: ClickHouse replicated tables are read-only due to startup without ZooKeeper configured or ZooKeeper session loss.
to: dba
template: clickhouse_max_part_count_for_partition
on: clickhouse.max_part_count_for_partition
class: Workload
type: Database
component: ClickHouse
lookup: avg -1m unaligned
units: parts
every: 10s
warn: $this > (($status >= $WARNING) ? (200) : (300))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high parts/partition detected
info: ClickHouse high number of parts per partition
to: dba
template: clickhouse_distributed_connections_failures
on: clickhouse.distributed_connections_fail_exhausted_retries
class: Error
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: failures
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse distributed connections failures detected
info: ClickHouse has failed distributed connections after exhausting all retry attempts
to: dba
template: clickhouse_distributed_files_to_insert
on: clickhouse.distributed_files_to_insert
class: Workload
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: files
every: 10s
warn: $this > (($status >= $WARNING) ? (40) : (80))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high files to insert detected
info: ClickHouse high number of pending files to process for asynchronous insertion into Distributed tables
to: silent