HEX
Server: LiteSpeed
System: Linux kapuas.iixcp.rumahweb.net 5.14.0-427.42.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Nov 1 14:58:02 EDT 2024 x86_64
User: mirz4654 (1666)
PHP: 8.1.33
Disabled: system,exec,escapeshellarg,escapeshellcmd,passthru,proc_close,proc_get_status,proc_nice,proc_open,proc_terminate,shell_exec,popen,pclose,dl,pfsockopen,leak,apache_child_terminate,posix_kill,posix_mkfifo,posix_setsid,posix_setuid,posix_setpgid,ini_alter,show_source,define_syslog_variables,symlink,syslog,openlog,openlog,closelog,ocinumcols,listen,chgrp,apache_note,apache_setenv,debugger_on,debugger_off,ftp_exec,dll,ftp,myshellexec,socket_bind,mail,posix_getwpuid
Upload Files
File: //lib/netdata/conf.d/health.d/consul.conf
# you can disable an alarm notification by setting the 'to' line to: silent

 template: consul_license_expiration_time
       on: consul.license_expiration_time
    class: Errors
     type: ServiceMesh
component: Consul
     calc: $license_expiration
    every: 60m
    units: seconds
     warn: $this < 14*24*60*60
     crit: $this < 7*24*60*60
  summary: Consul license expiration on ${label:node_name}
     info: Consul Enterprise license expiration time on node ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_autopilot_health_status
       on: consul.autopilot_health_status
    class: Errors
     type: ServiceMesh
component: Consul
     calc: $unhealthy
    every: 10s
    units: status
     warn: $this == 1
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul datacenter ${label:datacenter} health
     info: Datacenter ${label:datacenter} cluster is unhealthy as reported by server ${label:node_name}
       to: sysadmin

 template: consul_autopilot_server_health_status
       on: consul.autopilot_server_health_status
    class: Errors
     type: ServiceMesh
component: Consul
     calc: $unhealthy
    every: 10s
    units: status
     warn: $this == 1
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} health
     info: Server ${label:node_name} from datacenter ${label:datacenter} is unhealthy
       to: sysadmin

 template: consul_raft_leader_last_contact_time
       on: consul.raft_leader_last_contact_time
    class: Errors
     type: ServiceMesh
component: Consul
   lookup: average -1m unaligned of quantile_0.5
    every: 10s
    units: milliseconds
     warn: $this > (($status >= $WARNING)  ? (150) : (200))
     crit: $this > (($status == $CRITICAL) ? (200) : (500))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul leader server ${label:node_name} last contact time
     info: Median time elapsed since leader server ${label:node_name} datacenter ${label:datacenter} was last able to contact the follower nodes
       to: sysadmin

 template: consul_raft_leadership_transitions
       on: consul.raft_leadership_transitions_rate
    class: Errors
     type: ServiceMesh
component: Consul
   lookup: sum -1m unaligned
    every: 10s
    units: transitions
     warn: $this > 0
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} leadership transitions
     info: There has been a leadership change and server ${label:node_name} datacenter ${label:datacenter} has become the leader
       to: sysadmin

 template: consul_raft_thread_main_saturation
       on: consul.raft_thread_main_saturation_perc
    class: Utilization
     type: ServiceMesh
component: Consul
   lookup: average -1m unaligned of quantile_0.9
    every: 10s
    units: percentage
     warn: $this > (($status >= $WARNING)  ? (40) : (50))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} main Raft saturation
     info: Average saturation of the main Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_raft_thread_fsm_saturation
       on: consul.raft_thread_fsm_saturation_perc
    class: Utilization
     type: ServiceMesh
component: Consul
   lookup: average -1m unaligned of quantile_0.9
    every: 10s
    units: milliseconds
     warn: $this > (($status >= $WARNING)  ? (40) : (50))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} FSM Raft saturation
     info: Average saturation of the FSM Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_client_rpc_requests_exceeded
       on: consul.client_rpc_requests_exceeded_rate
    class: Errors
     type: ServiceMesh
component: Consul
   lookup: sum -1m unaligned
    every: 10s
    units: requests
     warn: $this > (($status >= $WARNING)  ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} RPC requests rate
     info: Number of rate-limited RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_client_rpc_requests_failed
       on: consul.client_rpc_requests_failed_rate
    class: Errors
     type: ServiceMesh
component: Consul
   lookup: sum -1m unaligned
    every: 10s
    units: requests
     warn: $this > (($status >= $WARNING)  ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} failed RPC requests
     info: number of failed RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_node_health_check_status
       on: consul.node_health_check_status
    class: Errors
     type: ServiceMesh
component: Consul
     calc: $warning + $critical
    every: 10s
    units: status
     warn: $this != nan AND $this != 0
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul node health check ${label:check_name} on ${label:node_name}
     info: Node health check ${label:check_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_service_health_check_status
       on: consul.service_health_check_status
    class: Errors
     type: ServiceMesh
component: Consul
     calc: $warning + $critical
    every: 10s
    units: status
     warn: $this == 1
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul service health check ${label:check_name} service ${label:service_name} node ${label:node_name}
     info: Service health check ${label:check_name} for service ${label:service_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin

 template: consul_gc_pause_time
       on: consul.gc_pause_time
    class: Errors
     type: ServiceMesh
component: Consul
   lookup: sum -1m unaligned
    every: 10s
    units: seconds
     warn: $this > (($status >= $WARNING)  ? (1) : (2))
     crit: $this > (($status >= $WARNING)  ? (2) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Consul server ${label:node_name} garbage collection pauses
     info: Time spent in stop-the-world garbage collection pauses on server ${label:node_name} datacenter ${label:datacenter}
       to: sysadmin