#===============================================================================
# BROKER
#===============================================================================
# Description: The Broker is responsible for:
# - Exporting centralized logs of all Shinken daemon processes
# - Exporting status data
# - Exporting performance data
# - Exposing Shinken APIs:
# - Status data
# - Performance data
# - Command interface
#==========================================================================
# BROKER
#===============================================================================
# Description: The broker is responsible for:
# - Exporting centralized logs of all Shinken daemon processes
# - Exporting status data
# - Exporting performance data
# - Exposing Shinken APIs:
# - Status data
# - Performance data
# - Command interface
#===============================================================================
define broker {
# Shinken Enterprise. Lines added by import core. Do not remove it, it's used by Shinken Enterprise to update your objects if you re-import them.
_SE_UUID core-broker-060340145ade11e5b703080027f08538
_SE_UUID_HASH 8e00136f9e61061e07ca0f4a63509b68
# End of Shinken Enterprise part
#======== Daemon name and address =========
# Daemon name. Must be unique
broker_name broker-master
# IP/fqdn of this daemon (note: you MUST change it by the real ip/fqdn of this server)
address localhost
# Port (HTTP/HTTPS) exposed by this daemon
port 7772
# 0 = use HTTP, 1 = use HTTPS
use_ssl 0
#======== Master or spare selection =========
# 1 =============
define broker { is a spare, 0 = is not a spare
spare 0
#======== Daemon name and address ========= spare_daemon: name of the daemon that will take this daemon job if it dies
# IMPORTANT:
# Daemon name. Must be unique
broker_name * a spare_daemon can only be the spare of 1 (and only one) master daemon
# * a spare_daemon cannot have broker-master
a spare_daemon
# IP/fqdn of this daemon (note: you MUST change it by the real ip/fqdn of this server)
* the spare must have modules with the same module_type as the master
# address - depending of the value of the broker__manage_spare__spare_must_have_the_same_list_of_module_type parameter
# Example: spare_daemon localhost
# Port (HTTP/HTTPS) exposed by this daemonbroker-spare
portspare_daemon
# 1 = (default) the spare defined with spare_daemon must have the same module_type as this 7772
master
# 0 = usethe HTTP, 1 = use HTTPS
spare module_type are not checked
use_ssl # broker__manage_spare__spare_must_have_the_same_list_of_module_type 01
#======== Master or spare selection Daemon connection timeout and down state limit =========
# 1timeout: =how ismany aseconds spare,to 0consider =a isnode not a sparedon't answer
spare timeout 0
# spare_daemon: name of the daemon that will take this daemon job if it dies3
# IMPORTANTdata_timeout:
how many second #to *consider a spare_daemonconfiguration cantransfer onlyto be the sparefailed
of 1 (and only# one)because masterthe daemon
network bandwidth is too #small.
* a sparedata_daemontimeout cannot have a spare_daemon
# * the spare must have120
modules with the same# module_type as the master
# max_check_attempts: how many fail check to consider this daemon as DEAD
-max_check_attempts depending of the value of the broker__manage_spare__spare_must_have_the_same_list_of_module_type parameter 3
# Example: spare_Check this daemon every X seconds
check_interval broker-spare
spare_daemon
60
#======== Modules to enable for this daemon =========
# Available:
# 1- WebUI = (default) the spare defined with spare_daemon must have the same module_type as this master
# 0 = the spare module_type are: notVisualisation checkedinterface
# broker__manage_spare__spare_must_have_the_same_list_of_module_type - Graphite-Perfdata 1
#======== Daemon connection timeout and down state limit ========= : Save all metrics into a graphite database
# timeout: how many seconds to consider a node don't answer
- sla timeout : Save sla into a 3database
# data_timeout: how many second to consider a configuration transfer to be failed
- Livestatus # because the: networkTCP bandwidthAPI isto tooquery small.
element state, used by data_timeoutnagios external tools like NagVis or Thruk
# - broker-module-livedata 120
# max_check_attempts: how many: failREST checkAPI to considerquery thisall daemonmonitored aselement DEAD
data (host, cluster or max_check_attempts)
# - event-manager-writer 3
# Check this daemon every: XSave seconds
events for events manager check_interval(do not forget to activate the module in your webui to see 60data)
#======== Modules to enable for this daemon =========
- Simple-log # Available:
# - WebUI : Save all logs into a common file, Use this module only if you need to have all the check results :in Visualisationone interfacefile.
# - broker--module- Graphite-Perfdata report-builder : External module used by webui--module-report-handler to generate reports
modules : Save all metrics into a graphite database
#WebUI, Graphite-Perfdata, sla, event-manager-writer
#======== Realm and architecture settings =========
# Realm to set this daemon into
realm : Save sla into a database
# - Livestatus All
# 1 = take data from the daemon realm and :its TCPsub APIrealms
to query element state,# used0 by= nagiostake externaldata toolsonly likefrom NagVisthe ordaemon Thrukrealm
#manage_sub_realms - broker-module-livedata 1
: REST# APIIn toNATted queryenvironments, allyou monitoreddeclare elementeach data (host, cluster or check)satellite ip[:port] as seen by
# - event-manager-writer : Save events for events manager (do not forget to activate the module in your webui to see data)
# - Simple-log : Save all logs into a common file, Use this module only if you need to have all the check results in one file.
# - broker--module-report-builder : External module used by webui--module-report-handler to generate reports
modules WebUI, Graphite-Perfdata, sla, event-manager-writer
#======== Realm and architecture settings =========
# Realm to set this daemon into
realm All
# 1 = take data from the daemon realm and its sub realms
# 0 = take data only from the daemon realm
manage_sub_realms 1
# In NATted environments, you declare each satellite ip[:port] as seen by
# *this* Broker (if port not set, the port declared by satellite itself
# is used)
#satellitemap scheduler-1=1.2.3.4:7768, poller-1=1.2.3.5:7771
# Exchange between Brokers <- Schedulers can be limited by packet size (in kB)
# Note: as compression is automatic, this is a higher limit, and in real case the
# packets will be lower than this value
# broks_packet_size 1024
#======== Memory protection =========
# Are the daemon module process and worker process are waiting for enough
# memory to be available before being launch. Default: 1 (enabled)
broker__manage_brok__enable_sub_processes_memory_usage_protection 1
# The sub process memory usage protection can have a system reserved memory
# that won't be used by theses sub process when launched
# By default: 0 (no reserved memory)
# Example: 10 (means 10% of the total memory is reserved for the system*this* daemon (if port not set, the port declared by satellite itself
# is used)
#satellitemap scheduler-1=1.2.3.4:7768, scheduler-2=1.2.3.5:7771
# Exchange between brokers <- schedulers can be limited by packet size (in kB)
# Note: as compression is automatic, this is a higher limit, and in real case the
# packets will be lower than this value
# broks_packet_size 1024
#======== Memory protection =========
# Are the daemon module process and worker process are waiting for enough
# memory to be available before being launch. Default: 1 (enabled)
broker__manage_brok__enable_sub_processes_memory_usage_protection 1
# The sub process memory usage protection can have a system reserved memory
# that won't be used by theses sub process when launched
# By default: 0 (no reserved memory)
# Example: 10 (means 10% of the total memory is reserved for the system)
broker__manage_brok__sub_process_memory_usage_system_reserved_memory 0
# If a sub process cannot be started because of the protection, how many seconds
# it will be retry and wait that the system memory is freed until it fail to start
# By default: 5 (seconds)
broker__manage_brok__sub_processes_memory_usage_protection_max_retry_time 5
#======== Brok pusher worker =========
# The broker spawn broks pusher sub process to push to external modules (like WebUI)
# the broker will look at this worker execution time, and will kill if it timeout
# The broker will compute the average execution time of previous workers to
# decide about how many time this worker will take based on:
# number of broks to send / past average send speed (broks/s)
# If this time is reach, it means that the pusher process is killed
# For small amount of broks to send, it should lead to ridiculously small allowed execution time
# and the fac to spawn the sub process can be higher than this value, so we are using a minimal
# execution timeout
# Default: 5 (second)
broker__manage_brok__sub_process_memorybroks_usagepusher_systemmin_reserved_memoryexecution_timeout 05
# If a sub process cannot be started because of the protection, how many seconds In order to manage the fact that the server can slow down during this send, you can setup a
# itratio that will be retryused andto waitincrease that the system memory is freed until it fail to startallowed timeout by multiply it
# By defaultDefault: 5 (seconds)
broker__manage_brok__sub_processesprocess_memorybroks_usagepusher_protection_max_retry_timesecurity_ratio 5
#======== Brok pusher worker ========= At the broker start without stats, this valid will be used for the timeout
# The Broker spawn broks pusher sub process to push to external modules (like WebUI) Default: 240 (seconds)
broker__manage_brok__sub_process_broks_pusher_max_execution_timeout 240
# If thea sub Brokerprocess willreach looka attimeout, thisit workerwill executionbe time,killed and willrelaunched. killAfter if it timeoutmax retry,
# the Theattached Brokermodule will computebe restarted
the average execution time# ofDefault: previous3
workers to
broker__manage_brok__sub_process_broks_pusher_max_retry # decide about how many time this worker will take based on:
# number of broks to send /3
past average send speed (broks/s)# broker__manage_brok__sub_process_broks_pusher_queue_batch_size:
# If this* timedefines isthe reach,maximum itnumber meansof thatbroks the pusher"queue process is killed
brok pusher"
# For small amountprocess ofwill brokshandle toper send, itto shouldexternal leadmodule to( ridiculouslylike smallWebUI allowed) execution time.
# and the fac to spawn the sub process can be higher than this value, so we are using a minimal
# execution timeout * Remaining broks will be handled in next send.
# * IMPORTANT: increase this value can lead to error on the socket
# Default: 5100000 (secondbroks/batch)
# broker__manage_brok__sub_process_broks_pusher_minqueue_executionbatch_timeoutsize 5100000
# In order to manage the fact that the server can slow down during this send, you can setup a
# ratio that will be used to increase the allowed timeout by multiply itBroks whose serialization time exceeds this threshold will generate a warning
# Default: 5100 (milliseconds)
# broker__manage_brok__suboversized_data_processwarning_broksthreshold_pusher_securityserialization_ratiotime 5100
# AtBroks thewhose Brokerserialization starttime withoutexceeds stats, this valid will be used for the timeoutthreshold will generate an error
# Default: 240500 (secondsmilliseconds)
# broker__manage_brok__suboversized_processdata_brokserror_pusherthreshold_max_executionserialization_timeouttime 240
# If a sub process reach a timeout, it will be killed and relaunched. After max retry,500
# the attached module will be restarted======== VMWare / ESXi ==========
# Default: 3
broker__manage_brok__sub_process_broks_pusher_max_retry 1 (default) = if vmware get the ESXi CPU stats value, 0 = do not get value
vmware__statistics_compute_enable 31
#======== Enable or not this daemon =========
# 1 = is enabled, 0 = is disabled
enabled 1
}
|