#===============================================================================
# BROKER
#===============================================================================
# Description: The broker is responsible for:
# - Exporting centralized logs of all Shinken daemon processes
# - Exporting status data
# - Exporting performance data
# - Exposing Shinken APIs:
# - Status data
# - Performance data
# - Command interface
#===============================================================================
define broker {
# BROKER
#======== Daemon name and address ============
# Daemon name. Must be unique
broker_name broker-master
# IP/fqdn of this daemon (note: you MUST change it by the real ip/fqdn of this server)
address localhost
# Port (HTTP/HTTPS) exposed by this daemon
port 7772
# 0 = use HTTP, 1 = use HTTPS
use_ssl ===========================================================
# Description: The broker is responsible for:
# - Exporting centralized logs of all Shinken daemon processes
# - Exporting status data
# - Exporting performance data
# - Exposing Shinken APIs:
# - Status data
# - Performance data
# - Command interface
#===============================================================================
define broker {
# Shinken Enterprise. Lines added by import core. Do not remove it, it's used by Shinken Enterprise to update your objects if you re-import them.
_SE_UUID core-broker-060340145ade11e5b703080027f08538
_SE_UUID_HASH 8e00136f9e61061e07ca0f4a63509b68
# End of Shinken Enterprise part
0
#======== MasterDaemon orname spareand selectionaddress =========
# 1Daemon =name. isMust abe spare,unique
0 = is notbroker_name a spare
spare broker-master
# IP/fqdn of this daemon (note: you MUST 0
change it by the # spare_daemon: name of the daemon that will take this daemon job if it dies
real ip/fqdn of this server)
address # IMPORTANT:
# * a spare_daemon can only be the spare of 1 (and only one) master localhost
# Port (HTTP/HTTPS) exposed by this daemon
#port * a spare_daemon cannot have a spare_daemon
# * the spare must have7772
modules with the same# module_type0 as= theuse master
HTTP, 1 = use #HTTPS
use_ssl - depending of the value of the broker__manage_spare__spare_must_have_the_same_list_of_module_type parmeter
# Example: spare_daemon 0
#======== Master or broker-spare
spare_daemon
selection =========
# 1 = (default)is thea spare defined with spare_daemon must have the same module_type as this master
, 0 = is not a spare
spare # 0 = the spare module_type are not checked0
# broker__manage_spare__spare_must_have_the_same_list_of_module_type 1
#======== Daemon connection timeout and down state limit =========spare_daemon: name of the daemon that will take this daemon job if it dies
# IMPORTANT:
# timeout: how many seconds to consider * a node don't answer
timeout spare_daemon can only be the spare of 1 (and only one) master daemon
# * a spare_daemon cannot have a 3spare_daemon
# data_timeout: how many second to consider a configuration transfert to be failed * the spare must have modules with the same module_type as the master
# because the network brandwith- isdepending tooof small.
the value of data_timeout the broker__manage_spare__spare_must_have_the_same_list_of_module_type parameter
# Example: spare_daemon 120
broker-spare
# max_check_attempts: how manyspare_daemon
fail check to consider# this1 daemon= as DEAD
max_check_attempts 3
(default) the spare defined with spare_daemon must have the same module_type as this master
# Check0 this= daemonthe everyspare X seconds
check_interval module_type are not checked
# broker__manage_spare__spare_must_have_the_same_list_of_module_type 601
#======== ModulesDaemon connection totimeout enableand fordown thisstate daemonlimit =========
# Availabletimeout:
how many seconds #to -consider Simple-loga node don't answer
timeout : save all logs into a common file
# - WebUI 3
# data_timeout: how many second to consider a configuration transfer to be : visualisation interfacefailed
# -because Graphite-Perfdatathe network bandwidth is too :small.
save all metrics intodata_timeout a graphite database
# - sla 120
# max_check_attempts: how many fail check to consider this daemon :as DEAD
save sla into a database
max_check_attempts # - Livestatus 3
# Check this daemon every X seconds
: TCP API tocheck_interval query element state, used by nagios external tools like NagVis or Thruk60
# - event-manager-writer : save events for events manager (do not forget to activate the module in your webui to see data)
modules======== Modules to enable for this daemon =========
# Available:
# - WebUI : Visualisation interface
# Simple-log, WebUI, Graphite-Perfdata, sla, event-manager-writer
: Save all #======== Realm and architecture settings =========metrics into a graphite database
# Realm- tosla set this daemon into
realm : Save sla into a All
database
# 1- =Livestatus take data from the daemon realm and its sub realms
#: 0TCP =API taketo dataquery onlyelement fromstate, theused daemonby realm
nagios external tools like manage_sub_realmsNagVis or Thruk
# 1
# In NATted environments, you declare each satellite ip[:port] as seen by
# *this* broker (if port not set, the port declared by satellite itself- broker-module-livedata : REST API to query all monitored element data (host, cluster or check)
# - event-manager-writer : Save events for events manager (do not forget to activate the module in your webui to see data)
# - is used)
Simple-log #satellitemap scheduler-1=1.2.3.4:7768, poller-1=1.2.3.5:7771
#: ExchangeSave betweenall brokerslogs <-into schedulersa cancommon befile, limitedUse bythis packetmodule sizeonly (in kB)
# Note: as compression is automatic, this is a higher limit, and in real case the
if you need to have all the check results in one file.
modules # packets will be lower than this value
# broks_packet_size 1024
WebUI, Graphite-Perfdata, sla, event-manager-writer
#======== Realm Memoryand architecture protectionsettings =========
# AreRealm theto daemonset modulethis processdaemon andinto
worker process are waitingrealm for enough
# memory to be available before being launch. Default: 1 (enabled)
broker__manage_brok__enable_sub_processes_memory_usage_protection 1All
# The1 sub= processtake memorydata usagefrom protectionthe candaemon haverealm aand systemits reservedsub memoryrealms
# that won't be used by theses sub process when launched
# By default: 0 (no reserved memory)0 = take data only from the daemon realm
manage_sub_realms 1
# Example:In 10NATted environments, (meansyou 10%declare ofeach the total memory is reserved for the system)
broker__manage_brok__sub_process_memory_usage_system_reserved_memory 0
# If a sub process cannot be started because of the protection, how many seconds
# it will be retry and wait that the system memory is freed until it fail to startsatellite ip[:port] as seen by
# *this* daemon (if port not set, the port declared by satellite itself
# is used)
#satellitemap scheduler-1=1.2.3.4:7768, scheduler-2=1.2.3.5:7771
# Exchange between brokers <- schedulers can be limited by packet size (in kB)
# By defaultNote: 5 (seconds)
broker__manage_brok__sub_processes_memory_usage_protection_max_retry_time 5
#======== Brok pusher worker =========as compression is automatic, this is a higher limit, and in real case the
# The broker spawn broks pusher sub processpackets towill pushbe tolower externalthan modules (like WebUI)this value
# the broker will look at this worker execution time, and will kill if it timeout
# The broker will compute the average execution time of previous workers to
# decide about how many time this worker will take based on:broks_packet_size 1024
#======== Memory protection =========
# Are the daemon module process and worker process are waiting for enough
# memory to be available before being launch. Default: 1 (enabled)
broker__manage_brok__enable_sub_processes_memory_usage_protection 1
# numberThe ofsub broksprocess tomemory sendusage protection /can pasthave averagea sendsystem speed (broks/s)reserved memory
# Ifthat thiswon't timebe isused reach,by ittheses means that the pusher sub process iswhen killedlaunched
# ForBy smalldefault: amount0 of(no broksreserved tomemory)
send, it should lead# toExample: ridicusly10 small(means allowed10% executionof time
the total memory is #reserved andfor the facsystem)
to spawn the sub process can be higher than this value, so we are using a minimal
# execution timeout broker__manage_brok__sub_process_memory_usage_system_reserved_memory 0
# If a sub process cannot be started because of the protection, how many seconds
# Default:it 5 (second)
broker__manage_brok__sub_process_broks_pusher_min_execution_timeout 5
will be retry and wait that the system memory is freed until it fail to start
# InBy orderdefault: to manage the fact that the server can slow down during this send, you can setup a5 (seconds)
broker__manage_brok__sub_processes_memory_usage_protection_max_retry_time 5
#======== Brok pusher worker =========
# ratioThe thatbroker will be usedspawn broks pusher sub process to increasepush theto allowedexternal timeoutmodules by multiply it(like WebUI)
# Default:the 5
broker will look broker__manage_brok__sub_process_broks_pusher_security_ratio at this worker execution time, and will kill if it timeout
# The broker will compute 5
# At the broker start without stats, this valud will be used for the timeout
# Default: 240 (seconds)
broker__manage_brok__sub_process_broks_pusher_max_execution_timeout 240the average execution time of previous workers to
# decide about how many time this worker will take based on:
# number of broks to send / past average send speed (broks/s)
# If this time is reach, it means that the pusher process is killed
# IfFor asmall subamount processof reachbroks ato timeoutsend, it willshould belead killedto andridiculously relaunched.small Afterallowed maxexecution retry,time
# and the attachedfac moduleto will be restartedspawn the sub process can be higher than this value, so we are using a minimal
# execution timeout
# Default: 35 (second)
broker__manage_brok__sub_process_broks_pusher_min_maxexecution_retrytimeout 5
# In order to manage the fact that the 3
server can slow #======== Enable or not this daemon =========down during this send, you can setup a
# 1ratio =that iswill enabled,be 0used =to isincrease disabled
the allowed timeout by enabledmultiply it
# Default: 5
1
}
broker__manage_brok__sub_process_broks_pusher_security_ratio 5
# At the broker start without stats, this valid will be used for the timeout
# Default: 240 (seconds)
broker__manage_brok__sub_process_broks_pusher_max_execution_timeout 240
# If a sub process reach a timeout, it will be killed and relaunched. After max retry,
# the attached module will be restarted
# Default: 3
broker__manage_brok__sub_process_broks_pusher_max_retry 3
# broker__manage_brok__sub_process_broks_pusher_queue_batch_size:
# * defines the maximum number of broks the "queue brok pusher"
# process will handle per send to external module ( like WebUI ) .
# * Remaining broks will be handled in next send.
# * IMPORTANT: increase this value can lead to error on the socket
# Default: 100000 (broks/batch)
# broker__manage_brok__sub_process_broks_pusher_queue_batch_size 100000
# Broks whose serialization time exceeds this threshold will generate a warning
# Default: 100 (milliseconds)
# broker__manage_brok__oversized_data_warning_threshold__serialization_time 100
# Broks whose serialization time exceeds this threshold will generate an error
# Default: 500 (milliseconds)
# broker__manage_brok__oversized_data_error_threshold__serialization_time 500
#======== VMWare / ESXi ==========
# 1 (default) = if vmware get the ESXi CPU stats value, 0 = do not get value
vmware__statistics_compute_enable 1
#======== Enable or not this daemon =========
# 1 = is enabled, 0 = is disabled
enabled 1
}
|