Akka.Cluster Configuration
Below is the default HOCON configuration for the base Akka.Cluster
package.
######################################
# Akka Cluster Reference Config File #
######################################
# This is the reference config file that contains all the default settings.
# Make your edits/overrides in your application.conf.
akka {
cluster {
# Initial contact points of the cluster.
# The nodes to join automatically at startup.
# Comma separated full URIs defined by a string on the form of
# "akka.tcp://system@hostname:port"
# Leave as empty if the node is supposed to be joined manually.
seed-nodes = []
# How long to wait for one of the seed nodes to reply to initial join request.
# When this is the first seed node and there is no positive reply from the other
# seed nodes within this timeout it will join itself to bootstrap the cluster.
# When this is not the first seed node the join attempts will be performed with
# this interval.
seed-node-timeout = 5s
# If a join request fails it will be retried after this period.
# Disable join retry by specifying "off".
retry-unsuccessful-join-after = 10s
# Should the 'leader' in the cluster be allowed to automatically mark
# unreachable nodes as DOWN after a configured time of unreachability?
# Using auto-down implies that two separate clusters will automatically be
# formed in case of network partition.
# Disable with "off" or specify a duration to enable auto-down.
# If a downing-provider-class is configured this setting is ignored.
auto-down-unreachable-after = off
# The joining of given seed nodes will by default be retried indefinitely until
# a successful join. That process can be aborted if unsuccessful by defining this
# timeout. When aborted it will run CoordinatedShutdown, which by default will
# terminate the ActorSystem. CoordinatedShutdown can also be configured to exit
# the JVM. It is useful to define this timeout if the seed-nodes are assembled
# dynamically and a restart with new seed-nodes should be tried after unsuccessful
# attempts.
shutdown-after-unsuccessful-join-seed-nodes = off
# Time margin after which shards or singletons that belonged to a downed/removed
# partition are created in surviving partition. The purpose of this margin is that
# in case of a network partition the persistent actors in the non-surviving partitions
# must be stopped before corresponding persistent actors are started somewhere else.
# This is useful if you implement downing strategies that handle network partitions,
# e.g. by keeping the larger side of the partition and shutting down the smaller side.
# It will not add any extra safety for auto-down-unreachable-after, since that is not
# handling network partitions.
# Disable with "off" or specify a duration to enable.
down-removal-margin = off
# Pluggable support for downing of nodes in the cluster.
# If this setting is left empty behaviour will depend on 'auto-down-unreachable' in the following ways:
# * if it is 'off' the `NoDowning` provider is used and no automatic downing will be performed
# * if it is set to a duration the `AutoDowning` provider is with the configured downing duration
#
# If specified the value must be the fully qualified class name of an implementation of
# `Akka.Cluster.IDowningProvider` having two argument constructor:
# - argument 1: accepting an `ActorSystem`
# - argument 2: accepting an `Akka.Cluster.Cluster`
downing-provider-class = "Akka.Cluster.SBR.SplitBrainResolverProvider, Akka.Cluster"
# If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network
# split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp'
# so they become part of the cluster even during a network split. The leader will
# move `Joining` members to 'WeaklyUp' after this configured duration without convergence.
# The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached.
allow-weakly-up-members = 7s
# The roles of this member. List of strings, e.g. roles = ["A", "B"].
# The roles are part of the membership information and can be used by
# routers or other services to distribute work to certain member types,
# e.g. front-end and back-end nodes.
roles = []
# Application version of the deployment. Used by rolling update features
# to distinguish between old and new nodes. The typical convention is to use
# 3 digit version numbers `major.minor.patch`, but 1 or two digits are also
# supported.
#
# If no `.` is used it is interpreted as a single digit version number or as
# plain alphanumeric if it couldn't be parsed as a number.
#
# It may also have a qualifier at the end for 2 or 3 digit version numbers such
# as "1.2-RC1".
# For 1 digit with qualifier, 1-RC1, it is interpreted as plain alphanumeric.
#
# It has support for https://github.com/dwijnand/sbt-dynver format with `+` or
# `-` separator. The number of commits from the tag is handled as a numeric part.
# For example `1.0.0+3-73475dce26` is less than `1.0.10+10-ed316bd024` (3 < 10).
#
# DEFAULT: by default the app-version will default to the entry assembly's version,
# i.e. the assembly of the executable running `Program.cs`
#
# Values can be "assembly-version" or a version string as defined above, i.e.
# app-version = "1.0.0"
# app-version = "1.1-beta1"
# app-version = "1"
# app-version = "1.1"
app-version = assembly-version
# Run the coordinated shutdown from phase 'cluster-shutdown' when the cluster
# is shutdown for other reasons than when leaving, e.g. when downing. This
# will terminate the ActorSystem when the cluster extension is shutdown.
run-coordinated-shutdown-when-down = on
role {
# Minimum required number of members of a certain role before the leader
# changes member status of 'Joining' members to 'Up'. Typically used together
# with 'Cluster.registerOnMemberUp' to defer some action, such as starting
# actors, until the cluster has reached a certain size.
# E.g. to require 2 nodes with role 'frontend' and 3 nodes with role 'backend':
# frontend.min-nr-of-members = 2
# backend.min-nr-of-members = 3
#<role-name>.min-nr-of-members = 1
}
# Minimum required number of members before the leader changes member status
# of 'Joining' members to 'Up'. Typically used together with
# 'Cluster.registerOnMemberUp' to defer some action, such as starting actors,
# until the cluster has reached a certain size.
min-nr-of-members = 1
# Enable/disable info level logging of cluster events
log-info = on
# Enable/disable verbose info-level logging of cluster events
# for temporary troubleshooting. Defaults to 'off'.
log-info-verbose = off
# how long should the node wait before starting the periodic tasks
# maintenance tasks?
periodic-tasks-initial-delay = 1s
# how often should the node send out gossip information?
gossip-interval = 1s
# discard incoming gossip messages if not handled within this duration
gossip-time-to-live = 2s
# how often should the leader perform maintenance tasks?
leader-actions-interval = 1s
# how often should the node move nodes, marked as unreachable by the failure
# detector, out of the membership ring?
unreachable-nodes-reaper-interval = 1s
# How often the current internal stats should be published.
# A value of 0s can be used to always publish the stats, when it happens.
# Disable with "off".
publish-stats-interval = off
# The id of the dispatcher to use for cluster actors.
# If not specified, the internal dispatcher is used.
# If specified you need to define the settings of the actual dispatcher.
use-dispatcher = ""
# Gossip to random node with newer or older state information, if any with
# this probability. Otherwise Gossip to any random live node.
# Probability value is between 0.0 and 1.0. 0.0 means never, 1.0 means always.
gossip-different-view-probability = 0.8
# Reduced the above probability when the number of nodes in the cluster
# greater than this value.
reduce-gossip-different-view-probability = 400
# Enable/disable legacy pre-1.4.19 heartbeat and heartbeat response wire format serialization support
# Set this flag to true if you're doing a rolling update from Akka.NET version older than 1.4.19.
use-legacy-heartbeat-message = false
# Settings for the Phi accrual failure detector (http://ddg.jaist.ac.jp/pub/HDY+04.pdf
# [Hayashibara et al]) used by the cluster subsystem to detect unreachable
# members.
failure-detector {
# FQCN of the failure detector implementation.
# It must implement akka.remote.FailureDetector and have
# a public constructor with a com.typesafe.config.Config and
# akka.actor.EventStream parameter.
implementation-class = "Akka.Remote.PhiAccrualFailureDetector, Akka.Remote"
# How often keep-alive heartbeat messages should be sent to each connection.
heartbeat-interval = 1 s
# Defines the failure detector threshold.
# A low threshold is prone to generate many wrong suspicions but ensures
# a quick detection in the event of a real crash. Conversely, a high
# threshold generates fewer mistakes but needs more time to detect
# actual crashes.
threshold = 8.0
# Number of the samples of inter-heartbeat arrival times to adaptively
# calculate the failure timeout for connections.
max-sample-size = 1000
# Minimum standard deviation to use for the normal distribution in
# AccrualFailureDetector. Too low standard deviation might result in
# too much sensitivity for sudden, but normal, deviations in heartbeat
# inter arrival times.
min-std-deviation = 100 ms
# Number of potentially lost/delayed heartbeats that will be
# accepted before considering it to be an anomaly.
# This margin is important to be able to survive sudden, occasional,
# pauses in heartbeat arrivals, due to for example garbage collect or
# network drop.
acceptable-heartbeat-pause = 3 s
# Number of member nodes that each member will send heartbeat messages to,
# i.e. each node will be monitored by this number of other nodes.
monitored-by-nr-of-members = 9
# After the heartbeat request has been sent the first failure detection
# will start after this period, even though no heartbeat mesage has
# been received.
expected-response-after = 1 s
}
# If the tick-duration of the default scheduler is longer than the
# tick-duration configured here a dedicated scheduler will be used for
# periodic tasks of the cluster, otherwise the default scheduler is used.
# See akka.scheduler settings for more details.
scheduler {
tick-duration = 33ms
ticks-per-wheel = 512
}
debug {
# log heartbeat events (very verbose, useful mostly when debugging heartbeating issues)
verbose-heartbeat-logging = off
# log gossip merge events (very verbose, useful when debugging convergence issues)
verbose-receive-gossip-logging = off
}
}
# Default configuration for routers
actor.deployment.default {
# MetricsSelector to use
# - available: "mix", "heap", "cpu", "load"
# - or: Fully qualified class name of the MetricsSelector class.
# The class must extend akka.cluster.routing.MetricsSelector
# and have a public constructor with com.typesafe.config.Config
# parameter.
# - default is "mix"
metrics-selector = mix
}
actor.deployment.default.cluster {
# enable cluster aware router that deploys to nodes in the cluster
enabled = off
# Maximum number of routees that will be deployed on each cluster
# member node.
# Note that max-total-nr-of-instances defines total number of routees, but
# number of routees per node will not be exceeded, i.e. if you
# define max-total-nr-of-instances = 50 and max-nr-of-instances-per-node = 2
# it will deploy 2 routees per new member in the cluster, up to
# 25 members.
max-nr-of-instances-per-node = 1
# Maximum number of routees that will be deployed, in total
# on all nodes. See also description of max-nr-of-instances-per-node.
# For backwards compatibility reasons, nr-of-instances
# has the same purpose as max-total-nr-of-instances for cluster
# aware routers and nr-of-instances (if defined by user) takes
# precedence over max-total-nr-of-instances.
max-total-nr-of-instances = 10000
# Defines if routees are allowed to be located on the same node as
# the head router actor, or only on remote nodes.
# Useful for master-worker scenario where all routees are remote.
allow-local-routees = on
# Use members with specified role, or all members if undefined or empty.
use-role = ""
}
# Protobuf serializer for cluster messages
actor {
serializers {
akka-cluster = "Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster"
reliable-delivery = "Akka.Cluster.Serialization.ReliableDeliverySerializer, Akka.Cluster"
}
serialization-bindings {
"Akka.Cluster.IClusterMessage, Akka.Cluster" = akka-cluster
"Akka.Cluster.Routing.ClusterRouterPool, Akka.Cluster" = akka-cluster
"Akka.Delivery.Internal.IDeliverySerializable, Akka" = reliable-delivery
}
serialization-identifiers {
"Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster" = 5
"Akka.Cluster.Serialization.ReliableDeliverySerializer, Akka.Cluster" = 36
}
}
}
# split-brain-resolver
# To enable the split brain resolver you first need to enable the provider in your application.conf:
# for old split brain resolver:
# akka.cluster.downing-provider-class = "Akka.Cluster.SplitBrainResolver"
# for new split brain resolver:
# akka.cluster.downing-provider-class = "Akka.Cluster.SBR.SplitBrainResolverProvider"
akka.cluster.split-brain-resolver {
# Select one of the available strategies (see descriptions below):
# static-quorum, keep-majority, keep-oldest, down-all, lease-majority, (keep-referee)
# keep-referee - supported only with the old split brain resolver
active-strategy = keep-majority
# Decision is taken by the strategy when there has been no membership or
# reachability changes for this duration, i.e. the cluster state is stable.
stable-after = 20s
# When reachability observations by the failure detector are changed the SBR decisions
# are deferred until there are no changes within the 'stable-after' duration.
# If this continues for too long it might be an indication of an unstable system/network
# and it could result in delayed or conflicting decisions on separate sides of a network
# partition.
# As a precaution for that scenario all nodes are downed if no decision is made within
# `stable-after + down-all-when-unstable` from the first unreachability event.
# The measurement is reset if all unreachable have been healed, downed or removed, or
# if there are no changes within `stable-after * 2`.
# The value can be on, off, or a duration.
# By default it is 'on' and then it is derived to be 3/4 of stable-after, but not less than
# 4 seconds.
# supported only with the new split brain resolver
down-all-when-unstable = on
}
# Down the unreachable nodes if the number of remaining nodes are greater than or equal to
# the given 'quorum-size'. Otherwise down the reachable nodes, i.e. it will shut down that
# side of the partition. In other words, the 'size' defines the minimum number of nodes
# that the cluster must have to be operational. If there are unreachable nodes when starting
# up the cluster, before reaching this limit, the cluster may shutdown itself immediately.
# This is not an issue if you start all nodes at approximately the same time.
#
# Note that you must not add more members to the cluster than 'quorum-size * 2 - 1', because
# then both sides may down each other and thereby form two separate clusters. For example,
# quorum-size configured to 3 in a 6 node cluster may result in a split where each side
# consists of 3 nodes each, i.e. each side thinks it has enough nodes to continue by
# itself. A warning is logged if this recommendation is violated.
akka.cluster.split-brain-resolver.static-quorum {
# minimum number of nodes that the cluster must have
quorum-size = undefined
# if the 'role' is defined the decision is based only on members with that 'role'
role = ""
}
# Down the unreachable nodes if the current node is in the majority part based the last known
# membership information. Otherwise down the reachable nodes, i.e. the own part. If the
# the parts are of equal size the part containing the node with the lowest address is kept.
# Note that if there are more than two partitions and none is in majority each part
# will shutdown itself, terminating the whole cluster.
akka.cluster.split-brain-resolver.keep-majority {
# if the 'role' is defined the decision is based only on members with that 'role'
role = ""
}
# Down the part that does not contain the oldest member (current singleton).
#
# There is one exception to this rule if 'down-if-alone' is defined to 'on'.
# Then, if the oldest node has partitioned from all other nodes the oldest
# will down itself and keep all other nodes running. The strategy will not
# down the single oldest node when it is the only remaining node in the cluster.
#
# Note that if the oldest node crashes the others will remove it from the cluster
# when 'down-if-alone' is 'on', otherwise they will down themselves if the
# oldest node crashes, i.e. shutdown the whole cluster together with the oldest node.
akka.cluster.split-brain-resolver.keep-oldest {
# Enable downing of the oldest node when it is partitioned from all other nodes
down-if-alone = on
# if the 'role' is defined the decision is based only on members with that 'role',
# i.e. using the oldest member (singleton) within the nodes with that role
role = ""
}
# Keep the part that can acquire the lease, and down the other part.
# Best effort is to keep the side that has most nodes, i.e. the majority side.
# This is achieved by adding a delay before trying to acquire the lease on the
# minority side.
# supported only with the new split brain resolver
akka.cluster.split-brain-resolver.lease-majority {
lease-implementation = ""
# The recommended format for the lease name is "<service-name>-akka-sbr".
# When lease-name is not defined, the name will be set to "<actor-system-name>-akka-sbr"
lease-name = ""
# This delay is used on the minority side before trying to acquire the lease,
# as an best effort to try to keep the majority side.
acquire-lease-delay-for-minority = 2s
# Release the lease after this duration.
release-after = 40s
# If the 'role' is defined the majority/minority is based only on members with that 'role'.
role = ""
}
# supported only with the old split brain resolver
akka.cluster.split-brain-resolver.keep-referee {
# referee address on the form of "akka.tcp://system@hostname:port"
address = ""
down-all-if-less-than-nodes = 1
}