Search Results for

    Show / Hide Table of Contents

    Akka.Cluster Configuration

    Below is the default HOCON configuration for the base Akka.Cluster package.

    ######################################
    # Akka Cluster Reference Config File #
    ######################################
    
    # This is the reference config file that contains all the default settings.
    # Make your edits/overrides in your application.conf.
    
    akka {
    
      cluster {
        # Initial contact points of the cluster.
        # The nodes to join automatically at startup.
        # Comma separated full URIs defined by a string on the form of
        # "akka.tcp://system@hostname:port"
        # Leave as empty if the node is supposed to be joined manually.
        seed-nodes = []
    
        # How long to wait for one of the seed nodes to reply to initial join request.
        # When this is the first seed node and there is no positive reply from the other
        # seed nodes within this timeout it will join itself to bootstrap the cluster.
        # When this is not the first seed node the join attempts will be performed with
        # this interval.
        seed-node-timeout = 5s
    
        # If a join request fails it will be retried after this period.
        # Disable join retry by specifying "off".
        retry-unsuccessful-join-after = 10s
        
        # Should the 'leader' in the cluster be allowed to automatically mark
        # unreachable nodes as DOWN after a configured time of unreachability?
        # Using auto-down implies that two separate clusters will automatically be
        # formed in case of network partition.
        # Disable with "off" or specify a duration to enable auto-down.
        # If a downing-provider-class is configured this setting is ignored.
        auto-down-unreachable-after = off
    
        # The joining of given seed nodes will by default be retried indefinitely until
        # a successful join. That process can be aborted if unsuccessful by defining this
        # timeout. When aborted it will run CoordinatedShutdown, which by default will
        # terminate the ActorSystem. CoordinatedShutdown can also be configured to exit
        # the JVM. It is useful to define this timeout if the seed-nodes are assembled
        # dynamically and a restart with new seed-nodes should be tried after unsuccessful
        # attempts.
        shutdown-after-unsuccessful-join-seed-nodes = off
    
        # Time margin after which shards or singletons that belonged to a downed/removed
        # partition are created in surviving partition. The purpose of this margin is that
        # in case of a network partition the persistent actors in the non-surviving partitions
        # must be stopped before corresponding persistent actors are started somewhere else.
        # This is useful if you implement downing strategies that handle network partitions,
        # e.g. by keeping the larger side of the partition and shutting down the smaller side.
        # It will not add any extra safety for auto-down-unreachable-after, since that is not
        # handling network partitions.
        # Disable with "off" or specify a duration to enable.
        down-removal-margin = off
    
        # Pluggable support for downing of nodes in the cluster.
        # If this setting is left empty behaviour will depend on 'auto-down-unreachable' in the following ways:
        # * if it is 'off' the `NoDowning` provider is used and no automatic downing will be performed
        # * if it is set to a duration the `AutoDowning` provider is with the configured downing duration
        #
        # If specified the value must be the fully qualified class name of an implementation of
        # `Akka.Cluster.IDowningProvider` having two argument constructor:
        #   - argument 1: accepting an `ActorSystem`
        #   - argument 2: accepting an `Akka.Cluster.Cluster`
        downing-provider-class = "Akka.Cluster.SBR.SplitBrainResolverProvider, Akka.Cluster"
    
        # If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network
        # split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp'
        # so they become part of the cluster even during a network split. The leader will
        # move `Joining` members to 'WeaklyUp' after this configured duration without convergence.
        # The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached.
        allow-weakly-up-members = 7s
    
        # The roles of this member. List of strings, e.g. roles = ["A", "B"].
        # The roles are part of the membership information and can be used by
        # routers or other services to distribute work to certain member types,
        # e.g. front-end and back-end nodes.
        roles = []
    
        # Application version of the deployment. Used by rolling update features
        # to distinguish between old and new nodes. The typical convention is to use
        # 3 digit version numbers `major.minor.patch`, but 1 or two digits are also
        # supported.
        #
        # If no `.` is used it is interpreted as a single digit version number or as
        # plain alphanumeric if it couldn't be parsed as a number.
        #
        # It may also have a qualifier at the end for 2 or 3 digit version numbers such
        # as "1.2-RC1".
        # For 1 digit with qualifier, 1-RC1, it is interpreted as plain alphanumeric.
        #
        # It has support for https://github.com/dwijnand/sbt-dynver format with `+` or
        # `-` separator. The number of commits from the tag is handled as a numeric part.
        # For example `1.0.0+3-73475dce26` is less than `1.0.10+10-ed316bd024` (3 < 10).
        #
        # DEFAULT: by default the app-version will default to the entry assembly's version,
        # i.e. the assembly of the executable running `Program.cs`
        #
        # Values can be "assembly-version" or a version string as defined above, i.e.
        # app-version = "1.0.0"
        # app-version = "1.1-beta1"
        # app-version = "1"
        # app-version = "1.1"
        app-version = assembly-version
    
        # Run the coordinated shutdown from phase 'cluster-shutdown' when the cluster
        # is shutdown for other reasons than when leaving, e.g. when downing. This
        # will terminate the ActorSystem when the cluster extension is shutdown.
        run-coordinated-shutdown-when-down = on
    
        role {
          # Minimum required number of members of a certain role before the leader
          # changes member status of 'Joining' members to 'Up'. Typically used together
          # with 'Cluster.registerOnMemberUp' to defer some action, such as starting
          # actors, until the cluster has reached a certain size.
          # E.g. to require 2 nodes with role 'frontend' and 3 nodes with role 'backend':
          #   frontend.min-nr-of-members = 2
          #   backend.min-nr-of-members = 3
          #<role-name>.min-nr-of-members = 1
        }
    
        # Minimum required number of members before the leader changes member status
        # of 'Joining' members to 'Up'. Typically used together with
        # 'Cluster.registerOnMemberUp' to defer some action, such as starting actors,
        # until the cluster has reached a certain size.
        min-nr-of-members = 1
    
        # Enable/disable info level logging of cluster events
        log-info = on
    
        # Enable/disable verbose info-level logging of cluster events
        # for temporary troubleshooting. Defaults to 'off'.
        log-info-verbose = off
    
        # how long should the node wait before starting the periodic tasks
        # maintenance tasks?
        periodic-tasks-initial-delay = 1s
    
        # how often should the node send out gossip information?
        gossip-interval = 1s
    
        # discard incoming gossip messages if not handled within this duration
        gossip-time-to-live = 2s
    
        # how often should the leader perform maintenance tasks?
        leader-actions-interval = 1s
    
        # how often should the node move nodes, marked as unreachable by the failure
        # detector, out of the membership ring?
        unreachable-nodes-reaper-interval = 1s
    
        # How often the current internal stats should be published.
        # A value of 0s can be used to always publish the stats, when it happens.
        # Disable with "off".
        publish-stats-interval = off
    
        # The id of the dispatcher to use for cluster actors.
        # If not specified, the internal dispatcher is used.
        # If specified you need to define the settings of the actual dispatcher.
        use-dispatcher = ""
    
        # Gossip to random node with newer or older state information, if any with
        # this probability. Otherwise Gossip to any random live node.
        # Probability value is between 0.0 and 1.0. 0.0 means never, 1.0 means always.
        gossip-different-view-probability = 0.8
    
        # Reduced the above probability when the number of nodes in the cluster
        # greater than this value.
        reduce-gossip-different-view-probability = 400
    
        # Enable/disable legacy pre-1.4.19 heartbeat and heartbeat response wire format serialization support
        # Set this flag to true if you're doing a rolling update from Akka.NET version older than 1.4.19.
        use-legacy-heartbeat-message = false
    
        # Settings for the Phi accrual failure detector (http://ddg.jaist.ac.jp/pub/HDY+04.pdf
        # [Hayashibara et al]) used by the cluster subsystem to detect unreachable
        # members.
        failure-detector {
    
          # FQCN of the failure detector implementation.
          # It must implement akka.remote.FailureDetector and have
          # a public constructor with a com.typesafe.config.Config and
          # akka.actor.EventStream parameter.
          implementation-class = "Akka.Remote.PhiAccrualFailureDetector, Akka.Remote"
    
          # How often keep-alive heartbeat messages should be sent to each connection.
          heartbeat-interval = 1 s
    
          # Defines the failure detector threshold.
          # A low threshold is prone to generate many wrong suspicions but ensures
          # a quick detection in the event of a real crash. Conversely, a high
          # threshold generates fewer mistakes but needs more time to detect
          # actual crashes.
          threshold = 8.0
    
          # Number of the samples of inter-heartbeat arrival times to adaptively
          # calculate the failure timeout for connections.
          max-sample-size = 1000
    
          # Minimum standard deviation to use for the normal distribution in
          # AccrualFailureDetector. Too low standard deviation might result in
          # too much sensitivity for sudden, but normal, deviations in heartbeat
          # inter arrival times.
          min-std-deviation = 100 ms
    
          # Number of potentially lost/delayed heartbeats that will be
          # accepted before considering it to be an anomaly.
          # This margin is important to be able to survive sudden, occasional,
          # pauses in heartbeat arrivals, due to for example garbage collect or
          # network drop.
          acceptable-heartbeat-pause = 3 s
    
          # Number of member nodes that each member will send heartbeat messages to,
          # i.e. each node will be monitored by this number of other nodes.
          monitored-by-nr-of-members = 9
    
          # After the heartbeat request has been sent the first failure detection
          # will start after this period, even though no heartbeat mesage has
          # been received.
          expected-response-after = 1 s
    
        }
    
        # If the tick-duration of the default scheduler is longer than the
        # tick-duration configured here a dedicated scheduler will be used for
        # periodic tasks of the cluster, otherwise the default scheduler is used.
        # See akka.scheduler settings for more details.
        scheduler {
          tick-duration = 33ms
          ticks-per-wheel = 512
        }
    
        debug {
          # log heartbeat events (very verbose, useful mostly when debugging heartbeating issues)
          verbose-heartbeat-logging = off
    
          # log gossip merge events (very verbose, useful when debugging convergence issues)
          verbose-receive-gossip-logging = off
        }
      }
    
      # Default configuration for routers
      actor.deployment.default {
        # MetricsSelector to use
        # - available: "mix", "heap", "cpu", "load"
        # - or: Fully qualified class name of the MetricsSelector class.
        #       The class must extend akka.cluster.routing.MetricsSelector
        #       and have a public constructor with com.typesafe.config.Config
        #       parameter.
        # - default is "mix"
        metrics-selector = mix
      }
      actor.deployment.default.cluster {
        # enable cluster aware router that deploys to nodes in the cluster
        enabled = off
    
        # Maximum number of routees that will be deployed on each cluster
        # member node.
        # Note that max-total-nr-of-instances defines total number of routees, but
        # number of routees per node will not be exceeded, i.e. if you
        # define max-total-nr-of-instances = 50 and max-nr-of-instances-per-node = 2
        # it will deploy 2 routees per new member in the cluster, up to
        # 25 members.
        max-nr-of-instances-per-node = 1
    
        # Maximum number of routees that will be deployed, in total
        # on all nodes. See also description of max-nr-of-instances-per-node.
        # For backwards compatibility reasons, nr-of-instances
        # has the same purpose as max-total-nr-of-instances for cluster
        # aware routers and nr-of-instances (if defined by user) takes
        # precedence over max-total-nr-of-instances.
        max-total-nr-of-instances = 10000
    
        # Defines if routees are allowed to be located on the same node as
        # the head router actor, or only on remote nodes.
        # Useful for master-worker scenario where all routees are remote.
        allow-local-routees = on
    
        # Use members with specified role, or all members if undefined or empty.
        use-role = ""
    
      }
    
      # Protobuf serializer for cluster messages
      actor {
        serializers {
          akka-cluster = "Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster"
          reliable-delivery = "Akka.Cluster.Serialization.ReliableDeliverySerializer, Akka.Cluster"
        }
    
        serialization-bindings {
          "Akka.Cluster.IClusterMessage, Akka.Cluster" = akka-cluster
          "Akka.Cluster.Routing.ClusterRouterPool, Akka.Cluster" = akka-cluster
          "Akka.Delivery.Internal.IDeliverySerializable, Akka" = reliable-delivery
        }
    
        serialization-identifiers {
          "Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster" = 5
          "Akka.Cluster.Serialization.ReliableDeliverySerializer, Akka.Cluster" = 36
        }
      }
    }
    
    
    # split-brain-resolver
    
    # To enable the split brain resolver you first need to enable the provider in your application.conf:
    # for old split brain resolver:
    # akka.cluster.downing-provider-class = "Akka.Cluster.SplitBrainResolver"
    # for new split brain resolver:
    # akka.cluster.downing-provider-class = "Akka.Cluster.SBR.SplitBrainResolverProvider"
    
    akka.cluster.split-brain-resolver {
      # Select one of the available strategies (see descriptions below):
      # static-quorum, keep-majority, keep-oldest, down-all, lease-majority, (keep-referee)
      # keep-referee - supported only with the old split brain resolver
      active-strategy = keep-majority
    
      # Decision is taken by the strategy when there has been no membership or
      # reachability changes for this duration, i.e. the cluster state is stable.
      stable-after = 20s
    
      # When reachability observations by the failure detector are changed the SBR decisions
      # are deferred until there are no changes within the 'stable-after' duration.
      # If this continues for too long it might be an indication of an unstable system/network
      # and it could result in delayed or conflicting decisions on separate sides of a network
      # partition.
      # As a precaution for that scenario all nodes are downed if no decision is made within
      # `stable-after + down-all-when-unstable` from the first unreachability event.
      # The measurement is reset if all unreachable have been healed, downed or removed, or
      # if there are no changes within `stable-after * 2`.
      # The value can be on, off, or a duration.
      # By default it is 'on' and then it is derived to be 3/4 of stable-after, but not less than
      # 4 seconds.
      # supported only with the new split brain resolver
      down-all-when-unstable = on
    }
    
    # Down the unreachable nodes if the number of remaining nodes are greater than or equal to
    # the given 'quorum-size'. Otherwise down the reachable nodes, i.e. it will shut down that
    # side of the partition. In other words, the 'size' defines the minimum number of nodes
    # that the cluster must have to be operational. If there are unreachable nodes when starting
    # up the cluster, before reaching this limit, the cluster may shutdown itself immediately.
    # This is not an issue if you start all nodes at approximately the same time.
    #
    # Note that you must not add more members to the cluster than 'quorum-size * 2 - 1', because
    # then both sides may down each other and thereby form two separate clusters. For example,
    # quorum-size configured to 3 in a 6 node cluster may result in a split where each side
    # consists of 3 nodes each, i.e. each side thinks it has enough nodes to continue by
    # itself. A warning is logged if this recommendation is violated.
    akka.cluster.split-brain-resolver.static-quorum {
      # minimum number of nodes that the cluster must have
      quorum-size = undefined
    
      # if the 'role' is defined the decision is based only on members with that 'role'
      role = ""
    }
    
    # Down the unreachable nodes if the current node is in the majority part based the last known
    # membership information. Otherwise down the reachable nodes, i.e. the own part. If the
    # the parts are of equal size the part containing the node with the lowest address is kept.
    # Note that if there are more than two partitions and none is in majority each part
    # will shutdown itself, terminating the whole cluster.
    akka.cluster.split-brain-resolver.keep-majority {
      # if the 'role' is defined the decision is based only on members with that 'role'
      role = ""
    }
    
    # Down the part that does not contain the oldest member (current singleton).
    #
    # There is one exception to this rule if 'down-if-alone' is defined to 'on'.
    # Then, if the oldest node has partitioned from all other nodes the oldest
    # will down itself and keep all other nodes running. The strategy will not
    # down the single oldest node when it is the only remaining node in the cluster.
    #
    # Note that if the oldest node crashes the others will remove it from the cluster
    # when 'down-if-alone' is 'on', otherwise they will down themselves if the
    # oldest node crashes, i.e. shutdown the whole cluster together with the oldest node.
    akka.cluster.split-brain-resolver.keep-oldest {
      # Enable downing of the oldest node when it is partitioned from all other nodes
      down-if-alone = on
    
      # if the 'role' is defined the decision is based only on members with that 'role',
      # i.e. using the oldest member (singleton) within the nodes with that role
      role = ""
    }
    
    # Keep the part that can acquire the lease, and down the other part.
    # Best effort is to keep the side that has most nodes, i.e. the majority side.
    # This is achieved by adding a delay before trying to acquire the lease on the
    # minority side.
    # supported only with the new split brain resolver
    akka.cluster.split-brain-resolver.lease-majority {
      lease-implementation = ""
    
      # The recommended format for the lease name is "<service-name>-akka-sbr".
      # When lease-name is not defined, the name will be set to "<actor-system-name>-akka-sbr"
      lease-name = ""
    
      # This delay is used on the minority side before trying to acquire the lease,
      # as an best effort to try to keep the majority side.
      acquire-lease-delay-for-minority = 2s
    
      # Release the lease after this duration.
      release-after = 40s
    
      # If the 'role' is defined the majority/minority is based only on members with that 'role'.
      role = ""
    }
    
    # supported only with the old split brain resolver
    akka.cluster.split-brain-resolver.keep-referee {
      # referee address on the form of "akka.tcp://system@hostname:port"
      address = ""
      down-all-if-less-than-nodes = 1
    }
    
    In this article
    • githubEdit this page
    Back to top
    Contribute
    • Project Chat
    • Discussion Forum
    • Source Code
    Support
    • Akka.NET Support Plans
    • Akka.NET Observability Tools
    • Akka.NET Training & Consulting
    Maintained By
    • Petabridge - The Akka.NET Company
    • Learn Akka.NET