Storm2.0.0中defaults.yaml文件的配置如下(链接为:​​https://github.com/apache/storm/blob/v2.0.0/conf/defaults.yaml​​):

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


########### These all have default values as shown
########### Additional configuration goes into storm.yaml

java.library.path: "/usr/local/lib:/opt/local/lib:/usr/lib:/usr/lib64"

### storm.* configs are general configurations
# the local dir is where jars are kept
storm.local.dir: "storm-local"
storm.log4j2.conf.dir: "log4j2"
storm.zookeeper.servers:
- "localhost"
storm.zookeeper.port: 2181
storm.zookeeper.root: "/storm"
storm.zookeeper.session.timeout: 20000
storm.zookeeper.connection.timeout: 15000
storm.zookeeper.retry.times: 5
storm.zookeeper.retry.interval: 1000
storm.zookeeper.retry.intervalceiling.millis: 30000
storm.zookeeper.auth.user: null
storm.zookeeper.auth.password: null
storm.exhibitor.port: 8080
storm.exhibitor.poll.uripath: "/exhibitor/v1/cluster/list"
storm.cluster.mode: "distributed" # can be distributed or local
storm.local.mode.zmq: false
storm.thrift.transport: "org.apache.storm.security.auth.SimpleTransportPlugin"
storm.thrift.socket.timeout.ms: 600000
storm.principal.tolocal: "org.apache.storm.security.auth.DefaultPrincipalToLocal"
storm.group.mapping.service: "org.apache.storm.security.auth.ShellBasedGroupsMapping"
storm.group.mapping.service.params: null
storm.messaging.transport: "org.apache.storm.messaging.netty.Context"
storm.nimbus.retry.times: 5
storm.nimbus.retry.interval.millis: 2000
storm.nimbus.retry.intervalceiling.millis: 60000
storm.nimbus.zookeeper.acls.check: true
storm.nimbus.zookeeper.acls.fixup: true

storm.auth.simple-white-list.users: []
storm.cluster.state.store: "org.apache.storm.cluster.ZKStateStorageFactory"
storm.meta.serialization.delegate: "org.apache.storm.serialization.GzipThriftSerializationDelegate"
storm.codedistributor.class: "org.apache.storm.codedistributor.LocalFileSystemCodeDistributor"
storm.workers.artifacts.dir: "workers-artifacts"
storm.health.check.dir: "healthchecks"
storm.health.check.timeout.ms: 5000
storm.disable.symlinks: false

### nimbus.* configs are for the master
nimbus.seeds : ["localhost"]
nimbus.thrift.port: 6627
nimbus.thrift.threads: 64
nimbus.thrift.max_buffer_size: 1048576
nimbus.childopts: "-Xmx1024m"
nimbus.task.timeout.secs: 30
nimbus.supervisor.timeout.secs: 60
nimbus.monitor.freq.secs: 10
nimbus.cleanup.inbox.freq.secs: 600
nimbus.inbox.jar.expiration.secs: 3600
nimbus.code.sync.freq.secs: 120
nimbus.task.launch.secs: 120
nimbus.file.copy.expiration.secs: 600
nimbus.topology.validator: "org.apache.storm.nimbus.DefaultTopologyValidator"
topology.min.replication.count: 1
topology.max.replication.wait.time.sec: 60
nimbus.credential.renewers.freq.secs: 600
nimbus.queue.size: 100000
scheduler.display.resource: false
nimbus.local.assignments.backend.class: "org.apache.storm.assignments.InMemoryAssignmentBackend"
nimbus.assignments.service.threads: 10
nimbus.assignments.service.thread.queue.size: 100
nimbus.worker.heartbeats.recovery.strategy.class: "org.apache.storm.nimbus.TimeOutWorkerHeartbeatsRecoveryStrategy"
nimbus.topology.blobstore.deletion.delay.ms: 300000

### ui.* configs are for the master
ui.host: 0.0.0.0
ui.port: 8080
ui.childopts: "-Xmx768m"
ui.actions.enabled: true
ui.filter: null
ui.filter.params: null
ui.users: null
ui.header.buffer.bytes: 4096
ui.http.creds.plugin: org.apache.storm.security.auth.DefaultHttpCredentialsPlugin
ui.pagination: 20
ui.disable.http.binding: true
ui.disable.spout.lag.monitoring: true

logviewer.port: 8000
logviewer.childopts: "-Xmx128m"
logviewer.cleanup.age.mins: 10080
logviewer.appender.name: "A1"
logviewer.max.sum.worker.logs.size.mb: 4096
logviewer.max.per.worker.logs.size.mb: 2048
logviewer.disable.http.binding: true
logviewer.filter: null
logviewer.filter.params: null

logs.users: null

drpc.port: 3772
drpc.worker.threads: 64
drpc.max_buffer_size: 1048576
drpc.queue.size: 128
drpc.invocations.port: 3773
drpc.invocations.threads: 64
drpc.request.timeout.secs: 600
drpc.childopts: "-Xmx768m"
drpc.http.port: 3774
drpc.https.port: -1
drpc.https.keystore.password: ""
drpc.https.keystore.type: "JKS"
drpc.http.creds.plugin: org.apache.storm.security.auth.DefaultHttpCredentialsPlugin
drpc.authorizer.acl.filename: "drpc-auth-acl.yaml"
drpc.authorizer.acl.strict: false
drpc.disable.http.binding: true

transactional.zookeeper.root: "/transactional"
transactional.zookeeper.servers: null
transactional.zookeeper.port: null

## blobstore configs
supervisor.blobstore.class: "org.apache.storm.blobstore.NimbusBlobStore"
supervisor.blobstore.download.thread.count: 5
supervisor.blobstore.download.max_retries: 3
supervisor.localizer.cache.target.size.mb: 10240
supervisor.localizer.cleanup.interval.ms: 30000

nimbus.blobstore.class: "org.apache.storm.blobstore.LocalFsBlobStore"
nimbus.blobstore.expiration.secs: 600

storm.blobstore.inputstream.buffer.size.bytes: 65536
client.blobstore.class: "org.apache.storm.blobstore.NimbusBlobStore"
storm.blobstore.replication.factor: 3
# For secure mode we would want to change this config to true
storm.blobstore.acl.validation.enabled: false

### supervisor.* configs are for node supervisors
# Define the amount of workers that can be run on this machine. Each worker is assigned a port to use for communication
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
supervisor.childopts: "-Xmx256m"
supervisor.run.worker.as.user: false
#how long supervisor will wait to ensure that a worker process is started
supervisor.worker.start.timeout.secs: 120
#how long between heartbeats until supervisor considers that worker dead and tries to restart it
supervisor.worker.timeout.secs: 30
#How many seconds to allow for graceful worker shutdown when killing workers before resorting to force kill
supervisor.worker.shutdown.sleep.secs: 3
#how frequently the supervisor checks on the status of the processes it's monitoring and restarts if necessary
supervisor.monitor.frequency.secs: 3
#how frequently the supervisor heartbeats to the cluster state (for nimbus)
supervisor.heartbeat.frequency.secs: 5
#max timeout for a node worker heartbeats when master gains leadership
supervisor.worker.heartbeats.max.timeout.secs: 600
supervisor.enable: true
supervisor.supervisors: []
supervisor.supervisors.commands: []
supervisor.memory.capacity.mb: 4096.0
#By convention 1 cpu core should be about 100, but this can be adjusted if needed
# using 100 makes it simple to set the desired value to the capacity measurement
# for single threaded bolts
supervisor.cpu.capacity: 400.0

#Supervisor thrift config
supervisor.thrift.port: 6628
supervisor.queue.size: 128
supervisor.thrift.threads: 16
supervisor.thrift.max_buffer_size: 1048576
supervisor.thrift.socket.timeout.ms: 5000

### worker.* configs are for task workers
worker.heap.memory.mb: 768
worker.childopts: "-Xmx%HEAP-MEM%m -XX:+PrintGCDetails -Xloggc:artifacts/gc.log -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=1M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=artifacts/heapdump"
worker.gc.childopts: ""

# Unlocking commercial features requires a special license from Oracle.
# See http://www.oracle.com/technetwork/java/javase/terms/products/index.html
# For this reason, profiler features are disabled by default.
worker.profiler.enabled: false
worker.profiler.childopts: "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder"
worker.profiler.command: "flight.bash"
worker.heartbeat.frequency.secs: 1

# check whether dynamic log levels can be reset from DEBUG to INFO in workers
worker.log.level.reset.poll.secs: 30

# control how many worker receiver threads we need per worker
topology.worker.receiver.thread.count: 1

# Executor metrics reporting interval.
# Cause the ui only show built in metrics, we should keep sync with the built in metrics interval,
# also the metrics consumer's collecting interval.
# See topology.builtin.metrics.bucket.size.secs and storm.cluster.metrics.consumer.publish.interval.secs.
executor.metrics.frequency.secs: 60

task.heartbeat.frequency.secs: 3
task.refresh.poll.secs: 10
task.credentials.poll.secs: 30

# Used by workers to communicate
storm.messaging.netty.server_worker_threads: 1
storm.messaging.netty.client_worker_threads: 1
storm.messaging.netty.buffer_size: 5242880 #5MB buffer

# The netty write buffer high watermark in bytes.
# If the number of bytes queued in the netty's write buffer exceeds this value, the netty client will block
# until the value falls below the low water mark.
storm.messaging.netty.buffer.high.watermark: 16777216 # 16 MB
# The netty write buffer low watermark in bytes.
# Once the number of bytes queued in the write buffer exceeded the high water mark and then
# dropped down below this value, any blocked clients will unblock and start processing further messages.
storm.messaging.netty.buffer.low.watermark: 8388608 # 8 MB
# Since nimbus.task.launch.secs and supervisor.worker.start.timeout.secs are 120, other workers should also wait at least that long before giving up on connecting to the other worker. The reconnection period need also be bigger than storm.zookeeper.session.timeout(default is 20s), so that we can abort the reconnection when the target worker is dead.
storm.messaging.netty.max_wait_ms: 1000
storm.messaging.netty.min_wait_ms: 100

# If the Netty messaging layer is busy(netty internal buffer not writable), the Netty client will try to batch message as more as possible up to the size of storm.messaging.netty.transfer.batch.size bytes, otherwise it will try to flush message as soon as possible to reduce latency.
storm.messaging.netty.transfer.batch.size: 262144
# Sets the backlog value to specify when the channel binds to a local address
storm.messaging.netty.socket.backlog: 500

# By default, the Netty SASL authentication is set to false. Users can override and set it true for a specific topology.
storm.messaging.netty.authentication: false

# Default plugin to use for automatic network topology discovery
storm.network.topography.plugin: org.apache.storm.networktopography.DefaultRackDNSToSwitchMapping

# default number of seconds group mapping service will cache user group
storm.group.mapping.service.cache.duration.secs: 120

### topology.* configs are for specific executing storms
topology.enable.message.timeouts: true
topology.debug: false
topology.workers: 1
topology.acker.executors: null
topology.eventlogger.executors: 0
topology.tasks: null
# maximum amount of time a message has to complete before it's considered failed
topology.message.timeout.secs: 30
topology.multilang.serializer: "org.apache.storm.multilang.JsonSerializer"
topology.shellbolt.max.pending: 100
topology.skip.missing.kryo.registrations: false
topology.max.task.parallelism: null
topology.max.spout.pending: null # ideally should be larger than topology.producer.batch.size. (esp. if topology.batch.flush.interval.millis=0)
topology.state.synchronization.timeout.secs: 60
topology.stats.sample.rate: 0.05
topology.builtin.metrics.bucket.size.secs: 60
topology.fall.back.on.java.serialization: true
topology.worker.childopts: null
topology.worker.logwriter.childopts: "-Xmx64m"
topology.tick.tuple.freq.secs: null
topology.worker.shared.thread.pool.size: 4

# Spout Wait Strategy - employed when there is no data to produce
topology.spout.wait.strategy: "org.apache.storm.policy.WaitStrategyProgressive"
topology.spout.wait.park.microsec : 100 # park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

topology.spout.wait.progressive.level1.count: 0 # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
topology.spout.wait.progressive.level2.count: 0 # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
topology.spout.wait.progressive.level3.sleep.millis: 1 # sleep duration for idling iterations in level 3 of WaitStrategyProgressive

# Bolt Wait Strategy - employed when there is no data in its receive buffer to process
topology.bolt.wait.strategy : "org.apache.storm.policy.WaitStrategyProgressive"

topology.bolt.wait.park.microsec : 100 # park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

topology.bolt.wait.progressive.level1.count: 1 # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
topology.bolt.wait.progressive.level2.count: 1000 # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
topology.bolt.wait.progressive.level3.sleep.millis: 1 # sleep duration for idling iterations in level 3 of WaitStrategyProgressive

# BackPressure Wait Strategy - for any producer (spout/bolt/transfer thread) when the downstream Q is full
topology.backpressure.wait.strategy: "org.apache.storm.policy.WaitStrategyProgressive"

topology.backpressure.wait.park.microsec: 100 # park time for org.apache.storm.policy.WaitStrategyPark. Busy spins if set to 0.

topology.backpressure.wait.progressive.level1.count: 1 # number of iterations to spend in level 1 [no sleep] of WaitStrategyProgressive, before progressing to level 2
topology.backpressure.wait.progressive.level2.count: 1000 # number of iterations to spend in level 2 [parkNanos(1)] of WaitStrategyProgressive, before progressing to level 3
topology.backpressure.wait.progressive.level3.sleep.millis: 1 # sleep duration for idling iterations in level 3 of WaitStrategyProgressive


topology.backpressure.check.millis: 50 # how often to check if backpressure has relieved on executors under BP, for informing other workers to resume sending msgs to them. Must be > 0
topology.executor.overflow.limit: 0 # max items in overflowQ of any bolt/spout. When exceeded, worker will drop incoming messages (from the workers) destined to that overflowing spout/bolt. Set to 0 to disable overflow limiting. Enabling this may degrade perf slightly.

topology.error.throttle.interval.secs: 10
topology.max.error.report.per.interval: 5
topology.kryo.factory: "org.apache.storm.serialization.DefaultKryoFactory"
topology.tuple.serializer: "org.apache.storm.serialization.types.ListDelegateSerializer"
topology.trident.batch.emit.interval.millis: 500
topology.testing.always.try.serialize: false
topology.classpath: null
topology.environment: null

topology.transfer.buffer.size: 1000 # size of recv queue for transfer worker thread
topology.transfer.batch.size: 1 # can be no larger than half of `topology.transfer.buffer.size`

topology.executor.receive.buffer.size: 32768 # size of recv queue for spouts & bolts. Will be internally rounded up to next power of 2 (if not already a power of 2)
topology.producer.batch.size: 1 # can be no larger than half of `topology.executor.receive.buffer.size`

topology.batch.flush.interval.millis: 1 # Flush tuples are disabled if this is set to 0 or if (topology.producer.batch.size=1 and topology.transfer.batch.size=1).
topology.spout.recvq.skips: 3 # Check recvQ once every N invocations of Spout's nextTuple() [when ACKs disabled]

topology.disable.loadaware.messaging: false
topology.state.checkpoint.interval.ms: 1000
topology.localityaware.higher.bound: 0.8
topology.localityaware.lower.bound: 0.2
topology.serialized.message.size.metrics: false

# Configs for Resource Aware Scheduler
# topology priority describing the importance of the topology in decreasing importance starting from 0 (i.e. 0 is the highest priority and the priority importance decreases as the priority number increases).
# Recommended range of 0-29 but no hard limit set.
topology.priority: 29
topology.component.resources.onheap.memory.mb: 128.0
topology.component.resources.offheap.memory.mb: 0.0
topology.component.cpu.pcore.percent: 10.0
topology.worker.max.heap.size.mb: 768.0
topology.scheduler.strategy: "org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy"
resource.aware.scheduler.priority.strategy: "org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy"

blacklist.scheduler.tolerance.time.secs: 300
blacklist.scheduler.tolerance.count: 3
blacklist.scheduler.resume.time.secs: 1800
blacklist.scheduler.reporter: "org.apache.storm.scheduler.blacklist.reporters.LogReporter"
blacklist.scheduler.strategy: "org.apache.storm.scheduler.blacklist.strategies.DefaultBlacklistStrategy"

dev.zookeeper.path: "/tmp/dev-storm-zookeeper"

pacemaker.servers: []
pacemaker.port: 6699
pacemaker.base.threads: 10
pacemaker.max.threads: 50
pacemaker.client.max.threads: 2
pacemaker.thread.timeout: 10
pacemaker.childopts: "-Xmx1024m"
pacemaker.auth.method: "NONE"
pacemaker.kerberos.users: []
pacemaker.thrift.message.size.max: 10485760

#default storm daemon metrics reporter plugins
storm.daemon.metrics.reporter.plugins:
- "org.apache.storm.daemon.metrics.reporters.JmxPreparableReporter"

storm.metricstore.class: "org.apache.storm.metricstore.rocksdb.RocksDbStore"
storm.metricprocessor.class: "org.apache.storm.metricstore.NimbusMetricProcessor"
storm.metricstore.rocksdb.location: "storm_rocks"
storm.metricstore.rocksdb.create_if_missing: true
storm.metricstore.rocksdb.metadata_string_cache_capacity: 4000
storm.metricstore.rocksdb.retention_hours: 240

# configuration of cluster metrics consumer
storm.cluster.metrics.consumer.publish.interval.secs: 60

storm.resource.isolation.plugin: "org.apache.storm.container.cgroup.CgroupManager"
# Also determines whether the unit tests for cgroup runs.
# If storm.resource.isolation.plugin.enable is set to false the unit tests for cgroups will not run
storm.resource.isolation.plugin.enable: false
storm.cgroup.memory.enforcement.enable: false
storm.cgroup.inherit.cpuset.configs: false

# Configs for CGroup support
storm.cgroup.hierarchy.dir: "/cgroup/storm_resources"
storm.cgroup.resources:
- "cpu"
- "memory"
storm.cgroup.hierarchy.name: "storm"
storm.supervisor.cgroup.rootdir: "storm"
storm.cgroup.cgexec.cmd: "/bin/cgexec"
storm.cgroup.memory.limit.tolerance.margin.mb: 0.0
storm.supervisor.memory.limit.tolerance.margin.mb: 128.0
storm.supervisor.hard.memory.limit.multiplier: 2.0
storm.supervisor.hard.memory.limit.overage.mb: 2024
storm.supervisor.low.memory.threshold.mb: 1024
storm.supervisor.medium.memory.threshold.mb: 1536
storm.supervisor.medium.memory.grace.period.ms: 30000

storm.worker.min.cpu.pcore.percent: 0.0

storm.topology.classpath.beginning.enabled: false
worker.metrics:
"CGroupMemory": "org.apache.storm.metric.cgroup.CGroupMemoryUsage"
"CGroupMemoryLimit": "org.apache.storm.metric.cgroup.CGroupMemoryLimit"
"CGroupCpu": "org.apache.storm.metric.cgroup.CGroupCpu"
"CGroupCpuGuarantee": "org.apache.storm.metric.cgroup.CGroupCpuGuarantee"

# The number of buckets for running statistics
num.stat.buckets: 20

# Metrics v2 configuration (optional)
#storm.metrics.reporters:
# # Graphite Reporter
# - class: "org.apache.storm.metrics2.reporters.GraphiteStormReporter"
# daemons:
# - "supervisor"
# - "nimbus"
# - "worker"
# report.period: 60
# report.period.units: "SECONDS"
# graphite.host: "localhost"
# graphite.port: 2003
#
# # Console Reporter
# - class: "org.apache.storm.metrics2.reporters.ConsoleStormReporter"
# daemons:
# - "worker"
# report.period: 10
# report.period.units: "SECONDS"
# filter:
# class: "org.apache.storm.metrics2.filters.RegexFilter"
# expression: ".*my_component.*emitted.*"

storm基本配置

  • storm.local.dir: nimbus 和supervisor进程存储一些状态信息(conf或者jars)的本地路径,需要每台storm node单独创建该目录并保证该目录有正确的读写权限;
  • storm.log4j2.conf.dir: log4j的配置目录;
  • storm.zookeeper.servers: storm严重依赖zookeeper存储状态信息,要保证zookeeper的高可用,最好设置多个zk地址;
  • storm.zookeeper.port: 默认2181;
  • storm.zookeeper.root: 在zookeeper中存储的根目录,如果多个storm集群公用一个zk集群,需要修改其根目录名称即可;
  • storm.session.timeout: 默认20s,nimbus和supervisor和zk的session超时时间,如果log中常用sessiontimeout错误,考虑增加其值或者修改gc参数。该值并不能无限设置,zk有自己的最大session时间(默认20 ticktime);
  • storm.zookeeper.connection.timeout:连接超时时间;
  • storm.zookeeper.retry.times: 默认5,执行zk操作重试次数;
  • storm.zookeeper.retry.interval: 默认1000,即间隔1s;
  • storm.zookeeper.retry.intervalceiling.millis: 300000 (5分钟)执行重试的时间间隔最长时间;
  • storm.messaging.transport:”backtype.storm.messaging.netty.Context” task之间的消息传输协议,默认使用netty传输;
  • storm.cluster.mode: “distributed” storm集群模式;
  • storm.id:运行中拓扑的id,由storm name和一个唯一随机数组成;
  • storm.local.mode.zmq:Local模式下是否使用ZeroMQ作消息系统,如果设置为false则使用java消息系统。默认为false;
    注:Storm严重依赖zookeeper,而且zk在分布式使用中扮演了非常重要的角色。

nimbus相关设置

  • storm.nimbus.retry.times: 5 nimbus操作的重试次数
  • storm.nimbus.retry.interval.millis: 2s 重试间隔
  • storm.nimbus.retry.intervalceiling.millis: 60000 最大重试时间 10分钟
  • nimbus.seeds: [] 用于leader nimbus发现的nimbus hosts 列表,解决nimbus的单点故障问题,代替了原来的nimbus.host 配置
  • nimbus.thrift.port: 6627 nimbus工作的thrift端口,客户端上传jar和提交拓扑的端口(nimbus的thrift监听端口)
  • nimbus.thrift.threads: 64 nimbus thrift 线程数目
  • nimbus.thrift.max_buffer_size: 1048576 1m
  • nimbus.childopts: “-Xmx1024m” nimbus java 进程jvm设置
  • nimbus.task.timeout.secs:30 与task没有心跳时多久nimbus可以认为该task已经死掉并且可以重新分配该task
  • nimbus.supervisor.timeout.secs: 60 一分钟没有心跳 nimbus可以认为该supervisor已经dead,不会分配新的work
  • nimbus.monitor.freq.secs: 10 nimbus多久查询下supervisor心跳信息并且重新分配工作。注意当一台机子曾经挂掉,nimbus会立即采取一些操作
  • nimbus.reassign:当发现task失败时nimbus是否重新分配执行。默认为真,不建议修改。
  • nimbus.cleanup.inbox.freq.secs: 600 多久时间启动清理inbox文件的线程
  • nimbus.inbox.jar.expiration.secs: 3600 一个小时 jar过期时间
  • nimbus.code.sync.freq.secs: 300 5分钟同步一次未执行的拓扑的代码
  • nimbus.task.launch.secs: 120 用于task 第一次启动时的超时时间
  • nimbus.file.copy.expiration.secs: 600 上传下载文件超时时间
  • nimbus.topology.validator: “backtype.storm.nimbus.DefaultTopologyValidator” 拓扑验证,控制该拓扑是否可以执行
  • topology.min.replication.count: 1 当nimbus seeds中该拓扑代码的备份达到最小数目时leader nimbus才可以执行拓扑动作。
  • topology.max.replication.wait.time.sec: 60 当代码备份在nimbus list中达到topology.min.replication.count设置的最大等待时间,如果超时,不管有没有最小备份个数,都要执行该拓扑

supervisor相关配置

  • supervisor.slots.ports: 设置当台机子上可跑的worker数目,每个worker对应一个port,通常情况下多少个cpu core就设置多少个worker,类似与hadoop中nodemanager中slot的设置
  • supervisor.childopts: “-Xmx256m” supervisor jvm参数设置
  • supervisor.worker.start.timeout.secs: 120 supervisor等待worker启动的最长时间
  • supervisor.worker.timeout.secs: 30 worker的最长超时时间
  • supervisor.worker.shutdown.sleep.secs: 1秒 supervisor shutdown worker需要等待的时间
  • supervisor.monitor.frequency.secs: 3s检查一次worker的心跳确保是否要重启这些worker
  • supervisor.heartbeat.frequency.secs: 5s supervisor和nimbus心跳的频率
  • supervisor.enable: true supervisor是否要启动分配它的worker

worker 配置

  • worker.childopts: “-Xmx768m”
  • worker.gc.childopts: “” worker gc set 可以被topology.worker.gc.childopts.覆盖
  • worker.heartbeat.frequency.secs: 1 worker 和supervisor的heartbeat时间
  • topology.worker.receiver.thread.count: 1 每个worker设置的receiver 线程个数
  • task.heartbeat.frequency.secs: 3s task向nimbus发送心跳的频率
  • task.refresh.poll.secs: 10 多久和其他task同步连接(如果task重新分配,发往该task信息的那些task需要重练他们之间的连接)

message传递相关参数

  • storm.messaging.netty.server_worker_threads:1, server端接收信息的线程个数
  • storm.messaging.netty.client_worker_threads: 1, client端发送信息的线程个数
  • storm.messaging.netty.buffer_size: 5M,netty buffer大小
  • storm.messaging.netty.max_retries: 300 重试次数
  • storm.messaging.netty.max_wait_ms: 1000ms=1s 最大等待时间要大于task launchtime and worker launch time默认120s,重连间隔要大于zk的sessiontimeout 以确保worker是否已挂
  • storm.messaging.netty.min_wait_ms: 100
  • storm.messaging.netty.transfer.batch.size: 262144 如果netty 发送消息非常忙,client客户端可以batch发送消息,否则尽快的flush消息以减少延迟。

topology相关的设置, 针对特定拓扑的配置

  • topology.enable.message.timeouts: true 保证数据完全处理;
  • topology.acker.executors: null 设置acker线程个数;
  • topology.message.timeout.secs: 30 当一个消息的处理超时多长时间多少认为该tuple处理失败;
  • topology.max.spout.pending: null 当spout 发送一个tuple时会将该tuple放到一个pending list,此字段控制在storm中处理的spout tuple数,可以根据超时时间以及每秒处理的消息数估算;
  • topology.workers: 1,Config#setNumWorkers,设置worker数,一个worker执行一个拓扑的一个子集任务,其上可以跑多个excutors,可能是多个bolt或者spout;
  • topology.tasks: null,top.setNumtasks,设置task数目;
  • topology.max.task.parallelism: null,拓扑最大线程数;

workergc设置

  • topology.worker.childopts: null;
  • topology.worker.logwriter.childopts: “-Xmx64m”;
  • topology.worker.shared.thread.pool.size: 4 worker task 共享线程池大小;

worker内消息传送的参数,与disruptor相关:

  • topology.executor.receive.buffer.size: 1024 #batched;
  • topology.executor.send.buffer.size: 1024 #individual messages;
  • topology.transfer.buffer.size: 1024 # batched;
  • topology.disruptor.wait.strategy:com.lmax.disruptor.BlockingWaitStrategy 延迟和吞吐率权衡;
  • topology.disruptor.wait.timeout.millis: 1000 延迟和cpu使用权衡,使用长延时时会减少cpu使用,减少等待时间可以保证延时小,但cpu负载高;

其他参数配置:

  • topology.debug: false debug模式关闭;
  • topology.tick.tuple.freq.secs: null 用于定时处理逻辑的拓扑使用;
  • topology.spout.wait.strategy: “backtype.storm.spout.SleepSpoutWaitStrategy” 两种情形下等待1. no data 2. 达到最大pending大小;
  • topology.sleep.spout.wait.strategy.time.ms: 1 sleep时间;