.
- 一 .前言
- 二 .数据结构
- 2.1. JobVertex
- 2.2. JobEdge
- 2.3. 数据实例
- 三 .代码浅析
- 3.1. 入口
- 3.2. StreamingJobGraphGenerator 属性
- 3.3. StreamingJobGraphGenerator#createJobGraph
- 3.3. StreamingJobGraphGenerator#setChaining
一 .前言
StreamGraph 转变成 JobGraph 也是在 Client 完成,主要作了四件事:
⚫ StreamNode 转成 JobVertex。
⚫ 对可以合并的StreamNode合并成一个JobVertex
⚫ StreamEdge 转成 JobEdge。
⚫ JobEdge 和 JobVertex 之间创建 IntermediateDataSet 来连接
二 .数据结构
2.1. JobVertex
JobVertex 是JobGraph中的一个节点. 用于标识一个source/map/sink之类的操作.
主要的是几个属性为:
- name : 名称
- parallelism : 并行度
- invokableClassName : 实现类名称, 比如 :
org.apache.flink.streaming.runtime.tasks.OneInputStreamTask
org.apache.flink.streaming.runtime.tasks.SourceStreamTask
- operatorIDs : 此顶点中包含的所有运算符的ID。
- inputs : 输入的JobEdge的ArrayList集合.
- results : 输出IntermediateDataSet的ArrayList集合
private static final long serialVersionUID = 1L;
private static final String DEFAULT_NAME = "(unnamed vertex)";
// --------------------------------------------------------------------------------------------
// Members that define the structure / topology of the graph
// --------------------------------------------------------------------------------------------
/**
* The ID of the vertex.
* */
private final JobVertexID id;
/**
* 名称
* The name of the vertex. This will be shown in runtime logs and will be in the runtime
* environment.
*/
private String name;
/**
* Optional, the name of the operator, such as 'Flat Map' or 'Join', to be included in the JSON
* plan.
*/
private String operatorName;
/**
* 并行度
* Number of subtasks to split this task into at runtime.
* */
private int parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
/** The class of the invokable. */
private String invokableClassName;
/**
* 此顶点中包含的所有运算符的ID。
*
* The IDs of all operators contained in this vertex.
*
* <p>The ID pairs are stored depth-first post-order; for the forking chain below the ID's would
* be stored as [D, E, B, C, A]. A - B - D \ \ C E This is the same order that operators are
* stored in the {@code StreamTask}.
*/
private final List<OperatorIDPair> operatorIDs;
/**
* 传入数据的边列表。 每个Reader一份。
* List of edges with incoming data. One per Reader. */
private final ArrayList<JobEdge> inputs = new ArrayList<>();
/**
* 产生的数据集列表,每个writer一个
*
* List of produced data sets, one per writer.
* */
private final ArrayList<IntermediateDataSet> results = new ArrayList<>();
/** The list of factories for operator coordinators. */
private final ArrayList<SerializedValue<OperatorCoordinator.Provider>> operatorCoordinators = new ArrayList<>();
/** Maximum number of subtasks to split this task into a runtime. */
private int maxParallelism = -1;
/** The minimum resource of the vertex. */
private ResourceSpec minResources = ResourceSpec.DEFAULT;
/** The preferred resource of the vertex. */
private ResourceSpec preferredResources = ResourceSpec.DEFAULT;
/** Custom configuration passed to the assigned task at runtime. */
private Configuration configuration;
/** Indicates of this job vertex is stoppable or not. */
private boolean isStoppable = false;
/**
* Optionally, a source of input splits.
* */
private InputSplitSource<?> inputSplitSource;
/**
* Optionally, a sharing group that allows subtasks from different job vertices to run
* concurrently in one slot.
*/
@Nullable private SlotSharingGroup slotSharingGroup;
/** The group inside which the vertex subtasks share slots. */
@Nullable private CoLocationGroup coLocationGroup;
/**
* Optional, the description of the operator, like 'Hash Join', or 'Sorted Group Reduce', to be
* included in the JSON plan.
*/
private String operatorDescription;
/** Optional, pretty name of the operator, to be displayed in the JSON plan. */
private String operatorPrettyName;
/**
* Optional, the JSON for the optimizer properties of the operator result, to be included in the
* JSON plan.
*/
private String resultOptimizerProperties;
/** The input dependency constraint to schedule this vertex. */
private InputDependencyConstraint inputDependencyConstraint = InputDependencyConstraint.ANY;
2.2. JobEdge
此类表示作业图中的边(通信通道)。
边总是从中间结果分区到作业顶点。
边用它的{@link DistributionPattern}参数化。
/**
* 与此边连接的节点。
*
* The vertex connected to this edge.
* */
private final JobVertex target;
/**
*
* 应用于此作业边缘的分布模式。 [ ALL_TO_ALL or POINTWISE ]
*
* The distribution pattern that should be used for this job edge.
*
* */
private final DistributionPattern distributionPattern;
/**
* 应用于下游侧此作业边缘的通道重放器。
*
* The channel rescaler that should be used for this job edge on downstream side.
* */
private SubtaskStateMapper downstreamSubtaskStateMapper = SubtaskStateMapper.ROUND_ROBIN;
/**
* 应用于上游侧此作业边缘的通道重放器。
* The channel rescaler that should be used for this job edge on upstream side.
* */
private SubtaskStateMapper upstreamSubtaskStateMapper = SubtaskStateMapper.ROUND_ROBIN;
/**
* 如果边缘尚未连接,则边缘source的数据集可能为空
*
* The data set at the source of the edge, may be null if the edge is not yet connected
* */
private IntermediateDataSet source;
/**
* source 中间数据集的id
* The id of the source intermediate data set */
private IntermediateDataSetID sourceId;
/**
* 要在JSON计划中显示的数据传送策略(转发、分区哈希、重新平衡等)的可选名称
* Optional name for the data shipping strategy (forward, partition hash, rebalance, ...), to be displayed in the JSON plan
*/
private String shipStrategyName;
/**
* 要在JSON计划中显示的预处理操作(排序、组合排序…)的可选名称
* Optional name for the pre-processing operation (sort, combining sort, ...), to be displayed in the JSON plan
*/
private String preProcessingOperationName;
/**
* 操作符内部缓存的可选描述,显示在JSON计划中
* Optional description of the caching inside an operator, to be displayed in the JSON plan
* */
private String operatorLevelCachingDescription;
2.3. 数据实例
jobGraph = {JobGraph@3639} "JobGraph(jobId: 736da6495b13c60226e229f62562a157)"
taskVertices = {LinkedHashMap@3671} size = 4
0 = {JobVertexID@4579} "6d2677a0ecc3fd8df0b72ec675edf8f4" -> {JobVertex@4591} "Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
key = {JobVertexID@4579} "6d2677a0ecc3fd8df0b72ec675edf8f4"
value = {JobVertex@4591} "Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
id = {JobVertexID@4579} "6d2677a0ecc3fd8df0b72ec675edf8f4"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4602} size = 1
results = {ArrayList@4603} size = 0
inputs = {ArrayList@4604} size = 1
0 = {JobEdge@4613} "07892af4088575caafee17e740d7a0d8 --> Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4591} "Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$6@4616} "ROUND_ROBIN"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4618} "Intermediate Data Set (07892af4088575caafee17e740d7a0d8)"
id = {IntermediateDataSetID@4619} "07892af4088575caafee17e740d7a0d8"
producer = {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
consumers = {ArrayList@4676} size = 1
resultType = {ResultPartitionType@4302} "PIPELINED_BOUNDED"
sourceId = {IntermediateDataSetID@4619} "07892af4088575caafee17e740d7a0d8"
shipStrategyName = "REBALANCE"
preProcessingOperationName = null
operatorLevelCachingDescription = null
operatorCoordinators = {ArrayList@4605} size = 0
parallelism = 1
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4020} "{checkpointing=false, vertexID=5, inputs=[B@48cd9a2c, chainEnd=true, nonChainedOutputs=[B@4f67e3df, numberOfOutputs=0, operatorName=Sink: Print to Std. Out, execution.checkpointing.alignment-timeout=0, timechar=2, inStreamEdges=[B@7be7e15, managedMemFraction.STATE_BACKEND=0.0, statekeyser=[B@72d0f2b4, serializedUDF=[B@118102ee, graphContainingLoops=false, execution.checkpointing.unaligned=false, typeSerializer_out=[B@771d1ffb, sorted-inputs=false, chainedTaskConfig_=[B@3abfe845, chainIndex=1, chainedOutputs=[B@56681eaf, edgesInOrder=[B@7dff6d05, isChainedSubtask=true, checkpointMode=1, operatorID=[B@45d64d27, numberOfNetworkInputs=1}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.OneInputStreamTask"
isStoppable = false
inputSplitSource = null
name = "Sink: Print to Std. Out"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
1 = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6" -> {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
key = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6"
value = {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
id = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4267} size = 1
results = {ArrayList@4268} size = 1
0 = {IntermediateDataSet@4618} "Intermediate Data Set (07892af4088575caafee17e740d7a0d8)"
id = {IntermediateDataSetID@4619} "07892af4088575caafee17e740d7a0d8"
producer = {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
id = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4267} size = 1
results = {ArrayList@4268} size = 1
inputs = {ArrayList@4269} size = 1
operatorCoordinators = {ArrayList@4270} size = 0
parallelism = 4
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4271} "{checkpointing=false, vertexID=4, inputs=[B@571a9686, chainEnd=true, nonChainedOutputs=[B@2f651f93, numberOfOutputs=1, operatorName=Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction), execution.checkpointing.alignment-timeout=0, timechar=2, inStreamEdges=[B@38af1bf6, managedMemFraction.STATE_BACKEND=1.0, statekeyser=[B@4e93dcb9, serializedUDF=[B@5cb042da, statePartitioner0=[B@59c33386, graphContainingLoops=false, execution.checkpointing.unaligned=false, typeSerializer_out=[B@719d35e8, sorted-inputs=false, chainedTaskConfig_=[B@129bd55d, chainIndex=1, chainedOutputs=[B@6c575325, edgesInOrder=[B@747d1932, isChainedSubtask=true, checkpointMode=1, operatorID=[B@736309a9, numberOfNetworkInputs=1}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.OneInputStreamTask"
isStoppable = false
inputSplitSource = null
name = "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction)"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
consumers = {ArrayList@4676} size = 1
0 = {JobEdge@4613} "07892af4088575caafee17e740d7a0d8 --> Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4591} "Sink: Print to Std. Out (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$6@4616} "ROUND_ROBIN"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4618} "Intermediate Data Set (07892af4088575caafee17e740d7a0d8)"
sourceId = {IntermediateDataSetID@4619} "07892af4088575caafee17e740d7a0d8"
shipStrategyName = "REBALANCE"
preProcessingOperationName = null
operatorLevelCachingDescription = null
resultType = {ResultPartitionType@4302} "PIPELINED_BOUNDED"
inputs = {ArrayList@4269} size = 1
0 = {JobEdge@4339} "ad9def4de4efd432736babc2e781c9ea --> Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$5@4685} "RANGE"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4686} "Intermediate Data Set (ad9def4de4efd432736babc2e781c9ea)"
id = {IntermediateDataSetID@4687} "ad9def4de4efd432736babc2e781c9ea"
producer = {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
consumers = {ArrayList@4695} size = 1
resultType = {ResultPartitionType@4302} "PIPELINED_BOUNDED"
sourceId = {IntermediateDataSetID@4687} "ad9def4de4efd432736babc2e781c9ea"
shipStrategyName = "HASH"
preProcessingOperationName = null
operatorLevelCachingDescription = null
operatorCoordinators = {ArrayList@4270} size = 0
parallelism = 4
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4271} "{checkpointing=false, vertexID=4, inputs=[B@571a9686, chainEnd=true, nonChainedOutputs=[B@2f651f93, numberOfOutputs=1, operatorName=Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction), execution.checkpointing.alignment-timeout=0, timechar=2, inStreamEdges=[B@38af1bf6, managedMemFraction.STATE_BACKEND=1.0, statekeyser=[B@4e93dcb9, serializedUDF=[B@5cb042da, statePartitioner0=[B@59c33386, graphContainingLoops=false, execution.checkpointing.unaligned=false, typeSerializer_out=[B@719d35e8, sorted-inputs=false, chainedTaskConfig_=[B@129bd55d, chainIndex=1, chainedOutputs=[B@6c575325, edgesInOrder=[B@747d1932, isChainedSubtask=true, checkpointMode=1, operatorID=[B@736309a9, numberOfNetworkInputs=1}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.OneInputStreamTask"
isStoppable = false
inputSplitSource = null
name = "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction)"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
2 = {JobVertexID@4256} "0a448493b4782967b150582570326227" -> {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
key = {JobVertexID@4256} "0a448493b4782967b150582570326227"
value = {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
id = {JobVertexID@4256} "0a448493b4782967b150582570326227"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4257} size = 1
results = {ArrayList@4258} size = 1
0 = Intermediate Data Set (ad9def4de4efd432736babc2e781c9ea)
id = {IntermediateDataSetID@4687} "ad9def4de4efd432736babc2e781c9ea"
producer = {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
id = {JobVertexID@4256} "0a448493b4782967b150582570326227"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4257} size = 1
results = {ArrayList@4258} size = 1
inputs = {ArrayList@4259} size = 1
operatorCoordinators = {ArrayList@4260} size = 0
parallelism = 4
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4261} "{checkpointing=false, vertexID=2, inputs=[B@10ef5fa0, chainEnd=true, nonChainedOutputs=[B@10acd6, numberOfOutputs=1, operatorName=Flat Map, execution.checkpointing.alignment-timeout=0, timechar=2, inStreamEdges=[B@706eab5d, managedMemFraction.STATE_BACKEND=0.0, statekeyser=[B@b25b095, serializedUDF=[B@5b275174, graphContainingLoops=false, execution.checkpointing.unaligned=false, typeSerializer_out=[B@244e619a, sorted-inputs=false, chainedTaskConfig_=[B@72725ee1, chainIndex=1, chainedOutputs=[B@61dde151, edgesInOrder=[B@40e60ece, isChainedSubtask=true, checkpointMode=1, operatorID=[B@3f9270ed, numberOfNetworkInputs=1}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.OneInputStreamTask"
isStoppable = false
inputSplitSource = null
name = "Flat Map"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
consumers = {ArrayList@4695} size = 1
0 = {JobEdge@4339} "ad9def4de4efd432736babc2e781c9ea --> Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4252} "Window(TumblingProcessingTimeWindows(5000), ProcessingTimeTrigger, ReduceFunction$1, PassThroughWindowFunction) (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$5@4685} "RANGE"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4686} "Intermediate Data Set (ad9def4de4efd432736babc2e781c9ea)"
sourceId = {IntermediateDataSetID@4687} "ad9def4de4efd432736babc2e781c9ea"
shipStrategyName = "HASH"
preProcessingOperationName = null
operatorLevelCachingDescription = null
resultType = {ResultPartitionType@4302} "PIPELINED_BOUNDED"
inputs = {ArrayList@4259} size = 1
0 = {JobEdge@4450} "98275f9c07f6fdd46f188c5c38ac71ff --> Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$6@4616} "ROUND_ROBIN"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4655} "Intermediate Data Set (98275f9c07f6fdd46f188c5c38ac71ff)"
sourceId = {IntermediateDataSetID@4657} "98275f9c07f6fdd46f188c5c38ac71ff"
shipStrategyName = "REBALANCE"
preProcessingOperationName = null
operatorLevelCachingDescription = null
operatorCoordinators = {ArrayList@4260} size = 0
parallelism = 4
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4261} "{checkpointing=false, vertexID=2, inputs=[B@10ef5fa0, chainEnd=true, nonChainedOutputs=[B@10acd6, numberOfOutputs=1, operatorName=Flat Map, execution.checkpointing.alignment-timeout=0, timechar=2, inStreamEdges=[B@706eab5d, managedMemFraction.STATE_BACKEND=0.0, statekeyser=[B@b25b095, serializedUDF=[B@5b275174, graphContainingLoops=false, execution.checkpointing.unaligned=false, typeSerializer_out=[B@244e619a, sorted-inputs=false, chainedTaskConfig_=[B@72725ee1, chainIndex=1, chainedOutputs=[B@61dde151, edgesInOrder=[B@40e60ece, isChainedSubtask=true, checkpointMode=1, operatorID=[B@3f9270ed, numberOfNetworkInputs=1}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.OneInputStreamTask"
isStoppable = false
inputSplitSource = null
name = "Flat Map"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
3 = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658" -> {JobVertex@4408} "Source: Socket Stream (org.apache.flink.streaming.runtime.tasks.SourceStreamTask)"
key = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658"
value = {JobVertex@4408} "Source: Socket Stream (org.apache.flink.streaming.runtime.tasks.SourceStreamTask)"
id = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4646} size = 1
results = {ArrayList@4647} size = 1
0 = {IntermediateDataSet@4655} "Intermediate Data Set (98275f9c07f6fdd46f188c5c38ac71ff)"
producer = {JobVertex@4408} "Source: Socket Stream (org.apache.flink.streaming.runtime.tasks.SourceStreamTask)"
id = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658"
operatorIDs = {Collections$UnmodifiableRandomAccessList@4646} size = 1
results = {ArrayList@4647} size = 1
inputs = {ArrayList@4648} size = 0
operatorCoordinators = {ArrayList@4649} size = 0
parallelism = 1
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4650} "{checkpointing=false, serializedUDF=[B@7a0f244f, graphContainingLoops=false, vertexID=1, execution.checkpointing.unaligned=false, inputs=[B@3672276e, typeSerializer_out=[B@4248b963, sorted-inputs=false, chainEnd=true, nonChainedOutputs=[B@7f08caf, numberOfOutputs=1, operatorName=Source: Socket Stream, chainedTaskConfig_=[B@4defd42, chainIndex=1, execution.checkpointing.alignment-timeout=0, timechar=2, chainedOutputs=[B@2330e3e0, edgesInOrder=[B@24b4d544, managedMemFraction.STATE_BACKEND=0.0, isChainedSubtask=true, checkpointMode=1, operatorID=[B@27a2a089, statekeyser=[B@54657dd2}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.SourceStreamTask"
isStoppable = false
inputSplitSource = null
name = "Source: Socket Stream"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
id = {IntermediateDataSetID@4657} "98275f9c07f6fdd46f188c5c38ac71ff"
consumers = {ArrayList@4658} size = 1
0 = {JobEdge@4450} "98275f9c07f6fdd46f188c5c38ac71ff --> Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask) [ALL_TO_ALL]"
target = {JobVertex@4251} "Flat Map (org.apache.flink.streaming.runtime.tasks.OneInputStreamTask)"
distributionPattern = {DistributionPattern@4615} "ALL_TO_ALL"
downstreamSubtaskStateMapper = {SubtaskStateMapper$6@4616} "ROUND_ROBIN"
upstreamSubtaskStateMapper = {SubtaskStateMapper$1@4617} "ARBITRARY"
source = {IntermediateDataSet@4655} "Intermediate Data Set (98275f9c07f6fdd46f188c5c38ac71ff)"
sourceId = {IntermediateDataSetID@4657} "98275f9c07f6fdd46f188c5c38ac71ff"
shipStrategyName = "REBALANCE"
preProcessingOperationName = null
operatorLevelCachingDescription = null
resultType = {ResultPartitionType@4302} "PIPELINED_BOUNDED"
inputs = {ArrayList@4648} size = 0
operatorCoordinators = {ArrayList@4649} size = 0
parallelism = 1
maxParallelism = -1
minResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
preferredResources = {ResourceSpec@3710} "ResourceSpec{UNKNOWN}"
configuration = {Configuration@4650} "{checkpointing=false, serializedUDF=[B@7a0f244f, graphContainingLoops=false, vertexID=1, execution.checkpointing.unaligned=false, inputs=[B@3672276e, typeSerializer_out=[B@4248b963, sorted-inputs=false, chainEnd=true, nonChainedOutputs=[B@7f08caf, numberOfOutputs=1, operatorName=Source: Socket Stream, chainedTaskConfig_=[B@4defd42, chainIndex=1, execution.checkpointing.alignment-timeout=0, timechar=2, chainedOutputs=[B@2330e3e0, edgesInOrder=[B@24b4d544, managedMemFraction.STATE_BACKEND=0.0, isChainedSubtask=true, checkpointMode=1, operatorID=[B@27a2a089, statekeyser=[B@54657dd2}"
invokableClassName = "org.apache.flink.streaming.runtime.tasks.SourceStreamTask"
isStoppable = false
inputSplitSource = null
name = "Source: Socket Stream"
slotSharingGroup = {SlotSharingGroup@4599} "SlotSharingGroup [0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4, bc764cd8ddf7a0cff126f51c16239658]"
coLocationGroup = null
operatorName = null
operatorDescription = null
operatorPrettyName = null
resultOptimizerProperties = null
inputDependencyConstraint = {InputDependencyConstraint@3586} "ANY"
jobConfiguration = {Configuration@3672} "{}"
jobID = {JobID@3673} "736da6495b13c60226e229f62562a157"
jobName = "Socket Window WordCount"
scheduleMode = {ScheduleMode@3485} "EAGER"
approximateLocalRecovery = false
serializedExecutionConfig = {SerializedValue@4551} "SerializedValue"
snapshotSettings = {JobCheckpointingSettings@4552} "SnapshotSettings: config=JobCheckpointingConfiguration{checkpointInterval=9223372036854775807, checkpointTimeout=600000, minPauseBetweenCheckpoints=0, maxConcurrentCheckpoints=1, checkpointRetentionPolicy=NEVER_RETAIN_AFTER_TERMINATION, isExactlyOnce=false, isUnalignedCheckpoint=false, isPreferCheckpointForRecovery=false, tolerableCheckpointFailureNumber=0}, trigger=[bc764cd8ddf7a0cff126f51c16239658], ack=[bc764cd8ddf7a0cff126f51c16239658, 0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4], commit=[bc764cd8ddf7a0cff126f51c16239658, 0a448493b4782967b150582570326227, ea632d67b7d595e5b851708ae9ad79d6, 6d2677a0ecc3fd8df0b72ec675edf8f4]"
verticesToTrigger = {ArrayList@4570} size = 1
bc764cd8ddf7a0cff126f51c16239658
verticesToAcknowledge = {ArrayList@4571} size = 4
0 = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658"
1 = {JobVertexID@4256} "0a448493b4782967b150582570326227"
2 = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6"
3 = {JobVertexID@4579} "6d2677a0ecc3fd8df0b72ec675edf8f4"
verticesToConfirm = {ArrayList@4572} size = 4
0 = {JobVertexID@4576} "bc764cd8ddf7a0cff126f51c16239658"
1 = {JobVertexID@4256} "0a448493b4782967b150582570326227"
2 = {JobVertexID@4266} "ea632d67b7d595e5b851708ae9ad79d6"
3 = {JobVertexID@4579} "6d2677a0ecc3fd8df0b72ec675edf8f4"
checkpointCoordinatorConfiguration = {CheckpointCoordinatorConfiguration@4573} "JobCheckpointingConfiguration{checkpointInterval=9223372036854775807, checkpointTimeout=600000, minPauseBetweenCheckpoints=0, maxConcurrentCheckpoints=1, checkpointRetentionPolicy=NEVER_RETAIN_AFTER_TERMINATION, isExactlyOnce=false, isUnalignedCheckpoint=false, isPreferCheckpointForRecovery=false, tolerableCheckpointFailureNumber=0}"
checkpointInterval = 9223372036854775807
checkpointTimeout = 600000
minPauseBetweenCheckpoints = 0
maxConcurrentCheckpoints = 1
tolerableCheckpointFailureNumber = 0
checkpointRetentionPolicy = {CheckpointRetentionPolicy@4582} "NEVER_RETAIN_AFTER_TERMINATION"
isExactlyOnce = false
isPreferCheckpointForRecovery = false
isUnalignedCheckpointsEnabled = false
alignmentTimeout = 0
defaultStateBackend = null
masterHooks = null
savepointRestoreSettings = {SavepointRestoreSettings@3484} "SavepointRestoreSettings.none()"
userJars = {ArrayList@3675} size = 1
file:/opt/tools/flink-1.12.0/examples/streaming/SocketWindowWordCount.jar
userArtifacts = {HashMap@3676} size = 0
userJarBlobKeys = {ArrayList@3677} size = 0
classpaths = {ArrayList@4564} size = 0
三 .代码浅析
3.1. 入口
yarn-per-job 为例进行分析, 入口类 为 AbstractJobClusterExecutor#execute .
@Override
public CompletableFuture<JobClient> execute(
@Nonnull final Pipeline pipeline,
@Nonnull final Configuration configuration,
@Nonnull final ClassLoader userCodeClassloader)
throws Exception {
// 流图 抓换为 作业图
// JobGraph(jobId: 536af83b56ddfc2ef4ffda8b43a21e15)
final JobGraph jobGraph = PipelineExecutorUtils.getJobGraph(pipeline, configuration);
// 其他代码略 ............
}
调用层级比较深, 最终是构造一个StreamingJobGraphGenerator , 然后调用createJobGraph生成JobGraph
AbstractJobClusterExecutor#execute
--> PipelineExecutorUtils#getJobGraph
--> FlinkPipelineTranslationUtil#getJobGraph
--> FlinkPipelineTranslator#translateToJobGraph
--> StreamGraph#getJobGraph
--> StreamingJobGraphGenerator#createJobGraph
3.2. StreamingJobGraphGenerator 属性
StreamingJobGraphGenerator 的成员变量都是为了辅助生成最终的 JobGraph。
// 默认网络 BUFFER 超时时间 : 100 L
private static final long DEFAULT_NETWORK_BUFFER_TIMEOUT = 100L;
//未定义的网络缓冲区超时 : -1
public static final long UNDEFINED_NETWORK_BUFFER_TIMEOUT = -1L;
// ------------------------------------------------------------------------
// 构建根据StreamGraph 构建 JobGraph
public static JobGraph createJobGraph(StreamGraph streamGraph) {
return createJobGraph(streamGraph, null);
}
// 构建根据StreamGraph 构建 JobGraph
public static JobGraph createJobGraph(StreamGraph streamGraph, @Nullable JobID jobID) {
// 构建一个 StreamingJobGraphGenerator 生成器构建 createJobGraph
// StreamingJobGraphGenerator 构造方法里面会创建一个JobGraph
return new StreamingJobGraphGenerator(streamGraph, jobID).createJobGraph();
}
// ------------------------------------------------------------------------
// StreamGraph 对象
private final StreamGraph streamGraph;
// job的 jobVertices
private final Map<Integer, JobVertex> jobVertices;
// JobGraph 对象
private final JobGraph jobGraph;
// 已经构建的 JobVertex 的 id 集合
private final Collection<Integer> builtVertices;
// 物理边集合(排除了 chain 内部的边) , 按创建顺序排序
private final List<StreamEdge> physicalEdgesInOrder;
// 保存 chain 信息,部署时用来构建 OperatorChain,
// startNodeId -> (currentNodeId -> StreamConfig)
private final Map<Integer, Map<Integer, StreamConfig>> chainedConfigs;
// 所有节点的配置信息, id -> StreamConfig
private final Map<Integer, StreamConfig> vertexConfigs;
// 保存每个节点的名字, id -> chainedName
private final Map<Integer, String> chainedNames;
// chained 最小 资源
private final Map<Integer, ResourceSpec> chainedMinResources;
// chained 的首选 资源
private final Map<Integer, ResourceSpec> chainedPreferredResources;
private final Map<Integer, InputOutputFormatContainer> chainedInputOutputFormats;
private final StreamGraphHasher defaultStreamGraphHasher;
private final List<StreamGraphHasher> legacyStreamGraphHashers;
3.3. StreamingJobGraphGenerator#createJobGraph
private JobGraph createJobGraph() {
preValidate();
// make sure that all vertices start immediately
// 确认所有的节点立即启动
// streaming 模式下, 调度模式是所有节点(Vertices) 立即启动. :
// ScheduleMode : EAGER(立即启动)
jobGraph.setScheduleMode(streamGraph.getScheduleMode());
// false
jobGraph.enableApproximateLocalRecovery(
streamGraph.getCheckpointConfig().isApproximateLocalRecoveryEnabled());
// Generate deterministic hashes for the nodes in order to identify them across
// submission iff they didn't change.
// 为节点生成确定性的哈希值,以便在提交时(如果它们未更改)对其进行标识。
// 广度优先遍历streamGraph, 并且为每个SteamNode生成 hashID
// 保证如果提交到拓扑没有改变,则每次生成的hash是一样的.
Map<Integer, byte[]> hashes =
defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);
// 生成旧版本哈希以向后兼容
// Generate legacy version hashes for backwards compatibility
List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
}
// [重点] 生成JobVertex , JobEdge等, 并尽可能地将多个节点chain在一起
setChaining(hashes, legacyHashes);
// 将每个生成JobVertex的入边集合也序列化到改生成JobVertex的StreamConfig中(出边集合已经在setChaining的时候写入了)
setPhysicalEdges();
// 根据group name , 为每个JobVertex指定所属的SlotSharingGroup 以及针对Iteration的头尾设置COLocationGroup
setSlotSharingAndCoLocation();
setManagedMemoryFraction(
Collections.unmodifiableMap(jobVertices),
Collections.unmodifiableMap(vertexConfigs),
Collections.unmodifiableMap(chainedConfigs),
id -> streamGraph.getStreamNode(id).getManagedMemoryOperatorScopeUseCaseWeights(),
id -> streamGraph.getStreamNode(id).getManagedMemorySlotScopeUseCases());
configureCheckpointing();
jobGraph.setSavepointRestoreSettings(streamGraph.getSavepointRestoreSettings());
JobGraphUtils.addUserArtifactEntries(streamGraph.getUserArtifacts(), jobGraph);
// set the ExecutionConfig last when it has been finalized
try {
jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
} catch (IOException e) {
throw new IllegalConfigurationException(
"Could not serialize the ExecutionConfig."
+ "This indicates that non-serializable types (like custom serializers) were registered");
}
return jobGraph;
}
3.3. StreamingJobGraphGenerator#setChaining
为所有节点生成一个唯一的 hash id,如果节点在多次提交中没有改变(包括并发度、上下游等),那么这个 id 就不会改变,这主要用于故障恢复.
这里不能用 StreamNode.id 来代替,因为这是一个从 1 开始的静态计数变量,同样的 Job可能会得到不一样的 id,如下代码示例的两个 job 是完全一样的, 但是 source 的 id 却不一样了
// 范例 1: A.id=1 B.id=2
DataStream<String> A = ...
DataStream<String> B = ...
A.union(B).print();
// 范例 2: A.id=2 B.id=1
DataStream<String> B = ...
DataStream<String> A = ...
A.union(B).print();
每个 JobVertex 都会对应一个可序列化的 StreamConfig, 用来发送给 JobManager 和 TaskManager。
最后在 TaskManager 中起 Task 时,需要从这里面反序列化出所需要的配置信息, 其中就包括了含有用户代码的 StreamOperator 。
setChaining 会对 source 调用 createChain 方法,该方法会递归调用下游节点,从而构建出 node chains。
createChain 会分析当前节点的出边,根据 Operator Chains 中的 chainable 条件,将出边分成 chainalbe 和 noChainable 两类,并分别递归调用自身方法。
之后会将StreamNode 中的配置信息序列化到 StreamConfig 中。如果当前不是 chain 中的子节点,则会构建 JobVertex 和 JobEdge 相连。
如果是 chain 中的子节点,则会将 StreamConfig 添加到该chain 的 config 集合中。一个 node chains,除了 headOfChain node 会生成对应的 JobVertex,其余的 nodes 都是以序列化的形式写入到 StreamConfig 中,并保存到 headOfChain 的CHAINED_TASK_CONFIG 配置项中。
直到部署时,才会取出并生成对应的 ChainOperators。
// 构建 node chains,返回当前节点的物理出边
// startNodeId != currentNodeId 时,说明 currentNode 是 chain 中的子节点
private List<StreamEdge> createChain(
final Integer currentNodeId,
final int chainIndex,
final OperatorChainInfo chainInfo,
final Map<Integer, OperatorChainInfo> chainEntryPoints) {
Integer startNodeId = chainInfo.getStartNodeId();
if (!builtVertices.contains(startNodeId)) {
// 过滤用的出边集合,用来生成最终的jobEdge, 注意不包括chain内部的变
List<StreamEdge> transitiveOutEdges = new ArrayList<StreamEdge>();
// 可以串起来的出边 [两个节点之间合并成一个节点, 中间的边就可以省略了]
List<StreamEdge> chainableOutputs = new ArrayList<StreamEdge>();
// 不可以串起来的出边
List<StreamEdge> nonChainableOutputs = new ArrayList<StreamEdge>();
//获取节点
StreamNode currentNode = streamGraph.getStreamNode(currentNodeId);
// 将当前节点的出边分成chainable和nonchainable两类
// 获取当前节点的出边
// 将当前节点的出边分成 chainable 和 nonChainable 两类
for (StreamEdge outEdge : currentNode.getOutEdges()) {
// 验证是否可以串联: (下游的输入edge是1 && 是可以串联的)
if (isChainable(outEdge, streamGraph)) {
// 如果可以 合并边
chainableOutputs.add(outEdge);
} else {
// 不可以合并的变
nonChainableOutputs.add(outEdge);
}
}
// 遍历 可以合并的集合
for (StreamEdge chainable : chainableOutputs) {
transitiveOutEdges.addAll(
// 递归...
createChain(
chainable.getTargetId(),
chainIndex + 1,
chainInfo,
chainEntryPoints));
}
// 遍历 不可以合并的集合
for (StreamEdge nonChainable : nonChainableOutputs) {
transitiveOutEdges.add(nonChainable);
createChain(
nonChainable.getTargetId(),
1, // operators start at position 1 because 0 is for chained source inputs
chainEntryPoints.computeIfAbsent(
nonChainable.getTargetId(),
(k) -> chainInfo.newChain(nonChainable.getTargetId())),
chainEntryPoints);
}
// 生成当前节点的显示名,如:"Keyed Aggregation -> Sink: Unnamed"
chainedNames.put(
currentNodeId,
createChainedName(
currentNodeId,
chainableOutputs,
Optional.ofNullable(chainEntryPoints.get(currentNodeId))));
chainedMinResources.put(
currentNodeId, createChainedMinResources(currentNodeId, chainableOutputs));
chainedPreferredResources.put(
currentNodeId,
createChainedPreferredResources(currentNodeId, chainableOutputs));
OperatorID currentOperatorId =
chainInfo.addNodeToChain(currentNodeId, chainedNames.get(currentNodeId));
if (currentNode.getInputFormat() != null) {
getOrCreateFormatContainer(startNodeId)
.addInputFormat(currentOperatorId, currentNode.getInputFormat());
}
if (currentNode.getOutputFormat() != null) {
getOrCreateFormatContainer(startNodeId)
.addOutputFormat(currentOperatorId, currentNode.getOutputFormat());
}
// 如果当前节点是起始节点, 则直接创建 JobVertex 并返回 StreamConfig, 否则先创建一个空的 StreamConfig
StreamConfig config =
currentNodeId.equals(startNodeId)
? createJobVertex(startNodeId, chainInfo)
: new StreamConfig(new Configuration());
// 设置 JobVertex 的 StreamConfig, 基本上是序列化 StreamNode 中的配置到 StreamConfig中.
setVertexConfig(
currentNodeId,
config,
chainableOutputs,
nonChainableOutputs,
chainInfo.getChainedSources());
if (currentNodeId.equals(startNodeId)) {
// 如果是chain的起始节点,标记成chain start(不是chain中的节点,也会被标记成 chain start)
config.setChainStart();
config.setChainIndex(chainIndex);
config.setOperatorName(streamGraph.getStreamNode(currentNodeId).getOperatorName());
/*TODO 将当前节点(headOfChain)与所有出边相连*/
for (StreamEdge edge : transitiveOutEdges) {
// [重要]
/*TODO 通过StreamEdge构建出JobEdge,创建 IntermediateDataSet,用来将JobVertex和JobEdge相连*/
connect(startNodeId, edge);
}
/*TODO 把物理出边写入配置, 部署时会用到*/
config.setOutEdgesInOrder(transitiveOutEdges);
/*TODO 将chain中所有子节点的StreamConfig写入到 headOfChain 节点的 CHAINED_TASK_CONFIG 配置中*/
config.setTransitiveChainedTaskConfigs(chainedConfigs.get(startNodeId));
} else {
// 如果是 chain 中的子节点, 非歧视节点
chainedConfigs.computeIfAbsent(
startNodeId, k -> new HashMap<Integer, StreamConfig>());
config.setChainIndex(chainIndex);
StreamNode node = streamGraph.getStreamNode(currentNodeId);
config.setOperatorName(node.getOperatorName());
/*TODO 将当前节点的StreamConfig添加到该chain的config集合中*/
chainedConfigs.get(startNodeId).put(currentNodeId, config);
}
config.setOperatorID(currentOperatorId);
if (chainableOutputs.isEmpty()) {
config.setChainEnd();
}
/*TODO 返回连往chain外部的出边集合*/
return transitiveOutEdges;
} else {
return new ArrayList<>();
}
}