flink jobmanger中有几个jobmaster flink 提交job

转载

mob64ca1403528a 2024-05-06 11:04:07

文章标签 flink java 大数据 List ide 文章分类 架构后端开发

AbstractJobClusterExecutor.java

@Override
public CompletableFuture<JobClient> execute(@Nonnull final Pipeline pipeline, @Nonnull final Configuration configuration, @Nonnull final ClassLoader userCodeClassloader) throws Exception {
  /*TODO 将 流图（StreamGraph） 转换成 作业图（JobGraph）*/
  final JobGraph jobGraph = PipelineExecutorUtils.getJobGraph(pipeline, configuration);

  /*TODO 集群描述器：创建、启动了 YarnClient， 包含了一些yarn、flink的配置和环境信息*/
  try (final ClusterDescriptor<ClusterID> clusterDescriptor = clusterClientFactory.createClusterDescriptor(configuration)) {
    final ExecutionConfigAccessor configAccessor = ExecutionConfigAccessor.fromConfiguration(configuration);

    /*TODO 集群特有资源配置：JobManager内存、TaskManager内存、每个Tm的slot数*/
    final ClusterSpecification clusterSpecification = clusterClientFactory.getClusterSpecification(configuration);

    final ClusterClientProvider<ClusterID> clusterClientProvider = clusterDescriptor
        .deployJobCluster(clusterSpecification, jobGraph, configAccessor.getDetachedMode());
    LOG.info("Job has been submitted with JobID " + jobGraph.getJobID());

    return CompletableFuture.completedFuture(
        new ClusterClientJobClientAdapter<>(clusterClientProvider, jobGraph.getJobID(), userCodeClassloader));
  }

将StreamGraph转换为JobGraph

**（1）找到createJobGraph方法
**

1）PipelineExecutorUtils

public static JobGraph getJobGraph(@Nonnull final Pipeline pipeline, @Nonnull final Configuration configuration) throws MalformedURLException {
    checkNotNull(pipeline);
    checkNotNull(configuration);

    final ExecutionConfigAccessor executionConfigAccessor = ExecutionConfigAccessor.fromConfiguration(configuration);
    // 往下看
    final JobGraph jobGraph = FlinkPipelineTranslationUtil
        .getJobGraph(pipeline, configuration, executionConfigAccessor.getParallelism());

    configuration
        .getOptional(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID)
        .ifPresent(strJobID -> jobGraph.setJobID(JobID.fromHexString(strJobID)));

    jobGraph.addJars(executionConfigAccessor.getJars());
    jobGraph.setClasspaths(executionConfigAccessor.getClasspaths());
    jobGraph.setSavepointRestoreSettings(executionConfigAccessor.getSavepointRestoreSettings());

    return jobGraph;
  }

2）FlinkPipelineTranslationUtil

public static JobGraph getJobGraph(
      Pipeline pipeline,
      Configuration optimizerConfiguration,
      int defaultParallelism) {

    FlinkPipelineTranslator pipelineTranslator = getPipelineTranslator(pipeline);
    // 往下看
    return pipelineTranslator.translateToJobGraph(pipeline,
        optimizerConfiguration,
        defaultParallelism);
  }

3）StreamGraphTranslator implements FlinkPipelineTranslator

@Override
 public JobGraph translateToJobGraph(
 Pipeline pipeline,
 Configuration optimizerConfiguration,
 int defaultParallelism) {
 …

StreamGraph streamGraph = (StreamGraph) pipeline;

// 重点
 return streamGraph.getJobGraph(null);
 }

4）StreamGraph

public JobGraph getJobGraph(@Nullable JobID jobID) {
 return StreamingJobGraphGenerator.createJobGraph(this, jobID);
 }

5）StreamingJobGraphGenerator

public static JobGraph createJobGraph(StreamGraph streamGraph, @Nullable JobID jobID) {
 return new StreamingJobGraphGenerator(streamGraph, jobID).createJobGraph();
 }

private JobGraph createJobGraph() {
 preValidate();// make sure that all vertices start immediately
 /*TODO streaming 模式下，调度模式是所有节点（vertices）一起启动：Eager */
 jobGraph.setScheduleMode(streamGraph.getScheduleMode());
 jobGraph.enableApproximateLocalRecovery(streamGraph.getCheckpointConfig().isApproximateLocalRecoveryEnabled());// Generate deterministic hashes for the nodes in order to identify them across
 // submission iff they didn’t change.
 // 广度优先遍历 StreamGraph 并且为每个SteamNode生成hash id，
 // 保证如果提交的拓扑没有改变，则每次生成的hash都是一样的
 Map<Integer, byte[]> hashes = defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);// Generate legacy version hashes for backwards compatibility
 List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
 for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
 legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
 }/* TODO 最重要的函数，生成 JobVertex，JobEdge等，并尽可能地将多个节点chain在一起*/
 setChaining(hashes, legacyHashes);/TODO 将每个JobVertex的入边集合也序列化到该JobVertex的StreamConfig中 (出边集合已经在setChaining的时候写入了)/
 setPhysicalEdges();/TODO 根据group name，为每个 JobVertex 指定所属的 SlotSharingGroup 以及针对 Iteration的头尾设置 CoLocationGroup/
 setSlotSharingAndCoLocation();setManagedMemoryFraction(
 Collections.unmodifiableMap(jobVertices),
 Collections.unmodifiableMap(vertexConfigs),
 Collections.unmodifiableMap(chainedConfigs),
 id -> streamGraph.getStreamNode(id).getManagedMemoryOperatorScopeUseCaseWeights(),
 id -> streamGraph.getStreamNode(id).getManagedMemorySlotScopeUseCases());configureCheckpointing();
jobGraph.setSavepointRestoreSettings(streamGraph.getSavepointRestoreSettings());
JobGraphUtils.addUserArtifactEntries(streamGraph.getUserArtifacts(), jobGraph);
// set the ExecutionConfig last when it has been finalized
 try {
 /TODO 将 StreamGraph 的 ExecutionConfig 序列化到 JobGraph 的配置中/
 jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
 }
 catch (IOException e) {
 throw new IllegalConfigurationException(“Could not serialize the ExecutionConfig.” +
 “This indicates that non-serializable types (like custom serializers) were registered”);
 }return jobGraph;
 }

**（1）生成 JobVertex，JobEdge，并尽可能地将多个节点chain在一起**

  

1）StreamingJobGraphGenerator

  

operators start at position 1 because 0 is for chained source inputs

chain的开始位置是1，因为0是source input

/**
• Sets up task chains from the source {@link StreamNode} instances.
• 
• This will recursively create all {@link JobVertex} instances. 
*/
 private void setChaining(Map<Integer, byte[]> hashes, List<Map<Integer, byte[]>> legacyHashes) {
 // we separate out the sources that run as inputs to another operator (chained inputs)
 // from the sources that needs to run as the main (head) operator.
 final Map<Integer, OperatorChainInfo> chainEntryPoints = buildChainedInputsAndGetHeadInputs(hashes, legacyHashes);
 final Collection initialEntryPoints = new ArrayList<>(chainEntryPoints.values());// iterate over a copy of the values, because this map gets concurrently modified
   // 从source开始建⽴ node chains
 for (OperatorChainInfo info : initialEntryPoints) {
     // 构建node chains，返回当前节点的物理出边；startNodeId != currentNodeId 时,说明currentNode是chain中的子节点
 createChain(
 info.getStartNodeId(),
 1, // operators start at position 1 because 0 is for chained source inputs
 info,
 chainEntryPoints);
 }
 }

private List createChain(
 final Integer currentNodeId,
 final int chainIndex,
 final OperatorChainInfo chainInfo,
 final Map<Integer, OperatorChainInfo> chainEntryPoints) {Integer startNodeId = chainInfo.getStartNodeId();
 if (!builtVertices.contains(startNodeId)) {
 /TODO 过渡用的出边集合, 用来生成最终的 JobEdge, 注意不包括 chain 内部的边/
 List transitiveOutEdges = new ArrayList();

List<StreamEdge> chainableOutputs = new ArrayList<StreamEdge>();
List<StreamEdge> nonChainableOutputs = new ArrayList<StreamEdge>();

StreamNode currentNode = streamGraph.getStreamNode(currentNodeId);

/*TODO 将当前节点的出边分成 chainable 和 nonChainable 两类*/
for (StreamEdge outEdge : currentNode.getOutEdges()) {
  if (isChainable(outEdge, streamGraph)) {
    chainableOutputs.add(outEdge);
  } else {
    nonChainableOutputs.add(outEdge);
  }
}

for (StreamEdge chainable : chainableOutputs) {
  transitiveOutEdges.addAll(
      createChain(chainable.getTargetId(), chainIndex + 1, chainInfo, chainEntryPoints));
}

/*TODO 递归调用 createChain*/
for (StreamEdge nonChainable : nonChainableOutputs) {
  transitiveOutEdges.add(nonChainable);
  createChain(
      nonChainable.getTargetId(),
      1, // operators start at position 1 because 0 is for chained source inputs
      chainEntryPoints.computeIfAbsent(
        nonChainable.getTargetId(),
        (k) -> chainInfo.newChain(nonChainable.getTargetId())),
      chainEntryPoints);
}

/*TODO 生成当前节点的显示名，如："Keyed Aggregation -> Sink: Unnamed"*/
chainedNames.put(currentNodeId, createChainedName(currentNodeId, chainableOutputs, Optional.ofNullable(chainEntryPoints.get(currentNodeId))));
chainedMinResources.put(currentNodeId, createChainedMinResources(currentNodeId, chainableOutputs));
chainedPreferredResources.put(currentNodeId, createChainedPreferredResources(currentNodeId, chainableOutputs));

OperatorID currentOperatorId = chainInfo.addNodeToChain(currentNodeId, chainedNames.get(currentNodeId));

if (currentNode.getInputFormat() != null) {
  getOrCreateFormatContainer(startNodeId).addInputFormat(currentOperatorId, currentNode.getInputFormat());
}

if (currentNode.getOutputFormat() != null) {
  getOrCreateFormatContainer(startNodeId).addOutputFormat(currentOperatorId, currentNode.getOutputFormat());
}

/*TODO 如果当前节点是起始节点, 则直接创建 JobVertex 并返回 StreamConfig, 否则先创建一个空的 StreamConfig */
StreamConfig config = currentNodeId.equals(startNodeId)
    ? createJobVertex(startNodeId, chainInfo)
    : new StreamConfig(new Configuration());

/*TODO 设置 JobVertex 的 StreamConfig, 基本上是序列化 StreamNode 中的配置到 StreamConfig中.*/
setVertexConfig(currentNodeId, config, chainableOutputs, nonChainableOutputs, chainInfo.getChainedSources());

if (currentNodeId.equals(startNodeId)) {
  /*TODO 如果是chain的起始节点，标记成chain start（不是chain中的节点，也会被标记成 chain start）*/
  config.setChainStart();
  config.setChainIndex(chainIndex);
  config.setOperatorName(streamGraph.getStreamNode(currentNodeId).getOperatorName());

  /*TODO 将当前节点(headOfChain)与所有出边相连*/
  for (StreamEdge edge : transitiveOutEdges) {
    /*TODO 通过StreamEdge构建出JobEdge，创建 IntermediateDataSet，用来将JobVertex和JobEdge相连*/
    connect(startNodeId, edge);
  }

  /*TODO 把物理出边写入配置, 部署时会用到*/
  config.setOutEdgesInOrder(transitiveOutEdges);
  /*TODO 将chain中所有子节点的StreamConfig写入到 headOfChain 节点的 CHAINED_TASK_CONFIG 配置中*/
  config.setTransitiveChainedTaskConfigs(chainedConfigs.get(startNodeId));

} else {
  /*TODO 如果是 chain 中的子节点*/
  chainedConfigs.computeIfAbsent(startNodeId, k -> new HashMap<Integer, StreamConfig>());

  config.setChainIndex(chainIndex);
  StreamNode node = streamGraph.getStreamNode(currentNodeId);
  config.setOperatorName(node.getOperatorName());
  /*TODO 将当前节点的StreamConfig添加到该chain的config集合中*/
  chainedConfigs.get(startNodeId).put(currentNodeId, config);
}

config.setOperatorID(currentOperatorId);

if (chainableOutputs.isEmpty()) {
  config.setChainEnd();
}
/*TODO 返回连往chain外部的出边集合*/
return transitiveOutEdges;

} else {
 return new ArrayList<>();
 }
 }

创建启动YarnClient

1）StandaloneClientFactory implements ClusterClientFactory

创建、启动了 YarnClient， 包含了一些yarn、flink的配置和环境信息

public StandaloneClusterDescriptor createClusterDescriptor(Configuration configuration) {
 checkNotNull(configuration);
 return new StandaloneClusterDescriptor(configuration);
 }

2）YarnClusterClientFactory

private YarnClusterDescriptor getClusterDescriptor(Configuration configuration) {
 /TODO 创建了YarnClient/
 final YarnClient yarnClient = YarnClient.createYarnClient();
 final YarnConfiguration yarnConfiguration = new YarnConfiguration();/TODO 初始化、启动 YarnClient/
 yarnClient.init(yarnConfiguration);
 yarnClient.start();return new YarnClusterDescriptor(
 configuration,
 yarnConfiguration,
 yarnClient,
 YarnClientYarnClusterInformationRetriever.create(yarnClient),
 false);
 }

集群资源配置

  

  

  

**（1） 配置内存**  

  

JobManager内存 = jobmanager.memory.process.size

TaskManager内存 = taskmanager.memory.process.size

每个Tm的slot数 = taskmanager.numberOfTaskSlots

public ClusterSpecification getClusterSpecification(Configuration configuration) {
 checkNotNull(configuration);
  // jm 的内存 jobmanager.memory.process.size
 final int jobManagerMemoryMB = JobManagerProcessUtils.processSpecFromConfigWithNewOptionToInterpretLegacyHeap(
 configuration,
 JobManagerOptions.TOTAL_PROCESS_MEMORY)
 .getTotalProcessMemorySize()
 .getMebiBytes();
 // tm 的内存 taskmanager.memory.process.size
 final int taskManagerMemoryMB = TaskExecutorProcessUtils
 .processSpecFromConfig(TaskExecutorProcessUtils.getConfigurationMapLegacyTaskManagerHeapSizeToConfigOption(
 configuration, TaskManagerOptions.TOTAL_PROCESS_MEMORY))
 .getTotalProcessMemorySize()
 .getMebiBytes();
 // slot的个数 taskmanager.numberOfTaskSlots
 int slotsPerTaskManager = configuration.getInteger(TaskManagerOptions.NUM_TASK_SLOTS);return new ClusterSpecification.ClusterSpecificationBuilder()
 .setMasterMemoryMB(jobManagerMemoryMB)
 .setTaskManagerMemoryMB(taskManagerMemoryMB)
 .setSlotsPerTaskManager(slotsPerTaskManager)
 .createClusterSpecification();
 }

集群部署

  

  

  

YarnClusterDescriptor

public ClusterClientProvider deployJobCluster(
 ClusterSpecification clusterSpecification,
 JobGraph jobGraph,
 boolean detached) throws ClusterDeploymentException {
 try {
 // 1)
 return deployInternal(
 clusterSpecification,
 “Flink per-job cluster”,
       // 2)
 getYarnJobClusterEntrypoint(),
 jobGraph,
 detached);
 } catch (Exception e) {
 throw new ClusterDeploymentException(“Could not deploy Yarn job cluster.”, e);
 }
 }

**（1) deployInternal方法**

/**
• This method will block until the ApplicationMaster/JobManager have been deployed on YARN.
• 
• @param clusterSpecification Initial cluster specification for the Flink cluster to be deployed
• @param applicationName name of the Yarn application to start
• @param yarnClusterEntrypoint Class name of the Yarn cluster entry point.
• @param jobGraph A job graph which is deployed with the Flink cluster, {@code null} if none
• @param detached True if the cluster should be started in detached mode
 */
 private ClusterClientProvider deployInternal(
 ClusterSpecification clusterSpecification,
 String applicationName,
 String yarnClusterEntrypoint,
 @Nullable JobGraph jobGraph,
 boolean detached) throws Exception {
final UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
 if (HadoopUtils.isKerberosSecurityEnabled(currentUser)) {
 boolean useTicketCache = flinkConfiguration.getBoolean(SecurityOptions.KERBEROS_LOGIN_USETICKETCACHE);

if (!HadoopUtils.areKerberosCredentialsValid(currentUser, useTicketCache)) {
  throw new RuntimeException("Hadoop security with Kerberos is enabled but the login user " +
    "does not have Kerberos credentials or delegation tokens!");
}

}
/TODO 部署前检查：jar包路径、conf路径、yarn最大核数…/
 isReadyForDeployment(clusterSpecification);// ------------------ Check if the specified queue exists --------------------
/TODO 检查指定的yarn队列是否存在/
 checkYarnQueues(yarnClient);// ------------------ Check if the YARN ClusterClient has the requested resources --------------
 /TODO 检查yarn是否有足够的资源/// Create application via yarnClient
 final YarnClientApplication yarnApplication = yarnClient.createApplication();
 final GetNewApplicationResponse appResponse = yarnApplication.getNewApplicationResponse();Resource maxRes = appResponse.getMaximumResourceCapability();
final ClusterResourceDescription freeClusterMem;
 try {
 freeClusterMem = getCurrentFreeClusterResources(yarnClient);
 } catch (YarnException | IOException e) {
 failSessionDuringDeployment(yarnClient, yarnApplication);
 throw new YarnDeploymentException(“Could not retrieve information about free cluster resources.”, e);
 }final int yarnMinAllocationMB = yarnConfiguration.getInt(
 YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
 YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
 if (yarnMinAllocationMB <= 0) {
 throw new YarnDeploymentException(“The minimum allocation memory "
 + “(” + yarnMinAllocationMB + " MB) configured via '” + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
 + “’ should be greater than 0.”);
 }final ClusterSpecification validClusterSpecification;
 try {
 validClusterSpecification = validateClusterResources(
 clusterSpecification,
 yarnMinAllocationMB,
 maxRes,
 freeClusterMem);
 } catch (YarnDeploymentException yde) {
 failSessionDuringDeployment(yarnClient, yarnApplication);
 throw yde;
 }LOG.info(“Cluster specification: {}”, validClusterSpecification);
final ClusterEntrypoint.ExecutionMode executionMode = detached ?
 ClusterEntrypoint.ExecutionMode.DETACHED
 : ClusterEntrypoint.ExecutionMode.NORMAL;flinkConfiguration.setString(ClusterEntrypoint.EXECUTION_MODE, executionMode.toString());
/TODO 开始启动AM/
 ApplicationReport report = startAppMaster(
 flinkConfiguration,
 applicationName,
 yarnClusterEntrypoint,
 jobGraph,
 yarnClient,
 yarnApplication,
 validClusterSpecification);// print the application id for user to cancel themselves.
 if (detached) {
 final ApplicationId yarnApplicationId = report.getApplicationId();
 logDetachedClusterInformation(yarnApplicationId, LOG);
 }setClusterEntrypointInfoToConfig(report);
return () -> {
 try {
 return new RestClusterClient<>(flinkConfiguration, report.getApplicationId());
 } catch (Exception e) {
 throw new RuntimeException(“Error while creating RestClusterClient.”, e);
 }
 };
 }

1）部署前检查：jar包路径、conf路径、yarn最大核数

private void isReadyForDeployment(ClusterSpecification clusterSpecification) throws Exception {
if (this.flinkJarPath == null) {
 throw new YarnDeploymentException(“The Flink jar path is null”);
 }
 if (this.flinkConfiguration == null) {
 throw new YarnDeploymentException(“Flink configuration object has not been set”);
 }// Check if we don’t exceed YARN’s maximum virtual cores.
 final int numYarnMaxVcores = yarnClusterInformationRetriever.getMaxVcores();int configuredAmVcores = flinkConfiguration.getInteger(YarnConfigOptions.APP_MASTER_VCORES);
 if (configuredAmVcores > numYarnMaxVcores) {
 throw new IllegalConfigurationException(
 String.format(“The number of requested virtual cores for application master %d” +
 " exceeds the maximum number of virtual cores %d available in the Yarn Cluster.",
 configuredAmVcores, numYarnMaxVcores));
 }int configuredVcores = flinkConfiguration.getInteger(YarnConfigOptions.VCORES, clusterSpecification.getSlotsPerTaskManager());
 // don’t configure more than the maximum configured number of vcores
 if (configuredVcores > numYarnMaxVcores) {
 throw new IllegalConfigurationException(
 String.format(“The number of requested virtual cores per node %d” +
 " exceeds the maximum number of virtual cores %d available in the Yarn Cluster." +
 " Please note that the number of virtual cores is set to the number of task slots by default" +
 " unless configured in the Flink config with ‘%s.’",
 configuredVcores, numYarnMaxVcores, YarnConfigOptions.VCORES.key()));
 }// check if required Hadoop environment variables are set. If not, warn user
 if (System.getenv(“HADOOP_CONF_DIR”) == null &&
 System.getenv(“YARN_CONF_DIR”) == null) {
 LOG.warn("Neither the HADOOP_CONF_DIR nor the YARN_CONF_DIR environment variable is set. " +
 "The Flink YARN Client needs one of these to be set to properly load the Hadoop " +
 “configuration for accessing YARN.”);
 }
 }

2）检查yarn资源是否够

private ClusterResourceDescription getCurrentFreeClusterResources(YarnClient yarnClient) throws YarnException, IOException {
 List nodes = yarnClient.getNodeReports(NodeState.RUNNING);int totalFreeMemory = 0;
 int containerLimit = 0;
 int[] nodeManagersFree = new int[nodes.size()];for (int i = 0; i < nodes.size(); i++) {
 NodeReport rep = nodes.get(i);
 int free = rep.getCapability().getMemory() - (rep.getUsed() != null ? rep.getUsed().getMemory() : 0);
 nodeManagersFree[i] = free;
 totalFreeMemory += free;
 if (free > containerLimit) {
 containerLimit = free;
 }
 }
 return new ClusterResourceDescription(totalFreeMemory, containerLimit, nodeManagersFree);
 }

**找到最小资源配置**

  

RM\_SCHEDULER\_MINIMUM\_ALLOCATION\_MB=yarn.scheduler.minimum-allocation-mb

DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB=1024

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：rustdesk源码二次改造 rust辅助源码

下一篇：android fragment状态保存 isInLayout fragment 缓存 view

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯

flink jobmanger中有几个jobmaster flink 提交job

flink jobmanger中有几个jobmaster flink 提交job

51CTO博客