参考:Yarn 监控 - 监控任务运行状态 (包括Spark,MR 所有在Yarn中运行的任务)
//获取任务的applicationId public static String getAppId(String jobName) throws IOException { Configuration conf = new Configuration(); System.setProperty("java.security.krb5.conf", "/etc/krb5.conf"); conf.set("hadoop.security.authentication", "Kerberos"); UserGroupInformation.setConfiguration(conf); UserGroupInformation.loginUserFromKeytab("hdfs@XXXX", "/home/xxxx/hdfs.keytab"); YarnClient client = YarnClient.createYarnClient(); client.init(conf); client.start(); EnumSet<YarnApplicationState> appStates = EnumSet.noneOf(YarnApplicationState.class); if (appStates.isEmpty()) { appStates.add(YarnApplicationState.RUNNING); appStates.add(YarnApplicationState.ACCEPTED); appStates.add(YarnApplicationState.SUBMITTED); } List<ApplicationReport> appsReport = null; try { // 返回EnumSet<YarnApplicationState>中个人任务状态的所有任务 appsReport = client.getApplications(appStates); } catch (YarnException | IOException e) { e.printStackTrace(); } assert appsReport != null; for (ApplicationReport appReport : appsReport) { System.out.println(appReport); // 获取任务名 String jn = appReport.getName(); String applicationType = appReport.getApplicationType(); if (jn.equals(jobName)) { // && "Apache Flink".equals(applicationType)) { try { client.close(); } catch (IOException e) { e.printStackTrace(); } return appReport.getApplicationId().toString(); } } try { client.close(); } catch (IOException e) { e.printStackTrace(); } return null; } // 根据任务的applicationId去获取任务的状态 public static YarnApplicationState getState(String appId) throws IOException { Configuration conf = new Configuration(); System.setProperty("java.security.krb5.conf", "/etc/krb5.conf"); conf.set("hadoop.security.authentication", "Kerberos"); UserGroupInformation.setConfiguration(conf); UserGroupInformation.loginUserFromKeytab("hdfs@XXXXX", "/home/xxxx/hdfs.keytab"); YarnClient client = YarnClient.createYarnClient(); client.init(conf); client.start(); ApplicationId applicationId = ConverterUtils.toApplicationId(appId); YarnApplicationState yarnApplicationState = null; try { ApplicationReport applicationReport = client.getApplicationReport(applicationId); yarnApplicationState = applicationReport.getYarnApplicationState(); } catch (YarnException | IOException e) { e.printStackTrace(); } try { client.close(); } catch (IOException e) { e.printStackTrace(); } return yarnApplicationState; } public static void main(String[] args) throws IOException, InterruptedException { String state = getAppId("job_xxxxx"); System.out.println(state); // System.out.println(state == YarnApplicationState.RUNNING); }
输出
applicationId { id: 279 cluster_timestamp: 1620359479641 } user: "dl" queue: "root.xxxx" name: "xxx-flink" host: "xxxxx" rpc_port: 18000 client_to_am_token { identifier: "xxxx@XXXX" password: ";xxxxx" kind: "YARN_CLIENT_TOKEN" service: "" } yarn_application_state: RUNNING trackingUrl: "http://xxxxx:8088/proxy/application_xxxxx/" diagnostics: "" startTime: 1620391776339 finishTime: 0 final_application_status: APP_UNDEFINED app_resource_Usage { num_used_containers: 4 num_reserved_containers: 0 used_resources { memory: 8192 virtual_cores: 7 } reserved_resources { memory: 0 virtual_cores: 0 } needed_resources { memory: 8192 virtual_cores: 7 } memory_seconds: 12703546778 vcore_seconds: 10855065 } originalTrackingUrl: "http://xxxx:18000" currentApplicationAttemptId { application_id { id: 279 cluster_timestamp: 1620359479641 } attemptId: 1 } progress: 1.0 applicationType: "XXXX Flink" log_aggregation_status: LOG_NOT_START