OpenTSDB源码详解之TSDMain类

1.TSDMain类

Main class of the TSD, the Time Series Daemon.
TSD的主类,时间序列的守护进程。

2.源代码

package net.opentsdb.tools;

import java.io.IOException;
import java.lang.reflect.Constructor;

import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.HashMap;
import java.util.Map;

import org.jboss.netty.bootstrap.ServerBootstrap;
import org.jboss.netty.channel.socket.ServerSocketChannelFactory;
import org.jboss.netty.channel.socket.nio.NioServerBossPool;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
import org.jboss.netty.channel.socket.nio.NioWorkerPool;
import org.jboss.netty.channel.socket.oio.OioServerSocketChannelFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.opentsdb.tools.BuildData;
import net.opentsdb.core.TSDB;
import net.opentsdb.core.Const;
import net.opentsdb.tsd.PipelineFactory;
import net.opentsdb.tsd.RpcManager;
import net.opentsdb.utils.Config;
import net.opentsdb.utils.FileSystem;
import net.opentsdb.utils.Pair;
import net.opentsdb.utils.PluginLoader;
import net.opentsdb.utils.Threads;

/**
 * Main class of the TSD, the Time Series Daemon.
 */
final class TSDMain {

  /** Prints usage and exits with the given retval. */
  static void usage(final ArgP argp, final String errmsg, final int retval) {
    System.err.println(errmsg);
    System.err.println("Usage: tsd --port=PORT"
      + " --staticroot=PATH --cachedir=PATH\n"
      + "Starts the TSD, the Time Series Daemon");
    if (argp != null) {
      System.err.print(argp.usage());
    }
    System.exit(retval);
  }

  /** A map of configured filters for use in querying */
  private static Map<String, Pair<Class<?>, Constructor<? extends StartupPlugin>>>
          startupPlugin_filter_map = new HashMap<String,
          Pair<Class<?>, Constructor<? extends StartupPlugin>>>();

  private static final short DEFAULT_FLUSH_INTERVAL = 1000;
  
  private static TSDB tsdb = null;
  
  public static void main(String[] args) throws IOException {
    Logger log = LoggerFactory.getLogger(TSDMain.class);//将TSDMain.class这个类作为log标记
    log.info("Starting.");//打印 starting

    //BuildData :Build data for net.opentsdb.tools
    //revisionString() 和 buildString()都只是获取一个string消息的过程
    log.info(BuildData.revisionString());
    log.info(BuildData.buildString());

    try {
      System.in.close();  // Release a FD we don't need.
    } catch (Exception e) {
      log.warn("Failed to close stdin", e);
    }

    //Argp :A dead simple command-line argument parser.
    final ArgP argp = new ArgP();
    CliOptions.addCommon(argp);
    argp.addOption("--port", "NUM", "TCP port to listen on.");
    argp.addOption("--bind", "ADDR", "Address to bind to (default: 0.0.0.0).");
    argp.addOption("--staticroot", "PATH",
                   "Web root from which to serve static files (/s URLs).");
    argp.addOption("--cachedir", "PATH",
                   "Directory under which to cache result of requests.");
    argp.addOption("--worker-threads", "NUM",
                   "Number for async io workers (default: cpu * 2).");
    argp.addOption("--async-io", "true|false",
                   "Use async NIO (default true) or traditional blocking io");
    argp.addOption("--read-only", "true|false",
                   "Set tsd.mode to ro (default false)");
    argp.addOption("--disable-ui", "true|false",
                   "Set tsd.core.enable_ui to false (default true)");
    argp.addOption("--disable-api", "true|false",
                   "Set tsd.core.enable_api to false (default true)");
    argp.addOption("--backlog", "NUM",
                   "Size of connection attempt queue (default: 3072 or kernel"
                   + " somaxconn.");
    argp.addOption("--max-connections", "NUM",
                   "Maximum number of connections to accept");
    argp.addOption("--flush-interval", "MSEC",
                   "Maximum time for which a new data point can be buffered"
                   + " (default: " + DEFAULT_FLUSH_INTERVAL + ").");
    argp.addOption("--statswport", "Force all stats to include the port");
    CliOptions.addAutoMetricFlag(argp);
    args = CliOptions.parse(argp, args);
    args = null; // free().  why args need to be null?

    // get a config object
    //读取配置文件的入口  => 下面这个就是设置properties文件的程序入口
    Config config = CliOptions.getConfig(argp);

      // check for the required parameters
    try {
      if (config.getString("tsd.http.staticroot").isEmpty())
        usage(argp, "Missing static root directory", 1);
    } catch(NullPointerException npe) {
      usage(argp, "Missing static root directory", 1);
    }
    try {
      if (config.getString("tsd.http.cachedir").isEmpty())
        usage(argp, "Missing cache directory", 1);
    } catch(NullPointerException npe) {
      usage(argp, "Missing cache directory", 1);
    }
    try {
      if (!config.hasProperty("tsd.network.port"))
        usage(argp, "Missing network port", 1);
      config.getInt("tsd.network.port");
    } catch (NumberFormatException nfe) {
      usage(argp, "Invalid network port setting", 1);
    }


    // validate the cache and staticroot directories
    try {
      FileSystem.checkDirectory(config.getString("tsd.http.cachedir"),
          Const.MUST_BE_WRITEABLE, Const.CREATE_IF_NEEDED);
      FileSystem.checkDirectory(config.getString("tsd.http.staticroot"),
              !Const.MUST_BE_WRITEABLE, Const.DONT_CREATE);
    } catch (IllegalArgumentException e) {
      usage(argp, e.getMessage(), 3);
    }

    //--------------------------think over----------------------------
    //A ChannelFactory which creates a ServerSocketChannel.
      // 创建一个ServerSocketChannel 的ChannelFactory
    final ServerSocketChannelFactory factory;
    int connections_limit = 0;
    try {
      //默认的tsd.core.connections.limit的值是0  => default_map.put("tsd.core.connections.limit", "0");在Config类中
      connections_limit = config.getInt("tsd.core.connections.limit");
    } catch (NumberFormatException nfe) {
      usage(argp, "Invalid connections limit", 1);
    }

    //Config类中设置的值是:default_map.put("tsd.network.async_io", "true");
    if (config.getBoolean("tsd.network.async_io")) {
      int workers = Runtime.getRuntime().availableProcessors() * 2;

      //Config 类中的default_map.put("tsd.network.worker_threads", "");
      //根据tsd.network.worker_threads这个配置得到线程数
      if (config.hasProperty("tsd.network.worker_threads")) {
        try {
        workers = config.getInt("tsd.network.worker_threads");
        } catch (NumberFormatException nfe) {
          usage(argp, "Invalid worker thread count", 1);
        }
      }


      //都是使用xxpool去提高性能
      final Executor executor = Executors.newCachedThreadPool();
      final NioServerBossPool boss_pool = 
          new NioServerBossPool(executor, 1, new Threads.BossThreadNamer());
      final NioWorkerPool worker_pool = new NioWorkerPool(executor, 
          workers, new Threads.WorkerThreadNamer());
      factory = new NioServerSocketChannelFactory(boss_pool, worker_pool);
    } else {
      factory = new OioServerSocketChannelFactory(
          Executors.newCachedThreadPool(), Executors.newCachedThreadPool(), 
          new Threads.PrependThreadNamer());
    }

    StartupPlugin startup = null;
    try {
      startup = loadStartupPlugins(config);//load startUpPlugins
    } catch (IllegalArgumentException e) {
      usage(argp, e.getMessage(), 3);
    } catch (Exception e) {
      throw new RuntimeException("Initialization failed", e);
    }

    try {
      tsdb = new TSDB(config);
      if (startup != null) {
        tsdb.setStartupPlugin(startup);
      }
      tsdb.initializePlugins(true);
      if (config.getBoolean("tsd.storage.hbase.prefetch_meta")) {
        tsdb.preFetchHBaseMeta();
      }
      
      // Make sure we don't even start if we can't find our tables.
      tsdb.checkNecessaryTablesExist().joinUninterruptibly();
      
      registerShutdownHook();

      //ServerBootstrap: A helper class which creates a new server-side Channel and accepts incoming connections.
      // 一个帮助类:去创建一个新的服务器端Channel 并且接受即将到来的连接
      final ServerBootstrap server = new ServerBootstrap(factory);
      
      // This manager is capable of lazy init, but we force an init here to fail fast.
      // RpcManager : Manager for the lifecycle of HttpRpcs,TelnetRpcs,RpcPlugins, and HttpRpcPlugin
      final RpcManager manager = RpcManager.instance(tsdb);

      server.setPipelineFactory(new PipelineFactory(tsdb, manager, connections_limit));
      if (config.hasProperty("tsd.network.backlog")) {
        server.setOption("backlog", config.getInt("tsd.network.backlog")); 
      }
      server.setOption("child.tcpNoDelay", 
          config.getBoolean("tsd.network.tcp_no_delay"));
      server.setOption("child.keepAlive", 
          config.getBoolean("tsd.network.keep_alive"));
      server.setOption("reuseAddress", 
          config.getBoolean("tsd.network.reuse_address"));

      // null is interpreted as the wildcard address.  null被解释为通配符地址。
      //This class represents an Internet Protocol (IP) address. 这个类代表一个IP地址
      InetAddress bindAddress = null;
      if (config.hasProperty("tsd.network.bind")) {
        bindAddress = InetAddress.getByName(config.getString("tsd.network.bind"));
      }

      // we validated the network port config earlier
      final InetSocketAddress addr = new InetSocketAddress(bindAddress,
          config.getInt("tsd.network.port"));
      server.bind(addr);
      if (startup != null) {
        startup.setReady(tsdb);
      }
      log.info("Ready to serve on " + addr);
    } catch (Throwable e) {
      factory.releaseExternalResources();
      try {
        if (tsdb != null)
          tsdb.shutdown().joinUninterruptibly();
      } catch (Exception e2) {
        log.error("Failed to shutdown HBase client", e2);
      }
      throw new RuntimeException("Initialization failed", e);
    }
    // The server is now running in separate threads, we can exit main.
     System.out.println("main end"); // you can't read this output in stdout
  }


  private static StartupPlugin loadStartupPlugins(Config config) {
    Logger log = LoggerFactory.getLogger(TSDMain.class);

    // load the startup plugin if enabled
    StartupPlugin startup = null;

    if (config.getBoolean("tsd.startup.enable")) {
      log.debug("Startup Plugin is Enabled");
      final String plugin_path = config.getString("tsd.core.plugin_path");
      final String plugin_class = config.getString("tsd.startup.plugin");

      log.debug("Plugin Path: " + plugin_path);
      try {
        TSDB.loadPluginPath(plugin_path);
      } catch (Exception e) {
        log.error("Error loading plugins from plugin path: " + plugin_path, e);
      }

      log.debug("Attempt to Load: " + plugin_class);
      startup = PluginLoader.loadSpecificPlugin(plugin_class, StartupPlugin.class);
      if (startup == null) {
        throw new IllegalArgumentException("Unable to locate startup plugin: " +
                config.getString("tsd.startup.plugin"));
      }
      try {
        startup.initialize(config);
      } catch (Exception e) {
        throw new RuntimeException("Failed to initialize startup plugin", e);
      }
      log.info("Successfully initialized startup plugin [" +
              startup.getClass().getCanonicalName() + "] version: "
              + startup.version());
    } else {
      startup = null;
    }

    return startup;
  }


  private static void registerShutdownHook() {
    final class TSDBShutdown extends Thread {
      public TSDBShutdown() {
        super("TSDBShutdown");
      }
      public void run() {
        try {
          if (RpcManager.isInitialized()) {
            // Check that its actually been initialized.  We don't want to
            // create a new instance only to shutdown!
            RpcManager.instance(tsdb).shutdown().join();
          }
          if (tsdb != null) {
            tsdb.shutdown().join();
          }
        } catch (Exception e) {
          LoggerFactory.getLogger(TSDBShutdown.class)
            .error("Uncaught exception during shutdown", e);
        }
      }
    }
    Runtime.getRuntime().addShutdownHook(new TSDBShutdown());
  }
}

3.分析

先看一下TSDMain类的uml类图,如下:
OpenTSDB源码详解之TSDMain类_openTSDB实战
字段有startupPlugin_filter_map,DEFAULT_FLUSH_INTERVAL,tsdb。
方法有usage(ArgP,String,int);main(String []);loadStartupPlugins(Config);registerShutdownHook()
现在依次介绍:
字段:

  • startupPlugin_filter_map:用于在查询时的配置的过滤器映射
  • DEFAULT_FLUSH_INTERVAL :默认的刷新间隔,为 1000【但是这个单位是什么?ms?】
  • TSDB 的一个对象tsdb

方法:

  • main,主方法。用于启动守护进程
  • loadStartupPlugins:用于加载插件【但是这些插件的作用是什么?我暂时不了解】
  • registerShutdownHook():【暂不清楚】

4.运行结果

尝试运行TSDMain类中的main方法,可能会一直卡在某个地方,我就不大理解了,我的环境都已经配置通了,为什么什么都没有显示出来呢?我在浏览器输入localhost:4399时,报无法连接的错误。
后来再次研究main方法中的代码,存在如下的代码:

// Make sure we don't even start if we can't find our tables.
tsdb.checkNecessaryTablesExist().joinUninterruptibly();      

这个意思就是确定OpenTSDB所需要的表存在才会继续运行,否则会一直卡住。再查看checkNecessaryTablesExist()方法,如下:

/**
   * Verifies that the data and UID tables exist in HBase and optionally the
   * tree and meta data tables if the user has enabled meta tracking or tree
   * building
   * 验证数据以及UID表存在于HBase中,并且可选的是,如果用户开启了meta跟踪或树构建,则也需要tree和meta数据表。
   *
   * @return An ArrayList of objects to wait for
   * @throws TableNotFoundException
   * @since 2.0
   */
  public Deferred<ArrayList<Object>> checkNecessaryTablesExist() {
    final ArrayList<Deferred<Object>> checks = 
      new ArrayList<Deferred<Object>>(2);
    checks.add(client.ensureTableExists(
        config.getString("tsd.storage.hbase.data_table")));//default_map.put("tsd.storage.hbase.data_table", "tsdb");

      checks.add(client.ensureTableExists(
        config.getString("tsd.storage.hbase.uid_table")));//default_map.put("tsd.storage.hbase.uid_table", "tsdb-uid");

      //判断是否开启了tree building功能,如果开启了,则需要使用tree_table
      if (config.enable_tree_processing()) {
      checks.add(client.ensureTableExists(
          config.getString("tsd.storage.hbase.tree_table")));
    }

    //如果开启了元数据统计,则需要表tesdb-meta
    // default_map.put("tsd.storage.hbase.meta_table", "tsdb-meta");
    if (config.enable_realtime_ts() || config.enable_realtime_uid() || 
        config.enable_tsuid_incrementing()) {
      checks.add(client.ensureTableExists(
          config.getString("tsd.storage.hbase.meta_table")));
    }
    return Deferred.group(checks);
  }

恍然大悟,是进程没有找到这个tsdb、tsdb-uid等表。然后接着查看集群进程,发现HMaster已经死亡。接着重启HBase进程,再次运行即可。然后得到最后的输出界面如下:
OpenTSDB源码详解之TSDMain类_openTSDB实战_02
main end表示整个main方法都已经运行结束了。打开网页localhost:4399,可以看到如下的页面【只不过是图像莫名丢失啦:(】
OpenTSDB源码详解之TSDMain类_openTSDB文档翻译_03
下面就来解决这个图片消失的问题。
在浏览器中触发这个数据查询时,会在后台触发java代码,并且在intellij的标准输出中输出。可以看到此时的输出结果为:

20:44:28.852 [OpenTSDB I/O Boss #1] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] OPEN
20:44:28.854 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] BOUND: /192.168.211.2:4399
20:44:28.854 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] CONNECTED: /192.168.211.2:6273
20:44:28.858 [OpenTSDB I/O Worker #1] DEBUG o.j.n.h.c.http.HttpContentCompressor - -Dio.netty.noJdkZlibEncoder: false
20:44:28.872 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP / done in 8ms
20:44:28.933 [OpenTSDB I/O Worker #1] WARN  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\queryui.nocache.js (系统找不到指定的文件。)
20:44:28.934 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP /s/queryui.nocache.js done in 1ms
20:44:28.936 [OpenTSDB I/O Worker #1] WARN  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\opentsdb_header.jpg (系统找不到指定的文件。)
20:44:28.937 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP /s/opentsdb_header.jpg done in 0ms
20:44:28.943 [OpenTSDB I/O Boss #1] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] OPEN
20:44:28.944 [OpenTSDB I/O Worker #2] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] BOUND: /192.168.211.2:4399
20:44:28.944 [OpenTSDB I/O Worker #2] INFO  net.opentsdb.tsd.ConnectionManager - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] CONNECTED: /192.168.211.2:6274
20:44:30.019 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP / done in 0ms
20:44:30.073 [OpenTSDB I/O Worker #1] WARN  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\queryui.nocache.js (系统找不到指定的文件。)
20:44:30.074 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP /s/queryui.nocache.js done in 1ms
20:44:30.076 [OpenTSDB I/O Worker #2] WARN  net.opentsdb.tsd.HttpQuery - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\opentsdb_header.jpg (系统找不到指定的文件。)
20:44:30.077 [OpenTSDB I/O Worker #2] INFO  net.opentsdb.tsd.HttpQuery - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] HTTP /s/opentsdb_header.jpg done in 0ms
20:44:32.184 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP / done in 1ms
20:44:32.239 [OpenTSDB I/O Worker #1] WARN  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\queryui.nocache.js (系统找不到指定的文件。)
20:44:32.240 [OpenTSDB I/O Worker #1] INFO  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] HTTP /s/queryui.nocache.js done in 1ms
20:44:32.241 [OpenTSDB I/O Worker #2] WARN  net.opentsdb.tsd.HttpQuery - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\opentsdb_header.jpg (系统找不到指定的文件。)
20:44:32.242 [OpenTSDB I/O Worker #2] INFO  net.opentsdb.tsd.HttpQuery - [id: 0xadcd8ee4, /192.168.211.2:6274 => /192.168.211.2:4399] HTTP /s/opentsdb_header.jpg done in 0ms
20:44:32.270 [OpenTSDB I/O Worker #1] WARN  net.opentsdb.tsd.HttpQuery - [id: 0x99b05f33, /192.168.211.2:6273 => /192.168.211.2:4399] File not found: \root\openTSDB_temp\opentsdb_header.jpg (系统找不到指定的文件。)

可以很清楚的看到系统找不到指定的文件错,也就是因为没有拿到这些文件导致出错。主要的文件有:

  • opentsdb_header.jpg
  • queryui.nocache.js
    查看opentsdb.properties文件,其中的两行代码如下:
tsd.http.cachedir = G:\\testdb
tsd.http.staticroot = G:\\testdb

这两行代码就是存储Opentsdb的一个前端页面文件,但是在G:\testdb目录中却是没有文件的。我将之前Centos 7中安装的opentsdb的文件拿了过来,最后就得到了相应的界面,如下所示:
OpenTSDB源码详解之TSDMain类_openTSDB文档翻译_04
这次出现了opentsdb_header的图像,但是主要的躯干还没有显示出来,原因一样,也是因为文件丢失。从以前的安装包里copy一份即可。再次刷新一下即可:
OpenTSDB源码详解之TSDMain类_openTSDB_05
最后得到的图形界面如上,到这里通过源码编译opentsdb的第一步算是成功了。接下来会逐渐深入。

整个源码分析过程中遇到的困难我思考了很久才得以解决,这里将其稍微记录一下:

  • 1.在opentsdb.properties文件中,可以看到如下的代码:
# ----------- HTTP -----------
# The location of static files for the HTTP GUI interface.
# *** REQUIRED ***
#i don't know why can't set staticroot to /usr/local/opentsdb-2.3.0/build/staticroot,must to /root/openTSDB_temp
#tsd.http.staticroot = /usr/local/opentsdb-2.3.0/build/staticroot
#tsd.http.staticroot = /root/openTSDB_temp
tsd.http.staticroot = G:\\testdb

tsd.http.staticroot的路径我变换了很多次,从/usr/local/opentsdb-2.3.0/build/staticroot => /root/openTSDB_temp => G:\testdb。才恍然大悟,运行在windows上的程序,你是无法把其文件路径指向其它机器(如虚拟机上)的文件夹。这里必须使用windows本地的路径才可以。

  • 2.为什么我们在浏览器中输入了metric,date…之后,就可以触发整个数据的查询呢?原因是,这是由前端语言javascript和ajax控制的,如果前端的页面有变化,则会触发后台的java程序工作。