

这个问题虽然看起来很小,却并不那么容易回答。大家如果有更好的方法欢迎赐教,先来一个天真的估算方法:假设要求一个系统的TPS(Transaction Per Second或者Task Per Second)至少为20,然后假设每个Transaction由一个线程完成,继续假设平均每个线程处理一个Transaction的时间为4s。那么问题转化为:





  • 如果是CPU密集型应用,则线程池大小设置为N+1
  • 如果是IO密集型应用,则线程池大小设置为2N+1


接下来在这个文档:服务器性能IO优化 中发现一个估算公式:


​最佳线程数目 = ((线程等待时间+线程CPU时间)/线程CPU时间 )* CPU数目​



​最佳线程数目 = (线程等待时间与线程CPU时间之比 + 1)* CPU数目​





  • 尽量提高短板操作的并行化比率,比如多线程下载技术
  • 增强短板能力,比如用NIO替代IO



​加速比=优化前系统耗时 / 优化后系统耗时​



​Speedup <= ​​​​1​​ ​​/ (F + (​​​​1​​​​-F)/N)​





  • 多线程带来线程上下文切换开销,单线程就没有这种开销



最后来一个“Dark Magic”估算方法(因为我暂时还没有搞懂它的原理),使用下面的类:


​package​​ ​​pool_size_calculate;​




​import​​ ​​java.math.BigDecimal;​


​import​​ ​​java.math.RoundingMode;​


​import​​ ​​java.util.Timer;​


​import​​ ​​java.util.TimerTask;​


​import​​ ​​java.util.concurrent.BlockingQueue;​






​* A class that calculates the optimal thread pool boundaries. It takes the​


​* desired target utilization and the desired work queue memory consumption as​


​* input and retuns thread count and work queue capacity.​




​* @author Niklas Schlimm​






​public​​ ​​abstract​​ ​​class​​ ​​PoolSizeCalculator {​






​* The sample queue size to calculate the size of a single {@link Runnable}​


​* element.​




​private​​ ​​final​​ ​​int​​ ​​SAMPLE_QUEUE_SIZE = ​​​​1000​​​​;​






​* Accuracy of test run. It must finish within 20ms of the testTime​


​* otherwise we retry the test. This could be configurable.​




​private​​ ​​final​​ ​​int​​ ​​EPSYLON = ​​​​20​​​​;​






​* Control variable for the CPU time investigation.​




​private​​ ​​volatile​​ ​​boolean​​ ​​expired;​






​* Time (millis) of the test run in the CPU time calculation.​




​private​​ ​​final​​ ​​long​​ ​​testtime = ​​​​3000​​​​;​






​* Calculates the boundaries of a thread pool for a given {@link Runnable}.​




​* @param targetUtilization​


​*            the desired utilization of the CPUs (0 <= targetUtilization <=   *            1)     * @param targetQueueSizeBytes   *            the desired maximum work queue size of the thread pool (bytes)     */​​     ​​protected​​ ​​void​​ ​​calculateBoundaries(BigDecimal targetUtilization,            BigDecimal targetQueueSizeBytes) {      calculateOptimalCapacity(targetQueueSizeBytes);         Runnable task = creatTask();        start(task);        start(task); ​​​​// warm up phase       long cputime = getCurrentThreadCPUTime();       start(task); // test intervall      cputime = getCurrentThreadCPUTime() - cputime;      long waittime = (testtime * 1000000) - cputime;         calculateOptimalThreadCount(cputime, waittime, targetUtilization);  }   private void calculateOptimalCapacity(BigDecimal targetQueueSizeBytes) {        long mem = calculateMemoryUsage();      BigDecimal queueCapacity = targetQueueSizeBytes.divide(new BigDecimal(              mem), RoundingMode.HALF_UP);        System.out.println("Target queue memory usage (bytes): "                + targetQueueSizeBytes);        System.out.println("createTask() produced "                 + creatTask().getClass().getName() + " which took " + mem               + " bytes in a queue");         System.out.println("Formula: " + targetQueueSizeBytes + " / " + mem);       System.out.println("* Recommended queue capacity (bytes): "                 + queueCapacity);   }   /**      * Brian Goetz' optimal thread count formula, see 'Java Concurrency in   * Practice' (chapter 8.2)   *       * @param cpu    *            cpu time consumed by considered task   * @param wait   *            wait time of considered task   * @param targetUtilization      *            target utilization of the system   */     private void calculateOptimalThreadCount(long cpu, long wait,           BigDecimal targetUtilization) {         BigDecimal waitTime = new BigDecimal(wait);         BigDecimal computeTime = new BigDecimal(cpu);       BigDecimal numberOfCPU = new BigDecimal(Runtime.getRuntime()                .availableProcessors());        BigDecimal optimalthreadcount = numberOfCPU.multiply(targetUtilization)                 .multiply(                      new BigDecimal(1).add(waitTime.divide(computeTime,                              RoundingMode.HALF_UP)));        System.out.println("Number of CPU: " + numberOfCPU);        System.out.println("Target utilization: " + targetUtilization);         System.out.println("Elapsed time (nanos): " + (testtime * 1000000));        System.out.println("Compute time (nanos): " + cpu);         System.out.println("Wait time (nanos): " + wait);       System.out.println("Formula: " + numberOfCPU + " * "                + targetUtilization + " * (1 + " + waitTime + " / "                 + computeTime + ")");       System.out.println("* Optimal thread count: " + optimalthreadcount);    }   /**      * Runs the {@link Runnable} over a period defined in {@link #testtime}.     * Based on Heinz Kabbutz' ideas     * (http://www.javaspecialists.eu/archive/Issue124.html).    *       * @param task   *            the runnable under investigation   */     public void start(Runnable task) {      long start = 0;         int runs = 0;       do {            if (++runs > 5) {​


​throw​​ ​​new​​ ​​IllegalStateException(​​​​"Test not accurate"​​​​);​




​expired = ​​​​false​​​​;​


​start = System.currentTimeMillis();​


​Timer timer = ​​​​new​​ ​​Timer();​


​timer.schedule(​​​​new​​ ​​TimerTask() {​


​public​​ ​​void​​ ​​run() {​


​expired = ​​​​true​​​​;​




​}, testtime);​


​while​​ ​​(!expired) {​






​start = System.currentTimeMillis() - start;​




​} ​​​​while​​ ​​(Math.abs(start - testtime) > EPSYLON);​








​private​​ ​​void​​ ​​collectGarbage(​​​​int​​ ​​times) {​


​for​​ ​​(​​​​int​​ ​​i = ​​​​0​​​​; i < times; i++) {​




​try​​ ​​{​




​} ​​​​catch​​ ​​(InterruptedException e) {​
















​* Calculates the memory usage of a single element in a work queue. Based on​


​* Heinz Kabbutz' ideas​


​* (http://www.javaspecialists.eu/archive/Issue029.html).​




​* @return memory usage of a single {@link Runnable} element in the thread​


​*         pools work queue​




​public​​ ​​long​​ ​​calculateMemoryUsage() {​


​BlockingQueue queue = createWorkQueue();​


​for​​ ​​(​​​​int​​ ​​i = ​​​​0​​​​; i < SAMPLE_QUEUE_SIZE; i++) {​






​long​​ ​​mem0 = Runtime.getRuntime().totalMemory()​


​- Runtime.getRuntime().freeMemory();​


​long​​ ​​mem1 = Runtime.getRuntime().totalMemory()​


​- Runtime.getRuntime().freeMemory();​


​queue = ​​​​null​​​​;​




​mem0 = Runtime.getRuntime().totalMemory()​


​- Runtime.getRuntime().freeMemory();​


​queue = createWorkQueue();​


​for​​ ​​(​​​​int​​ ​​i = ​​​​0​​​​; i < SAMPLE_QUEUE_SIZE; i++) {​








​mem1 = Runtime.getRuntime().totalMemory()​


​- Runtime.getRuntime().freeMemory();​


​return​​ ​​(mem1 - mem0) / SAMPLE_QUEUE_SIZE;​








​* Create your runnable task here.​




​* @return an instance of your runnable task under investigation​




​protected​​ ​​abstract​​ ​​Runnable creatTask();​






​* Return an instance of the queue used in the thread pool.​




​* @return queue instance​




​protected​​ ​​abstract​​ ​​BlockingQueue createWorkQueue();​






​* Calculate current cpu time. Various frameworks may be used here,​


​* depending on the operating system in use. (e.g.​


​* http://www.hyperic.com/products/sigar). The more accurate the CPU time​


​* measurement, the more accurate the results for thread count boundaries.​




​* @return current cpu time of current thread​




​protected​​ ​​abstract​​ ​​long​​ ​​getCurrentThreadCPUTime();​







​package​​ ​​pool_size_calculate;​




​import​​ ​​java.io.BufferedReader;​


​import​​ ​​java.io.IOException;​


​import​​ ​​java.io.InputStreamReader;​


​import​​ ​​java.lang.management.ManagementFactory;​


​import​​ ​​java.math.BigDecimal;​


​import​​ ​​java.net.HttpURLConnection;​


​import​​ ​​java.net.URL;​


​import​​ ​​java.util.concurrent.BlockingQueue;​


​import​​ ​​java.util.concurrent.LinkedBlockingQueue;​




​public​​ ​​class​​ ​​SimplePoolSizeCaculatorImpl ​​​​extends​​ ​​PoolSizeCalculator {​






​protected​​ ​​Runnable creatTask() {​


​return​​ ​​new​​ ​​AsyncIOTask();​








​protected​​ ​​BlockingQueue createWorkQueue() {​


​return​​ ​​new​​ ​​LinkedBlockingQueue(​​​​1000​​​​);​








​protected​​ ​​long​​ ​​getCurrentThreadCPUTime() {​


​return​​ ​​ManagementFactory.getThreadMXBean().getCurrentThreadCpuTime();​






​public​​ ​​static​​ ​​void​​ ​​main(String[] args) {​


​PoolSizeCalculator poolSizeCalculator = ​​​​new​​ ​​SimplePoolSizeCaculatorImpl();​


​poolSizeCalculator.calculateBoundaries(​​​​new​​ ​​BigDecimal(​​​​1.0​​​​), ​​​​new​​ ​​BigDecimal(​​​​100000​​​​));​












​* 自定义的异步IO任务​


​* @author Will​






​class​​ ​​AsyncIOTask ​​​​implements​​ ​​Runnable {​






​public​​ ​​void​​ ​​run() {​


​HttpURLConnection connection = ​​​​null​​​​;​


​BufferedReader reader = ​​​​null​​​​;​


​try​​ ​​{​


​String getURL = ​​​​"http://baidu.com"​​​​;​


​URL getUrl = ​​​​new​​ ​​URL(getURL);​




​connection = (HttpURLConnection) getUrl.openConnection();​




​reader = ​​​​new​​ ​​BufferedReader(​​​​new​​ ​​InputStreamReader(​






​String line;​


​while​​ ​​((line = reader.readLine()) != ​​​​null​​​​) {​


​// empty loop​








​catch​​ ​​(IOException e) {​




​} ​​​​finally​​ ​​{​


​if​​​​(reader != ​​​​null​​​​) {​


​try​​ ​​{​






​catch​​​​(Exception e) {​





















​Target queue memory usage (bytes): 100000​


​createTask() produced pool_size_calculate.AsyncIOTask which took 40 bytes in a queue​


​Formula: 100000 / 40​


​* Recommended queue capacity (bytes): 2500​


​Number of CPU: 4​


​Target utilization: 1​


​Elapsed time (nanos): 3000000000​


​Compute time (nanos): 47181000​


​Wait time (nanos): 2952819000​


​Formula: 4 * 1 * (1 + 2952819000 / 47181000)​


​* Optimal thread count: 256​



​ThreadPoolExecutor pool =​


​new​​ ​​ThreadPoolExecutor(​​​​256​​​​, ​​​​256​​​​, 0L, TimeUnit.MILLISECONDS, ​​​​new​​ ​​LinkedBlockingQueue(​​​​2500​​​​));​