前言

在工作中经常遇到字符串拼接的问题,一般如果只是简单的拼接一次直接使用String和“+”就可以实现,如果大量的拼接则需要StringBuilder或者StringBuffer。但是不管是String还是StringBuilder他们在拼接的时候向后追加的效率都是很高的,但是如果向前拼接追加,那效率则相当低下。原因就是字符串底层在向前追加内容的时候,都会使数组中的其他元素向后移动,这样效率就会很低。下面我们就来探索如果实现一个高效的向前追加拼接方法。

解决方法

在StringBuilder的基础上进行扩展使用其支持高效的向前追加,但是由于StringBuilder是使用final修饰的,所以这里重新实现StringBuilder。代码如下:

package com.cz.redis;

import java.util.Arrays;

/**
 * @program: PostGirl
 * @description: 扩展JStringBuilder
 * 提供向前追加的功能
 * 实现原理:
 *         1、初始化数组,如果向前插入,则向前扩容,如果向后插入则向后扩容
 *         2、扩容后提供前缀指针和后缀指针来指向当前存储的位置
 * @author: Cheng Zhi
 * @create: 2023-03-22 14:01
 **/
public class JefStringBuilder {

    /**
     * The value is used for character storage.
     */
    char[] value;

    /**
     * The count is the number of characters used.
     */
    int count;

    /**
     * 前缀数组的指针,初始化为0
     */
    int preCursor;

    /**
     * 后缀数组的指针,初始化为0
     */
    int fixCursor = 0;

    /**
     * 初始化数组长度
     */
    int defaultCapacity = 16;

    /**
     * 前缀数组长度
     */
    int preArraySize;

    /**
     * The maximum size of array to allocate (unless necessary).
     * Some VMs reserve some header words in an array.
     * Attempts to allocate larger arrays may result in
     * OutOfMemoryError: Requested array size exceeds VM limit
     */
    private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;

    public JefStringBuilder() {
        value = new char[defaultCapacity];
    }

    public JefStringBuilder(int capacity) {
        value = new char[capacity];
    }

    /**
     * 向后追加字符串
     * @param str
     * @return
     */
    public JefStringBuilder append(String str) {
        if (str == null)
            return appendNull();
        int len = str.length();
        ensureCapacityInternal(count + len, len);
        str.getChars(0, len, value, fixCursor);
        fixCursor = fixCursor + len;
        count += len;
        return this;
    }

    public JefStringBuilder append(StringBuffer sb) {
        if (sb == null)
            return appendNull();
        int len = sb.length();
        ensureCapacityInternal(count + len, len);
        sb.getChars(0, len, value, fixCursor);
        fixCursor = fixCursor + len;
        count += len;
        return this;
    }

    /**
     * @since 1.8
     */
    JefStringBuilder append(JefStringBuilder asb) {
        if (asb == null)
            return appendNull();
        int len = asb.length();
        ensureCapacityInternal(count + len, len);
        asb.getChars(0, len, value, count);
        count += len;
        return this;
    }

    public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
        if (srcBegin < 0)
            throw new StringIndexOutOfBoundsException(srcBegin);
        if ((srcEnd < 0) || (srcEnd > count))
            throw new StringIndexOutOfBoundsException(srcEnd);
        if (srcBegin > srcEnd)
            throw new StringIndexOutOfBoundsException("srcBegin > srcEnd");
        System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
    }


    // Documentation in subclasses because of synchro difference
    public JefStringBuilder append(CharSequence s) {
        if (s == null)
            return appendNull();
        if (s instanceof String)
            return this.append((String)s);
        if (s instanceof JefStringBuilder)
            return this.append((JefStringBuilder)s);

        return this.append(s, 0, s.length());
    }

    public JefStringBuilder append(CharSequence s, int start, int end) {
        if (s == null)
            s = "null";
        if ((start < 0) || (start > end) || (end > s.length()))
            throw new IndexOutOfBoundsException(
                    "start " + start + ", end " + end + ", s.length() "
                            + s.length());
        int len = end - start;
        ensureCapacityInternal(count + len, len);
        for (int i = start, j = count; i < end; i++, j++)
            value[j] = s.charAt(i);
        count += len;
        return this;
    }

    public JefStringBuilder append(Object obj) {
        return append(String.valueOf(obj));
    }

    /**
     * 向前追加
     * @param str
     * @return
     */
    public JefStringBuilder preAppand(String str) {
        if (str == null) {
            return this;
        }
        int len = str.length();
        int newLength = count + len;
        preEnsureCapacityInternal(newLength, len);
        int length = value.length;
        preCursor = preCursor - len;
        str.getChars(0, len, value, preCursor);
        count += len;
        return this;
    }

    /**
     * 向前扩容
     * @param minimumCapacity 扩容的长度
     * @param len 当前字符串的长度,用来判断是否需要扩容
     */
    private void preEnsureCapacityInternal(int minimumCapacity,int len) {
        // 如果当前指针的位置减去将要存入数组的字符串长度,如果不够存,意味着要扩容
        if ((preCursor - len) < 0) {
            int persSize = newCapacity(minimumCapacity);
            // 数组向前扩容
            char[] copy = new char[persSize + value.length];
            //char[] copy = new char[persSize + count];
            System.arraycopy(value, preCursor, copy, preCursor + persSize, value.length - preCursor);
            int oldPreCursor = preCursor;
            preCursor = oldPreCursor + copy.length - value.length ;
            fixCursor = fixCursor + preCursor - oldPreCursor;
            value = copy;
            copy = null;
        }
    }

    private JefStringBuilder appendNull() {
        int c = count;
        ensureCapacityInternal(c + 4, 4);
        final char[] value = this.value;
        value[c++] = 'n';
        value[c++] = 'u';
        value[c++] = 'l';
        value[c++] = 'l';
        count = c;
        return this;
    }

    /**
     * 向后扩容
     * @param minimumCapacity 扩容的长度
     * @param len 当前字符串的长度,用来判断是否需要扩容
     */
    private void ensureCapacityInternal(int minimumCapacity, int len) {

        if ((fixCursor + len-value.length) > 0) {
            value = Arrays.copyOf(value, newCapacity(minimumCapacity));
        }
    }

    /**
     * 计算新容量
     * @param minCapacity
     * @return
     */
    private int newCapacity(int minCapacity) {
        // overflow-conscious code
        int newCapacity = (value.length << 1) + 2;
        if (newCapacity - minCapacity < 0) {
            newCapacity = minCapacity;
        }
        return (newCapacity <= 0 || MAX_ARRAY_SIZE - newCapacity < 0)
                ? hugeCapacity(minCapacity)
                : newCapacity;
    }

    private int hugeCapacity(int minCapacity) {
        if (Integer.MAX_VALUE - minCapacity < 0) { // overflow
            throw new OutOfMemoryError();
        }
        return (minCapacity > MAX_ARRAY_SIZE)
                ? minCapacity : MAX_ARRAY_SIZE;
    }

    public String toString() {
        // Create a copy, don't share the array
        return new String(value, preCursor, count);
    }

    public int length() {
        return count;
    }

}

单元测试案例

package com.cz;

import com.cz.redis.JefStringBuilder;
import org.junit.jupiter.api.Test;

import java.util.Stack;

/**
 * @program: Reids
 * @description: 测试字符串拼接
 * @author: Cheng Zhi
 * @create: 2023-03-24 09:41
 **/
public class TestJefStringBuilder {

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的:测试String类型向后循环追加的耗时以及内存占用情况
     * @测试结果:  内存消耗:cost -174393888 byte (-170306 KB).
     *             时间消耗:cost 9499 milliseconds (9 seconds).
     * @特殊说明:消耗内存出现负数的原因:在运行过程中,内存不够用时,Java的垃圾回收器(gc)会回收垃圾内存(没有引用的对象)
     */
    @Test
    public void testStringConcatAfter(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        String str = "a";
        for (int i = 0; i< 100000; i++) {
            str = str + i;
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的: 测试String类型向前循环追加的耗时以及内存占用情况
     * @测试结果: 内存消耗:cost -291341328 byte (-284513 KB).
     *            时间消耗:cost 10671 milliseconds (10 seconds).
     * @特殊说明:消耗内存出现负数的原因:在运行过程中,内存不够用时,Java的垃圾回收器(gc)会回收垃圾内存(没有引用的对象)
     */
    @Test
    public void testStringConcatBefore(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        String str = "a";
        for (int i = 0; i< 100000; i++) {
            str = i + str;
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的:测试StringBuilder向后循环追加的耗时以及内存占用情况
     * @预期结果: 内存消耗:cost 6457160 byte (6305 KB).
     *            时间消耗:cost 21 milliseconds (0 seconds).
     */
    @Test
    public void testStringBuiderConcatAfter(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        StringBuilder str = new StringBuilder();
        str.append("a");
        for (int i = 0; i< 100000; i++) {
            str.append(i);
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的: 测试StringBuilder向前循环追加的耗时以及内存占用情况,由于stringbuilder为提供直接向前拼接的方法,所以只能用插入的方式变相实现。
     * @预期结果: 内存消耗:cost 11221808 byte (10958 KB).
     *            时间消耗:cost 1108 milliseconds (1 seconds).
     */
    @Test
    public void testStringBuiderConcatBefore(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        StringBuilder str = new StringBuilder();
        str.append("a");
        for (int i = 0; i< 100000; i++) {
            str.insert(0,i);
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的: 通过栈和StringBuilder变相实现字符串向前拼接,测试内存以及耗时情况。
     * @预期结果: 内存消耗:cost 20428544 byte (19949 KB).
     *            时间消耗:cost 52 milliseconds (0 seconds).
     */
    @Test
    public void testStringBuilderWith(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        Stack<String> stack = new Stack<String>();
        stack.push("a");
        for (int i=0;i<100000; i++) {
            stack.push(i+"");
        }
        int stackSize = stack.size();
        StringBuilder stringBuilder = new StringBuilder();
        for (int i=0; i<stackSize; i++) {
            stringBuilder.append(stack.pop());
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的:测试JefStringBuilder向后循环追加的耗时以及内存占用情况
     * @预期结果: 内存消耗:cost 14229360 byte (13895 KB).
     *            时间消耗:cost 41 milliseconds (0 seconds).
     */
    @Test
    public void testJefStringBuiderConcatAfter(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        JefStringBuilder str = new JefStringBuilder();
        str.append("a");
        for (int i = 0; i< 100000; i++) {
            str.append(i);
        }
        System.out.println(timeLag.cost());
    }

    /**
     * @author Chengzhi
     * @date 2023-03-24
     * @测试目的:测试JefStringBuilder向前循环追加的耗时以及内存占用情况
     * @预期结果: 内存消耗:cost 20120968 byte (19649 KB).
     *            时间消耗:cost 31 milliseconds (0 seconds).
     */
    @Test
    public void testJefStringBuiderConcatBefore(){
        TimeLag timeLag = new TimeLag();
        timeLag.openMemoStat();
        JefStringBuilder str = new JefStringBuilder();
        str.append("a");
        for (int i = 0; i< 100000; i++) {
            str.preAppand(i+"");
        }
        System.out.println(timeLag.cost());
    }
}

综上可以发现,自己实现的字符串拼接在效率上还是比较占优势的。

另附上效率统计工具:

package com.cz;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Date;

/**
 * 工具类,打印耗时
 * @program: PostGirl-panent
 * @description: TimeLag
 * @author: Cheng Zhi
 * @create: 2021-04-29 11:18
 **/
public class TimeLag {

    Logger logger = LoggerFactory.getLogger(TimeLag.class);

    private Date startDate;
    private Date endDate;
    private long startMemeory;
    private long endMemorty;
    private Runtime runtime;
    private boolean isOpenMemoryStatistics = false;
    public TimeLag() {
        startDate = new Date();
    }

    public void openMemoStat() {
        isOpenMemoryStatistics = true;
        // 先执行gc
        runtime = Runtime.getRuntime();
        runtime.gc();
        startMemeory = runtime.totalMemory();
    }
    /**
     * 返回耗时
     * @return
     */
    public String cost() {
        endDate = new Date();
        long cont = endDate.getTime() - startDate.getTime();
        StringBuffer stringBuffer = new StringBuffer();
        if (isOpenMemoryStatistics) {
            endMemorty = runtime.freeMemory();
            long contMemory = startMemeory - endMemorty;
            stringBuffer.append("cost ").append(contMemory).append(" byte (").append(contMemory / 1024).append(" KB).").append('\n');
        }
        String s = stringBuffer.append("cost ").append(cont).append(" milliseconds (").append(cont / 1000).append(" seconds).").toString();
        return s;
    }

    public static void main(String[] args) {
        TimeLag lag = new TimeLag();
        lag.openMemoStat();
        int s = 0;
        for ( int i=0; i<100000; i++) {

            for (int j = 100000; j>i; j--) {
                s = j+i;
            }

        }
        System.out.println(s);
        System.out.println(lag.cost());
    }
}