前言
在工作中经常遇到字符串拼接的问题,一般如果只是简单的拼接一次直接使用String和“+”就可以实现,如果大量的拼接则需要StringBuilder或者StringBuffer。但是不管是String还是StringBuilder他们在拼接的时候向后追加的效率都是很高的,但是如果向前拼接追加,那效率则相当低下。原因就是字符串底层在向前追加内容的时候,都会使数组中的其他元素向后移动,这样效率就会很低。下面我们就来探索如果实现一个高效的向前追加拼接方法。
解决方法
在StringBuilder的基础上进行扩展使用其支持高效的向前追加,但是由于StringBuilder是使用final修饰的,所以这里重新实现StringBuilder。代码如下:
package com.cz.redis;
import java.util.Arrays;
/**
* @program: PostGirl
* @description: 扩展JStringBuilder
* 提供向前追加的功能
* 实现原理:
* 1、初始化数组,如果向前插入,则向前扩容,如果向后插入则向后扩容
* 2、扩容后提供前缀指针和后缀指针来指向当前存储的位置
* @author: Cheng Zhi
* @create: 2023-03-22 14:01
**/
public class JefStringBuilder {
/**
* The value is used for character storage.
*/
char[] value;
/**
* The count is the number of characters used.
*/
int count;
/**
* 前缀数组的指针,初始化为0
*/
int preCursor;
/**
* 后缀数组的指针,初始化为0
*/
int fixCursor = 0;
/**
* 初始化数组长度
*/
int defaultCapacity = 16;
/**
* 前缀数组长度
*/
int preArraySize;
/**
* The maximum size of array to allocate (unless necessary).
* Some VMs reserve some header words in an array.
* Attempts to allocate larger arrays may result in
* OutOfMemoryError: Requested array size exceeds VM limit
*/
private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
public JefStringBuilder() {
value = new char[defaultCapacity];
}
public JefStringBuilder(int capacity) {
value = new char[capacity];
}
/**
* 向后追加字符串
* @param str
* @return
*/
public JefStringBuilder append(String str) {
if (str == null)
return appendNull();
int len = str.length();
ensureCapacityInternal(count + len, len);
str.getChars(0, len, value, fixCursor);
fixCursor = fixCursor + len;
count += len;
return this;
}
public JefStringBuilder append(StringBuffer sb) {
if (sb == null)
return appendNull();
int len = sb.length();
ensureCapacityInternal(count + len, len);
sb.getChars(0, len, value, fixCursor);
fixCursor = fixCursor + len;
count += len;
return this;
}
/**
* @since 1.8
*/
JefStringBuilder append(JefStringBuilder asb) {
if (asb == null)
return appendNull();
int len = asb.length();
ensureCapacityInternal(count + len, len);
asb.getChars(0, len, value, count);
count += len;
return this;
}
public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
if (srcBegin < 0)
throw new StringIndexOutOfBoundsException(srcBegin);
if ((srcEnd < 0) || (srcEnd > count))
throw new StringIndexOutOfBoundsException(srcEnd);
if (srcBegin > srcEnd)
throw new StringIndexOutOfBoundsException("srcBegin > srcEnd");
System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
}
// Documentation in subclasses because of synchro difference
public JefStringBuilder append(CharSequence s) {
if (s == null)
return appendNull();
if (s instanceof String)
return this.append((String)s);
if (s instanceof JefStringBuilder)
return this.append((JefStringBuilder)s);
return this.append(s, 0, s.length());
}
public JefStringBuilder append(CharSequence s, int start, int end) {
if (s == null)
s = "null";
if ((start < 0) || (start > end) || (end > s.length()))
throw new IndexOutOfBoundsException(
"start " + start + ", end " + end + ", s.length() "
+ s.length());
int len = end - start;
ensureCapacityInternal(count + len, len);
for (int i = start, j = count; i < end; i++, j++)
value[j] = s.charAt(i);
count += len;
return this;
}
public JefStringBuilder append(Object obj) {
return append(String.valueOf(obj));
}
/**
* 向前追加
* @param str
* @return
*/
public JefStringBuilder preAppand(String str) {
if (str == null) {
return this;
}
int len = str.length();
int newLength = count + len;
preEnsureCapacityInternal(newLength, len);
int length = value.length;
preCursor = preCursor - len;
str.getChars(0, len, value, preCursor);
count += len;
return this;
}
/**
* 向前扩容
* @param minimumCapacity 扩容的长度
* @param len 当前字符串的长度,用来判断是否需要扩容
*/
private void preEnsureCapacityInternal(int minimumCapacity,int len) {
// 如果当前指针的位置减去将要存入数组的字符串长度,如果不够存,意味着要扩容
if ((preCursor - len) < 0) {
int persSize = newCapacity(minimumCapacity);
// 数组向前扩容
char[] copy = new char[persSize + value.length];
//char[] copy = new char[persSize + count];
System.arraycopy(value, preCursor, copy, preCursor + persSize, value.length - preCursor);
int oldPreCursor = preCursor;
preCursor = oldPreCursor + copy.length - value.length ;
fixCursor = fixCursor + preCursor - oldPreCursor;
value = copy;
copy = null;
}
}
private JefStringBuilder appendNull() {
int c = count;
ensureCapacityInternal(c + 4, 4);
final char[] value = this.value;
value[c++] = 'n';
value[c++] = 'u';
value[c++] = 'l';
value[c++] = 'l';
count = c;
return this;
}
/**
* 向后扩容
* @param minimumCapacity 扩容的长度
* @param len 当前字符串的长度,用来判断是否需要扩容
*/
private void ensureCapacityInternal(int minimumCapacity, int len) {
if ((fixCursor + len-value.length) > 0) {
value = Arrays.copyOf(value, newCapacity(minimumCapacity));
}
}
/**
* 计算新容量
* @param minCapacity
* @return
*/
private int newCapacity(int minCapacity) {
// overflow-conscious code
int newCapacity = (value.length << 1) + 2;
if (newCapacity - minCapacity < 0) {
newCapacity = minCapacity;
}
return (newCapacity <= 0 || MAX_ARRAY_SIZE - newCapacity < 0)
? hugeCapacity(minCapacity)
: newCapacity;
}
private int hugeCapacity(int minCapacity) {
if (Integer.MAX_VALUE - minCapacity < 0) { // overflow
throw new OutOfMemoryError();
}
return (minCapacity > MAX_ARRAY_SIZE)
? minCapacity : MAX_ARRAY_SIZE;
}
public String toString() {
// Create a copy, don't share the array
return new String(value, preCursor, count);
}
public int length() {
return count;
}
}
单元测试案例
package com.cz;
import com.cz.redis.JefStringBuilder;
import org.junit.jupiter.api.Test;
import java.util.Stack;
/**
* @program: Reids
* @description: 测试字符串拼接
* @author: Cheng Zhi
* @create: 2023-03-24 09:41
**/
public class TestJefStringBuilder {
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的:测试String类型向后循环追加的耗时以及内存占用情况
* @测试结果: 内存消耗:cost -174393888 byte (-170306 KB).
* 时间消耗:cost 9499 milliseconds (9 seconds).
* @特殊说明:消耗内存出现负数的原因:在运行过程中,内存不够用时,Java的垃圾回收器(gc)会回收垃圾内存(没有引用的对象)
*/
@Test
public void testStringConcatAfter(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
String str = "a";
for (int i = 0; i< 100000; i++) {
str = str + i;
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的: 测试String类型向前循环追加的耗时以及内存占用情况
* @测试结果: 内存消耗:cost -291341328 byte (-284513 KB).
* 时间消耗:cost 10671 milliseconds (10 seconds).
* @特殊说明:消耗内存出现负数的原因:在运行过程中,内存不够用时,Java的垃圾回收器(gc)会回收垃圾内存(没有引用的对象)
*/
@Test
public void testStringConcatBefore(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
String str = "a";
for (int i = 0; i< 100000; i++) {
str = i + str;
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的:测试StringBuilder向后循环追加的耗时以及内存占用情况
* @预期结果: 内存消耗:cost 6457160 byte (6305 KB).
* 时间消耗:cost 21 milliseconds (0 seconds).
*/
@Test
public void testStringBuiderConcatAfter(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
StringBuilder str = new StringBuilder();
str.append("a");
for (int i = 0; i< 100000; i++) {
str.append(i);
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的: 测试StringBuilder向前循环追加的耗时以及内存占用情况,由于stringbuilder为提供直接向前拼接的方法,所以只能用插入的方式变相实现。
* @预期结果: 内存消耗:cost 11221808 byte (10958 KB).
* 时间消耗:cost 1108 milliseconds (1 seconds).
*/
@Test
public void testStringBuiderConcatBefore(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
StringBuilder str = new StringBuilder();
str.append("a");
for (int i = 0; i< 100000; i++) {
str.insert(0,i);
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的: 通过栈和StringBuilder变相实现字符串向前拼接,测试内存以及耗时情况。
* @预期结果: 内存消耗:cost 20428544 byte (19949 KB).
* 时间消耗:cost 52 milliseconds (0 seconds).
*/
@Test
public void testStringBuilderWith(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
Stack<String> stack = new Stack<String>();
stack.push("a");
for (int i=0;i<100000; i++) {
stack.push(i+"");
}
int stackSize = stack.size();
StringBuilder stringBuilder = new StringBuilder();
for (int i=0; i<stackSize; i++) {
stringBuilder.append(stack.pop());
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的:测试JefStringBuilder向后循环追加的耗时以及内存占用情况
* @预期结果: 内存消耗:cost 14229360 byte (13895 KB).
* 时间消耗:cost 41 milliseconds (0 seconds).
*/
@Test
public void testJefStringBuiderConcatAfter(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
JefStringBuilder str = new JefStringBuilder();
str.append("a");
for (int i = 0; i< 100000; i++) {
str.append(i);
}
System.out.println(timeLag.cost());
}
/**
* @author Chengzhi
* @date 2023-03-24
* @测试目的:测试JefStringBuilder向前循环追加的耗时以及内存占用情况
* @预期结果: 内存消耗:cost 20120968 byte (19649 KB).
* 时间消耗:cost 31 milliseconds (0 seconds).
*/
@Test
public void testJefStringBuiderConcatBefore(){
TimeLag timeLag = new TimeLag();
timeLag.openMemoStat();
JefStringBuilder str = new JefStringBuilder();
str.append("a");
for (int i = 0; i< 100000; i++) {
str.preAppand(i+"");
}
System.out.println(timeLag.cost());
}
}
综上可以发现,自己实现的字符串拼接在效率上还是比较占优势的。
另附上效率统计工具:
package com.cz;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
/**
* 工具类,打印耗时
* @program: PostGirl-panent
* @description: TimeLag
* @author: Cheng Zhi
* @create: 2021-04-29 11:18
**/
public class TimeLag {
Logger logger = LoggerFactory.getLogger(TimeLag.class);
private Date startDate;
private Date endDate;
private long startMemeory;
private long endMemorty;
private Runtime runtime;
private boolean isOpenMemoryStatistics = false;
public TimeLag() {
startDate = new Date();
}
public void openMemoStat() {
isOpenMemoryStatistics = true;
// 先执行gc
runtime = Runtime.getRuntime();
runtime.gc();
startMemeory = runtime.totalMemory();
}
/**
* 返回耗时
* @return
*/
public String cost() {
endDate = new Date();
long cont = endDate.getTime() - startDate.getTime();
StringBuffer stringBuffer = new StringBuffer();
if (isOpenMemoryStatistics) {
endMemorty = runtime.freeMemory();
long contMemory = startMemeory - endMemorty;
stringBuffer.append("cost ").append(contMemory).append(" byte (").append(contMemory / 1024).append(" KB).").append('\n');
}
String s = stringBuffer.append("cost ").append(cont).append(" milliseconds (").append(cont / 1000).append(" seconds).").toString();
return s;
}
public static void main(String[] args) {
TimeLag lag = new TimeLag();
lag.openMemoStat();
int s = 0;
for ( int i=0; i<100000; i++) {
for (int j = 100000; j>i; j--) {
s = j+i;
}
}
System.out.println(s);
System.out.println(lag.cost());
}
}