Jave针对大文件的分片传输与合并
- 最近在做docker save、load 接口的开发中,遇到镜像文件特别大的情况(2G-30G),而且是服务端与服务端之间的文件传输,使用HttpClient直接传输的话肯定会有内存溢出或超时的风险,为了避免这个问题,最后我们服务端双方都决定采用文件分片、合并的方法,完成一个大文件的传输。
文件分片代码(示例代码)
- 注:大文件情况下分割文件需要耗费一定的时间,服务端的调用可能导致响应超时,建议接口采用异步方式,先返回调用方接口信息,并使用缓存,将文件分片情况同步到缓存中,调用方轮询查看redis中的分片状态。文件分片完成后将分片列表存放到缓存中,以备后续的分片文件下载,调用方获取到分片文件名列表后停止轮询。
- 接口
/**
* @param fileName 待分割的文件名 例:nginx.tar
* @return key
*/
@GetMapping("/cutFile")
@ResponseBody
public String cutFile(String fileName){
String key = String.valueOf(System.currentTimeMillis())+"-"+ fileName+"-key";
stringRedisTemplate.boundValueOps(key).set("start");
stringRedisTemplate.expire(key, 10, TimeUnit.MINUTES);
CompletableFuture.runAsync(new Runnable() {
@Override
public void run() {
List<String> fileNames = fileManageService.cutFile(fileName);
if (CollectionUtils.isEmpty(fileNames)){
stringRedisTemplate.boundValueOps(key).set("failed");
stringRedisTemplate.expire(key, 1, TimeUnit.MINUTES);
}
if (!CollectionUtils.isEmpty(fileNames)){
stringRedisTemplate.boundValueOps(key).set(JSONObject.toJSONString(fileNames));
stringRedisTemplate.expire(key, 2, TimeUnit.MINUTES);
}
}
});
//返回key
return key;
}
- service层
/**
* 文件分割
* @param fileName
* @return
*/
List<String> cutFile(String fileName);
- serviceImpl
@Value("${save_addr}")
private String saveAddr;
@Override
public List<String> cutFile(String fileName) {
//待分片文件在主机上的路径
String filePath = saveAddr + fileName;
File file = new File(filePath);
//分片文件的大小(字节)
Long byteSize = 52428800L;
List<String> fileNames = new CutFileUtil().cutFileBySize(filePath, byteSize, saveAddr);
return fileNames;
}
- 分割工具类
/**
* <功能简要> <br>
* <切割文件工具>
*
* @Author heyanbo
* @createTime 2020/6/7 23:31
* @since
*/
public class CutFileUtil {
/**
* @param filePath 文件所在主机的路径 例:/home/gyt/nginx.tar
* @param byteSize 拆分文件字节大小
* @param saveAddr 拆分后的文件保存目录 /homt/gyt/
* @return
*/
public List<String> cutFileBySize(String filePath, Long byteSize, String saveAddr){
List<String> fileNames = new ArrayList<>();
File file = new File(filePath);
//计算总共段数
int count = (int) Math.ceil(file.length()/(double)byteSize);
int countLen = (count +"").length();
ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(2,4,1,TimeUnit.SECONDS,new ArrayBlockingQueue<>(count * 2));
//时间戳
String timeStamp = String.valueOf(System.currentTimeMillis());
for (int i = 0; i < count; i++) {
//分段文件名
String fileName = timeStamp + "-" + leftPad((i+1) +"", countLen, '0') + "-" +file.getName();
threadPoolExecutor.execute(new SplitRunnable(byteSize.intValue(), fileName, file, i*byteSize, saveAddr));
fileNames.add(fileName);
}
threadPoolExecutor.shutdown();
while (true){
if (threadPoolExecutor.isTerminated()){
return fileNames;
}
try {
Thread.sleep(1000);
}catch (InterruptedException e){
e.printStackTrace();
}
}
}
public static String leftPad(String str, int length, char ch){
if (str.length() >= length){
return str;
}
char[] chs = new char[length];
Arrays.fill(chs, ch);
char[] src = str.toCharArray();
System.arraycopy(src, 0, chs, length - src.length, src.length);
return new String(chs);
}
private class SplitRunnable implements Runnable{
int byteSize;
String fileName;
File originFile;
Long startPos;
String currentWorkDir;
public SplitRunnable(int byteSize, String fileName, File originFile, Long startPos, String currentWorkDir) {
this.byteSize = byteSize;
this.fileName = fileName;
this.originFile = originFile;
this.startPos = startPos;
this.currentWorkDir = currentWorkDir;
}
public void run(){
RandomAccessFile randomAccessFile = null;
OutputStream outputStream = null;
try {
randomAccessFile = new RandomAccessFile(originFile, "r");
byte[] b = new byte[byteSize];
randomAccessFile.seek(startPos); //移动指针到每“段”开头
int s = randomAccessFile.read(b);
outputStream = new FileOutputStream(currentWorkDir+fileName);
outputStream.write(b, 0 , s);
outputStream.flush();
b= null;
}catch (IOException e){
e.printStackTrace();
}finally {
if (outputStream !=null){
try {
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (randomAccessFile !=null){
try {
randomAccessFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
}
- 接口调用,对redis镜像文件进行分割
- 缓存中存放的分段文件
合并文件(示例代码)
- 本次使用BufferedOutputStream合并分段后的文件,除了BufferedOutputStream还可以使用RandomAccessFile合并分段文件。合并后验证redis镜像文件是否还能正常load
- 接口
/**
* @param cutFileName 任意一个分段文件名,例:1591604609899-1-redis.tar
* @param chunks 分段总数
* @return
*/
@GetMapping("/merageFile")
@ResponseBody
public String merageFile(@RequestParam String cutFileName,
@RequestParam int chunks) throws IOException {
return fileManageService.merageFile(cutFileName, chunks);
}
- 实现类
@Override
public String merageFile(String cutFileName, int chunks) throws IOException {
int indexOf = cutFileName.indexOf("-");
String timeStream = cutFileName.substring(0, indexOf);
//段数+文件名+后缀名
String substring = cutFileName.substring(indexOf + 1, cutFileName.length());
int indexOf1 = substring.indexOf("-");
//文件名+后缀名
String fileName = substring.substring(indexOf1+1, substring.length());
File file = new File(saveAddr+fileName);
if (file.exists()){
file.delete();
LOGGER.info("覆盖已经存在的文件");
}
BufferedOutputStream destOutputStream = new BufferedOutputStream(new FileOutputStream(saveAddr+fileName));
for (int i = 1; i <= chunks ; i++) {
//循环将每个分片的数据写入目标文件
byte[] fileBuffer = new byte[1024];//文件读写缓存
int readBytesLength = 0; //每次读取字节数
File sourceFile = new File(saveAddr+timeStream+"-"+i+"-"+fileName);
BufferedInputStream sourceInputStream = new BufferedInputStream(new FileInputStream(sourceFile));
LOGGER.info("开始合并分段文件:"+timeStream+"-"+i+"-"+fileName);
while ((readBytesLength = sourceInputStream.read(fileBuffer))!=-1){
destOutputStream.write(fileBuffer, 0 , readBytesLength);
}
sourceInputStream.close();
LOGGER.info("合并分段文件完成:"+timeStream+"-"+i+"-"+fileName);
//分片合并后删除
boolean delete = sourceFile.delete();
if (delete){
LOGGER.info(timeStream+"-"+i+"-"+fileName+"删除完成");
}
}
destOutputStream.flush();
destOutputStream.close();
return fileName+"合并完成";
}
- 合并后的redis.tar
- 将redis镜像打成tar后的字节
- 将合并后的文件上传到服务端,docker load 验证redis.tar是否还可以被加载。
- 加载成功,镜像文件正常。