目录
问题现象:
问题分析:
1.编码问题
2.IO流
解决方法:
zip压缩包解压工具(ZipFIleUtil):
字符编码转换工具类(CharsetConvertUtil ):
问题现象:
今天在项目中需要使用到了zip解压技术,用于解压压缩包,从而获取其中的文件进行操作,但其中涉及到中文编码,和io流的问题需要注意。
问题分析:
1.编码问题
由于zip解压缩文件会产生编码问题,如中文,不解决的话就会出现乱码现象;可以通过 new ZipFile(srcFile,Charset.forName("字符编码规则"));
2.IO流
zip解压缩文件的时候,涉及到文件io流(输入输出流)的问题,因此需要注意流的关闭问题。
解决方法:
分享一下我的工具类,开箱即用:
zip压缩包解压工具(ZipFIleUtil):
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Enumeration;
import java.util.LinkedHashMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
/**
* zip压缩包解压工具
* 提供以下方法:
* 1. getZipInnerFileList(File srcFile):获取zip压缩包内部文件的名称和文件类型
* 2. unZip(File srcFile, String destDirPath):解压zip包
*
* @author Stephen
* @version 2021.03.30
*/
public class ZipFIleUtil {
/**
* 获取zip压缩包内部文件的名称和文件类型
*
* @param srcFile
* @return
* @throws RuntimeException
*/
public static LinkedHashMap<String, String> getZipInnerFileList(File srcFile) throws RuntimeException {
// 判断源文件是否存在
if ( !srcFile.exists() ) {
throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
}
// 开始解压
ZipFile zipFile = null;
try {
LinkedHashMap<String, String> unZipFileList = new LinkedHashMap<String, String>();
zipFile = new ZipFile(srcFile, Charset.forName(CharsetConvertUtil.getEncode(srcFile.getAbsolutePath(), true)));
System.out.println("zip压缩包 【" + srcFile.getName() + "】 中包含以下文件:");
Enumeration<?> entries = zipFile.entries();
String lastDirectory = null;
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry)entries.nextElement();
String filePath = entry.getName();
if ( filePath.contains("/") ) {
String directory = filePath.substring(0, filePath.lastIndexOf("/"));
if ( !directory.equals(lastDirectory) ) {
lastDirectory = directory;
unZipFileList.put(directory, "文件夹");
}
}
String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
String fileType = filePath.substring(filePath.lastIndexOf(".") + 1);
System.out.println("文件类型:" + fileType);
System.out.println("文件名:" + fileName);
unZipFileList.put(fileName, fileType);
}
zipFile.close();
if ( unZipFileList.size() > 0 ) {
return unZipFileList;
} else {return null;}
} catch (Exception e) {
throw new RuntimeException("获取zip压缩包内部文件信息失败!", e);
}
}
/**
* 解压zip包
*
* @param srcFile
* @param destDirPath
* @throws RuntimeException
*/
public static void unZip(File srcFile, String destDirPath) throws RuntimeException {
long start = System.currentTimeMillis();
// 判断源文件是否存在
if ( !srcFile.exists() ) {
throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
}
// 开始解压
ZipFile zipFile = null;
try {
zipFile = new ZipFile(srcFile, Charset.forName(CharsetConvertUtil.getEncode(srcFile.getAbsolutePath(), true)));
Enumeration<?> entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry)entries.nextElement();
System.out.println("解压" + entry.getName());
// 如果是文件夹,就创建个文件夹
if ( entry.isDirectory() ) {
String dirPath = destDirPath + "/" + entry.getName();
File dir = new File(dirPath);
dir.mkdirs();
} else {
// 如果是文件,就先创建一个文件,然后用io流把内容copy过去
File targetFile = new File(destDirPath + "/" + entry.getName());
// 保证这个文件的父文件夹必须要存在
if ( !targetFile.getParentFile().exists() ) {
targetFile.getParentFile().mkdirs();
}
targetFile.createNewFile();
// 将压缩文件内容写入到这个文件中
InputStream is = zipFile.getInputStream(entry);
FileOutputStream fos = new FileOutputStream(targetFile);
int len;
byte[] buf = new byte[1024];
while ((len = is.read(buf)) != -1) {
fos.write(buf, 0, len);
}
// 关流顺序,先打开的后关闭
fos.close();
is.close();
}
}
long end = System.currentTimeMillis();
System.out.println("解压完成,耗时:" + (end - start) + " ms");
zipFile.close();
} catch (Exception e) {
throw new RuntimeException("unzip error from ZipUtils", e);
}
}
}
字符编码转换工具类(CharsetConvertUtil ):
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.BitSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 字符编码转换工具类
* 自动识别文件编码格式
*/
public class CharsetConvertUtil {
private static Logger logger = LoggerFactory.getLogger(CharsetConvertUtil.class);
private static int BYTE_SIZE = 8;
public static String CODE_UTF8 = "UTF-8";
public static String CODE_UTF8_BOM = "UTF-8_BOM";
public static String CODE_GBK = "GBK";
/**
* 通过文件全名称获取编码集名称
*
* @param fullFileName
* @param ignoreBom
* @return
* @throws Exception
*/
public static String getEncode(String fullFileName, boolean ignoreBom) throws Exception {
logger.debug("fullFileName ; {}", fullFileName);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName));
return getEncode(bis, ignoreBom);
}
/**
* 通过文件缓存流获取编码集名称,文件流必须为未曾
*
* @param bis
* @param ignoreBom 是否忽略utf-8 bom
* @return
* @throws Exception
*/
public static String getEncode(BufferedInputStream bis, boolean ignoreBom) throws Exception {
bis.mark(0);
String encodeType = "未识别";
byte[] head = new byte[3];
bis.read(head);
if ( head[0] == -1 && head[1] == -2 ) {
encodeType = "UTF-16";
} else if ( head[0] == -2 && head[1] == -1 ) {
encodeType = "Unicode";
} else if ( head[0] == -17 && head[1] == -69 && head[2] == -65 ) { //带BOM
if ( ignoreBom ) {
encodeType = CODE_UTF8;
} else {
encodeType = CODE_UTF8_BOM;
}
} else if ( "Unicode".equals(encodeType) ) {
encodeType = "UTF-16";
} else if ( isUTF8(bis) ) {
encodeType = CODE_UTF8;
} else {
encodeType = CODE_GBK;
}
logger.info("result encode type : " + encodeType);
return encodeType;
}
/**
* 是否是无BOM的UTF8格式,不判断常规场景,只区分无BOM UTF8和GBK
*
* @param bis
* @return
*/
private static boolean isUTF8(BufferedInputStream bis) throws Exception {
bis.reset();
//读取第一个字节
int code = bis.read();
do {
BitSet bitSet = convert2BitSet(code);
//判断是否为单字节
if ( bitSet.get(0) ) {//多字节时,再读取N个字节
if ( !checkMultiByte(bis, bitSet) ) {//未检测通过,直接返回
return false;
}
} else {
//单字节时什么都不用做,再次读取字节
}
code = bis.read();
} while (code != -1);
return true;
}
/**
* 检测多字节,判断是否为utf8,已经读取了一个字节
*
* @param bis
* @param bitSet
* @return
*/
private static boolean checkMultiByte(BufferedInputStream bis, BitSet bitSet) throws Exception {
int count = getCountOfSequential(bitSet);
byte[] bytes = new byte[count - 1];//已经读取了一个字节,不能再读取
bis.read(bytes);
for ( byte b : bytes ) {
if ( !checkUtf8Byte(b) ) {
return false;
}
}
return true;
}
/**
* 检测单字节,判断是否为utf8
*
* @param b
* @return
*/
private static boolean checkUtf8Byte(byte b) throws Exception {
BitSet bitSet = convert2BitSet(b);
return bitSet.get(0) && !bitSet.get(1);
}
/**
* 检测bitSet中从开始有多少个连续的1
*
* @param bitSet
* @return
*/
private static int getCountOfSequential(BitSet bitSet) {
int count = 0;
for ( int i = 0; i < BYTE_SIZE; i++ ) {
if ( bitSet.get(i) ) {
count++;
} else {
break;
}
}
return count;
}
/**
* 将整形转为BitSet
*
* @param code
* @return
*/
private static BitSet convert2BitSet(int code) {
BitSet bitSet = new BitSet(BYTE_SIZE);
for ( int i = 0; i < BYTE_SIZE; i++ ) {
int tmp3 = code >> (BYTE_SIZE - i - 1);
int tmp2 = 0x1 & tmp3;
if ( tmp2 == 1 ) {
bitSet.set(i);
}
}
return bitSet;
}
/**
* 将一指定编码的文件转换为另一编码的文件
*
* @param oldFullFileName
* @param oldCharsetName
* @param newFullFileName
* @param newCharsetName
*/
public static void convert(String oldFullFileName, String oldCharsetName, String newFullFileName, String newCharsetName) throws Exception {
logger.info("the old file name is : {}, The oldCharsetName is : {}", oldFullFileName, oldCharsetName);
logger.info("the new file name is : {}, The newCharsetName is : {}", newFullFileName, newCharsetName);
StringBuffer content = new StringBuffer();
BufferedReader bin = new BufferedReader(new InputStreamReader(new FileInputStream(oldFullFileName), oldCharsetName));
String line;
while ((line = bin.readLine()) != null) {
content.append(line);
content.append(System.getProperty("line.separator"));
}
newFullFileName = newFullFileName.replace("\\", "/");
File dir = new File(newFullFileName.substring(0, newFullFileName.lastIndexOf("/")));
if ( !dir.exists() ) {
dir.mkdirs();
}
Writer out = new OutputStreamWriter(new FileOutputStream(newFullFileName), newCharsetName);
out.write(content.toString());
}
}