目录

问题现象:

问题分析:

1.编码问题

2.IO流

解决方法:

zip压缩包解压工具(ZipFIleUtil):

字符编码转换工具类(CharsetConvertUtil ):


问题现象:

今天在项目中需要使用到了zip解压技术,用于解压压缩包,从而获取其中的文件进行操作,但其中涉及到中文编码,和io流的问题需要注意。


问题分析:

1.编码问题

由于zip解压缩文件会产生编码问题,如中文,不解决的话就会出现乱码现象;可以通过 new ZipFile(srcFile,Charset.forName("字符编码规则"));

2.IO流

zip解压缩文件的时候,涉及到文件io流(输入输出流)的问题,因此需要注意流的关闭问题。


解决方法:

分享一下我的工具类,开箱即用:

zip压缩包解压工具(ZipFIleUtil):

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Enumeration;
import java.util.LinkedHashMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

/**
 * zip压缩包解压工具
 * 提供以下方法:
 * 1. getZipInnerFileList(File srcFile):获取zip压缩包内部文件的名称和文件类型
 * 2. unZip(File srcFile, String destDirPath):解压zip包
 *
 * @author Stephen
 * @version 2021.03.30
 */
public class ZipFIleUtil {

	/**
	 * 获取zip压缩包内部文件的名称和文件类型
	 *
	 * @param srcFile
	 * @return
	 * @throws RuntimeException
	 */
	public static LinkedHashMap<String, String> getZipInnerFileList(File srcFile) throws RuntimeException {
		// 判断源文件是否存在
		if ( !srcFile.exists() ) {
			throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
		}
		// 开始解压
		ZipFile zipFile = null;
		try {
			LinkedHashMap<String, String> unZipFileList = new LinkedHashMap<String, String>();
			zipFile = new ZipFile(srcFile, Charset.forName(CharsetConvertUtil.getEncode(srcFile.getAbsolutePath(), true)));
			System.out.println("zip压缩包 【" + srcFile.getName() + "】 中包含以下文件:");
			Enumeration<?> entries = zipFile.entries();
			String lastDirectory = null;
			while (entries.hasMoreElements()) {
				ZipEntry entry = (ZipEntry)entries.nextElement();
				String filePath = entry.getName();
				if ( filePath.contains("/") ) {
					String directory = filePath.substring(0, filePath.lastIndexOf("/"));
					if ( !directory.equals(lastDirectory) ) {
						lastDirectory = directory;
						unZipFileList.put(directory, "文件夹");
					}
				}
				String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
				String fileType = filePath.substring(filePath.lastIndexOf(".") + 1);
				System.out.println("文件类型:" + fileType);
				System.out.println("文件名:" + fileName);
				unZipFileList.put(fileName, fileType);
			}
			zipFile.close();
			if ( unZipFileList.size() > 0 ) {
				return unZipFileList;
			} else {return null;}
		} catch (Exception e) {
			throw new RuntimeException("获取zip压缩包内部文件信息失败!", e);
		}
	}

	/**
	 * 解压zip包
	 *
	 * @param srcFile
	 * @param destDirPath
	 * @throws RuntimeException
	 */
	public static void unZip(File srcFile, String destDirPath) throws RuntimeException {
		long start = System.currentTimeMillis();
		// 判断源文件是否存在
		if ( !srcFile.exists() ) {
			throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
		}

		// 开始解压
		ZipFile zipFile = null;
		try {
			zipFile = new ZipFile(srcFile, Charset.forName(CharsetConvertUtil.getEncode(srcFile.getAbsolutePath(), true)));
			Enumeration<?> entries = zipFile.entries();
			while (entries.hasMoreElements()) {
				ZipEntry entry = (ZipEntry)entries.nextElement();
				System.out.println("解压" + entry.getName());
				// 如果是文件夹,就创建个文件夹
				if ( entry.isDirectory() ) {
					String dirPath = destDirPath + "/" + entry.getName();
					File dir = new File(dirPath);
					dir.mkdirs();
				} else {
					// 如果是文件,就先创建一个文件,然后用io流把内容copy过去
					File targetFile = new File(destDirPath + "/" + entry.getName());
					// 保证这个文件的父文件夹必须要存在
					if ( !targetFile.getParentFile().exists() ) {
						targetFile.getParentFile().mkdirs();
					}
					targetFile.createNewFile();
					// 将压缩文件内容写入到这个文件中
					InputStream is = zipFile.getInputStream(entry);
					FileOutputStream fos = new FileOutputStream(targetFile);
					int len;
					byte[] buf = new byte[1024];
					while ((len = is.read(buf)) != -1) {
						fos.write(buf, 0, len);
					}
					// 关流顺序,先打开的后关闭
					fos.close();
					is.close();
				}
			}
			long end = System.currentTimeMillis();
			System.out.println("解压完成,耗时:" + (end - start) + " ms");
			zipFile.close();
		} catch (Exception e) {
			throw new RuntimeException("unzip error from ZipUtils", e);
		}
	}
}

字符编码转换工具类(CharsetConvertUtil ):

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.BitSet;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 字符编码转换工具类
 * 自动识别文件编码格式
 */
public class CharsetConvertUtil {
	private static Logger logger = LoggerFactory.getLogger(CharsetConvertUtil.class);

	private static int BYTE_SIZE = 8;
	public static String CODE_UTF8 = "UTF-8";
	public static String CODE_UTF8_BOM = "UTF-8_BOM";
	public static String CODE_GBK = "GBK";

	/**
	 * 通过文件全名称获取编码集名称
	 *
	 * @param fullFileName
	 * @param ignoreBom
	 * @return
	 * @throws Exception
	 */
	public static String getEncode(String fullFileName, boolean ignoreBom) throws Exception {
		logger.debug("fullFileName ; {}", fullFileName);
		BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullFileName));
		return getEncode(bis, ignoreBom);
	}

	/**
	 * 通过文件缓存流获取编码集名称,文件流必须为未曾
	 *
	 * @param bis
	 * @param ignoreBom 是否忽略utf-8 bom
	 * @return
	 * @throws Exception
	 */
	public static String getEncode(BufferedInputStream bis, boolean ignoreBom) throws Exception {
		bis.mark(0);

		String encodeType = "未识别";
		byte[] head = new byte[3];
		bis.read(head);
		if ( head[0] == -1 && head[1] == -2 ) {
			encodeType = "UTF-16";
		} else if ( head[0] == -2 && head[1] == -1 ) {
			encodeType = "Unicode";
		} else if ( head[0] == -17 && head[1] == -69 && head[2] == -65 ) { //带BOM
			if ( ignoreBom ) {
				encodeType = CODE_UTF8;
			} else {
				encodeType = CODE_UTF8_BOM;
			}
		} else if ( "Unicode".equals(encodeType) ) {
			encodeType = "UTF-16";
		} else if ( isUTF8(bis) ) {
			encodeType = CODE_UTF8;
		} else {
			encodeType = CODE_GBK;
		}
		logger.info("result encode type : " + encodeType);
		return encodeType;
	}

	/**
	 * 是否是无BOM的UTF8格式,不判断常规场景,只区分无BOM UTF8和GBK
	 *
	 * @param bis
	 * @return
	 */
	private static boolean isUTF8(BufferedInputStream bis) throws Exception {
		bis.reset();

		//读取第一个字节
		int code = bis.read();
		do {
			BitSet bitSet = convert2BitSet(code);
			//判断是否为单字节
			if ( bitSet.get(0) ) {//多字节时,再读取N个字节
				if ( !checkMultiByte(bis, bitSet) ) {//未检测通过,直接返回
					return false;
				}
			} else {
				//单字节时什么都不用做,再次读取字节
			}
			code = bis.read();
		} while (code != -1);
		return true;
	}

	/**
	 * 检测多字节,判断是否为utf8,已经读取了一个字节
	 *
	 * @param bis
	 * @param bitSet
	 * @return
	 */
	private static boolean checkMultiByte(BufferedInputStream bis, BitSet bitSet) throws Exception {
		int count = getCountOfSequential(bitSet);
		byte[] bytes = new byte[count - 1];//已经读取了一个字节,不能再读取
		bis.read(bytes);
		for ( byte b : bytes ) {
			if ( !checkUtf8Byte(b) ) {
				return false;
			}
		}
		return true;
	}

	/**
	 * 检测单字节,判断是否为utf8
	 *
	 * @param b
	 * @return
	 */
	private static boolean checkUtf8Byte(byte b) throws Exception {
		BitSet bitSet = convert2BitSet(b);
		return bitSet.get(0) && !bitSet.get(1);
	}

	/**
	 * 检测bitSet中从开始有多少个连续的1
	 *
	 * @param bitSet
	 * @return
	 */
	private static int getCountOfSequential(BitSet bitSet) {
		int count = 0;
		for ( int i = 0; i < BYTE_SIZE; i++ ) {
			if ( bitSet.get(i) ) {
				count++;
			} else {
				break;
			}
		}
		return count;
	}


	/**
	 * 将整形转为BitSet
	 *
	 * @param code
	 * @return
	 */
	private static BitSet convert2BitSet(int code) {
		BitSet bitSet = new BitSet(BYTE_SIZE);

		for ( int i = 0; i < BYTE_SIZE; i++ ) {
			int tmp3 = code >> (BYTE_SIZE - i - 1);
			int tmp2 = 0x1 & tmp3;
			if ( tmp2 == 1 ) {
				bitSet.set(i);
			}
		}
		return bitSet;
	}

	/**
	 * 将一指定编码的文件转换为另一编码的文件
	 *
	 * @param oldFullFileName
	 * @param oldCharsetName
	 * @param newFullFileName
	 * @param newCharsetName
	 */
	public static void convert(String oldFullFileName, String oldCharsetName, String newFullFileName, String newCharsetName) throws Exception {
		logger.info("the old file name is : {}, The oldCharsetName is : {}", oldFullFileName, oldCharsetName);
		logger.info("the new file name is : {}, The newCharsetName is : {}", newFullFileName, newCharsetName);

		StringBuffer content = new StringBuffer();

		BufferedReader bin = new BufferedReader(new InputStreamReader(new FileInputStream(oldFullFileName), oldCharsetName));
		String line;
		while ((line = bin.readLine()) != null) {
			content.append(line);
			content.append(System.getProperty("line.separator"));
		}
		newFullFileName = newFullFileName.replace("\\", "/");
		File dir = new File(newFullFileName.substring(0, newFullFileName.lastIndexOf("/")));
		if ( !dir.exists() ) {
			dir.mkdirs();
		}
		Writer out = new OutputStreamWriter(new FileOutputStream(newFullFileName), newCharsetName);
		out.write(content.toString());
	}
}