运用有zip4j的方法,需要导入jar或者pom文件,我这边用的是1.3.2版本
解压文件,读写文件,压缩文件都可自行拆分出来单独使用,我这边是整体使用的一个工具类
最后一段注释的代码,只是我用了另一种解压方式代替,功能依然正常;
这里是第一版的代码,如果想使用拆分后的功能,可以看我另外的一篇博客
废话不多说,源码:
package com.example.jiexi.util;
import net.lingala.zip4j.core.ZipFile;
import net.lingala.zip4j.exception.ZipException;
import javax.swing.filechooser.FileSystemView;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;public class JiexiZIPUtil {
static File desktopDir = FileSystemView.getFileSystemView() .getHomeDirectory();
//当前用户桌面路径(暂且文件放在桌面,后期跳转直接更改basic_url即可)
static String desktopPath = desktopDir.getAbsolutePath();
public static final int BUFFER_SIZE = 2 * 1024;
/***
* 修改更新内容方法
* @throws IOException
* @throws ZipException
*/
public static void modifyFile() throws IOException, ZipException {
FileSystemView fileSystemView = FileSystemView.getFileSystemView();
String absolutePath = fileSystemView.getHomeDirectory().getAbsolutePath();
//文件存放基础路径
String basic_url=absolutePath+File.separator;
//ZIP压缩包存放路径
String project_zip = basic_url+"project_zip";
//临时文件路径
String temp="";
//域名路径
String domainNamePath="";
//域名称
String domainName="";
//修改后压缩包ZIP存放的路径
String new_zip = basic_url+"new_zip";
File f = new File(project_zip);
//目录下文件的个数
File[] files = f.listFiles();
for (File fname : files) {
if (!fname.isDirectory()) {
String newPath = project_zip + File.separator + fname.getName();
boolean zipMsg = getZIPMsg(newPath);
if (zipMsg) {
temp=basic_url+File.separator+fname.getName().split("\\.")[0];
File file = new File(temp);
if (!file.isDirectory() || !file.exists()) {
file.mkdirs();
}
//1解压
decompression(newPath, temp);
//获取域名
domainNamePath=temp+File.separator+fname.getName().split("\\.")[0]+File.separator+"spiders";
System.out.println("domainnamepath=="+domainNamePath);
File fdomain=new File(domainNamePath);
File[] fdomains = fdomain.listFiles();
for(File value:fdomains){
if(!"__init__".equals(value.getName().split("\\.")[0])){
domainName=value.getName().split("\\.")[0];
break;
}
}
//2修改
String settingsPath = temp + File.separator + fname.getName().split("\\.")[0] + File.separator;
//JiexiZIPUtil.writerFile();
SettingPipelinesUpdateFile(settingsPath + "settings.py", settingsPath + "pipelines.py",settingsPath+"items.py",domainName,domainNamePath);
//3压缩到指定地方
createZip(temp,new_zip+File.separator+fname.getName());
//4删除文件
deleteFile(file);
} else {
System.out.println("目标ZIP不是我要的文件格式");
continue;
}
}
}
}
/***
* 解压文件(用了zip4j这个包)
* @zipPath zip压缩包文件的路径:XX/DD/dd.zip
* @aimPath 解压后的文件存放目录
* @throws ZipException
*/
public static void decompression(String zipPath, String aimPath) throws ZipException {
ZipFile zfile = new ZipFile(zipPath);
//防止中文文件名称乱码
zfile.setFileNameCharset("UTF-8");
if (!zfile.isValidZipFile()) {
throw new ZipException("压缩文件不存在,请检查路径");
}
File file = new File(aimPath);
//创建文件夹
if (file.isDirectory() && !file.exists()) {
//创建文件夹,mkdirs()不依赖父目录,而mkdir()依赖父目录
file.mkdirs();
}
//解压到aimPath路径中
zfile.extractAll(aimPath);
}
/***
* 删除zip
*/
public void deleteZIP(String zipLocation){
//压缩文件存放的位置
File fi=new File(zipLocation);
if (fi.isDirectory()) {
File[] files = fi.listFiles();
for (File f : files) {
// zip文件 判断 是否存在
if (f.getName().endsWith(".zip")) {
if(f.delete()) {
System.out.println("zip文件成功被删除");
}else{
System.out.println("zip文件删除失败");
}
}
}
}
}
/***
* 读取文件内容,并更改部分值
* @param path
* @param start
* @param end
* @param content
* @throws IOException
*/
public static void readUpdateFile(String path,int start,int end,String content) throws IOException {
Path path1 = Paths.get(path);
byte[] bytes = Files.readAllBytes(path1);
String s=new String(bytes,"UTF-8");
StringBuilder sb=new StringBuilder(s);
sb.replace(start,end,content);
//覆盖重写文件
writerFile(path,sb.toString(),false);
}
/***
* 更新修改setting、item、pipelines文件的内容
* @param settingsPath
* @param pipelinesPath
* @param itemsPath
* @throws IOException
*/
public static void SettingPipelinesUpdateFile(String settingsPath,String pipelinesPath,String itemsPath,String domainName,String domainNamePath) throws IOException {
String domainPath=domainNamePath+File.separator+domainName+".py";
System.out.println(settingsPath);
System.out.println(pipelinesPath);
System.out.println(itemsPath);
System.out.println(domainPath);
//s为settings更新内容
String s=new String(Files.readAllBytes(Paths.get(settingsPath)),"UTF-8");
String s1 = s.replaceAll("ROBOTSTXT_OBEY = True", "ROBOTSTXT_OBEY = False");
//stringbulider对stringbuffer速度快很多,但如果要求线程安全必须使用stringbuffer
StringBuilder sb=new StringBuilder(s1);
//i为item更新内容
String i=new String(Files.readAllBytes(Paths.get(itemsPath)),"UTF-8");
String i1=i.replaceAll("pass","");
StringBuilder isb=new StringBuilder(i1);
//p为pipelines更新内容
String p=new String(Files.readAllBytes(Paths.get(pipelinesPath)),"UTF-8");
String pipelines=p.replace("# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html","\n" +
"from elasticsearch import Elasticsearch\n" +
"import elasticsearch.helpers\n" +
"import redis\n" +
"from kafka import KafkaProducer\n" +
"import json\n" +
"from redis import Redis\n" +
"from .settings import *");
StringBuilder psb=new StringBuilder(pipelines);
//spiderContent为爬虫程序更新内容
String spiderContent=new String(Files.readAllBytes(Paths.get(domainPath)),"UTF-8");
String spiders=spiderContent.replace("from __future__ import absolute_import","from ..settings import ELASTICSEARCH_INDEX, ELASTICSEARCH_TYPE\n" +
"from __future__ import absolute_import");
StringBuilder spiderSb=new StringBuilder(spiders);
//3
File file = new File(settingsPath);
String parent = file.getParent();
File file1=new File(parent);
String fileName=file1.getName();
String upperCase =fileName.substring(0,1).toUpperCase()+fileName.substring(1);
System.out.println("uppercase="+upperCase);
String str="\n\t"+"'"+fileName+".pipelines."+upperCase+"Pipeline': 300,";
System.out.println("str==="+str);
String str1="\n\t"+"'"+fileName+".pipelines.ElasticsearchPipeline': 400,";
String str2="\n\t"+"'"+fileName+".pipelines.RedisPipeline': 500,";
String str3="ITEM_PIPELINES = {";
String str4="\n}";
//String settingContent=str3+str+"\n"+"\t"+str1+"\n"+"\t"+str2+"\n"+str4;
String settingContent=str3+str+str1+str2+str4;
String settingConfigureContent="# elasticsearch 链接配置\n" +
"ELASTICSEARCH_PORT = 9200\n" +
"ELASTICSEARCH_HOST = '192.168.1.53'\n" +
"ELASTICSEARCH_INDEX = 'sentiment1'\n" +
"# redis 链接配置\n" +
"REDIS_HOST = \"192.168.1.51\"\n" +
"REDIS_PORT = 6379\n" +
"# mongodb 链接配置\n" +
"MONGODB_URL = 'mongodb://192.168.1.51:27017/'\n" +
"ELASTICSEARCH_INDEX = 'test'\n"+
"ELASTICSEARCH_Type = '"+domainName+"'";
String pipelinesContent="class KafkaPipeline(object):\n" +
" def open_spider(self, spider):\n" +
" self.producer = KafkaProducer(bootstrap_servers=['sentiment01:9092', 'sentiment03:9092'], value_serializer=lambda m: json.dumps(m).encode('ascii'))\n" +
"\n" +
" def process_item(self, item, spider):\n" +
" item['index'] = ELASTICSEARCH_INDEX\n" +
" self.producer.send('sentiment', dict(item))\n" +
" return item\n" +
"class MongoPipeline(object):\n" +
" def open_spider(self, spider):\n" +
" pass\n" +
"\n" +
" def process_item(self, item, spider):\n" +
" return item\n" +
"\n" +
" def close_spider(self, spider):\n" +
" pass\n" +
"\n" +
"\n" +
"class ElasticsearchPipeline(object):\n" +
" def open_spider(self, spider):\n" +
" self.es = Elasticsearch(([{\"host\": ELASTICSEARCH_HOST, \"port\": str(ELASTICSEARCH_PORT)}]))\n" +
"\n" +
" def process_item(self, item, spider):\n" +
" actions = [\n" +
" {\n" +
" '_op_type': 'index',\n" +
" '_index': ELASTICSEARCH_INDEX,\n" +
" '_type': ELASTICSEARCH_TYPE,\n" +
" '_source': dict(item)\n" +
" }\n" +
" ]\n" +
" elasticsearch.helpers.bulk(self.es, actions) # 添加操作'''\n" +
" return item\n" +
"\n" +
" def close_spider(self, spider):\n" +
" pass\n" +
"\n" +
"\n" +
"class RedisPipeline(object):\n" +
" def open_spider(self, spider):\n" +
" spider.duplicate = Duplicate(spider.name)\n" +
" spider.duplicate.find_all_url(index=ELASTICSEARCH_INDEX, doc_type=ELASTICSEARCH_TYPE, source='url')\n" +
"\n" +
" def process_item(self, item, spider):\n" +
" return item\n" +
"\n" +
" def close_spider(self, spider):\n" +
" print('爬虫关闭')\n" +
" r = redis.Redis(host=REDIS_HOST, port=str(REDIS_PORT), db=0)\n" +
" r.delete(spider.name)\n" +
"\n" +
"\n" + String itemContent="\n\ttitle = scrapy.Field()\n" +
" content = scrapy.Field()\n" +
" publishtime = scrapy.Field()\n" +
" author = scrapy.Field()\n" +
" fromwhere = scrapy.Field()\n" +
" url = scrapy.Field()";
//False添加重写settings文件
writerFile(settingsPath,sb.toString(),false);
//False添加重写items文件
writerFile(itemsPath,isb.toString(),false);
//False添加重写pipelines文件
writerFile(pipelinesPath,psb.toString(),false);
//False添加重写spider爬虫文件domainName,domainNamePath
writerFile(domainPath,spiderSb.toString(),false);
//追加settings文件
writerFile(settingsPath,settingContent,true);
writerFile(settingsPath,settingConfigureContent,true);
//追加pipelines文件
writerFile(pipelinesPath,pipelinesContent,true);
//追加item内容
writerFile(itemsPath,itemContent,true);
}
/***
* 在文件的末尾追加内容content
* @param path 文件的路径
* @param content 要追加的内容
* @throws IOException
*/
public static void writerFile(String path, String content,boolean flag) throws IOException {
try {
// 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件(不是覆盖)
FileWriter writer = new FileWriter(path, flag);
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/***
* 解析zip压缩包是否符合我们需要的目录结构
* @param zipPath
* @return
* @throws IOException
*/
public static boolean getZIPMsg(String zipPath) throws IOException {
boolean flag=false;
//获取文件输入流
FileInputStream input = new FileInputStream(zipPath);
//获取ZIP输入流(一定要指定字符集Charset.forName("GBK")否则会报java.lang.IllegalArgumentException: MALFORMED)
ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(input), Charset.forName("GBK"));
//定义ZipEntry置为null,避免由于重复调用zipInputStream.getNextEntry造成的不必要的问题
ZipEntry ze = null;
//循环遍历
while ((ze = zipInputStream.getNextEntry()) != null) {
String name = ze.getName();
System.out.println("name1="+name);
if(name.split("/").length==2) {
name = name.split("/")[1];
System.out.println("name===="+name);
if("pipelines.py".equals(name)){
flag=true;
break;
}
}
}
//一定记得关闭流
zipInputStream.closeEntry();
input.close();
return flag;
} /***
* 删除临时文件夹
* @param file
*/
public static void deleteFile(File file) {
//判断文件是否存在
if (file.exists()) {
//判断是否是文件
if (file.isFile()) {
//删除文件
file.delete();
//否则如果它是一个目录
} else if (file.isDirectory()) {
//声明目录下所有的文件 files[];
File[] files = file.listFiles();
//遍历目录下所有的文件
for (int i = 0; i < files.length; i++) {
//把每个文件用这个方法进行迭代
deleteFile(files[i]);
}
//删除文件夹
file.delete();
System.out.println("删除");
}
} else {
System.out.println("所删除的文件不存在");
}
} /**
* * 压缩创建ZIP文件
* * @param sourcePath 文件或文件夹路径
* * @param zipPath 生成的zip文件存在路径(包括文件名)
*/
public static void createZip(String sourcePath, String zipPath) {
FileOutputStream fos = null;
ZipOutputStream zos = null;
try {
fos = new FileOutputStream(zipPath);
zos = new ZipOutputStream(fos);
Charset.forName("GBK");
writeZip(new File(sourcePath), "", zos);
} catch (FileNotFoundException e) {
} finally {
try {
if (zos != null) {
zos.close();
}
} catch (IOException e) {
}
}
}
private static void writeZip(File file, String parentPath, ZipOutputStream zos) {
if (file.exists()) {
//处理文件夹
if (file.isDirectory()) {
parentPath += file.getName() + File.separator;
File[] files = file.listFiles();
if (files.length != 0) {
for (File f : files) {
writeZip(f, parentPath, zos);
}
} else { //空目录则创建当前目录
try {
zos.putNextEntry(new ZipEntry(parentPath));
} catch (IOException e) {
e.printStackTrace();
}
}
} else {
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
ZipEntry ze = new ZipEntry(parentPath + file.getName());
zos.putNextEntry(ze);
byte[] content = new byte[1024];
int len;
while ((len = fis.read(content)) != -1) {
zos.write(content, 0, len);
zos.flush();
} } catch (FileNotFoundException e) {
} catch (IOException e) {
} finally {
try {
if (fis != null) {
fis.close();
}
} catch (IOException e) { }
}
}
}
}
/**
* @param sourceFile 源文件
* @param zos zip输出流
* @param name 压缩后的名称
* @param KeepDirStructure 是否保留原来的目录结构,true:保留目录结构;
* false:所有文件跑到压缩包根目录下(注意:不保留目录结构可能会出现同名文件,会压缩失败)
* @throws Exception
*/
// public static void compress(File sourceFile, ZipOutputStream zos, String name,
// boolean KeepDirStructure) throws Exception{
// byte[] buf = new byte[BUFFER_SIZE];
// if(sourceFile.isFile()){
// // 向zip输出流中添加一个zip实体,构造器中name为zip实体的文件的名字
// zos.putNextEntry(new ZipEntry(name));
// // copy文件到zip输出流中
// int len;
// FileInputStream in = new FileInputStream(sourceFile);
// while ((len = in.read(buf)) != -1){
// zos.write(buf, 0, len);
// }
// zos.closeEntry();
// in.close();
// } else {
// File[] listFiles = sourceFile.listFiles();
// if(listFiles == null || listFiles.length == 0){
// // 需要保留原来的文件结构时,需要对空文件夹进行处理
// if(KeepDirStructure){
// // 空文件夹的处理
// zos.putNextEntry(new ZipEntry(name + "/"));
// // 没有文件,不需要文件的copy
// zos.closeEntry();
// }
// }else {
// for (File file : listFiles) {
// // 判断是否需要保留原来的文件结构
// if (KeepDirStructure) {
// // 注意:file.getName()前面需要带上父文件夹的名字加一斜杠,
// // 不然最后压缩包中就不能保留原来的文件结构,即:所有文件都跑到压缩包根目录下了
// compress(file, zos, name + "/" + file.getName(),KeepDirStructure);
// } else {
// compress(file, zos, file.getName(),KeepDirStructure);
// }
// }
// }
// }
// }
}