用Java实现二进制文件的分割与合并,大小文本文件的多样化分割与合并,文件排序算法:按自然语言排序
https://pan.baidu.com/s/1Lhzb6dwZOJ1Yy5Lu0ELQTg&pwd=9qze
#1. 代码Main.java
import android.support.annotation.NonNull;
import java.util.Arrays;
import java.util.List;
import bin.mt.plugin.api.translation.BaseTranslationEngine;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import android.content.SharedPreferences;
import java.io.*;
public class Main extends BaseTranslationEngine {
public Main() {
super(new ConfigurationBuilder()
// 关闭「跳过已翻译词条」
.setForceNotToSkipTranslated(true).build());
}
//分割,参数说明:需分割文件的全路径(输入),切割的文件大小(设置),切割后的文件单位默认为MB(设置),分割后文件存放的文件夹全路径,待分割文件格式
public void Split(String SrcFilePath, long SingleGoalFileSize, String SingleFileUnit, String GoalFileDirectory,
int format) {
//SingleGoalFileSize 文件大小 SingleFileUnit 文件单位默认单位:MB ,分割后文件路径和目录(省略时为当前文件夹), 默认各类文件有自己的分割方式
SharedPreferences preferences = getContext().getPreferences();
String op = preferences.getString("operate", "");//分割方式
String name_flag = preferences.getString("name_flag", "-🆎标识");
String[] nf = new String[] { "", "" };//批处理文件在该字符数组之间嵌入序号为标识符
int ab = name_flag.lastIndexOf("🆎");//根据最后一个🆎来识别
if (ab == -1) {
if (name_flag.equals("")) {
nf[0] = "-";
nf[1] = "标识";
} else {
nf[0] = name_flag;
}
} else {
nf[0] = name_flag.substring(0, ab);
nf[1] = name_flag.substring(ab).replace("🆎", "");
}
if (SrcFilePath.equals("") || SrcFilePath == null) {
System.out.println("分割失败!");
return;
}
File SrcFile = new File(SrcFilePath); //新建文件
long SrcFileSize = SrcFile.length();//源文件的大小
long SingleFileSize = 1;//分割后的单个文件大小(以字节为单位)
int unit = 1;//分割后的单个文件单位
//SingleFileUnit=SingleFileUnit.toUpperCase();
switch (SingleFileUnit) {
case "B":
SingleFileSize = SingleGoalFileSize;
unit = (int) SingleGoalFileSize;
break;
case "KB":
SingleFileSize = 1024 * SingleGoalFileSize;
if (SingleGoalFileSize > 2048)
unit = 1024 * 1024;
else
unit = (int) SingleFileSize;
break;
case "GB":
unit = 1024 * 1024;
SingleFileSize = unit * 1024 * SingleGoalFileSize;
unit *= 10;
break;
default:
unit = 1024 * 1024;
SingleFileSize = unit * SingleGoalFileSize;//默认单位MB
}
int GoalFileNum = (int) (SrcFileSize / SingleFileSize); //获取分割后文件的数量
GoalFileNum = SrcFileSize % SingleFileSize == 0 ? GoalFileNum : GoalFileNum + 1; //计算总的文件大小
int x1 = SrcFilePath.lastIndexOf("/"); //获取文件路径的分隔符位置
int x2 = SrcFilePath.lastIndexOf("."); //获取文件的后缀位置
String SrcFileName, fc = "";
if (x2 < x1)
SrcFileName = SrcFilePath.substring(x1 + 1);
else {
fc = SrcFilePath.substring(x2);
SrcFileName = SrcFilePath.substring(x1 + 1, x2); //截取文件名,如果要分割的文件没有后缀,则GoalFileDirectory参数不能为空,因为在文件位置无法新建同名文件夹
}
if (GoalFileDirectory == null || GoalFileDirectory.equals("")) {
GoalFileDirectory = SrcFilePath.substring(0, x1);
}
File goalDirectory = new File(GoalFileDirectory + File.separator + SrcFileName + File.separator);
if (!goalDirectory.exists()) {
goalDirectory.mkdirs();
}
GoalFileDirectory = GoalFileDirectory + File.separator + SrcFileName + File.separator + SrcFileName + nf[0];//File.separator 的作用相当于 ' \ '所以用 File.separator 保证了在任何系统下不会出错。
try {
FileInputStream fis = new FileInputStream(SrcFilePath); //新建输入流对象
//String bm = fc.matches("\\.apk|\\.zip|\\.rar|\\.7z|\\.tar|\\.gz|\\.exe|\\.dll|\\.so|\\.dex|\\.mp3|\\.mp4|\\.jpg") ? "ISO-8859-1" : "UTF-8";
//注意根据文件编码格式指定InputStreamReader的编码
InputStreamReader reader = null;//new InputStreamReader(fis,"UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
BufferedReader bufferedReader = null;//new BufferedReader(reader);
//FileOutputStream fos = null;//new FileOutputStream(CompleteSingleGoalFilePath);
//BufferedOutputStream bos = null;//new BufferedOutputStream(fos); //包装
String tem = null, line = null, CompleteSingleGoalFilePath;
StringBuilder textbatch = new StringBuilder();//.delete(0,sb.length());
byte bytes[];
if (op.equals("file") || (op.equals("fit") && format == 0)) {
bytes = new byte[unit];//每次读取文件的大小
int len = -1;
BufferedInputStream bis = new BufferedInputStream(fis);
FileOutputStream fos = null;
BufferedOutputStream bos = null;
for (int i = 0; i < GoalFileNum; i++) {
//分割后的单个文件完整路径名
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(i) + nf[1] + fc;
fos = new FileOutputStream(CompleteSingleGoalFilePath);
bos = new BufferedOutputStream(fos); //包装
long count = 0;
while ((len = bis.read(bytes)) != -1) {
bos.write(bytes, 0, len);//从源文件读取规定大小的字节数写入到单个目标文件中
count += len;
if (count >= SingleFileSize)
break;
}
bos.flush();
bos.close();
fos.close();
}
bis.close();
fis.close();
} else if (op.equals("json") || (op.equals("fit") && format == 3)) {
reader = new InputStreamReader(fis, "UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
bufferedReader = new BufferedReader(reader);
FileWriter fw = null;
BufferedWriter bufw = null;
while ((line = bufferedReader.readLine()) != null) {
textbatch.append(line).append(System.getProperty("line.separator"));
}
JSONArray jsonArray = new JSONArray(textbatch.toString());
textbatch.delete(0, textbatch.length());
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(i) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
bufw.write(jsonObject.toString());
bufw.flush(); //清空流
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
bufferedReader.close();
reader.close();
} else {
reader = new InputStreamReader(fis, "UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
bufferedReader = new BufferedReader(reader);
String remove = preferences.getString("Remover_regular", "");//正则查找🆎正则替换
String spliter = preferences.getString("spliter", "");//分割🈹0指令 换行符:\r|\n
//String[] rp = remove.indexOf("🆎")>0 ? remove.split("🆎", 2) : remove.split("$", 5);//split不可靠,空字符串不被解析,,,,即使大参数也不行
String[] rp = new String[] { "", "" };
ab = remove.lastIndexOf("🆎");//根据最后一个🆎来识别
if (ab != -1) {
if (ab == 0)
rp[0] = remove.replace("🆎", "");
else {
rp[0] = remove.substring(0, ab);
rp[1] = remove.substring(ab).replace("🆎", "");
//replaceAll()的参数是正则表达式,replaceAll("[\\t\\n\\r]", "");替换换行符,要替换成换行符String.replace("@","\t\n"),,前后参数不同!!!
rp[1] = rp[1].replaceAll("\\\\n", "\n").replaceAll("\\\\t", "\t");//将储存的\\n变成换行符
}
} else
rp[0] = remove;
//boolean flagsp = false;
//if(!spliter.equals("")) flagsp = true;
Pattern prp = Pattern.compile(spliter, Pattern.MULTILINE);//多行匹配^ $
Matcher mrp;
//如果文本匹配分割框输入了任意字符串,将开启匹配分割模式
if (spliter.equals("")) {
//文本文件按自定义字符串长度分割粗略分割,文本文件大小并不精准)
for (int i = 0; i < GoalFileNum; i++) {
//分割后的单个文件完整路径名
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(i) + nf[1] + fc;
FileWriter fw = null;//new FileWriter(CompleteSingleGoalFilePath,false); //创建FileWriter类对象, true从尾部不覆盖
BufferedWriter bufw = null;// = new BufferedWriter(fw); //创建BufferedWriter对象
long count = 0;
do {
tem = line;
line = bufferedReader.readLine();
if (tem == null) {
continue;
} else {
if (line == null) {
textbatch.append(tem);
count = count + tem.length();//tem.getBytes(bm).length/2;
} else {
textbatch.append(tem).append(System.getProperty("line.separator"));
count = count + tem.length() + 1;//UTF-8 3字节,1字符,按字符计数
}
//count = count + tem.length();
}
if (textbatch.length() > 0) {
if (fw == null) {
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
}
/*mrp = prp.matcher(textbatch.toString());//试图匹配多行\n,并替换
mrp.reset();
StringBuffer tsb = new StringBuffer();
while (mrp.find()) {
// 将匹配之前的字符串复制到tsb,再将匹配结果替换为:rp[1],并追加到tsb
mrp.appendReplacement(tsb, "🈹");//rp[1]);
}
mrp.appendTail(tsb);// tsb.toString()*/
if (count >= SingleFileSize) {
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));//(?m)多行匹配^ $
textbatch.delete(0, textbatch.length());
break;
}
}
} while (line != null);
if (textbatch.length() > 0) {
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
textbatch.delete(0, textbatch.length());
}
//bufw.flush();
if (bufw != null) {
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
}
}
//查找字符串正则表达式,在匹配的字符串开头或结尾位置处分割
else {
FileWriter fw = null;//new FileWriter(CompleteSingleGoalFilePath,false); //创建FileWriter类对象, true从尾部不覆盖
BufferedWriter bufw = null;// = new BufferedWriter(fw); //创建BufferedWriter对象
int fmatch = Integer.valueOf(preferences.getString("match_split", "0"));
ab = 0;
if (fmatch < 2) {
do {
tem = line;
line = bufferedReader.readLine();
if (tem != null) {
if (line == null) {
textbatch.append(tem);
} else {
textbatch.append(tem).append(System.getProperty("line.separator"));
}
}
} while (line != null);
/*while((line = bufferedReader.readLine()) != null){
textbatch.append(line).append(System.getProperty("line.separator"));
}*/
mrp = prp.matcher(textbatch);//textbatch.toString());
//对全文匹配字符串正则表达式,在匹配的字符串开头之前一个位置处分割
if (fmatch == 0) {
StringBuilder tmatch = new StringBuilder();//.delete(0,sb.length());
while (mrp.find()) {
if (mrp.start() == 0) {
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1]
+ fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
if (ab == 1) {//判断是否文本开头匹配
bufw.write(mrp.group().replaceAll("(?m)" + rp[0], rp[1]));
textbatch.delete(0, mrp.end());
mrp.reset(textbatch);//重设匹配
continue;
}
}
bufw.write(tmatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
bufw = null;
fw = null;
tmatch = null;
tmatch = new StringBuilder();
tmatch.append(mrp.group());
textbatch.delete(0, mrp.end());
mrp.reset(textbatch);//重设匹配
} else {
tmatch.append(textbatch.substring(0, mrp.start()));//start()返回当前匹配到的字符串在原目标字符串中的起始索引位置,start(i)返回匹配()中group(i)的索引
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1]
+ fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
}
bufw.write(tmatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
bufw = null;
fw = null;
//textbatch.setLength(0);//清空 内存回收性能不好
tmatch = null;
tmatch = new StringBuilder();
tmatch.append(mrp.group());
textbatch.delete(0, mrp.end());
mrp.reset(textbatch);//重设匹配
}
}
tmatch.append(textbatch.toString());
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
}
bufw.write(tmatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
tmatch.setLength(0);//清空
textbatch.setLength(0);//清空
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
//对全文匹配字符串正则表达式,在匹配的字符串结尾之后一个位置处分割
else {
while (mrp.find()) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
bufw.write(textbatch.substring(0, mrp.end()).replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
textbatch.delete(0, mrp.end());//end()返回当前匹配到的字符串在原目标字符串中的末尾索引位置+1,end(i)返回匹配()中group(i)的索引
mrp.reset(textbatch);
ab++;
}
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
textbatch.setLength(0);//清空
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
}
//对单行匹配字符串正则表达式,在匹配的字符串开头之前一个位置处分割
else if (fmatch == 2) {
while ((line = bufferedReader.readLine()) != null) {
mrp = prp.matcher(line);
//mrp.reset();
while (mrp.find()) {
if (mrp.start() == 0) {
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1]
+ fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
if (ab == 1) {//判断是否文本开头匹配
bufw.write(mrp.group().replaceAll("(?m)" + rp[0], rp[1]));
if (line.length() > mrp.end())
line = line.substring(mrp.end());//判断是否句末匹配
else
line = "";
mrp.reset(line);//重设匹配
continue;
}
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
bufw = null;
fw = null;
textbatch = null;
textbatch = new StringBuilder();
textbatch.append(mrp.group());
if (line.length() > mrp.end())
line = line.substring(mrp.end());//判断是否句末匹配
else
line = "";
mrp.reset(line);//重设匹配
} else {
textbatch.append(line.substring(0, mrp.start()));//start()返回当前匹配到的字符串在原目标字符串中的起始索引位置,start(i)返回匹配()中group(i)的索引
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1]
+ fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
bufw = null;
fw = null;
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
textbatch = new StringBuilder();
textbatch.append(mrp.group());
if (line.length() > mrp.end())
line = line.substring(mrp.end());//判断是否句末匹配
else
line = "";
mrp.reset(line);//重设匹配
}
}
textbatch.append(line).append(System.getProperty("line.separator"));
if (textbatch.length() > 131072) {
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
textbatch = new StringBuilder();
}
}
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
//对单行匹配字符串正则表达式,在匹配的字符串结尾之后一个位置处分割
} else {
while ((line = bufferedReader.readLine()) != null) {
mrp = prp.matcher(line);
//mrp.reset();
while (mrp.find()) {
textbatch.append(line.substring(0, mrp.end()));//end()返回当前匹配到的字符串在原目标字符串中的末尾索引位置+1,end(i)返回匹配()中group(i)的索引
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
bufw = null;
fw = null;
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
textbatch = new StringBuilder();
if (line.length() > mrp.end())
line = line.substring(mrp.end());//判断是否句末匹配
else
line = "";
mrp.reset(line);//重设匹配
}
textbatch.append(line).append(System.getProperty("line.separator"));
if (textbatch.length() > 131072) {
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
ab++;
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
textbatch = new StringBuilder();
}
}
if (fw == null || bufw == null) {
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(ab) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
}
bufw.write(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
//textbatch.setLength(0);//清空 内存回收性能不好
textbatch = null;
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
}
bufferedReader.close();
reader.close();
}
//return;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
//Toast.makeText(MainActivity.this,"分割成功!!!",Toast.LENGTH_SHORT).show();
}
//参数说明:分割后文件存放的文件夹全路径(输入),合并后文件存放的文件夹全路径(设置)
public void Merge(String FilePath, String GoalFileDirectory) {
SharedPreferences preferences = getContext().getPreferences();
String sort = preferences.getString("filesort", "0");
String GoalFileName;//合并文件名
List<File> fileList = sortFile(getFiles(FilePath, ".*"));
if (sort.equals("3") || sort.equals("7")) {
String name_flag = preferences.getString("name_flag", "-🆎标识");
String[] nf = new String[] { "", "" };
int ab = name_flag.lastIndexOf("🆎");//根据最后一个🆎来识别,批处理文件在该字符数组之间嵌入序号为标识符排序
if (ab == -1) {
if (name_flag.equals("")) {
nf[0] = "-";
nf[1] = "标识";
} else {
nf[0] = name_flag;
}
} else {
nf[0] = name_flag.substring(0, ab);
nf[1] = name_flag.substring(ab).replace("🆎", "");
}
GoalFileName = fileList.get(0).getName().replaceAll(nf[0] + "\\d*" + nf[1] + "\\.", ".");
} else
GoalFileName = "0_op." + fileList.get(0).getName();//.replaceAll(".*\\.", "");
String[] SingleFilePath = new String[fileList.size()];
for (int i = 0; i < fileList.size(); i++) {
SingleFilePath[i] = fileList.get(i).getPath();
}
if (GoalFileDirectory == null || GoalFileDirectory.equals("")) {
GoalFileDirectory = FilePath;
}
File goalDirectory = new File(GoalFileDirectory);
if (!goalDirectory.exists()) {
goalDirectory.mkdirs();
}
//合并后的完整路径名
String CompleteGoalFilePath = GoalFileDirectory + File.separator + GoalFileName;
byte bytes[] = new byte[1024 * 1024];//每次读取文件的大小
int len = -1;
FileOutputStream fos = null;//将数据合并到目标文件中
BufferedOutputStream bos = null;//使用缓冲字节流写入数据
try {
fos = new FileOutputStream(CompleteGoalFilePath);
bos = new BufferedOutputStream(fos);
FileInputStream fis = null;
BufferedInputStream bis = null;
for (int i = 0; i < SingleFilePath.length; i++) {
if (SingleFilePath[i] == null || "".equals(SingleFilePath[i])) {
System.exit(0);
}
fis = new FileInputStream(SingleFilePath[i]);//从分割后的文件读取数据
bis = new BufferedInputStream(fis);//使用缓冲字节流读取数据
while ((len = bis.read(bytes)) != -1)
bos.write(bytes, 0, len);
bis.close();
fis.close();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (bos != null)
bos.close();
if (fos != null)
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
//Toast.makeText(MainActivity.this,"合并成功!!!",Toast.LENGTH_SHORT).show();
}
public boolean unionFile(String outfile, String dictionary, String filecata) throws IOException {
boolean result = false;
List<File> fileList = sortFile(getFiles(dictionary, filecata));
File fout = new File(outfile);
FileWriter fw = new FileWriter(fout);
FileInputStream in = null;
InputStreamReader reader = null;
BufferedReader br = null;
String tem = null, line = null;
StringBuilder textbatch = new StringBuilder();//.delete(0,sb.length());
SharedPreferences preferences = getContext().getPreferences();
String remove = preferences.getString("Remover_regular", "");
String[] rp = new String[] { "", "" };
int ab = remove.lastIndexOf("🆎");//根据最后一个🆎来识别
if (ab != -1) {
if (ab == 0)
rp[0] = remove.replace("🆎", "");
else {
rp[0] = remove.substring(0, ab);
rp[1] = remove.substring(ab).replace("🆎", "");
//replaceAll()的参数是正则表达式,replaceAll("[\\t\\n\\r]", "");替换换行符,要替换成换行符String.replace("@","\t\n"),,前后参数不同!!!
rp[1] = rp[1].replaceAll("\\\\n", "\n").replaceAll("\\\\t", "\t");//将储存的\\n变成换行符
}
} else
rp[0] = remove;
for (int i = 0; i < fileList.size(); i++) {
//传入文件路径fileName,底层实现 new FileInputStream(new File(fileName));相同
in = new FileInputStream(fileList.get(i));
//注意根据文件编码格式指定InputStreamReader的编码
reader = new InputStreamReader(in, "UTF-8");//ASCII GBK gbk 默认UTF-8
br = new BufferedReader(reader);
/*while((line = br.readLine()) != null){
line=line.replaceAll(remove,"");
fw.append(line).append(System.getProperty("line.separator"));
}*/
do {
tem = line;
line = br.readLine();
if (tem == null) {
continue;
} else {
//tem=tem.replaceAll(rp[0], rp[1]);
if (line == null && i == fileList.size() - 1)
textbatch.append(tem);
else if (line == null && i < fileList.size() - 1 && fileList.get(i + 1).length() == 0)
textbatch.append(tem);
else
textbatch.append(tem).append(System.getProperty("line.separator"));
}
} while (line != null);
fw.append(textbatch.toString().replaceAll("(?m)" + rp[0], rp[1]));
textbatch.setLength(0);//清空
br.close();
reader.close();
in.close();
}
fw.close();
return true;
}
public static List<File> getFiles(String path, String fc) {
File root = new File(path);
List<File> files = new ArrayList<File>();
if (!root.isDirectory()) {
if (root.getName().matches(".*" + fc + "$") && !root.getName().matches(".*0_op." + fc + "$")) {
files.add(root);
}
} else {
File[] subFiles = root.listFiles();
for (File f : subFiles) {
files.addAll(getFiles(f.getAbsolutePath(), fc));
}
}
return files;
}
public List<File> sortFile(List<File> fileList) {
SharedPreferences preferences = getContext().getPreferences();
String sort = preferences.getString("filesort", "0");
String name_flag = preferences.getString("name_flag", "-🆎标识");
final String[] nf = new String[] { "", "" };//从内部类中访问本地变量nf; 需要被声明为最终类型
int ab = name_flag.lastIndexOf("🆎");//根据最后一个🆎来识别,批处理文件在该字符数组之间嵌入序号为标识符排序
if (ab == -1) {
if (name_flag.equals("")) {
nf[0] = "-";
nf[1] = "标识";
} else {
nf[0] = name_flag;
}
} else {
nf[0] = name_flag.substring(0, ab);
nf[1] = name_flag.substring(ab).replace("🆎", "");
}
List<File> files = new ArrayList<File>();
boolean flag = false;
switch (sort) {
case "1"://按时间升序排序
Collections.sort(fileList, new Comparator<File>() {
@Override
public int compare(File f1, File f2) {
long diff = f1.lastModified() - f2.lastModified();
if (diff > 0)
return 1;
else if (diff == 0)
return 0;
else
return -1;//如果 if 中修改为 返回-1 同时此处修改为返回 1 排序就会是递减
}
public boolean equals(Object obj) {
return true;
}
});
break;
case "2"://按大小升序排序
Collections.sort(fileList, new Comparator<File>() {
@Override
public int compare(File f1, File f2) {
long diff = f1.length() - f2.length();
if (diff > 0)
return 1;
else if (diff == 0)
return 0;
else
return -1;//如果 if 中修改为 返回-1 同时此处修改为返回 1 排序就会是递减
}
public boolean equals(Object obj) {
return true;
}
});
break;
case "3"://按 -[0-9]*标识 升序排序
//List<File> files = new ArrayList<File>();
for (File i : fileList) {
if (i.getName().matches(".*" + nf[0] + "[0-9]*" + nf[1] + "\\..*")) {
files.add(i);//files.get(i);
}
}
Collections.sort(files, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
if (o1.isDirectory() && o2.isFile())
return -1;
if (o1.isFile() && o2.isDirectory())
return 1;
String o1s = o1.getName();
String o2s = o2.getName();
int o1index = Integer
.valueOf(0 + o1s.substring(o1s.lastIndexOf(nf[0]) + 1, o1s.lastIndexOf(nf[1])));//解决空串问题
int o2index = Integer
.valueOf(0 + o2s.substring(o2s.lastIndexOf(nf[0]) + 1, o2s.lastIndexOf(nf[1])));
return o1index > o2index ? 1 : -1;
}
});
fileList = files;
break;
case "5"://按时间降序排序
Collections.sort(fileList, new Comparator<File>() {
@Override
public int compare(File f1, File f2) {
long diff = f1.lastModified() - f2.lastModified();
if (diff > 0)
return -1;
else if (diff == 0)
return 0;
else
return 1;//如果 if 中修改为 返回-1 同时此处修改为返回 1 排序就会是递减
}
public boolean equals(Object obj) {
return true;
}
});
break;
case "6"://按大小降序排序
Collections.sort(fileList, new Comparator<File>() {
@Override
public int compare(File f1, File f2) {
long diff = f1.length() - f2.length();
if (diff > 0)
return -1;
else if (diff == 0)
return 0;
else
return 1;//如果 if 中修改为 返回-1 同时此处修改为返回 1 排序就会是递减
}
public boolean equals(Object obj) {
return true;
}
});
break;
case "7"://按 -[0-9]标识 降序排序
for (File i : fileList) {
if (i.getName().matches(".*-[0-9]*标识.*")) {
files.add(i);//files.get(i);
}
}
Collections.sort(files, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
if (o1.isDirectory() && o2.isFile())
return -1;
if (o1.isFile() && o2.isDirectory())
return 1;
String o1s = o1.getName();
String o2s = o2.getName();
int o1index = Integer
.valueOf(0 + o1s.substring(o1s.lastIndexOf(nf[0]) + 1, o1s.lastIndexOf(nf[1])));//解决空串问题
int o2index = Integer
.valueOf(0 + o2s.substring(o2s.lastIndexOf(nf[0]) + 1, o2s.lastIndexOf(nf[1])));
return o1index < o2index ? 1 : -1;
}
});
fileList = files;
break;
case "4"://按名称升降序排序
flag = true;
default://按名称升序排序
Collections.sort(fileList, new Comparator<File>() {
class Int {
public int i;
}
public int findDigitEnd(char[] arrChar, Int at) {
int k = at.i;
char c = arrChar[k];
boolean bFirstZero = (c == '0');
while (k < arrChar.length) {
c = arrChar[k];
//first non-digit which is a high chance.
if (c > '9' || c < '0') {
break;
} else if (bFirstZero && c == '0') {
at.i++;
}
k++;
}
return k;
}
public int findDigitPosition(char[] arrChar) {
int k = 0;
for (; k < arrChar.length; k++) {
if (arrChar[k] <= '9' && arrChar[k] >= '0')
break;
}
return k;
}
@Override
public int compare(File o1, File o2) {
if (o1.isDirectory() && o2.isFile())
return -1;
if (o1.isFile() && o2.isDirectory())
return 1;
String o1n = o1.getName();
int n1 = o1n.lastIndexOf(".");
if (n1 > 0)
o1n = o1n.substring(0, n1);//去除文件扩展名(后缀名)
String o2n = o2.getName();
int n2 = o2n.lastIndexOf(".");
if (n2 > 0)
o2n = o2n.substring(0, n2);
if (o2n.indexOf(o1n) == 0)
return -1;//是否有子串
if (o1n.indexOf(o2n) == 0)
return 1;
char a[] = o1n.toCharArray();
char b[] = o2n.toCharArray();
n1 = findDigitPosition(a);
n2 = findDigitPosition(b);
if (n1 < a.length && n2 < b.length) {//是否存在数字
if (n1 == n2) {//首个数字位置是否相等
char c[] = Arrays.copyOf(a, n1);//拷贝指定长度 或 Arrays.copyOfRange(a, 0, n1);
char d[] = Arrays.copyOf(b, n2);
if (!Arrays.equals(c, d))
return o1n.compareTo(o2n);
} else
return o1n.compareTo(o2n);
} else
return o1n.compareTo(o2n);
if (a != null || b != null) {
Int aNonzeroIndex = new Int();
Int bNonzeroIndex = new Int();
int aIndex = 0, bIndex = 0, aComparedUnitTailIndex, bComparedUnitTailIndex;
while (aIndex < a.length && bIndex < b.length) {
//aIndex <
aNonzeroIndex.i = aIndex;
bNonzeroIndex.i = bIndex;
aComparedUnitTailIndex = findDigitEnd(a, aNonzeroIndex);
bComparedUnitTailIndex = findDigitEnd(b, bNonzeroIndex);
//compare by number
if (aComparedUnitTailIndex > aIndex && bComparedUnitTailIndex > bIndex) {
int aDigitIndex = aNonzeroIndex.i;
int bDigitIndex = bNonzeroIndex.i;
int aDigit = aComparedUnitTailIndex - aDigitIndex;
int bDigit = bComparedUnitTailIndex - bDigitIndex;
//compare by digit
if (aDigit != bDigit)
return aDigit - bDigit;
//the number of their digit is same.
while (aDigitIndex < aComparedUnitTailIndex) {
if (a[aDigitIndex] != b[bDigitIndex])
return a[aDigitIndex] - b[bDigitIndex];
aDigitIndex++;
bDigitIndex++;
}
//if they are equal compared by number, compare the number of '0' when start with "0"
//ps note: paNonZero and pbNonZero can be added the above loop "while", but it is changed meanwhile.
//so, the following comparsion is ok.
aDigit = aNonzeroIndex.i - aIndex;
bDigit = bNonzeroIndex.i - bIndex;
if (aDigit != bDigit)
return aDigit - bDigit;
aIndex = aComparedUnitTailIndex;
bIndex = bComparedUnitTailIndex;
} else {
if (a[aIndex] != b[bIndex])
return a[aIndex] - b[bIndex];
aIndex++;
bIndex++;
}
}
}
return a.length - b.length;
}
});
//倒序排序
if (flag) {
for (int i = fileList.size() - 1; i >= 0; i--) {
files.add(fileList.get(i));
}
fileList = files;
}
}
return fileList;
}
/**
* 翻译引擎名称
*/
@NonNull
@Override
public String name() {
SharedPreferences preferences = getContext().getPreferences();
//b 初始为false; B 为类,初始为null
Boolean temp0 = preferences.getString("match_split", "true").equals("true");//获取key为 的数据,如果没有这个key的话,返回"true"
Boolean temp1 = preferences.getString("singleFile_size", "true").equals("true");//获取key为 的数据,如果没有这个key的话,返回"true"
Boolean temp2 = preferences.getString("singleFile_unit", "true").equals("true");
Boolean temp3 = preferences.getString("operate", "true").equals("true");
Boolean temp4 = preferences.getString("filesort", "true").equals("true");
Boolean temp5 = preferences.getString("segmentation_path", "true").equals("true");
Boolean temp6 = preferences.getString("merge_path", "true").equals("true");
Boolean temp7 = preferences.getString("Remover_regular", "true").equals("true");
Boolean temp8 = preferences.getString("name_flag", "true").equals("true");
if (temp0 || temp1 || temp2 || temp3 || temp4 || temp5 || temp6 || temp7 || temp8) {
preferences.edit().putString("match_split", "0").putString("singleFile_size", "90")
.putString("singleFile_unit", "MB").putString("operate", "fit").putString("filesort", "0")
.putString("segmentation_path", "").putString("merge_path", "").putString("Remover_regular", "🆎")
.putString("name_flag", "-🆎标识").apply();
}
return "文件分割与合并";
}
/**
* 源语言代码列表
*/
@NonNull
@Override
public List<String> loadSourceLanguages() {
return Arrays.asList("file", "alltext", "txt", "json");
}
/**
* 目标语言代码列表
*/
@NonNull
@Override
public List<String> loadTargetLanguages(String sourceLanguage) {
return Arrays.asList("upper", "lower");
}
/**
* 将语言代码转为可视化名称
* ps: 可能有些人会问为什么这么麻烦,不能直接把可视化名称写在上面吗?
* 其实这么做主要是考虑到多国语言支持,将语言名称统一在这边处理,
* 这边不多做演示,具体可以看文档后面关于LocalString的介绍。
*/
@NonNull
@Override
public String getLanguageDisplayName(String language) {
switch (language) {
case "txt":
return "txt格式";
case "json":
return "json格式";
case "file":
return "二进制文件工具";
case "alltext":
return "文本文件工具";
case "upper":
return "合并";
case "lower":
return "分割";
}
return "???";
}
/**
* MT在翻译每一个词条时都会调用一次这个方法
*
* @param text 待翻译内容
* @param sourceLanguage 源语言代码
* @param targetLanguage 目标语言代码
* @return 翻译结果
*/
@NonNull
@Override
public String translate(String text, String sourceLanguage, String targetLanguage) {
SharedPreferences preferences = getContext().getPreferences();
switch (targetLanguage) {
case "upper"://合并
File ff = new File(text);
if (ff.exists() && ff.isDirectory()) {
try {
boolean pd = false;
if (!text.matches("^/.*/$"))
text += "/";
if (sourceLanguage.equals("txt")) {//txt合并
pd = unionFile(text + "0_op.txt", ff.toString(), "txt");
if (pd) {
return "txt合并处理成功";
} else {
return "txt合并处理失败";
}
} else if (sourceLanguage.equals("json")) {//json合并
pd = unionFile(text + "0_op.json", ff.toString(), "json");
if (pd) {
return "json合并处理成功";
} else {
return "json合并处理失败";
}
} else if (sourceLanguage.equals("alltext")) {//所有文本文件合并
pd = unionFile(text + "0_op.txt", ff.toString(), ".*");
if (pd) {
return "文本文件合并处理成功";
} else {
return "文本文件合并处理失败";
}
} else {//文件合并
String hbPath = preferences.getString("merge_path", "");
if (hbPath.equals(""))
hbPath = text;
Merge(text, hbPath);
return "文件合并成功,文件路径:" + hbPath;
}
} catch (Exception e) {
//getContext().showToastL(e.toString());
return "文本文件合并异常" + e.toString();
}
} else {
return "文件夹路径不存在或者不符合格式!";
}
case "lower"://文件拆分
String _size = preferences.getString("singleFile_size", "90");
long size = _size.indexOf("0x") == 0 ? Long.parseLong(_size.substring(2), 16) : Long.parseLong(_size, 10);
//int size= Integer.valueOf(_size);
size = (size > 0 ? size : 90);
String segmentation = preferences.getString("segmentation_path", "");
String fileunit = preferences.getString("singleFile_unit", "MB");
if (sourceLanguage.equals("json")) {//json分割
Split(text, size, fileunit, segmentation, 3);
} else if (sourceLanguage.equals("txt")) {//txt分割
Split(text, size, fileunit, segmentation, 2);
} else if (sourceLanguage.equals("alltext")) {//所有文本文件分割
Split(text, size, fileunit, segmentation, 1);
} else {
Split(text, size, fileunit, segmentation, 0);
}
if (segmentation.equals(""))
segmentation = text.replaceAll("\\..{1,5}$", "");
return _size + fileunit + " 文件分割完成,文件路径:" + segmentation;
default:
return "正在开发......";
}
}
}