private static final int THREAD_COUNT = 4; // 线程数
private static final int BUFFER_SIZE = 1024; // 缓冲区大小
/**
* 多线程读取文件,转换文件编码格式 4线程 1Mb缓存
*
* @param inputFile 输入文件 String input = "E:/02code/web/test.txt";
* @param sourceCharset 源文件编码 String output = "E:/02code/web/tes1t.txt";
* @param outputFile 输出文件 String sourceCharset = "GBK";
* @param targetCharset 目标文件编码 String targetCharset = "UTF-8";
*/
private static void conversionFileEncodingFormat(File inputFile, String sourceCharset, File outputFile, String targetCharset) throws InterruptedException, IOException, ExecutionException {
long fileSize = inputFile.length();
long chunkSize = fileSize / THREAD_COUNT;
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
List<Future<File>> futures = new ArrayList<>();
for (int i = 0; i < THREAD_COUNT; i++) {
long start = i * chunkSize;
long end = (i == THREAD_COUNT - 1) ? fileSize : (start + chunkSize);
futures.add(executor.submit(new FileReadTask(inputFile, start, end, i, sourceCharset, targetCharset)));
}
executor.shutdown();
executor.awaitTermination(1, TimeUnit.HOURS);
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outputFile))) {
for (Future<File> future : futures) {
File tempFile = future.get();
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(tempFile))) {
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead;
while ((bytesRead = bis.read(buffer)) != -1) {
bos.write(buffer, 0, bytesRead);
}
}
tempFile.delete(); // 删除临时文件
}
}
}
static class FileReadTask implements Callable<File> {
private final File inputFile;
private final long start;
private final long end;
private final int index;
/**
* 源文件编码
*/
private final String sourceCharset;
/**
* 目标文件编码
*/
private final String targetCharset;
public FileReadTask(File inputFile, long start, long end, int index, String sourceCharset, String targetCharset) {
this.inputFile = inputFile;
this.start = start;
this.end = end;
this.index = index;
this.sourceCharset = sourceCharset;
this.targetCharset = targetCharset;
}
@Override
public File call() throws Exception {
File tempFile = new File("temp_" + index + ".txt");
try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tempFile), targetCharset))) {
raf.seek(start);
byte[] buffer = new byte[BUFFER_SIZE];
long bytesToRead = end - start;
while (bytesToRead > 0) {
int bytesRead = raf.read(buffer, 0, (int) Math.min(buffer.length, bytesToRead));
if (bytesRead == -1) break;
String chunk = new String(buffer, 0, bytesRead, sourceCharset); // 假设原文件编码为 ISO_8859_1
writer.write(chunk);
bytesToRead -= bytesRead;
}
}
return tempFile;
}
}
java多线程转换文件格式
原创
©著作权归作者所有:来自51CTO博客作者痛而不觉的原创作品,请联系作者获取转载授权,否则将追究法律责任
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
awk转换文件格式,bash脚本检测目录属性(项目)
该文章还是不错的,如果有瑕疵敬请指点,谢谢!
bash awk 检测权限 用户 组 -
java 文本转公式 java转换文件格式
由于GWT(Google Web Toolkit)需要utf-8编码的源代码(采用gbk会乱码),因此决定将所有的java文件encoding改为utf-8.
java 文本转公式 java encoding gwt output