java 亿行文件分段 java文件分段下载

转载

mob6454cc6328d1 2023-11-09 14:45:32

文章标签 java 亿行文件分段 java 队列多线程字节数组 文章分类 Java 后端开发

原理

首先需要判断目标服务器是否支持断点续传
方法是在Header中添加Range字段，值格式为：bytes={开始下标}-{结束下标}（头尾包含），如 Range: bytes=10-20 表示获取第10字节到第20字节。

当 Range字段合法时服务器若返回206状态码，表示支持断点续传。

Range: bytes=0- 表示获取全部字节，我们需要先获取全部字节来得到文件的总长度，以及判断状态码是否是206。

然后使用线程池，让每个线程请求不同的Range分段，并用一个字节数组保存请求到的字节数据。当所有分段下载完成后把该字节数组写出到文件。

本例使用比较简单的实现方法，文件下载过程中数据全部保存在内存中，所以并不是真正的断点续传（重启程序就会丢失），且不适用于过大的文件。

依赖

<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
        <dependency>
            <groupId>com.squareup.okhttp3</groupId>
            <artifactId>okhttp</artifactId>
            <version>4.8.1</version>
        </dependency>

基础方法

/**
     * 一次尝试连接
     *
     * @param url   url
     * @param start 文件开头
     * @param end   文件结尾
     * @return 响应对象
     * @throws IOException
     */
    public static CloseableHttpResponse getResponse(String url, Integer start, Integer end) throws IOException {
        CloseableHttpClient client = getCloseableHttpClient();
        HttpGet get = new HttpGet(url);
        int endIndex = url.indexOf("/", url.indexOf("//") + 2);
        get.addHeader("Referer", url.substring(0, endIndex));
        get.addHeader("Range", "bytes=" + (start != null ? start : 0) + "-" + (end != null ? end : ""));
        CloseableHttpResponse execute = client.execute(get);
        return execute;
    }

    /**
     * 生成http客户端
     *
     * @return http客户端
     */
    private static CloseableHttpClient getCloseableHttpClient() {
        int connectionRequestTimeout = 30 * 1000;
        RequestConfig config = RequestConfig.custom()
                .setConnectionRequestTimeout(connectionRequestTimeout)
                .setConnectTimeout(connectionRequestTimeout)
                .setSocketTimeout(connectionRequestTimeout).build();

        return HttpClients.custom()
                .setDefaultRequestConfig(config).build();
    }

    /**
     * 创建线程池
     *
     * @param name     线程池名称
     * @param coreSize 核心线程池大小
     * @return 线程池
     */
    public static ThreadPoolTaskExecutor getExecutor(String name, Integer coreSize) {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        //核心线程池大小
        executor.setCorePoolSize(coreSize);
        //最大线程数
        executor.setMaxPoolSize(coreSize);
        //队列容量
        executor.setQueueCapacity(1000);
        //活跃时间
        executor.setKeepAliveSeconds(300);
        //线程名字前缀
        executor.setThreadNamePrefix(name);

        // setRejectedExecutionHandler：当pool已经达到max size的时候，如何处理新任务
        // CallerRunsPolicy：不在新线程中执行任务，而是由调用者所在的线程来执行
        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
        // 等待所有任务结束后再关闭线程池
        executor.setWaitForTasksToCompleteOnShutdown(true);
        executor.initialize();
        return executor;
    }

核心方法

private static void PoolDownload(String url, String filePath) throws IOException {
        //开始时间
        long start = System.currentTimeMillis();

        File file = new File(filePath);
        if (file.exists()) {
            file.delete();
        }
        File parentFile = file.getParentFile();
        if (!parentFile.exists()) {
            parentFile.mkdirs();
        }
        //第一次尝试连接 获取文件大小 以及是否支持断点续传
        CloseableHttpResponse response = getResponse(url, null, null);
        //状态码
        int statusCode = response.getStatusLine().getStatusCode();
        //文件总大小 这里直接转换为int比较方便计算 因为我们的目的是下载小文件 int足够使用
        int contentLength = Math.toIntExact(response.getEntity().getContentLength());
        //字节数组 用来存储下载到的数据 下载完成后写入到文件
        byte[] bytesFile = new byte[contentLength];

        //状态码 = 206 时表示支持断点续传
        if (statusCode == HttpStatus.SC_PARTIAL_CONTENT) {
            //创建线程池
            ThreadPoolTaskExecutor downloadExecutor = getExecutor("d", 10);
            int k = 1024;
            //分块大小 这里选择80k
            int step = 40 * k;
            //用来分配任务的数组下标
            int index = 0;
            while (index < contentLength) {
                int finalIndex = index;
                //提交任务
                downloadExecutor.execute(() -> {
                    //循环到成功
                    while (true) {
                        try {
                            //请求一个分块的数据
                            CloseableHttpResponse res = getResponse(url, finalIndex, finalIndex + step - 1);
                            HttpEntity entity = res.getEntity();
                            InputStream inputStream = entity.getContent();
                            //缓冲字节数组 大小4k
                            byte[] buffer = new byte[4 * k];
                            //读取到的字节数组长度
                            int readLength;
                            //分块内已读取到的位置下标
                            int totalRead = 0;
                            while ((readLength = inputStream.read(buffer)) > 0) {
                                //把读取到的字节数组复制到总的字节数组的对应位置
                                System.arraycopy(buffer, 0, bytesFile, finalIndex + totalRead, readLength);
                                //下标移动
                                totalRead += readLength;
                            }
                            EntityUtils.consume(entity);
                            //分段下载成功 结束任务
                            return;
                        } catch (IOException e) {
                            //分段下载失败 重新开始
                            log.warn(e.getMessage());
                        }
                    }

                });
                index += step;
            }
            //等待任务结束 这里用了一个比较粗糙的方法
            log.info("等待任务结束");
            do {
                try {
                    Thread.sleep(100);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            } while (downloadExecutor.getActiveCount() > 0);
            downloadExecutor.shutdown();

            //把总字节数组写入到文件;
            FileOutputStream fos = new FileOutputStream(file);
            fos.write(bytesFile, 0, bytesFile.length);
            fos.flush();
            fos.close();


            long end = System.currentTimeMillis();

            log.info("{} 下载完毕 用时 {}毫秒 总速度:{}KB/s", filePath.substring(filePath.lastIndexOf("/") + 1), (end - start), contentLength * 1000 / 1024 / (end - start));

        }
    }

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。