python 多线程下载 link

转载

网络安全专家 2024-11-02 23:49:52

文章标签 python 多线程下载 link python多线程下载断点下载 urlopen 进度条 文章分类 Python 后端开发

多线程下载步骤

使用 urlopen() 方法打开远程资源
获取指定的 URL 对象所指向资源的大小（通过 Content-Length 响应头获取
计算每个线程应该下载网络资源的哪个部分（从哪个字节开始，到哪个字节结束）
依次创建并启动多个线程来下载网络资源的指定部分

相当于多个线程执行体，对同一资源进行分段下载

断点下载原理

下载之初生成网络资源具有相同大小的空文件及配置文件
配置文件分别记录每个线程已经下载到哪个字节
当网络断开后再次开始下载时，每个线程根据配置文件中记录的位置向后下载

代码示例

# urlopen多线程下载

from urllib.request import *
import threading


class DownUtil:
    def __init__(self, path, target_file, thread_num):
        self.path = path
        self.thread_num = thread_num
        self.target_file = target_file
        # 初始化线程数组
        self.threads = []

    def download(self):
        # 创建Request对象
        req = Request(url=self.path, method="GET")
        # 添加请求头
        req.add_header('Accept', '*/*')
        req.add_header('Charset', 'utf-8')
        req.add_header('Coonection', 'Keep-Alive')
        # 打开要下载的资源，返回_UrlopenRet对象
        f = urlopen(req)
        # 获取要下载文件的大小
        self.file_size = int(dict(f.headers).get('Content-Length', 0))
        f.close()
        # 计算每个线程要下载资源大小
        current_part_size = self.file_size // self.thread_num + 1
        for i in range(self.thread_num):
            # 计算每个线程下载的开始位置
            start_pos = i * current_part_size
            # 每个线程使用一个wb模式打开的文件进行下载
            t = open(self.target_file, 'wb')
            # 定位该线程下载位置
            t.seek(start_pos, 0)
            # 创建下载线程
            td = DownThread(self.path, start_pos, current_part_size, t)
            self.threads.append(td)
            # 启动下载线程
            td.start()

    # 获取下载完成的百分比
    def get_complete_rate(self):
        # 统计多个线程已下载的总大小
        sum_size = 0
        for i in range(self.thread_num):
            sum_size += self.threads[i].length
        # 返回已完成的百分比
        return sum_size/self.file_size


class DownThread(threading.Thread):
    def __init__(self, path, start_pos, current_part_size, current_part):
        super().__init__()
        self.path = path
        # 当前线程下载位置
        self.start_pos = start_pos
        # 当前线程负责下载文件的大小
        self.current_part_size = current_part_size
        # 当前线程需要下载的文件块
        self.current_part = current_part
        # 定义该线程已经下下载的字节数
        self.length = 0

    def run(self):
        req = Request(url=self.path, method="GET")
        req.add_header('Accept', '*/*')
        req.add_header('Charset', 'UTF-8')
        req.add_header('Connection', 'Keep-Alive')
        f = urlopen(req)
        # 跳过self.start_pos个字节，表明该线程只下载自己负责的那部分内容
        for i in range(self.start_pos):
            f.read(1)
        # 读取网络数据，并写入本地文件
        while self.length < self.current_part_size:
            data = f.read(1024)
            if data is None or len(data) <= 0:
                break
            self.current_part.write(data)
            # 累计该线程下载的总大小
            self.length += len(data)
        self.current_part.close()
        f.close()


# 测试,开启3个线程
du = DownUtil("https://desk-fd.zol-img.com.cn/t_s1920x1200c5/g5/M00/0F/00/ChMkJlwuAEuIMbmBAAMSpPUXEUkAAuKRAMRt0IAAxK8664.jpg", 'a.jpg', 3)

# 启动下载线程
du.download()


# 定义timer线程显示进度
def show_process():
    # 非换行非清空屏幕
    print('\r 已完成：%2d %%' % (du.get_complete_rate()*100), end=" ")
    # 定义全局变量
    global t
    if du.get_complete_rate() < 1:
        # 通过定时器启动0.1之后执行show_process函数
        t = threading.Timer(0.1, show_process)
        t.start()


# 定时器启动show_process函数
t = threading.Timer(0.1, show_process)
t.start()

效果

>python -u "urlopen_multi_thread_download.py"
 已完成：100 %

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：win mysql5数据库

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯

python 多线程 下载 link

python 多线程 下载 link

多线程下载步骤

断点下载原理

代码示例

51CTO博客

python 多线程下载 link

python 多线程下载 link