python下载M3U8文件

原创

wx5bbc67ce7b2af 2019-12-23 10:41:37 ©著作权

©著作权归作者所有：来自51CTO博客作者wx5bbc67ce7b2af的原创作品，请联系作者获取转载授权，否则将追究法律责任

M3U8 是 Unicode 版本的 M3U，用 UTF-8 编码。"M3U" 和 "M3U8" 文件都是苹果公司使用的 HTTP Live Streaming（HLS） 协议格式的基础，这种协议格式可以在 iPhone 和 Macbook 等设备播放。
上述文字定义来自于维基百科。
可以看到，m3u8 文件其实是 HTTP Live Streaming（缩写为 HLS） 协议的部分内容，而 HLS 是一个由苹果公司提出的基于 HTTP 的流媒体网络传输协议。
HLS 的工作原理是把整个流分成一个个小的基于 HTTP 的文件来下载，每次只下载一些。当媒体流正在播放时，客户端可以选择从许多不同的备用源中以不同的速率下载同样的资源，允许流媒体会话适应不同的数据速率。在开始一个流媒体会话时，客户端会下载一个包含元数据的 extended M3U (m3u8) playlist文件，用于寻找可用的媒体流。
HLS 只请求基本的 HTTP 报文，与实时传输协议（RTP）不同，HLS 可以穿过任何允许 HTTP 数据通过的防火墙或者代理服务器。它也很容易使用内容分发网络来传输媒体流。

简而言之，HLS 是新一代流媒体传输协议，其基本实现原理为将一个大的媒体文件进行分片，将该分片文件资源路径记录于 m3u8 文件（即 playlist）内，其中附带一些额外描述（比如该资源的多带宽信息···）用于提供给客户端。客户端依据该 m3u8 文件即可获取对应的媒体资源，进行播放。
因此，客户端获取 HLS 流文件，主要就是对 m3u8 文件进行解析操作。
简介参考：https://www.jianshu.com/p/e97f6555a070



ts 文件一般怎么处理?  
1 只有m3u8文件，需要下载ts文件
2 有ts文件，但因为被加密无法播放，需要解码
3 ts文件能正常播放，但太多而小，需要合并

代码使用：
修改main.py文件中的三个参数，运行main.py下载即可

# m3u8_url文件地址
m3u8_url = 'https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd.m3u8'
# 要保存的文件名
video_name = "video_list/h2.ts"

代码参考：https://www.jianshu.com/p/2a5403234b14

＃combine_ts.py

# encoding=utf-8
import os


def file_walker(path):
    file_list = []
    files_read = os.listdir(path)
    for file in files_read:
        if file.endswith(".ts"):
            file = file.strip(".ts")
            file_list.append(file)
    file_list.sort(key=int)
    for i in range(len(file_list)):
        file_list[i] = path + "/" + file_list[i] + ".ts"
    # print("/n".join(file_list))
    return file_list


def combine(ts_path,file_name):
    file_list = file_walker(ts_path)
    with open(file_name, 'wb+') as fw:
        for i in range(len(file_list)):
            fw.write(open(file_list[i], 'rb').read())


if __name__ == '__main__':
    combine("ts_list", "result/hh")

＃down_ts.py

# encoding=utf-8

import datetime
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

from package.mk_ts_url import get_ts_urls


def download(ts_urls, download_path):
    for i in range(len(ts_urls)):
        ts_url = ts_urls[i]
        try:
            response = requests.get(ts_url, stream=True, verify=False)
        except Exception as e:
            print("异常请求：%s" % e.args)
            return

        ts_path = download_path + "/{0}.ts".format(i)
        with open(ts_path, "wb+") as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)


if __name__ == '__main__':
    m3u8_url = 'https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd.m3u8'
    ts_urls = get_ts_urls(m3u8_url)
    download_path = "ts_list"
    download(ts_urls, download_path)

＃mk_ts_url.py

# encoding=utf-8
"""
＃m3u8文件下载url地址
https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd.m3u8

#ts地址
https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd0.ts

＃
"""
import requests


def get_ts_urls(m3u8_url):
    urls = []
    file_name = m3u8_url.split("/")[-1]
    base_url = m3u8_url.strip(file_name)
    r = requests.get(m3u8_url)
    lines = r.text.split('\n')
    for line in lines:
        if line.endswith(".ts"):
            urls.append(base_url + line.strip("\n"))
    return urls


if __name__ == '__main__':
    m3u8_url = 'https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd.m3u8'
    ts_url_list = get_ts_urls(m3u8_url)
    print("\n".join(ts_url_list))

＃main.py

# encoding=utf-8
import os
import shutil  # 文件树操作需要
import datetime
from package.combine_ts import combine
from package.down_ts import download, get_ts_urls

# m3u8_url文件地址
m3u8_url = 'https://jdvodrvfb210d.vod.126.net/mooc-video/nos/hls/2018/04/28/1009218006_cff89340b62041e396ec9a91a9974a81_sd.m3u8'
# 要保存的文件名
video_name = "result/h2.ts"


def down_m3u8(m3u8_url,video_name):
    ts_urls = get_ts_urls(m3u8_url)
    m3u8_file_name = m3u8_url.split("/")[-1]
    temp_download_path = "ts_list_temp"
    # 创建目录ts_list_temp
    if not os.path.exists(temp_download_path):
        os.mkdir(temp_download_path)

    print("开始下载 %s" % m3u8_file_name, end="\t")
    start = datetime.datetime.now().replace(microsecond=0)
    download(ts_urls, temp_download_path)
    combine(temp_download_path, video_name)
    shutil.rmtree(temp_download_path)  # 删除临时文件夹和文件
    end = datetime.datetime.now().replace(microsecond=0)
    print("耗时：%s" % (end - start))


if __name__ == '__main__':
    down_m3u8(m3u8_url,video_name)