python 找色高分辨率低分辨率 python高清

转载

jacksky 2023-10-22 08:11:56

文章标签 python 找色高分辨率低分辨率 python 爬虫 html 服务器 文章分类 Python 后端开发

用Python实现一个播放壁纸的小程序

实现方法：通过python的基本语法以及第三方库实现
高清壁纸的网址：http://www.netbian.com/dongman/（里面有很多动漫的高清壁纸很棒，喜欢的朋友可以进去看看）

第一步：主函数内所有需要运行的函数

#主函数
def main():
    url = "http://www.netbian.com/dongman/"  #动漫高清壁纸的第一个页面
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"}
    nums = get_pictures_nums(url,headers)       #获取该网址的最大壁纸页数值的函数
    list_htmls = get_pictures_html(url,nums)    #获取所有高清壁纸网址数据的函数
    while(1):
        get_pictures_all(list_htmls,headers)        #获取某一网址的所有壁纸

if __name__ == "__main__":
    main()

第二步：获取该网址的最大壁纸页数，这个函数中用num接受了一个返回值(也就是该网址的最大壁纸页数)

#获取该网址的最大壁纸页数的函数
def get_pictures_nums(url,headers):
    req = requests.get(url=url,headers=headers) #抓取壁纸
    print(req.status_code)
    req.encoding = "gbk"
    html = req.text
    souplist = BeautifulSoup(html,features="lxml")
    conslist = souplist.find(class_="page")
    cons_listes = conslist.find_all("a")
    max_nums = cons_listes[-2]
    max_nums = str(max_nums)
    max_num = re.findall("<.*?>(.*?)</a>",max_nums,re.S)
    nums = int(max_num[0])
    return nums

第三步：获取所有页面的网址，并将这个存放所有壁纸网址的列表list_pics返回

#获取所有存放壁纸的网址
def get_pictures_html(url,nums):
    list_pics = [url]      #用列表list_pics存放第一个高清页面的网址
    for i in range(2,nums+1):   #用循环通过最大的页面壁纸数num获取所有的网站壁纸并存入到列表中
        html_path = url + "index_" + str(i) + ".htm"
        list_pics.append(html_path)
    return list_pics

第四步：从list_htmls列表中随机获取一个壁纸的网页给到get_pictures函数

#获取所有壁纸的函数
def get_pictures_all(list_htmls,headers):
    chance = random.choice(list_htmls)    #通过random函数的choice方法随机获取一个list_htmls列表中的壁纸网址
    req = requests.get(chance, headers=headers)  #获取该壁纸网址的html源码
    req.encoding = "gbk"          #该网站的字节码为gbk格式
    html = req.text
    soup = BeautifulSoup(html, features="lxml")
    cons = soup.find(class_="list")
    cons_list = cons.find_all("img")
    list_img_path = []
    list_img_name = []
    for i in cons_list:
        i = str(i)
        img_name = re.findall('.*?alt="(.*?)".*?', i, re.S)  # 获取每张壁纸的名字
        img_one_name = img_name[0]
        img_path = re.findall('.*?src="(.*?)".*?', i, re.S)  # 获取每张壁纸的网址
        img_one_path = img_path[0]
        if img_name == "4k壁纸":       #因为每页的壁纸中都有一个广告信息，并且这个小心的名字为4k壁纸，所以这里做一个判断，跳过这个广告
            pass
        else:
            #如果这个壁纸是正常的壁纸那将存入列表中
            list_img_name.append(img_one_name)      #存放壁纸名字的列表
            list_img_path.append(img_one_path)      #存放壁纸网址的列表
    #用一个数counts记录该页壁纸中不能直接获取高清壁纸的网址个数（以前的大部分壁纸由于资源路径的问题不能直接获取高清壁纸）
    counts = 0 
    for img in list_img_path:  #通过循环所有存放壁纸的列表list_img_path判断网址中是否有newc的字符串
        if "newc" in img:
            counts += 1  #如果网址有newc的字符串那么将counts数进行+1操作记录这个值
        else:
            pass
    if counts > 5:   #如果这个值大于5，那么这个网页的壁纸我们就不获取了，因为不能直接得到高清壁纸的图片
        pass
    else:
        picture = random.choice(cons_list)   #如果counts值小于5，那么我们将再从该壁纸中随机获取一个壁纸的网址
        pictures = str(picture)      #将该壁纸的网址转换为字符串的格式
        get_pictures(pictures,headers)

第五步：从列表con_list中随机获取一个网址，交给到get_pictures函数爬取高清壁纸

#爬取某一页网址的每张壁纸
def get_pictures(pictures,headers):
    nums = 1
    img_save_path = r"C:/bizhi/"     #在C盘中创建一个名为bizhi的文件夹存放壁纸(每次都会替换，不占用过多的空间)
    img_name = re.findall('.*?alt="(.*?)".*?', pictures, re.S)  # 获取该张壁纸的名字
    img_one_name = img_name[0]
    img_path = re.findall('.*?src="(.*?)".*?', pictures, re.S)  # 获取该张壁纸的网址
    img_one_path = img_path[0]
    if img_one_name == "4k壁纸":
        pass
    else:
        #这里有个判断语句，判断是否有对应的文件夹
        if os.path.exists(img_save_path):
            high_img = img_one_path[:38] + img_one_path[43:-14] + img_one_path[-4:]
            # print(high_img)
            # print(img_one_name)
            response = requests.get(url=high_img, headers=headers)
            img_paths = img_save_path + "壁纸.jpg"
            with open(img_paths, "wb") as f:
                f.write(response.content)
            time.sleep(1)
            print("爬取第" + str(nums) + "张壁纸成功!!!")
            nums += 1
        else:
            os.mkdir(img_save_path)
            high_img = img_one_path[:38] + img_one_path[43:-14] + img_one_path[-4:]
            # print(high_img)
            # print(img_one_name)
            response = requests.get(url=high_img, headers=headers)
            img_paths = img_save_path + "壁纸.jpg"
            with open(img_paths, "wb") as f:
                f.write(response.content)
            time.sleep(1)
            print("爬取第" + str(nums) + "张壁纸成功!!!")
            nums += 1

    filepath =  img_save_path + "壁纸.jpg"
    set_img_as_wallpaper(filepath)

第六步：将获取的高清壁纸给到set_img_as_wallpaper函数，再用python的第三方库ctypes将该高清壁纸设为桌面的壁纸

def set_img_as_wallpaper(filepath):
    ctypes.windll.user32.SystemParametersInfoW(20,0,filepath,0)
    ctypes.windll.user32.SystemParametersInfoW()

将完整的代码展现给大家看一下，有兴趣的可以复制使用一下，但使用过程中如果爬取太快的话可能会被服务器那端监视到，导致ip被封，但是过一段时间后，就可以再使用了。

#完整代码(大家有兴趣的可以尝试一下，可以把爬取时间改长一点) time.sleep方法改时间
import ctypes
import random
import re
import time
import requests
from bs4 import BeautifulSoup
import os
os.environ['NO_PROXY'] = 'stackoverflow.com'

def set_img_as_wallpaper(filepath):
    ctypes.windll.user32.SystemParametersInfoW(20,0,filepath,0)
    ctypes.windll.user32.SystemParametersInfoW()

#获取网址的最大值
def get_pictures_nums(url,headers):
    req = requests.get(url=url,headers=headers)
    print(req.status_code)
    req.encoding = "gbk"
    html = req.text
    souplist = BeautifulSoup(html,features="lxml")
    conslist = souplist.find(class_="page")
    cons_listes = conslist.find_all("a")
    max_nums = cons_listes[-2]
    max_nums = str(max_nums)
    max_num = re.findall("<.*?>(.*?)</a>",max_nums,re.S)
    nums = int(max_num[0])
    return nums

#获取所有存放壁纸的网址
def get_pictures_html(url,nums):
    list_pics = [url]
    for i in range(2,nums+1):
        html_path = url + "index_" + str(i) + ".htm"
        list_pics.append(html_path)
    return list_pics

#爬取某一页网址的每张壁纸
def get_pictures(pictures,headers):
    nums = 1
    img_save_path = r"C:/bizhi/"
    img_name = re.findall('.*?alt="(.*?)".*?', pictures, re.S)  # 获取每张壁纸的名字
    img_one_name = img_name[0]
    img_path = re.findall('.*?src="(.*?)".*?', pictures, re.S)  # 获取每张壁纸的网址
    img_one_path = img_path[0]
    if img_one_name == "4k壁纸":
        pass
    else:
        if os.path.exists(img_save_path):
            high_img = img_one_path[:38] + img_one_path[43:-14] + img_one_path[-4:]
            response = requests.get(url=high_img, headers=headers)
            img_paths = img_save_path + "壁纸.jpg"
            with open(img_paths, "wb") as f:
                f.write(response.content)
            time.sleep(1)   #设置爬取时间的间隔
            print("爬取第" + str(nums) + "张壁纸成功!!!")
            nums += 1
        else:
            os.mkdir(img_save_path)
            high_img = img_one_path[:38] + img_one_path[43:-14] + img_one_path[-4:]
            response = requests.get(url=high_img, headers=headers)
            img_paths = img_save_path + "壁纸.jpg"
            with open(img_paths, "wb") as f:
                f.write(response.content)
            time.sleep(1)  #设置爬取时间的间隔
            print("爬取第" + str(nums) + "张壁纸成功!!!")
            nums += 1
    filepath =  img_save_path + "壁纸.jpg"
    set_img_as_wallpaper(filepath)

#获取所有壁纸的函数
def get_pictures_all(list_htmls,headers):
    chance = random.choice(list_htmls)
    req = requests.get(chance, headers=headers)
    req.encoding = "gbk"
    html = req.text
    soup = BeautifulSoup(html, features="lxml")
    cons = soup.find(class_="list")
    cons_list = cons.find_all("img")
    list_img_path = []
    list_img_name = []
    for i in cons_list:
        i = str(i)
        img_name = re.findall('.*?alt="(.*?)".*?', i, re.S)  # 获取每张壁纸的名字
        img_one_name = img_name[0]
        img_path = re.findall('.*?src="(.*?)".*?', i, re.S)  # 获取每张壁纸的网址
        img_one_path = img_path[0]
        if img_name == "4k壁纸":
            pass
        else:
            list_img_name.append(img_one_name)
            list_img_path.append(img_one_path)
    counts = 0
    for img in list_img_path:
        if "newc" in img:
            counts += 1
        else:
            pass
    if counts > 5:
        pass
    else:
        picture = random.choice(cons_list)
        pictures = str(picture)
        get_pictures(pictures,headers)

#主函数
def main():
    url = "http://www.netbian.com/dongman/"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"}
    nums = get_pictures_nums(url,headers)       #获取最大壁纸网站的值
    list_htmls = get_pictures_html(url,nums)    #获取所有壁纸网址的统一资源定位符url
    while(1):
        get_pictures_all(list_htmls,headers)        #获取某一网址的所有壁纸

if __name__ == "__main__":
    main()

注：当爬取速度过快可能就会报这个错误，虽然不是直接提示服务器那端断开你的链接，但实际上应该是服务器封了你的ip，导致你没有抓取到相应的对象，我们从最上端的白色字体处也可以发现服务器给我们报的错误是503（web服务器不能处理HTTP请求，可能是临时超载或者是服务器进行维护）我打印了服务器的链接状态，自己查了一下说是服务器不能接受我们这边的请求。具体原因等我以后熟悉了懂得多了再和大家详细讲一下，我本身也是个小白还在学习中，如果有问题和不足的也希望和大家一同讨论分析。

python 找色高分辨率低分辨率 python高清_python