python模拟商场优惠打折商品打折python

转载

mob64ca1400bfa8 2024-03-07 14:50:02

文章标签 python模拟商场优惠打折大数据 html 字符串搜索 文章分类 Python 后端开发

之前写到获取网页信息，这回就把剩下的获取plus价格低于原价5折的商品的代码写完！
。
。
。
。
首先写一个获取plus价格的方法，把plus价格低于原价5折的商品爬取出来，代码如下：

def get_price(html):
    soup = BeautifulSoup(html, 'lxml')#用beautifulsoup处理反馈回来的网页信息
    # all_page= soup.find_all('div', class_='f-pager')#找到这类商品一共有多少页，方便历遍所有商品
    # page=all_page[0].find('span', class_='fp-text').find('i').text
    li_list = soup.find_all('li', class_='gl-item')#找到所有的商品，以list方式保存
    result_page=[]
    for li in li_list:
        #提取需要内容
        price = li.find('div', class_='p-price').find('i').text#提取每个商品的价格
        plus_price = li.find('div', class_='p-price').find(title="PLUS会员专享价")#提取每个plus价格
        keys=str(plus_price)#因为上面提取的价格为tag格式，需要把他转为字符串格式
        if plus_price==None:#判断此商品是否有plus价格，这个判断方式应该还可以优化
            pass
        else:
            detail_list = []
            price_int = float(price)
            key=re.findall(r"<em>￥(.+)</em>", keys)#使用正则法则提取plus价格
            key_int=float(key[0])
            result=key_int/price_int#算出plus结果打了多少折，如果低于5折则反馈
            if result<0.5:
                code = str(li)
                re_price = r'href="(.*?)"'
                paths = re.findall(re_price, code)
                path="http:"+paths[0]
                name=li.find('div', class_='p-name p-name-type-2').find('em').text#提取每个商品的名字
                detail_list.append(path )
                detail_list.append(price_int)
                detail_list.append(key_int)
                detail_list.append(name)
                result_page.append(detail_list)
                msg=str(path)+"   原价为："+str(price_int)+"    会员价为："+str(key_int)+"      "+str(name)
                #detail_lists=map(str,detail_list)#将list里的元素转为str类型
                #msg=''.join(detail_lists)#把list整体转为字符串，已’‘隔开，前提是list里面的元素都是str类型
                window_name = '买京东便野'
                sent_message(msg, window_name)
    return result_page

这个方法需要输入html参数，这个在上一篇文章文章已经介绍了。
。
。
。
。
。
因为京东搜索需要关键词，我就把关键词都放到了一个text文本里，这让我可以随时修改，所以这里也要写一个读取text文本内容的方法：

def read_keys(path):#path为text文本的路径
    f=open(path,encoding="utf8")  #将关键词存储到text文本然后读取文本
    data=[each_line.strip('\n') for each_line in f]#读取文本的每一行，并去掉换行符/n
    f.close()
    return data#保存为一个list

。
。
。
爬取到结果后还需要把结果储存到excel表：

def run(path):
    keys = read_keys(path)
    for key in keys:
        print(key)
        try:
            ab = 0
            outlow = xlsxwriter.Workbook(key + '.xlsx')
            tablelow_output = outlow.add_worksheet('plus')
            tablelow_output.write_row(0, 0, ('路径', '原价格', 'plus价格', '名称'))
            for n in range(1, 200):
                # url格式已经考虑了懒加载
                # 其实不用那么麻烦，比如我爬衣服这个数据，只要在后面加&scrolling=y并且page=n就行从一开始s就是搜索商品的数，
                # 比如第一页第一个商品就是1，第二页s就是第一页商品的个数加上1，
                # 我的 爬取的是衣服:url='https://search.jd.com/Search?keyword=衣服&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&stock=1&page='+str(n)+'&s='+str(1+(n-1)*30)+'&click=0&scrolling=y'
                # 这样就可以省略def crow_last(n):
                url = "https://search.jd.com/Search?keyword=" +"自营"+key + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&stock=1&page=" + str(n) + "&s=" + str(1 + (n - 1) * 30) + "&click=0&scrolling=y"
                html = get_page(url)
                result_one = get_price(html)
                if result_one == 0:
                    pass
                elif len(result_one) > 0:
                    for i in range(0, len(result_one)):
                        print(result_one[i])

                        tablelow_output.write_row(ab + 1, 0, result_one[i])
                        ab += 1
        except:
            print('报错了！！')

        finally:
            outlow.close()
            if ab==0:
                os.remove(key + '.xlsx')
            else:
                os.rename(key + '.xlsx', key + str(ab) + '.xlsx')

。
。
。
。
。
最后就是python的固定运行格式了，我这里用的双线程，有需要的可以修改为4线程或更多的线程，这个看个人的计算机市多少核心：

if __name__ == '__main__':
    delete_excel()
    path1=('keys1.txt')
    path2 = ('keys2.txt')
    p1=Process(target=run,args=("keys1.txt",))
    p2 = Process(target=run, args=("keys2.txt",))
    p1.start()
    p2.start()

。.
.
.
.
.

还有在开始前还得导入用到的库：

from bs4 import BeautifulSoup
import re
import xlsxwriter
import requests
import os
import numba
from multiprocessing import Process
import win32gui
import win32con
import win32clipboard as w

那么python之网络爬虫-爬取京东商品plus价格低于原价5折的商品这个项目就到此结束了。
在此声明，整个项目仅用于学习交流，用于违法事情，本人一律不负责任。如果涉及到版权或者利益，请联系本人删除本贴！

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。