import requests
import pandas
import random

def random_user_agent():
    ulist =["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32"]

    # 随机从列表中取值
    #print(random.choice(ulist))
    #return random.choice(ulist)
    #0,列表长度中取值
    return ulist[random.randint(0,len(ulist)-1)]

def get_page(page):
    url="https://s.askci.com/stock/a/0-0?reportTime=2022-06-30&pageNum={}".format(page)
    headers = {
        # 浏览器信息
        # "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
        "user-agent": random_user_agent(),
        # 从哪里来
        "Referer": "https://www.yixue.com/",
        # 用户信息
        "cookie": "newstatisticUUID=1651282597_602532090; _csrfToken=rmG7zCbbkD7QK34BymosH69Xve6eibDLGzeI2q8q; pageOps=1; fu=1403875312; qdrs=0|3|0|0|1; showSectionCommentGuide=1; qdgd=1; lrbc=1033272309|702228691|0; rcr=1033272309; bc=1033272309; _gid=GA1.2.364214729.1651282600; readadclose=1; _gat_gtag_UA_199934072_2=1; _ga_FZMMH98S83=GS1.1.1651282598.1.1.1651282753.0; _ga_PFYW0QLV3P=GS1.1.1651282598.1.1.1651282753.0; _ga=GA1.2.279552178.1651282599"
    }
    #处理异常
    try:
        response = requests.get(url,headers=headers,timeout=10)
        if response.status_code==200:
            return response.text
        else:
             return None
    except:
        return None


def parse_page(html):
    tb=pandas.read_html(html,header=0)[3]      #提取表头
    return tb


def save_data(data):
    df=pandas.concat(data)
    df.to_excel("上市公司.xlsx",index=False,encoding="utf-8")


data=[]

def main():
    for i in range(1,246):
        html=get_page(i)
        print(i)
        #print(html)
        tb=parse_page(html)
        data.append(tb)
        #print(data)
        save_data(data)

if __name__ =="__main__":
    main()