import requests
import pandas
import random
def random_user_agent():
ulist =["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32"]
# 随机从列表中取值
#print(random.choice(ulist))
#return random.choice(ulist)
#0,列表长度中取值
return ulist[random.randint(0,len(ulist)-1)]
def get_page(page):
url="https://s.askci.com/stock/a/0-0?reportTime=2022-06-30&pageNum={}".format(page)
headers = {
# 浏览器信息
# "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
"user-agent": random_user_agent(),
# 从哪里来
"Referer": "https://www.yixue.com/",
# 用户信息
"cookie": "newstatisticUUID=1651282597_602532090; _csrfToken=rmG7zCbbkD7QK34BymosH69Xve6eibDLGzeI2q8q; pageOps=1; fu=1403875312; qdrs=0|3|0|0|1; showSectionCommentGuide=1; qdgd=1; lrbc=1033272309|702228691|0; rcr=1033272309; bc=1033272309; _gid=GA1.2.364214729.1651282600; readadclose=1; _gat_gtag_UA_199934072_2=1; _ga_FZMMH98S83=GS1.1.1651282598.1.1.1651282753.0; _ga_PFYW0QLV3P=GS1.1.1651282598.1.1.1651282753.0; _ga=GA1.2.279552178.1651282599"
}
#处理异常
try:
response = requests.get(url,headers=headers,timeout=10)
if response.status_code==200:
return response.text
else:
return None
except:
return None
def parse_page(html):
tb=pandas.read_html(html,header=0)[3] #提取表头
return tb
def save_data(data):
df=pandas.concat(data)
df.to_excel("上市公司.xlsx",index=False,encoding="utf-8")
data=[]
def main():
for i in range(1,246):
html=get_page(i)
print(i)
#print(html)
tb=parse_page(html)
data.append(tb)
#print(data)
save_data(data)
if __name__ =="__main__":
main()
爬取网页的股票信息
原创
©著作权归作者所有:来自51CTO博客作者bent5233的原创作品,请联系作者获取转载授权,否则将追究法律责任
上一篇:mysql安装文档教程
下一篇:Python抓取音乐音乐
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
js爬取网页文字图片 html爬取网页信息
js爬取网页文字图片 html爬取网页信息博主的话功能简述运行效果项目代码代码简述博主的话 可以爬取许多
js爬取图片 js爬取文字 爬取图片 html爬取 xml -
从网易财经行情中心爬取股票信息java json ajax 数据 xml