python爬取所有股票数据并保存到excel文件_json

# -*- coding: utf-8 -*-
"""
Created on Thu Oct 22 10:00:27 2020

@author: LiKely
"""


import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time
import json
import xlwt

def get_list(url):

# 股票代码链接
res=requests.get(url)
res.encoding='utf-8'

# 完整HTML
html=BeautifulSoup(res.text,'html.parser')

# 股票代码列表
stockList=[]

for item in html.select('.stockTable a'):
try:
stockObj={}
stockObj['name']=item.text
stockObj['url']=item.get('href')
stockList.append(stockObj)
except:
print('出现异常')

return stockList


def get_detail(url):

# 股票链接
res=requests.get(url)
res.encoding='utf-8'

# 完整HTML
html=BeautifulSoup(res.text,'html.parser')

# 股票对象
result={}
result['title']=''
result['code']=''
result['state']=''
result['nowtime']=''
result['url']=''
result['maxheight']=''
result['minheight']=''
result['childTitles']=[]
result['childValues']=[]

try:
# 股票名称
result['title']=html.select('.stock_title h1')[0].text
except:
print('读取股票名称,出现异常',url)

try:
# 股票代码
result['code']=html.select('.stock_title h2')[0].text
except:
print('读取股票代码,出现异常',url)

try:
# 股票状态
result['state']=html.select('.stock_title em')[0].text
except:
print('读取股票状态,出现异常',url)

try:
# 当前时间
result['nowtime']=html.select('.stock_title time')[0].text
except:
print('读取当前时间,出现异常',url)

try:
# 股票链接
result['url']=url
except:
print('读取股票链接,出现异常',url)

try:
# 最高
result['maxheight']=html.select('.s_height dd')[0].text
except:
print('读取最高,出现异常',url)

try:
# 最低
result['minheight']=html.select('.s_height dd')[1].text
except:
print('读取最低,出现异常',url)

try:
# 股票各项指数标题
childTitles=[]
for item in html.select('.s_date dt'):
childTitles.append(item.text)
result['childTitles']=childTitles
except:
print('读取股票各项指数值,出现异常',url)

try:
# 股票各项指数值
childValues=[]
for item in html.select('.s_date dd'):
childValues.append(item.text)
result['childValues']=childValues
except:
print('读取股票各项指数值,出现异常',url)

return result


if __name__ == "__main__": #主函数

# 获取股票代码列表
stockList=get_list('https://hq.gucheng.com/gpdmylb.html')

# 获取股票各项指数标题列表
firstChildTitleList=get_detail(stockList[0]['url'])['childTitles']

print('获取到'+str(len(stockList))+'个股票代码')

# 创建工作簿
book = xlwt.Workbook(encoding='utf-8')

# 创建工作表
sheet = book.add_sheet('股票代码')

# 创建固定表头
head = ['股票名称','股票代码','状态','时间','网址','最高','最低']
for h in range(len(head)):
sheet.write(0,h,head[h])

#追加各项指标表头
for h in range(len(firstChildTitleList)):
sheet.write(0,len(head)+int(h),firstChildTitleList[h])

# 写入固定列数据
for i,item in enumerate(stockList):
try:
stockObj=get_detail(item['url'])
sheet.write(i+1,0,stockObj['title'])
sheet.write(i+1,1,stockObj['code'])
sheet.write(i+1,2,stockObj['state'])
sheet.write(i+1,3,stockObj['nowtime'])
sheet.write(i+1,4,stockObj['url'])
sheet.write(i+1,5,stockObj['maxheight'])
sheet.write(i+1,6,stockObj['minheight'])

# 写入各项指标列数据
for j,child in enumerate(stockObj['childValues']):
sheet.write(i+1,7+j,child)

print (str(i),'写入成功')
# print (str(i),'写入成功',stockObj)
except:
print (str(i),'出现异常',stockObj['url'])

book.save('股票代码.xls')

print('写入完毕!>>股票代码.xls')

python爬取所有股票数据并保存到excel文件_html_02