使用Python爬虫去天气预报网站爬取天气数据存储至MySQL然后使用pyecharts实现绘图
本次代码可以在gitee下载https://gitee.com/liuyueming/weatherSpider.git
一,环境查看
Python版本
C:\Users\liuym\Desktop\weatherSpider>python --version
Python 3.6.6
MySQL版本
mysql --version
mysql Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using EditLine wrapper
二,代码
安装模块
pip3 install pymysql
pip3 install bs4
pip3 install lxml
pip3 install requests
pip3 install pyecharts
运行过程中遇到没有安装的库使用pip install安装即可
本次爬取的天气预报网站为 http://www.tianqihoubao.com/
主程序main.py
import pymysql
import requests
from bs4 import BeautifulSoup
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
#获取网页信息
def get_html(url):
html = requests.get(url)
html.encoding = html.apparent_encoding
soup = BeautifulSoup(html.text, 'lxml')
return soup
year = ['2020']
month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12']
time = [y+x for y in year for x in month]
for date in time:
url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html'
soup = get_html(url)
sup = soup.find('table',attrs={'class':'b'})
tr = sup.find_all('tr')
for trl in tr[1:]:
td = trl.find_all('td')
href = td[0].find('a')['href'] #获取链接信息
title = td[0].find('a')['title'] #获取名称
weather = td[1].get_text().replace('\r\n','').replace(' ','') #获取天气状况
wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#获取温度
fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #获取风力大小
sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \
values(%s, %s, %s, %s, %s)"""
cursor.execute(sql, (title, href, weather, wendu, fengli))
db.commit()
db.close
print('爬取完成')
代码解析
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 数据库连接信息,根据实际情况修改
year = ['2020'] # 需要爬取的年份信息
url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市信息 本次为南昌
生成html程序myVisualize.py
import pymysql
import pyecharts.options as opts
from pyecharts.charts import Line, Pie
def create_temp():
db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
cursor.execute('SELECT * FROM weather_spider;')
data = cursor.fetchall()
max_temp_list = []
min_temp_list = []
day_list = []
for d in data:
max_temp_list.append(d[3].split('/')[0].replace('℃', ''))
min_temp_list.append(d[3].split('/')[1].replace('℃', ''))
day_list.append(d[0][:11])
line = Line()
line.add_xaxis(day_list)
line.add_yaxis(series_name="最高气温", y_axis=max_temp_list, is_symbol_show = False,
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
markline_opts=opts.MarkLineOpts(
data=[opts.MarkLineItem(type_="average", name="平均值")]
))
line.add_yaxis(series_name="最低气温", y_axis=min_temp_list, is_symbol_show = False,
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
markline_opts=opts.MarkLineOpts(
data=[opts.MarkLineItem(type_="average", name="平均值")]
))
line.set_global_opts(yaxis_opts=opts.AxisOpts(name="温度(℃)"),
title_opts=opts.TitleOpts(title="南昌气温变化表"),
tooltip_opts=opts.TooltipOpts(trigger="axis"))
line.render('南昌2020气温变化表.html')
print('气温图生成成功')
db.close()
cursor.close()
def create_weather():
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
attr = ["雨", "多云", "晴", "阴", "雪", "雾", "霾"]
rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";')
cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多云%";')
sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";')
overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%阴%";')
snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";')
fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雾%";')
smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";')
weather = [rain, cloud, sun, overcast, snow, fog, smog]
pie = (
Pie()
.add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])])
.set_global_opts(title_opts=opts.TitleOpts(title="天气占比表"))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)
pie.render('南昌2020天气占比表.html')
print('天气图生成成功')
db.close()
cursor.close()
if __name__ == '__main__':
create_temp()
create_weather()
MySQL操作(安装MySQL不详述)
创建库
create databese weather;
导入表
mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql
表语句sql如下weather.sql
DROP TABLE IF EXISTS `weather_spider`;
CREATE TABLE `weather_spider` (
`time_local` varchar(255) DEFAULT NULL,
`link` varchar(255) DEFAULT NULL,
`weather_type` varchar(255) DEFAULT NULL,
`temperature` varchar(255) DEFAULT NULL,
`wind_power` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
三,运行
运行主程序
python main.py
运行正常会往MySQL数据库写入数据,登录数据库搜索查看
select * from weather_spider;

运行生成html程序
python myVisualize.py
气温图生成成功
天气图生成成功
在当前目录会生成html,打开查看


















