python画图全国人口分布用python人口分布地图

转载

bingfeng 2024-01-05 16:15:31

文章标签 python画图全国人口分布 python 开发语言 json 数据 文章分类 Python 后端开发

第 16 章下载数据

16.1 CSV文件格式

16.1.1 分析CSV文件头

16.1.2 打印文件头及其位置

16.1.3 提取并读取数据

16.1.4 绘制气温图表

16.1.5 模块datetime

16.1.8 再绘制一个数据系列

16.1.9 给图表区域着色

16.1.10 错误检查

16.2 制作世界人口地图： JSON格式

16.2.1 下载世界人口数据

16.2.2 提取相关的数据

16.2.4 获取两个字母的国别码

16.2.5 制作世界地图

16.2.8 根据人口数量将国家分组

16.2.9 使用Pygal设置世界地图的样式

16.2.10 加亮颜色主题

代码

第 16 章下载数据

16.1 CSV文件格式

16.1.1 分析CSV文件头

highs_lows.py

import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	print(header_row)

16.1.2 打印文件头及其位置

highs_lows.py

import csv
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	for index, column_header in enumerate(header_row):
		print(index, column_header)

python画图全国人口分布用python人口分布地图_python画图全国人口分布

对列表调用enumerate（）来获取每个元素的索引及其值

16.1.3 提取并读取数据

highs_lows.py

import csv

# 从文件中获取最高气温
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	highs = []
	for row in reader:
		high = int(row[5])
		highs.append(high)
	print(highs)

python画图全国人口分布用python人口分布地图_python画图全国人口分布_02

16.1.4 绘制气温图表

使用matplotlib创建一个显示每日最高气温的简单图形
highs_lows.py

import csv
from matplotlib import pyplot as plt

# 从文件中获取最高气温
filename = 'sitka_weather_2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	highs = []
	for row in reader:
		high = int(row[5])
		highs.append(high)
	print(highs)
	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(highs, c='red')

# 设置图形的格式
plt.title("Daily high temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=24)
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

python画图全国人口分布用python人口分布地图_python_03

16.1.5 模块datetime

使用模块datetime 中的方法strptime()
模块datetime中设置日期和时间格式的实参

实参	含义
%A	星期的名称，如Monday
%B	月份名，如January
%m	用数字表示的月份（01~12）
%d	用数字表示月份中的一天（01~31）
%Y	四位的年份，如2015
%y	两位的年份，如15
%H	24小时制的小时数（00~23）
%I	12小时制的小时数（01~12）
%p	am或pm
%M	分钟数（00~59）
%S	秒数（00~61）

highs_lows.py

import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温和日期
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs = [] , []
	for row in reader:
		current_date = datetime.strptime(row[2], "%Y-%m-%d")
		dates.append(current_date)
		
		high = int(row[5])
		highs.append(high)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')

# 设置图形的格式
plt.title("Daily high temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

python画图全国人口分布用python人口分布地图_python画图全国人口分布_04

16.1.8 再绘制一个数据系列

再添加最低气温数据
highs_lows.py

import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温、最低气温和日期
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [] , [] , []
	for row in reader:
		current_date = datetime.strptime(row[2], "%Y-%m-%d")
		dates.append(current_date)
		
		high = int(row[5])
		highs.append(high)
		
		low = int(row[6])
		lows.append(low)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')
plt.plot(dates, lows, c='blue')

# 设置图形的格式
plt.title("Daily high and low temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

python画图全国人口分布用python人口分布地图_数据_05

16.1.9 给图表区域着色

使用方法fill_between(),接受一个 x 值系列和两个 y 值系列，并填充两个 y 值系列之间的空间：
highs_lows.py

import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温、最低气温和日期
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [] , [] , []
	for row in reader:
		current_date = datetime.strptime(row[2], "%Y-%m-%d")
		dates.append(current_date)
		
		high = int(row[5])
		highs.append(high)
		
		low = int(row[6])
		lows.append(low)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)

# 设置图形的格式
plt.title("Daily high and low temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

python画图全国人口分布用python人口分布地图_数据_06

16.1.10 错误检查

highs_lows.py

for row in reader:
		try:
			current_date = datetime.strptime(row[2], "%Y-%m-%d")
			high = int(row[5])
			low = int(row[6])
		except ValueError:
			print(current_date, 'missing data')
		else:
			dates.append(current_date)
			highs.append(high)
			lows.append(low)

import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温、最低气温和日期
filename = 'death_valley_2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [] , [] , []
	for row in reader:
		try:
			current_date = datetime.strptime(row[2], "%Y-%m-%d")
			high = int(row[5])
			low = int(row[6])
		except ValueError:
			print(current_date, 'missing data')
		else:
			dates.append(current_date)
			highs.append(high)
			lows.append(low)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)

# 设置图形的格式
plt.title("Daily high and low temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()

python画图全国人口分布用python人口分布地图_json_07

python画图全国人口分布用python人口分布地图_开发语言_08

16.2 制作世界人口地图： JSON格式

16.2.1 下载世界人口数据

为了便于操作方便，更快的学习。
这边附上直接下载csv和json压缩包的链接地址：https://pkgstore.datahub.io/core/population/population_zip/data/4e22dd3646c87e44fcaf119d34439291/population_zip.zip

16.2.2 提取相关的数据

world_population.py

import json         #导入json模块

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

    #打印每个国家2010年人口的数量
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        print(country_name + ":" + str(population))

python画图全国人口分布用python人口分布地图_python画图全国人口分布_09

16.2.4 获取两个字母的国别码

country_codes.py

from pygal_maps_world.i18n import COUNTRIES

for country_code in sorted(COUNTRIES.keys()):
	print(country_code, COUNTRIES[country_code])

python画图全国人口分布用python人口分布地图_开发语言_10

编写一个函数，在COUNTRIES中查找并返回国别码。将函数放在country_codes的模块中。

country_codes.py

from pygal_maps_world.i18n import COUNTRIES

def get_country_code(country_name):
	"""根据指定的国家，返回Pygal使用的两个字母的国别码"""
	for code, name in COUNTRIES.items():
		if name == country_name:
			return code
	# 未找到指定的国家输出None
	return None

print(get_country_code('China'))
print(get_country_code('Andorra'))
print(get_country_code('Afghanistan'))

python画图全国人口分布用python人口分布地图_python_11

在world_population.py文件中导入国别码

world_population.py

import json         #导入json模块
from country_codes import  get_country_code

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

    #打印每个国家2010年人口的数量
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        #print(country_name + ":" + str(population))
        code=get_country_code(country_name)
        if code:
            print(code + "：" + str(population))
        else:
            print('ERROR-' + country_name)

python画图全国人口分布用python人口分布地图_数据_12

16.2.5 制作世界地图

americas.py

import pygal_maps_world.maps
wm=pygal_maps_world.maps.World()		#创建实例
wm.title='North,Center,and South America'

wm.add('North America',['ca','mx','us'])
wm.add('Center America',['bz','cr','gt','hn','ni','pa','sv'])
wm.add('South America',['ar','bo','br','cl','co','ec','gf',
                        'gy','pe','py','sr','uy','ve'])
wm.render_to_file('americas.svg')

在世界地图上呈现数字数据

na_populations.py

import pygal_maps_world.maps
wm=pygal_maps_world.maps.World()

wm.title='Populations of Countries in North America'
wm.add('North America',{'ca':3412600,'us':309349000,'mx':113423000})

wm.render_to_file('na_populations.svg')

鼠标放在对应的国家会显示人口数据

绘制完整的世界人口地图

world_population.py

import json         #导入json模块
from country_codes import  get_country_code
import pygal_maps_world.maps

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

#打印每个国家2010年人口的数量
#创建一个包含人口数量的字典
cc_populations={}
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        #print(country_name + ":" + str(population))
        code=get_country_code(country_name)
        if code:
            cc_populations[code]=population

wm=pygal_maps_world.maps.World()
wm.title='World Population in 2010,by Country'
wm.add('2010',cc_populations)

wm.render_to_file('World_Population.svg')

cc_populations={}
创建了空字典，用来存储Pypal要求的字典格式要求数据
wm=pygal_maps_world.maps.World()
创建实例，并设置属性

16.2.8 根据人口数量将国家分组

world_population.py

import json         #导入json模块
from country_codes import  get_country_code
import pygal_maps_world.maps

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

#打印每个国家2010年人口的数量
#创建一个包含人口数量的字典
cc_populations={}
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        #print(country_name + ":" + str(population))
        code=get_country_code(country_name)
        if code:
            cc_populations[code]=population

# 根据人口数量将国家分为三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc, pop in cc_populations.items():
	if pop < 10000000:
		cc_pops_1[cc] = pop
	elif pop < 1000000000:
		cc_pops_2[cc] = pop
	else:
		cc_pops_3[cc] = pop

# 看看每组分别包含多少国家
print(len(cc_pops_1), len(cc_pops_2), len(cc_pops_3))

wm=pygal_maps_world.maps.World()
wm.title='World Population in 2010,by Country'

wm.add('0~10m', cc_pops_1)
wm.add('10m~1bm', cc_pops_2)
wm.add('>1bm', cc_pops_3)

wm.render_to_file('World_Population_8.svg')

python画图全国人口分布用python人口分布地图_python_13

结果显示在1000万以内的国家有81个，1000万~10亿以内的有71个国家，大于10亿的有两个国家

16.2.9 使用Pygal设置世界地图的样式

world_population.py

import json         #导入json模块
from country_codes import  get_country_code
import pygal_maps_world.maps
from pygal.style import RotateStyle

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

#打印每个国家2010年人口的数量
#创建一个包含人口数量的字典
cc_populations={}
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        #print(country_name + ":" + str(population))
        code=get_country_code(country_name)
        if code:
            cc_populations[code]=population

# 根据人口数量将国家分为三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc, pop in cc_populations.items():
	if pop < 10000000:
		cc_pops_1[cc] = pop
	elif pop < 1000000000:
		cc_pops_2[cc] = pop
	else:
		cc_pops_3[cc] = pop

# 看看每组分别包含多少国家
print(len(cc_pops_1), len(cc_pops_2), len(cc_pops_3))

#wm=pygal_maps_world.maps.World()
wm_style = RotateStyle('#336699')
wm = pygal_maps_world.maps.World(style = wm_style)
wm.title='World Population in 2010,by Country'

wm.add('0~10m', cc_pops_1)
wm.add('10m~1bm', cc_pops_2)
wm.add('>1bm', cc_pops_3)

wm.render_to_file('World_Population_9.svg')

Pygal样式存储在模块style 中，我们从这个模块中导入了样式RotateStyle （见❶）。创建这个类的实例时，需要提供一个实参——十六进制的RGB颜色（见❷）； Pygal将根据指定的颜色为每组选择颜色。十六进制格式的RGB颜色是一个以井号（#）打头的字符串，后面跟着6个字符，其中前两个字符表示红色分量，接下来的两个表示绿色分量，最后两个表示蓝色分量。每个分量的取值范围为00 （没有相应的颜色） ~FF （包含最多的相应颜色）。如果你在线搜索hex color chooser（十六进制颜色选择器），可找到让你能够尝试选择不同的颜色并显示其RGB值的工具。这里使用的颜色值（#336699）混合了少量的红色（33）、多一些的绿色（66）和更多一些的蓝色（99），它为RotateStyle 提供了一种淡蓝色基色。

16.2.10 加亮颜色主题

使用LightColorizedStyle

from pygal.style import LightColorizedStyle

wm_style=LightColorizedStyle

使用LightColorizedStyle类时，不能直接控制使用颜色，Pygal所以使用了默认的基色，要设置颜色，则使用RotateStyle，并将LightColorizedStyle作为基本样式

from pygal.style import LightColorizedStyle as LCS,RotateStyle as RS

# wm_style=RotateStyle('#336699',base_style=LightColorizedStyle)
wm_style=RS('#336699',base_style=LCS)

代码

highs_lows.py
import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温、最低气温和日期
filename = 'sitka_weather_07-2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [] , [] , []
	for row in reader:
		current_date = datetime.strptime(row[2], "%Y-%m-%d")
		dates.append(current_date)
		
		high = int(row[5])
		highs.append(high)
		
		low = int(row[6])
		lows.append(low)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)

# 设置图形的格式
plt.title("Daily high and low temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()
highs_lows_death.py
import csv
from datetime import datetime
from matplotlib import pyplot as plt

# 从文件中获取最高气温、最低气温和日期
filename = 'death_valley_2018_simple.csv'
with open(filename) as f:
	reader = csv.reader(f)
	header_row = next(reader)
	
	dates,highs,lows = [] , [] , []
	for row in reader:
		try:
			current_date = datetime.strptime(row[2], "%Y-%m-%d")
			high = int(row[5])
			low = int(row[6])
		except ValueError:
			print(current_date, 'missing data')
		else:
			dates.append(current_date)
			highs.append(high)
			lows.append(low)

	
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)

# 设置图形的格式
plt.title("Daily high and low temperatures, July 2018", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature(F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)

plt.show()
world_population.py
import json         #导入json模块
from country_codes import  get_country_code
import pygal_maps_world.maps
from pygal.style import RotateStyle
from pygal.style import LightColorizedStyle

#将数据添加到一个列表
filename='population_json.json'
with open(filename) as f:
    pop_data=json.load(f)
    #函数json.load()将数据转换为Python能够处理的格式，这里是一个列表

#打印每个国家2010年人口的数量
#创建一个包含人口数量的字典
cc_populations={}
for pop_dict in pop_data:
    if pop_dict['Year']==2010:
        country_name = pop_dict["Country Name"]
        population = int(float(pop_dict["Value"]))
        #print(country_name + ":" + str(population))
        code=get_country_code(country_name)
        if code:
            cc_populations[code]=population

# 根据人口数量将国家分为三组
cc_pops_1, cc_pops_2, cc_pops_3 = {}, {}, {}
for cc, pop in cc_populations.items():
	if pop < 10000000:
		cc_pops_1[cc] = pop
	elif pop < 1000000000:
		cc_pops_2[cc] = pop
	else:
		cc_pops_3[cc] = pop

# 看看每组分别包含多少国家
print(len(cc_pops_1), len(cc_pops_2), len(cc_pops_3))

#wm=pygal_maps_world.maps.World()
##wm_style = RotateStyle('#336699')    #颜色不够亮丽
###wm_style = LightColorizedStyle      #不能直接控制使用的颜色
wm_style = RotateStyle('#336699', base_style = LightColorizedStyle)
wm = pygal_maps_world.maps.World(style = wm_style)
wm.title='World Population in 2010,by Country'

wm.add('0~10m', cc_pops_1)
wm.add('10m~1bm', cc_pops_2)
wm.add('>1bm', cc_pops_3)

wm.render_to_file('World_Population_10.svg')
country_codes.py
from pygal_maps_world.i18n import COUNTRIES

def get_country_code(country_name):
	"""根据指定的国家，返回Pygal使用的两个字母的国别码"""
	for code, name in COUNTRIES.items():
		if name == country_name:
			return code
	# 未找到指定的国家输出None
	return None

print(get_country_code('China'))
print(get_country_code('Andorra'))
print(get_country_code('Afghanistan'))
americas.py
import pygal_maps_world.maps
wm=pygal_maps_world.maps.World()		#创建实例
wm.title='North,Center,and South America'

wm.add('North America',['ca','mx','us'])
wm.add('Center America',['bz','cr','gt','hn','ni','pa','sv'])
wm.add('South America',['ar','bo','br','cl','co','ec','gf',
                        'gy','pe','py','sr','uy','ve'])
wm.render_to_file('americas.svg')
na_populations.py
import pygal_maps_world.maps
wm=pygal_maps_world.maps.World()

wm.title='Populations of Countries in North America'
wm.add('North America',{'ca':3412600,'us':309349000,'mx':113423000})

wm.render_to_file('na_populations.svg')

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。