Python环境配置与数据处理
1.numpy的基础练习
1.1 创建一个长度为10的一维全为0的ndarray对象,然后让第5个元素等于1
import numpy as np
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
nd1=np.zeros(shape=10)
print(nd1)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
nd1[4]=1
print(nd1)
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
1.2 使用np.random.random创建一个10*10的ndarray对象,并打印出最大最小元素
nd2=np.random.random(size=(10,10))
print(nd2)
[[0.61993158 0.31522033 0.28300058 0.17359121 0.80817087 0.75573108
0.2524076 0.75421005 0.50546551 0.19976574]
[0.88604238 0.11417706 0.65778908 0.86969322 0.62615679 0.41322621
0.717381 0.66991761 0.92522042 0.25820991]
[0.94981597 0.27551905 0.78895276 0.16181883 0.80214942 0.13796826
0.3016356 0.23770425 0.2730307 0.51625704]
[0.56275125 0.37429869 0.21132968 0.30695606 0.33033307 0.71280133
0.80531668 0.45105168 0.99316998 0.03509345]
[0.85817934 0.30642866 0.38654506 0.01980823 0.08947374 0.67045185
0.40023845 0.97259466 0.46046158 0.43818511]
[0.02108788 0.78284794 0.39210544 0.70737209 0.69294844 0.50113278
0.16415867 0.29838336 0.09521118 0.74832518]
[0.76483358 0.55966895 0.88143049 0.39842911 0.76445938 0.29011755
0.65311882 0.34509633 0.28306275 0.78555244]
[0.39036742 0.57327435 0.09607757 0.6195993 0.05590364 0.15495825
0.04236545 0.13066749 0.74040154 0.19906226]
[0.1035001 0.22343978 0.68190871 0.782599 0.42178278 0.3395189
0.5830641 0.29792577 0.6847517 0.57117258]
[0.22463793 0.56954224 0.98659375 0.80609581 0.76044268 0.66628508
0.58861037 0.63903257 0.56358177 0.79286617]]
nd2max=nd2.max()
nd2min=nd2.min()
print(nd2max,nd2min)
0.9931699783795565 0.019808226699720377
1.3 创建一个元素为从10到49的ndarray对象,并将所有元素位置反转
a=np.arange(10,50)
print(a)
down=a[::-1]
print(down)
[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]
[49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26
25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10]
1.4 创建一个10*10的ndarray对象,且矩阵边界全为1,里面全为0
nd4=np.zeros((10,10))
nd4[0, :] = nd4[9, :] = nd4[:, 0] = nd4[:, 9] = 1
print(nd4)
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
nd4_1=np.ones((10,10))
nd4_1[1:-1,1:-1]=0
print(nd4_1)
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
1.5 创建一个每一行都是从0到4的5*5矩阵
I=[0,1,2,3,4]
nd5=np.array(I*5)
nd5=nd5.reshape(5,5)
print(nd5)
[[0 1 2 3 4]
[0 1 2 3 4]
[0 1 2 3 4]
[0 1 2 3 4]
[0 1 2 3 4]]
1.6 创建一个范围在(0,10)之间的长度为15的等差数列
nd6=np.linspace(0,10,15)
print(nd6)
[ 0. 0.71428571 1.42857143 2.14285714 2.85714286 3.57142857
4.28571429 5. 5.71428571 6.42857143 7.14285714 7.85714286
8.57142857 9.28571429 10. ]
1.7 创建一个长度为10的随机数组并排序
nd7=np.random.random(10)
print(nd7)
[0.7642248 0.57989547 0.93747947 0.35400782 0.61238838 0.89160774
0.33170533 0.58857917 0.46881695 0.29894106]
nd7=np.sort(nd7)
print(nd7)
[0.29894106 0.33170533 0.35400782 0.46881695 0.57989547 0.58857917
0.61238838 0.7642248 0.89160774 0.93747947]
1.8 创建一个长度为10的随机数组并将最大值替换为0
nd8=np.random.randint(0,20,size=10)
display(nd8)
index_max=nd8.argmax()
print(index_max)
nd8[index_max]=0
display(nd8)
array([ 5, 3, 14, 11, 12, 0, 1, 2, 6, 16])
9
array([ 5, 3, 14, 11, 12, 0, 1, 2, 6, 0])
1.9 给定一个4维矩阵,得到最后两维的和
nd9=np.random.randint(0,100,size=(2,3,3,3))
print(nd9)
[[[[36 95 83]
[26 26 69]
[62 4 80]]
[[51 43 17]
[31 2 96]
[66 58 69]]
[[ 9 22 16]
[ 4 26 40]
[14 97 53]]]
[[[71 57 6]
[37 13 90]
[79 27 84]]
[[99 35 99]
[64 91 4]
[71 90 3]]
[[67 46 10]
[23 60 96]
[20 61 76]]]]
sum9_1=nd9.sum(axis=(2,3))
print(sum9_1)
[[481 433 281]
[464 556 459]]
sum9_2=nd9.sum(axis=(-1,-2))
print(sum9_2)
[[481 433 281]
[464 556 459]]
1.10 创建一个53随机矩阵和一个32随机矩阵,求矩阵积
nd10_53=np.random.randint(0,100,size=(5,3))
print(nd10_53)
[[ 5 56 85]
[38 5 42]
[21 92 70]
[88 49 49]
[75 6 45]]
nd10_32=np.random.randint(0,100,size=(3,2))
print(nd10_32)
[[96 51]
[16 3]
[ 7 49]]
sum10_1=np.dot(nd10_53,nd10_32)
print(sum10_1)
[[1971 4588]
[4022 4011]
[3978 4777]
[9575 7036]
[7611 6048]]
2. 《python机器学习原理、算法及案例实战》第二章例题
2.1 天天学习,天天向上
import random
def fib_loop(n):
listNum=[]
a,b=0,1
for i in range(n):
a,b=b,a+b
listNum.append(a)
print(i,listNum)
return listNum
listPlan=['吃零食','学习','学习','学习','睡觉','吃饭','玩手机','学习','睡觉']
listNum=fib_loop(6)
varIdx=random.randint(0,5)
varRandom=listNum[varIdx]
print('今日计划:',listPlan[varRandom])
0 [1]
1 [1, 1]
2 [1, 1, 2]
3 [1, 1, 2, 3]
4 [1, 1, 2, 3, 5]
5 [1, 1, 2, 3, 5, 8]
今日计划: 睡觉
2.2 字符串
str='Python String'
print(str[1:3])
print(str[-3:-1])
print(str[3:-1])
print(str[-6:12])
print(str[2:])
print(str*2)
yt
in
hon Strin
Strin
thon String
Python StringPython String
list = ['a',56,112,1.366,'hello',[7,8,9]]
print(list)
print(list[4])
print(list[-2:6])
print(list[2:])
list.append('add')
print(list)
list[2]=0
print(list)
list[:3]=[255,142,'c']
print(list)
list.pop(1)
print(list)
['a', 56, 112, 1.366, 'hello', [7, 8, 9]]
hello
['hello', [7, 8, 9]]
[112, 1.366, 'hello', [7, 8, 9]]
['a', 56, 112, 1.366, 'hello', [7, 8, 9], 'add']
['a', 56, 0, 1.366, 'hello', [7, 8, 9], 'add']
[255, 142, 'c', 1.366, 'hello', [7, 8, 9], 'add']
[255, 'c', 1.366, 'hello', [7, 8, 9], 'add']
2.3 列表
lis=['兔子','老鼠','老鹰','熊猫','猫']
for item in lis:
print(item)
for i in enumerate(lis):
print(i)
for i in range(len(lis)):
print(lis[i])
兔子
老鼠
老鹰
熊猫
猫
(0, '兔子')
(1, '老鼠')
(2, '老鹰')
(3, '熊猫')
(4, '猫')
兔子
老鼠
老鹰
熊猫
猫
2.4 元组
tup=('windows',2022,3.1415,[22,63,'july'])
tup2=(56,'KKK')
print(tup)
print(tup[0])
print(tup[3:4])
print(tup+tup2)
print(tup[3])
tup[3][0]=0
tup[3][1]='kitty'
print(tup)
('windows', 2022, 3.1415, [22, 63, 'july'])
windows
([22, 63, 'july'],)
('windows', 2022, 3.1415, [22, 63, 'july'], 56, 'KKK')
[22, 63, 'july']
('windows', 2022, 3.1415, [0, 'kitty', 'july'])
2.5 字典
dict={'name':['July','Mary','Tom'],'age':18,'class':2001}
print('name:',dict['name'])
dict['Sex']='Man'
dict.update({'No':'001'})
print(dict)
del dict['No']
print(dict)
name: ['July', 'Mary', 'Tom']
{'name': ['July', 'Mary', 'Tom'], 'age': 18, 'class': 2001, 'Sex': 'Man', 'No': '001'}
{'name': ['July', 'Mary', 'Tom'], 'age': 18, 'class': 2001, 'Sex': 'Man'}
3. 第三章
3.1 numpy库
a=np.array([1,2,3,4,5],ndmin=2)
print(a)
b=np.array([1,2,3],dtype=complex)
print(b)
[[1 2 3 4 5]]
[1.+0.j 2.+0.j 3.+0.j]
x=np.float32(5)
print('x为:',x)
print('x对象的data属性:',x.data)
print('x对象的size属性:',x.size)
print('x对象的维度:',x.ndim)
y=np.bool_(x)
print('转换为bool类型的x为:',y)
z=np.float16(y)
print('True值转换为float16类型为:',z)
x为: 5.0
x对象的data属性: <memory at 0x000001A10F900860>
x对象的size属性: 1
x对象的维度: 0
转换为bool类型的x为: True
True值转换为float16类型为: 1.0
#使用astype()转换DataFrame
df=pd.DataFrame([{'qty':'3','num':'50'},{'qty':'7','num':'20'}])
print(df.dtypes)
print('-----------------')
df['qty']=df['qty'].astype('int')
df['num']=df['num'].astype('int')
print(df.dtypes)
qty object
num object
dtype: object
-----------------
qty int32
num int32
dtype: object
#使用axis参数设置当前轴
arr=np.array([[0,1,2],[3,4,5]])
print(arr)
print(arr.sum(axis=0))
print(arr.sum(axis=1))
[[0 1 2]
[3 4 5]]
[3 5 7]
[ 3 12]
#使用reshape()函数调整数组形状
arr=np.array([0,1,2,3,4,5,6,7])
print('秩为:',arr.ndim)
arr3D=arr.reshape(2,2,2)
print(arr3D)
print('秩为:',arr3D.ndim)
#显示数组维度
s=np.array([[1,2,3],[4,5,6]])
print(s.shape)
s.shape=(3,2)
print(s)
秩为: 1
[[[0 1]
[2 3]]
[[4 5]
[6 7]]]
秩为: 3
(2, 3)
[[1 2]
[3 4]
[5 6]]
3.2 Pandas库
3.2.1 Series
s1=pd.Series([1,1,1,1,1])
print(s1)
print('------ 字典创建--------')
s2=pd.Series({'Longitude':39,'latitude':116,'Temperature':23})
print('First value in s2:',s2['Longitude'])
print('------ 序列作索引------')
s3=pd.Series([3.4,0.8,2.1,0.3,1.5],range(5,10))
print('First value in s3:',s3[5])
0 1
1 1
2 1
3 1
4 1
dtype: int64
------ 字典创建--------
First value in s2: 39
------ 序列作索引------
First value in s3: 3.4
s2=pd.Series({'Longitude':39,'latitude':116,'Temperature':23})
s2['City']='Beijing'
s2['Temperature']+=2
print(s2)
Longitude 39
latitude 116
Temperature 25
City Beijing
dtype: object
s3=pd.Series([3.4,0.8,2.1,0.3,1.5],range(5,10))
s3[s3>2]
5 3.4
7 2.1
dtype: float64
3.2.2 DataFrame
dict1={'col1':[1,2,8,9],'col2':['a','b','c','d']}
df=pd.DataFrame(dict1)
df
col1 | col2 | |
0 | 1 | a |
1 | 2 | b |
2 | 8 | c |
3 | 9 | d |
lista=[1,2,3,9]
listb=['a','b','v','h']
df=pd.DataFrame({'col1':lista,'col2':listb})
df
col1 | col2 | |
0 | 1 | a |
1 | 2 | b |
2 | 3 | v |
3 | 9 | h |
data=pd.DataFrame(np.arange(16).reshape(4,4),index=['BJ','SH','GZ','SZ'],columns=['q','r','s','t'])
data['q']['BJ']=8
data['t']=8
data
q | r | s | t | |
BJ | 8 | 1 | 2 | 8 |
SH | 4 | 5 | 6 | 8 |
GZ | 8 | 9 | 10 | 8 |
SZ | 12 | 13 | 14 | 8 |
data['u']=9
data.drop('SZ',axis=0)
q | r | s | t | u | |
BJ | 8 | 1 | 2 | 8 | 9 |
SH | 4 | 5 | 6 | 8 | 9 |
GZ | 8 | 9 | 10 | 8 | 9 |
df=pd.DataFrame(np.arange(16).reshape(4,4),index=['BJ','SH','GZ','SZ'],columns=['q','r','s','t'])
df.sum(axis=0)
q 24
r 28
s 32
t 36
dtype: int64
df.sum(axis=1)
BJ 6
SH 22
GZ 38
SZ 54
dtype: int64
df.mean(axis=1)
BJ 1.5
SH 5.5
GZ 9.5
SZ 13.5
dtype: float64
a=np.arange(0,60,5)
a=a.reshape(3,4)
df=pd.DataFrame(a)
print(df)
print('------------------------')
print(df.std())
0 1 2 3
0 0 5 10 15
1 20 25 30 35
2 40 45 50 55
------------------------
0 20.0
1 20.0
2 20.0
3 20.0
dtype: float64
3.3 Matplotlib
fig=plt.figure()
ax1=fig.add_subplot(2,2,1)
ax1=fig.add_subplot(2,2,2)
ax2=fig.add_subplot(2,2,3)
fig.savefig('./kb.jpg')
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
rect=plt.Rectangle((0.2,0.75),0.4,0.15,color='r',alpha=0.3)
circ=plt.Circle((0.7,0.2),0.15,color='b',alpha=0.3)
pgon=plt.Polygon([[0.15,0.15],[0.35,0.4],[0.2,0.6]],color='g',alpha=0.3)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
plt.show()
fig.savefig('./fang.jpg')
x=np.linspace(-10,10,100)
y=np.sin(x)
plt.plot(x,y,marker='o')
plt.savefig('./sin.jpg')
def randrange(n, randFloor, randCeil):
rnd=np.random.rand(n)
return (randCeil-randFloor)*rnd + randFloor
plt.rcParams['font.sans-serif']=['SimHei']
fig = plt.figure(figsize=(10,8))
ax=fig.add_subplot(111,projection='3d')
n = 100
for zmin,zmax,c,m,l in [(4,15,'r','o','低值'),(13,40,'g','*','高值')]:
x=randrange(n,0,20)
y=randrange(n,0,20)
z=randrange(n,zmin,zmax)
ax.scatter(x,y,z,c=c,marker=m,label=l,s=z*6)
ax.set_xlabel('X-value')
ax.set_xlabel('Y-value')
ax.set_xlabel('Z-value')
ax.set_title("高/低值3D散点图",alpha=0.6,size=15,weight='bold')
plt.show()
fig.savefig('./散点图.jpg')
4.什么是图灵测试
图灵测试最早出现在1950年图灵发表的一篇名为《计算机械和智能》(Computing Machinery and Intelligence)的论文中,是判断机器是否具有人工智能的一套方法。而图灵测试是人工智能最初的概念,它甚至早于“人工智能”这个词本身,人工智能一词是在1956年才被提出的。图灵测试的方法很简单,就是让测试者与被测试者(一个人和一台机器)隔开,通过一些装置(如键盘)向被测试者随意提问。进行多次测试后,如果有超过30%的测试者不能确定出被测试者是人还是机器,那么这台机器就通过了测试,并被认为具有人工智能。 它的发明者图灵被誉为计算机科学之父、人工智能之父。
rnd=np.random.rand(n)
return (randCeil-randFloor)*rnd + randFloor
plt.rcParams[‘font.sans-serif’]=[‘SimHei’]
fig = plt.figure(figsize=(10,8))
ax=fig.add_subplot(111,projection=‘3d’)
n = 100
for zmin,zmax,c,m,l in [(4,15,‘r’,‘o’,‘低值’),(13,40,‘g’,‘',‘高值’)]:
x=randrange(n,0,20)
y=randrange(n,0,20)
z=randrange(n,zmin,zmax)
ax.scatter(x,y,z,c=c,marker=m,label=l,s=z6)
ax.set_xlabel(‘X-value’)
ax.set_xlabel(‘Y-value’)
ax.set_xlabel(‘Z-value’)
ax.set_title(“高/低值3D散点图”,alpha=0.6,size=15,weight=‘bold’)
plt.show()
fig.savefig(’./散点图.jpg’)
[外链图片转存中...(img-9uIJlYUQ-1677907130908)]
## 4.什么是图灵测试
图灵测试最早出现在1950年图灵发表的一篇名为《计算机械和智能》(Computing Machinery and Intelligence)的论文中,是判断机器是否具有人工智能的一套方法。而图灵测试是人工智能最初的概念,它甚至早于“人工智能”这个词本身,人工智能一词是在1956年才被提出的。图灵测试的方法很简单,就是让测试者与被测试者(一个人和一台机器)隔开,通过一些装置(如键盘)向被测试者随意提问。进行多次测试后,如果有超过30%的测试者不能确定出被测试者是人还是机器,那么这台机器就通过了测试,并被认为具有人工智能。 它的发明者图灵被誉为计算机科学之父、人工智能之父。