#3.1用Python获取本地数据(打开文件,读文件,写文件,关闭文件)
file_obj=open(filename,mode='r,buffering=-1..…)
mode为可选参数,默认值为r
buffering也为可选参数,默认值为-1(0代表不缓冲,1或大于1的值表示缓冲一行或指定缓冲区大小)
>>>f1=open('d:\\ infile. txt)#从D盘读取文件,用系统默认缓冲区大小
>>>f2=open(r'd:\ outfile. txt','w)#只读一个文件
>>>f3=open(record. dat',' wb',0)#写一个二进制文件
w以写模式打开(清空原内容)
a以追加模式打开(从EOF开始,必要时创建新文件)
r+以读写模式打开
w+以读写模式打开(清空原内容)
a+以读和追加模式打开
rb以二进制读模式打开
wb以二进制写模式打开(参见w)
ab以二进制追加模式打开(参见a)
rb+以二进制读写模式打开(参见r+)
wb+以二进制读写模式打开(参见W+)
ab+以二进制读写模式打开(参见a+)
#文件相关函数
#有关闭和读写文件相关的函数/方法
f.read(),f.write(),f.readline(),f.readlines(),fwritelines()-f.close()f.seek()
#将一个字符串写入文件
>>>f=open(firstpro.txt','w)
>>>f.write(Hello,World!")
>>>f.close()
output:Hello World!
>>>with open(firstpro. txt) as f:
f.write('Hello World!')
p1=f. read(5)
p2=read()#剩下的值
output:p1:'Hello'
p2:', World!'
print(p1,p2)
with open(' companies. txt') as f1:
cNames=f1. readlines()
for iin range(0, len(cNames)):
cNames[i]=str(i+1)+''+cNames[i]
with open(' scompanies. txt','w') as f2:
f2. writelines(cNames)#写入文件
#通过readline读取文件数据
Output:
1.GOOGLE Inc.
2.Microsoft Corporation
3.Apple Inc.
4.Facebook, Inc.
#标准文件(stdin,stdout,stderr)
#3.2网络数据获取(爬虫)
(1)抓取
urllib内建模块:urllib.request
Requests第三方库
Scrapy框架
(2)解析
BeautifulSoup库
re模块(正则表达式)
#用request库抓取网页内容
import requests
r = requests.get('')#抓取的网页URL,可能会改变
r.status_code
Out[3]: 200 #抓取正常标志
r.text # 显示抓取内容,解码(r.encoding(),r.context(),r.jsoni())
#用BeautifulSoup库进行网页数据解析
from bs4 import BeautifulSoup
markup = '<p> class="title"<b>The Little Prince</b></p>'
soup = BeautifulSoup(markup,"lxml")
soup.b #查看名称
Out[32]: <b>The Little Prince</b>
soup.find_all('b') #查找所有名称
Out[33]: [<b>The Little Prince</b>]
#获取短评
import requests
from bs4 import BeautifulSoup
r=requests.get('https://book.douban.com/subject/1084336/comments/')
soup=BeautifulSoup(r.text,'lxml')
pattern=soup.find_all('span','short')
for item in pattern:
print(item. string)#输出
#re正则表达式模块进行正则表达式处理(也是进行网页解析)
#3.3序列
aStr=' Hello, World!'#字符串
alist=[2,3,5,7,11]#列表
aTuple=(Sunday', happy') #元组
#元组构成的列表
pList=[("AXP,' American Express Company,'78.51),
(BA,' The Boeing Company','184.76),
(CAT,' Caterpillar Inc,96.39),
(CSCO', Cisco Systems, Inc.,"33.71),
(CVX, Chevron Corporation','106.09)]
#标准类型运算符
>>> 'apple'<'banana'
True
>>> ('34'<'234')and('apple'<'banana')
False
#类型运算符
>>> week = ['1','2','3']
>>> print(week[1],week[1:2])
2 ['2']
#内建函数类型转换len(),max(),sum(),zip(),sort()
>>>list("Hello,World!") #字符串转成列表
[H",'e','",'",o,",","W",o','r,",'d",!"]
>>>tuple("Hello,World!") #元组转换为字符串
("H','e',l"l",o','W,o',r',",'d",!")
#字符串
If=[(AXP,' American Express Company','78.51),
("BA,' The Boeing Company','184.761),
("CAT',' Caterpillar Inc.,'96.39),
(CSCo',' Cisco Systems, Inc.','33.71),
(CVXx, Chevron Corporation','106.09)]
>>>aStr=' The Boeing Company'#单引号
>>>bStr="The Boeing Company"#双引号
>>>cStr="I'm a student."
>>>dStr=""The Boeing""#三引号
#判断回文串
sStr="acdhdcal"
if(sStr==". join(reversed(sStr)):
print("Yes')
else:
print(No)
#字符串操作
>>> song = "Blowing in the wind"
>>> song.find("the")
11
>>> song.find("the",8,12)
-1
>>> song
'Blowing in the wind'
>>> song.split(' ')
['Blowing', 'in', 'the', 'wind']
>>> song.replace("the","that")
'Blowing in that wind'
>>> aList = ["Hello","World"]
>>>' '.join(aList)
'Helli World'
#列表
>>>aList=list("Hello")
>>>aList[H",'e,"l","l",‘o',]
>>>alist=list(hello)
>>>aList['h1,'e,"l","l",'o']
>>>alist[0]='H'
· aList=[1,2,3,4,5]
· names=[ Zhao,' Qian',' Sun', Li]
· bList=[3,2,1,' Action']
· pList=[("AXP1,' American Express Company,78.51),
((BA,' The Boeing Company',184.76),
(CAT, Caterpillar Inc.,96.39),
(CSCO', Cisco Systems, Inc.,'33.71),
("CVX, Chevron Corporation,'106.09]
#某学校组织了一场校园歌手比赛,每个歌手的得分由10名评委和观众决定,最终得分的规则是去掉10名评委所打分数的一个最高分和一个最低分,再加上所有观众评委分数后的平均值。评委打出的10个分数为:9、9、8.5、10、7、8、8、9、8和10,观众评委打出的综合评分为9,请计算该歌手的最终得分。
jScores=[9,9,8.5,10,7,8,8,9,8,10]
aScore=9
jScores. sort()
jScores. pop()
jScores. pop(0)
jScores. append(aScore)
aveScore=sum(jScores)/len(jScores)
print(aveScore)
[7,8.8,8,8.5,9,9.9,10,10]
[8,8,8,8.5,9,9,9,10]
[8.8,8,8.5,9,9,9,10,9]
8.72222222222
>>>numList=[3,11,5,8,16,1]
>>>fruitList=[' applel,' bananal,' pear',' lemon',' avocado]
>>>numList.sort(reverse=True)#直接逆序输出
>>>numList [16,11,8,5,3,1]
>>>fruitList. sort(key=len) #按照长度输出
>>>fruitList ' pear',' apple',' lemon',' banana',' avocado']
#列表解析
>>>[x for x in range(10)]
[0,1,2,3,4,5,6,7,8,9]
>>>[x**2 for x in range(10)]
[0,1,4,9,16,25,36,49,64,81]
>>>[x**2 for xin range(10)ifx**2<50]
[0,1,4,9,16,25,36,49]
>>>[(x+1,y+1)for x in range(2)for y in range(2)]
[(1,1),(1,2),(2,1),(2,2)]
#元组(圆括号表示,元素不可变)
>>>bTuple=(['Monday',1],2,3)
>>>bTuple
(["Monday',1],2,3)
>>>bTuple[0][1]
1
>>>len(bTuple)
3
>>>bTuple[1:]
(2,3)