#3.1用Python获取本地数据(打开文件,读文件,写文件,关闭文件)

file_obj=open(filename,mode='r,buffering=-1..…)
mode为可选参数,默认值为r
buffering也为可选参数,默认值为-1(0代表不缓冲,1或大于1的值表示缓冲一行或指定缓冲区大小)
>>>f1=open('d:\\ infile. txt)#从D盘读取文件,用系统默认缓冲区大小
>>>f2=open(r'd:\ outfile. txt','w)#只读一个文件
>>>f3=open(record. dat',' wb',0)#写一个二进制文件
w以写模式打开(清空原内容)
a以追加模式打开(从EOF开始,必要时创建新文件)
r+以读写模式打开
w+以读写模式打开(清空原内容)
a+以读和追加模式打开
rb以二进制读模式打开
wb以二进制写模式打开(参见w)
ab以二进制追加模式打开(参见a)
rb+以二进制读写模式打开(参见r+)
wb+以二进制读写模式打开(参见W+)
ab+以二进制读写模式打开(参见a+)

#文件相关函数
#有关闭和读写文件相关的函数/方法
f.read(),f.write(),f.readline(),f.readlines(),fwritelines()-f.close()f.seek()

#将一个字符串写入文件
>>>f=open(firstpro.txt','w)
>>>f.write(Hello,World!")
>>>f.close()
output:Hello World!

>>>with open(firstpro. txt) as f:
       f.write('Hello World!')
       p1=f. read(5)
       p2=read()#剩下的值
output:p1:'Hello'
       p2:', World!'
print(p1,p2) 

with open(' companies. txt') as f1:
    cNames=f1. readlines()
    for iin range(0, len(cNames)):
        cNames[i]=str(i+1)+''+cNames[i]
with open(' scompanies. txt','w') as f2:
    f2. writelines(cNames)#写入文件
#通过readline读取文件数据
Output:
1.GOOGLE Inc.
2.Microsoft Corporation
3.Apple Inc.
4.Facebook, Inc.

#标准文件(stdin,stdout,stderr)

#3.2网络数据获取(爬虫)
(1)抓取
     urllib内建模块:urllib.request
     Requests第三方库
     Scrapy框架
(2)解析
     BeautifulSoup库
     re模块(正则表达式)

#用request库抓取网页内容
import requests
r = requests.get('')#抓取的网页URL,可能会改变
r.status_code
Out[3]: 200 #抓取正常标志
r.text # 显示抓取内容,解码(r.encoding(),r.context(),r.jsoni())

#用BeautifulSoup库进行网页数据解析

from bs4 import BeautifulSoup
markup = '<p> class="title"<b>The Little Prince</b></p>'
soup = BeautifulSoup(markup,"lxml")
soup.b #查看名称
Out[32]: <b>The Little Prince</b>
soup.find_all('b') #查找所有名称
Out[33]: [<b>The Little Prince</b>]

#获取短评
import requests
from bs4 import BeautifulSoup 
r=requests.get('https://book.douban.com/subject/1084336/comments/')
soup=BeautifulSoup(r.text,'lxml') 
pattern=soup.find_all('span','short') 
for item in pattern:
    print(item. string)#输出

#re正则表达式模块进行正则表达式处理(也是进行网页解析)

#3.3序列
aStr=' Hello, World!'#字符串
alist=[2,3,5,7,11]#列表
aTuple=(Sunday', happy') #元组
#元组构成的列表
pList=[("AXP,' American Express Company,'78.51), 
       (BA,' The Boeing Company','184.76), 
       (CAT,' Caterpillar Inc,96.39),
       (CSCO', Cisco Systems, Inc.,"33.71),
       (CVX, Chevron Corporation','106.09)]

#标准类型运算符
>>> 'apple'<'banana'
True
>>> ('34'<'234')and('apple'<'banana')
False

#类型运算符
>>> week = ['1','2','3']
>>> print(week[1],week[1:2])
2 ['2']

#内建函数类型转换len(),max(),sum(),zip(),sort()
>>>list("Hello,World!") #字符串转成列表
[H",'e','",'",o,",","W",o','r,",'d",!"]
>>>tuple("Hello,World!") #元组转换为字符串
("H','e',l"l",o','W,o',r',",'d",!")

#字符串
If=[(AXP,' American Express Company','78.51),
    ("BA,' The Boeing Company','184.761),
    ("CAT',' Caterpillar Inc.,'96.39), 
    (CSCo',' Cisco Systems, Inc.','33.71), 
    (CVXx, Chevron Corporation','106.09)]
>>>aStr=' The Boeing Company'#单引号
>>>bStr="The Boeing Company"#双引号
>>>cStr="I'm a student."
>>>dStr=""The Boeing""#三引号

#判断回文串 
sStr="acdhdcal"
if(sStr==". join(reversed(sStr)):
  print("Yes')
else:
  print(No)

#字符串操作
>>> song = "Blowing in the wind"
>>> song.find("the")
11
>>> song.find("the",8,12)
-1
>>> song
'Blowing in the wind'
>>> song.split(' ')
['Blowing', 'in', 'the', 'wind']
>>> song.replace("the","that")
'Blowing in that wind'
>>> aList = ["Hello","World"]
>>>' '.join(aList)
'Helli World'

#列表
>>>aList=list("Hello")
>>>aList[H",'e,"l","l",‘o',]
>>>alist=list(hello)
>>>aList['h1,'e,"l","l",'o']
>>>alist[0]='H'

· aList=[1,2,3,4,5]
· names=[ Zhao,' Qian',' Sun', Li]
· bList=[3,2,1,' Action']
· pList=[("AXP1,' American Express Company,78.51), 
          ((BA,' The Boeing Company',184.76),
          (CAT, Caterpillar Inc.,96.39), 
          (CSCO', Cisco Systems, Inc.,'33.71), 
          ("CVX, Chevron Corporation,'106.09]

#某学校组织了一场校园歌手比赛,每个歌手的得分由10名评委和观众决定,最终得分的规则是去掉10名评委所打分数的一个最高分和一个最低分,再加上所有观众评委分数后的平均值。评委打出的10个分数为:9、9、8.5、10、7、8、8、9、8和10,观众评委打出的综合评分为9,请计算该歌手的最终得分。
jScores=[9,9,8.5,10,7,8,8,9,8,10]
aScore=9
jScores. sort()
jScores. pop() 
jScores. pop(0) 
jScores. append(aScore) 
aveScore=sum(jScores)/len(jScores) 
print(aveScore)

[7,8.8,8,8.5,9,9.9,10,10]
[8,8,8,8.5,9,9,9,10]
[8.8,8,8.5,9,9,9,10,9]
8.72222222222

>>>numList=[3,11,5,8,16,1]
>>>fruitList=[' applel,' bananal,' pear',' lemon',' avocado]
>>>numList.sort(reverse=True)#直接逆序输出
>>>numList [16,11,8,5,3,1]
>>>fruitList. sort(key=len) #按照长度输出
>>>fruitList ' pear',' apple',' lemon',' banana',' avocado']

#列表解析
>>>[x for x in range(10)]
[0,1,2,3,4,5,6,7,8,9]
>>>[x**2 for x in range(10)]
[0,1,4,9,16,25,36,49,64,81]
>>>[x**2 for xin range(10)ifx**2<50]
[0,1,4,9,16,25,36,49]
>>>[(x+1,y+1)for x in range(2)for y in range(2)]
[(1,1),(1,2),(2,1),(2,2)]
 
#元组(圆括号表示,元素不可变)
>>>bTuple=(['Monday',1],2,3) 
>>>bTuple 
(["Monday',1],2,3) 
>>>bTuple[0][1]
1
>>>len(bTuple) 
3 
>>>bTuple[1:]
(2,3)