#http://www.kugou.com/yy/rank/home 酷狗音乐
import requests
from bs4 import BeautifulSoup
import time
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}
def get_info(url):
data=requests.get(url,headers=headers)
print(data.text)
soup=BeautifulSoup(data.text,'lxml')
ranks=soup.select('span.pc_temp_num')
titles=soup.select('div.pc_temp_songlist>ul>li>a')
times=soup.select('span.pc_temp_tips_r>span')
for rank,title,time in zip(ranks,titles,times):
#print(title)
str1=title.get_text().split('-')
#print(str1)
data={
'rank':rank.get_text().strip(),# strip()去除首尾空格
'singer':str1[0],
'song':str1[1],
'time':time.get_text().strip()
}
print(data)
if __name__=='__main__':
urls=[
'http://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1,2) #每页22条数据,以22条做测试用
]
for url in urls:
#print(url)
get_info(url)
time.sleep(2) #延时两秒