# coding:utf-8
from lxml import etree
import requests
for i in range(10):
url='https://music.douban.com/top250?start={}'.format(i*25)
html = requests.get(url).text #这里一般先打印一下html内容,看看是否有内容再继续。
# print html
s = etree.HTML(html)
titles = s.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[2]/div/a/text()')
hrefs=s.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[2]/div/a/@href')
authors=s.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[2]/div/p/text()')
imgs=s.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[1]/a/img/@src')
for j in range(25):
print titles[j],hrefs[j],authors[j],imgs[j]