# coding=utf-8
"""根据搜索词下载百度图片"""
import re
import sys
import urllib
import requests
def getURL(keyword, page):
keyword = urllib.parse.quote(keyword, safe='/')
prefix = "http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word="
url = prefix + keyword + "&pn=" + \
str(page) + "&gsm="+str(hex(page))+"&ct=&ic=0&lm=-1&width=0&height=0"
return url
def getImageURLs(url):
try:
html = requests.get(url).text
except Exception as e:
print(e)
imageURLs = []
return imageURLs
imageURLs = re.findall('"objURL":"(.*?)",', html, re.S)
return imageURLs
def downImage(imageURLs, path):
"""给出图片链接列表, 下载所有图片"""
for i, imageURL in enumerate(imageURLs):
try:
image = requests.get(imageURL, timeout=15)
string = str(i + 1) + '.jpg'
with open(path + string, 'wb') as f:
f.write(image.content)
print('成功下载第%s张图片: %s' % (str(i + 1), str(imageURL)))
except Exception as e:
print('下载第%s张图片时失败: %s' % (str(i + 1), str(imageURL)))
print(e)
continue
if __name__ == '__main__':
keyword = input("请输入关键字:")
path = r"C:\Users\12703\Desktop\1\images"
pageBegin = 0
pageNumber = 30
imageNumber = 10
imageUrlLists = []
while True:
if pageBegin > imageNumber:
break
print("第{}次请求数据".format([pageBegin]))
url = getURL(keyword, pageBegin * pageNumber)
imageURL = getImageURLs(url)
pageBegin += 1
imageUrlLists.extend(imageURL)
downImage(list(set(imageUrlLists)), path)
















