1 import requests
2 from urllib.parse import urlencode
3 from pyquery import PyQuery as pq
4 from pymongo import MongoClient
5
6 base_url = 'https://m.weibo.cn/api/container/getIndex?'
7 headers = {
8 'Host': 'm.weibo.cn',
9 'Referer': 'https://m.weibo.cn/u/2803301701',
10 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36',
11 'X-Requested-With': 'XMLHttpRequest',
12 }
13 client = MongoClient()
14 db = client['weibo']
15 collection = db['weibo']
16 max_page = 10
17
18
19 def get_page(page):
20 params = {
21 'type': 'uid',
22 'value': '2803301701',
23 'containerid': '1076032803301701',
24 'page': page
25 }
26 url = base_url + urlencode(params)
27 try:
28 response = requests.get(url, headers=headers)
29 if response.status_code == 200:
30 return response.json(), page
31 except requests.ConnectionError as e:
32 print('Error', e.args)
33
34
35 def parse_page(json, page: int):
36 if json:
37 items = json.get('data').get('cards')
38 for index, item in enumerate(items):
39 if page == 1 and index == 1:
40 continue
41 else:
42 item = item.get('mblog')
43 weibo = {}
44 weibo['id'] = item.get('id')
45 weibo['text'] = pq(item.get('text')).text()
46 weibo['attitudes'] = item.get('attitudes_count')
47 weibo['comments'] = item.get('comments_count')
48 weibo['reposts'] = item.get('reposts_count')
49 yield weibo
50
51
52 # def save_to_mongo(result):
53 # if collection.insert(result):
54 # print('Saved to Mongo')
55
56
57 if __name__ == '__main__':
58 for page in range(1, max_page + 1):
59 json = get_page(page)
60 results = parse_page(*json)
61 for result in results:
62 print(result)
63 # save_to_mongo(result)
运行结果: