日期:2021.05.06
作者:杨传伟
完成任务:学习爬取后台json数据,re正则匹配字符串,xpath解析,requests请求json数据。爬取爱奇艺电影片库5000+条电影数据(电影名字、播放链接、评分、播放权限)并存到数据库。
爬虫源码:
1 import time
2 import traceback
3 import requests
4 from lxml import etree
5 import re
6 from bs4 import BeautifulSoup
7 from lxml.html.diff import end_tag
8 import json
9 import pymysql
10 #连接数据库 获取游标
11 def get_conn():
12 """
13 :return: 连接,游标
14 """
15 # 创建连接
16 conn = pymysql.connect(host="82.157.112.34",
17 user="root",
18 password="root",
19 db="MovieRankings",
20 charset="utf8")
21 # 创建游标
22 cursor = conn.cursor() # 执行完毕返回的结果集默认以元组显示
23 if ((conn != None) & (cursor != None)):
24 print("数据库连接成功!游标创建成功!")
25 else:
26 print("数据库连接失败!")
27 return conn, cursor
28 #关闭数据库连接和游标
29 def close_conn(conn, cursor):
30 if cursor:
31 cursor.close()
32 if conn:
33 conn.close()
34 return 1
35 def get_iqy():
36 # 获取数据库总数据条数
37 conn, cursor = get_conn()
38 sql = "select count(*) from movieiqy"
39 cursor.execute(sql) # 执行sql语句
40 conn.commit() # 提交事务
41 all_num = cursor.fetchall()[0][0] #cursor 返回值的类型是一个元祖的嵌套形式 比如( ( ) ,)
42 pagenum=int(all_num/48)+1 #这里是计算一个下面循环的起始值 每48个电影分一组
43 # print(pagenum)
44 print("movieiqy数据库有", all_num, "条数据!")
45
46 url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7"
47 headers = {
48 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
49 }
50 # response=requests.get(url=url,headers=headers)
51 # response.encoding="utf-8"
52 # page_text=response.text
53 # print(page_text)
54 """
55 """
56 #
57 temp_list = [] #暂时存放单部电影的数据
58 dataRes = [] #每次循环把单部电影数据放到这个list
59 for i in range(1, 137): #循环1-136 第137 json 是空的 也就是全部爬完
60 url = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id=1&ret_num=48&session=ee4d98ebb4e8e44c8d4b14fa90615fb7"
61 url_0 = "https://pcw-api.iqiyi.com/search/recommend/list?channel_id=1&data_type=1&mode=11&page_id="
62 url_0 = url_0 + str(i) + "&ret_num=48&session=ad1d98bb953b7e5852ff097c088d66f2"
63 print(url_0) #输出拼接好的url
64 response = requests.get(url=url_0, headers=headers)
65 response.encoding = "utf-8"
66 try:
67 page_text = response.text
68 #解析json对象
69 json_obj = json.loads(page_text)
70 #这里的异常捕获是因为 测试循环的次数有可能超过电影网站提供的电影数 为了防止后续爬到空的json对象报错
71 json_list = json_obj['data']['list']
72 except:
73 print("捕获异常!")
74 return dataRes #json为空 程序结束
75 for j in json_list: # 开始循环遍历json串
76 # print(json_list)
77 name = j['name'] #找到电影名
78 print(name)
79 temp_list.append(name)
80 #异常捕获,防止出现电影没有评分的现象
81 try:
82 score = j['score'] #找到电影评分
83 print(score)
84 temp_list.append(score)
85 except KeyError:
86 print( "评分---KeyError")
87 temp_list.append("iqy暂无评分") #替换字符串
88
89 link = j['playUrl'] #找到电影链接
90 temp_list.append(link)
91 # 解析播放状态
92 """
93 独播:https://www.iqiyipic.com/common/fix/site-v4/video-mark/only.png
94 VIP:https://pic0.iqiyipic.com/common/20171106/ac/1b/vip_100000_v_601_0_21.png
95 星钻:https://www.iqiyipic.com/common/fix/site-v4/video-mark/star-movie.png
96 """
97 state = []
98 pay_text = j['payMarkUrl'] #因为播放状态只有在一个图片链接里有 所以需要使用re解析出类似vip和only(独播)的字样
99 print(pay_text)
100 if (len(pay_text) == 0): #如果没有这个图片链接 说明电影是免费播放
101 state="免费"
102 else:
103 find_state = re.compile("(.*?).png")
104 state = re.findall(find_state, pay_text) #正则匹配链接找到vip
105 # print(state[0])
106
107 if(len(state)!=0): #只有当链接不为空再执行
108 # print(state)
109 # 再次解析
110 part_state=str(state[0])
111 part_state=part_state.split('/')
112 print(part_state[-1])
113 state = part_state[-1][0:3] #字符串分片
114 # 这里只输出了三个字符,如果是独播,页面显示的是only,我们设置为”独播“
115 if (state == "onl"):
116 state = "独播"
117 if (state == "sta"):
118 state = "星钻"
119 if(state == "vip"):
120 state="VIP"
121 print(state)
122 # 添加播放状态
123 # print(state)
124 temp_list.append(state)
125 dataRes.append(temp_list)
126 # print(temp_list)
127 temp_list = []
128
129 print('___________________________')
130 return dataRes
131
132 def insert_iqy():
133 cursor = None
134 conn = None
135 try:
136 count=0
137 list = get_iqy()
138 print(f"{time.asctime()}开始插入爱奇艺电影数据")
139 conn, cursor = get_conn()
140 sql = "insert into movieiqy (id,name,score,path,state) values(%s,%s,%s,%s,%s)"
141 for item in list:
142 print(item)
143 count = count + 1
144 if (count % 48 == 0):
145 print('___________________________')
146 #异常捕获,防止数据库主键冲突
147 try:
148 cursor.execute(sql, [0, item[0], item[1], item[2], item[3] ])
149 except pymysql.err.IntegrityError:
150 print("重复!跳过!")
151
152 conn.commit() # 提交事务 update delete insert操作
153 print(f"{time.asctime()}插入爱奇艺电影数据完毕")
154 except:
155 traceback.print_exc()
156 finally:
157 close_conn(conn, cursor)
158 return;
159
160 if __name__ == '__main__':
161 # get_iqy()
162
截图示例:
5.6 李楠
今日实现了我的页面的,想看,在看,看过的页面内容的显示,原先想使用Fragment的嵌套,
但是没有成功,于是就给每个radiobutton绑定了一个事件,监听radiogroup是否发生改变,然后返回对应的数据,
注意不能使用activity要在fragment中编写,然后返回视图:
Fragment部分
1 package com.example.cloudlibrary.Fragment;
2
3 import androidx.annotation.NonNull;
4 import androidx.annotation.Nullable;
5 import androidx.fragment.app.Fragment;
6 import androidx.fragment.app.FragmentActivity;
7 import androidx.viewpager.widget.ViewPager;
8
9 import android.os.Bundle;
10 import android.view.LayoutInflater;
11 import android.view.View;
12 import android.view.ViewGroup;
13 import android.widget.ListView;
14 import android.widget.RadioButton;
15 import android.widget.RadioGroup;
16
17 import com.example.cloudlibrary.Adapter.MyPagerListAdapter;
18 import com.example.cloudlibrary.Data.ListData;
19 import com.example.cloudlibrary.R;
20
21 import java.util.ArrayList;
22 import java.util.List;
23
24 public class MyPageFragment extends Fragment implements RadioGroup.OnCheckedChangeListener{
25 private RadioGroup my_pager_group;
26 private RadioButton want_look;
27 private RadioButton now_look;
28 private RadioButton have_look;
29 private List<ListData> list_data=new ArrayList<>();
30 private ListView mypager_like_list;
31 private ListData listData;
32 private View view;
33 private MyPagerListAdapter myPagerListAdapter;
34
35
36 public static final int PAGE_ONE = 0;
37 public static final int PAGE_TWO = 1;
38 public static final int PAGE_THREE = 2;
39 public MyPageFragment(){
40 }
41 @Override
42 public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) {
43 view = inflater.inflate(R.layout.activity_my_page, container, false);
44 my_pager_group = (RadioGroup) view.findViewById(R.id.my_pager_group);
45 want_look = (RadioButton) view.findViewById(R.id.want_look);
46 now_look = (RadioButton) view.findViewById(R.id.now_look);
47 have_look = (RadioButton) view.findViewById(R.id.have_look);
48 my_pager_group.setOnCheckedChangeListener(this);
49 RadioButton[] rbs = new RadioButton[3];
50 rbs[0] =want_look;
51 rbs[1] = now_look;
52 rbs[2] = have_look;
53 return view;
54 }
55
56 @Override
57 public void onCheckedChanged(RadioGroup group, int checkedId) {
58 switch (checkedId) {
59 case R.id.want_look:
60 list_data=new ArrayList<>();
61 listData=new ListData("head1","吹响吧,上低音号!");
62 list_data.add(listData);
63 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
64 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
65 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
66 mypager_like_list.setAdapter(myPagerListAdapter);
67 break;
68 case R.id.now_look:
69 list_data=new ArrayList<>();
70 listData=new ListData("head2","AIR");
71 list_data.add(listData);
72 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
73 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
74 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
75 mypager_like_list.setAdapter(myPagerListAdapter);
76 break;
77 case R.id.have_look:
78 list_data=new ArrayList<>();
79 listData=new ListData("head3","百变小樱");
80 list_data.add(listData);
81 //list_list_Data= dataDao.queryData("",query_stuid.getText().toString());
82 mypager_like_list=(ListView)view.findViewById(R.id.mypager_like_list);
83 myPagerListAdapter=new MyPagerListAdapter(getContext(),list_data);
84 mypager_like_list.setAdapter(myPagerListAdapter);
85 break;
86 }
87 }
88
listview adapter部分:
1 package com.example.cloudlibrary.Adapter;
2
3 import android.content.Context;
4 import android.view.LayoutInflater;
5 import android.view.View;
6 import android.view.ViewGroup;
7 import android.widget.BaseAdapter;
8 import android.widget.ImageView;
9 import android.widget.ListAdapter;
10 import android.widget.TextView;
11
12 import com.example.cloudlibrary.Data.ListData;
13 import com.example.cloudlibrary.R;
14
15 import org.w3c.dom.Text;
16
17 import java.util.ArrayList;
18 import java.util.List;
19
20 public class MyPagerListAdapter extends BaseAdapter {
21 private List<ListData> list_data=new ArrayList<>();
22 private Context context;
23 public MyPagerListAdapter(Context context, List<ListData> list_data){
24 this.context=context;
25 this.list_data=list_data;
26 }
27 @Override
28 public int getCount() {
29 return list_data.size();
30 }
31
32 @Override
33 public Object getItem(int position) {
34 return null;
35 }
36
37 @Override
38 public long getItemId(int position) {
39 return 0;
40 }
41
42 @Override
43 public View getView(int position, View convertView, ViewGroup parent) {
44 if(convertView==null)
45 {
46 convertView= LayoutInflater.from(context).inflate(R.layout.mypager_like_list,null);
47 }
48 ImageView picture_list=(ImageView)convertView.findViewById(R.id.picture_list);
49 TextView name_list=(TextView)convertView.findViewById(R.id.name_list);
50 ListData listData=list_data.get(position);
51 name_list.setText(listData.getName());
52 switch (listData.getImg()){
53 case "head1":
54 picture_list.setImageResource(R.mipmap.head1);
55 break;
56 case "head2":
57 picture_list.setImageResource(R.mipmap.head2);
58 break;
59 case "head3":
60 picture_list.setImageResource(R.mipmap.head3);
61 break;
62 }
63 return convertView;
64 }
65
xml部分:
1 <?xml version="1.0" encoding="utf-8"?>
2 <LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
3 xmlns:app="http://schemas.android.com/apk/res-auto"
4 xmlns:tools="http://schemas.android.com/tools"
5 android:layout_width="match_parent"
6 android:layout_height="match_parent"
7 android:orientation="vertical">
8
9 <LinearLayout
10 android:layout_width="match_parent"
11 android:layout_height="80dp"
12 android:orientation="horizontal">
13
14 <ImageView
15 android:id="@+id/first_head_picture"
16 android:layout_width="60dp"
17 android:layout_height="60dp"
18 android:layout_marginTop="10dp"
19 android:layout_marginLeft="10dp"
20 android:src="@mipmap/headpictrue"></ImageView>
21
22 <View
23 android:layout_width="2px"
24 android:layout_height="45dp"
25 android:background="@color/login_line_color"
26 android:layout_marginTop="20dp"
27 android:layout_marginLeft="20dp"/>
28
29 <LinearLayout
30 android:layout_width="wrap_content"
31 android:layout_height="wrap_content"
32 android:orientation="vertical">
33
34 <TextView
35 android:id="@+id/my_name"
36 android:layout_width="wrap_content"
37 android:layout_height="wrap_content"
38 android:layout_marginTop="10dp"
39 android:textSize="20dp"
40 android:layout_marginLeft="30dp"
41 android:textColor="#EEAAFF"
42 android:text="风吹过半夏"></TextView>
43
44
45 <TextView
46 android:id="@+id/my_phone"
47 android:layout_width="wrap_content"
48 android:layout_height="wrap_content"
49 android:layout_marginTop="10dp"
50 android:textSize="20dp"
51 android:layout_marginLeft="30dp"
52 android:textColor="#EEAAFF"
53 android:text="157****5171"></TextView>
54
55 </LinearLayout>
56
57 </LinearLayout>
58
59 <LinearLayout
60 android:layout_width="match_parent"
61 android:layout_height="match_parent"
62 android:orientation="vertical">
63
64 <LinearLayout
65 android:layout_width="match_parent"
66 android:layout_height="wrap_content"
67 android:orientation="horizontal">
68
69 <TextView
70 android:layout_width="wrap_content"
71 android:layout_height="wrap_content"
72 android:text="我的收藏"
73 android:textSize="20dp"></TextView>
74
75 <RadioGroup
76 android:id="@+id/my_pager_group"
77 android:layout_width="match_parent"
78 android:layout_height="30dp"
79 android:layout_alignParentBottom="true"
80 android:background="#ffffff"
81 android:orientation="horizontal">
82
83 <RadioButton android:id="@+id/want_look"
84 android:layout_width="wrap_content"
85 android:layout_height="wrap_content"
86 style="@style/tab_menu_item"
87 android:text="想看"></RadioButton>
88
89 <RadioButton android:id="@+id/now_look"
90 android:layout_width="wrap_content"
91 android:layout_height="wrap_content"
92 style="@style/tab_menu_item"
93 android:text="在看"></RadioButton>
94
95 <RadioButton android:id="@+id/have_look"
96 android:layout_width="wrap_content"
97 android:layout_height="wrap_content"
98 style="@style/tab_menu_item"
99 android:text="看过"></RadioButton>
100
101 </RadioGroup>
102
103 </LinearLayout>
104
105 <View
106 android:id="@+id/div_tab_bar"
107 android:layout_width="match_parent"
108 android:layout_height="2px"
109 android:layout_above="@id/main_group"
110 android:background="#DFDBDB" />
111
112 <ListView android:id="@+id/mypager_like_list"
113 android:layout_width="match_parent"
114 android:layout_height="wrap_content">
115 </ListView>
116
117 </LinearLayout>
118
119
120 </LinearLayout>
5.6 章英杰
任务进度:完成了根据电影分类进行多条件筛选功能。可根据类型、年份和地区进行多条件筛选。
产品页面:
电影分类部分主要代码:
1 <!--电影分类-->
2 <div id="classfiy">
3 <aside>
4 <i>类型:</i>
5 <div>
6 <span>全部</span>
7 <span>喜剧</span>
8 <span>动作</span>
9 <span>爱情</span>
10 <span>惊悚</span>
11 <span>犯罪</span>
12 <span>悬疑</span>
13 <span>战争</span>
14 <span>科幻</span>
15 <span>动画</span>
16 <span>恐怖</span>
17 <span>家庭</span>
18 <span>传记</span>
19 <span>冒险</span>
20 <span>奇幻</span>
21 <span>武侠</span>
22 <span>历史</span>
23 </div>
24 </aside>
25 <aside>
26 <i>年份:</i>
27 <div>
28 <span>全部</span>
29 <span>2021</span>
30 <span>2020</span>
31 <span>2019</span>
32 <span>2018</span>
33 <span>2017</span>
34 <span>2016</span>
35 <span>2015</span>
36 <span>2011-2014</span>
37 <span>2006-2010</span>
38 <span>2000-2005</span>
39 <span>90年代</span>
40 <span>80年代</span>
41 <span>其他</span>
42 </div>
43 </aside>
44 <aside>
45 <i>地区:</i>
46 <div>
47 <span>全部</span>
48 <span>内地</span>
49 <span>香港</span>
50 <span>美国</span>
51 <span>欧洲</span>
52 <span>台湾</span>
53 <span>日本</span>
54 <span>韩国</span>
55 <span>印度</span>
56 <span>泰国</span>
57 <span>英国</span>
58 <span>法国</span>
59 <span>德国</span>
60 <span>加拿大</span>
61 <span>西班牙</span>
62 <span>意大利</span>
63 <span>其他</span>
64 </div>
65 </aside>
66 <div class="last">已选择:
67 <div id="yi"></div>
68 </div>
69 </div>
70 <script>
71 var oDivLength = [];
72 var div = document.getElementsByTagName('div');
73 var divSpan = document.getElementsByTagName('span');
74 //判断有几个列表
75 for (var i = 0; i < div.length; i++) {
76 div[i].index = i;
77 }
78 for (var i = 0; i < divSpan.length; i++) {
79 divSpan[i].onclick = function() {
80 oDivLength[this.parentElement.index] = this.innerText;
81 var oChild = this.parentElement.children;
82 for (var j = 0; j < oChild.length; j++) {
83 oChild[j].className = '';
84 }
85 this.className = 'mystyle'; //已选中的当前列的当前元素添加样式
86 document.getElementById('yi').innerHTML = '';
87 for (var m = 0; m < oDivLength.length; m++) { //放到已选择里面
88 if (oDivLength[m] == '' || oDivLength[m] !== undefined) {
89 var para = document.createElement("span");
90 var node = document.createTextNode(oDivLength[m]);
91 para.appendChild(node);
92 document.getElementById('yi').appendChild(para);
93 }
94 }
95 }
96 }
97 </script>
98 <!--电影分类模块结束-->
任务看板
每日照片:
好看请赞,养成习惯 :) ,作者:靠谱杨
关于笔者:我可能不是天才,但我会努力成为人才。
更多日常分享尽在我的VX公众号:小杨的挨踢IT生活