第三周学习内容
1、爬取薄荷网热量为大创app数据计算添加数据库数据;
youmian.py
from youGet import youGet
if __name__ == '__main__':
data = youGet()
data.get_date()
data.parse_date()
import youdaosql
youdaosql.du_sql()
View Code
youGet.py
爬取所有食物需要更改url和每个next的数字就可以;
import json
import lxml.html
import requests
etree = lxml.html.etree
import time
from requests.adapters import HTTPAdapter
heatss = []
class youGet():
def get_date(self):
url = "http://www.boohee.com/food/view_menu"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36 '
}
response = requests.get(url, headers=headers)
with open('htmlyou.txt', 'w', encoding="utf-8") as file:
file.write(response.text)
response.close()
def parse_date(self):
with open('htmlyou.txt', 'r', encoding='utf-8') as file:
text = file.read()
html = etree.HTML(text)
# hrefpage = html.xpath('//a[@class="next_page"]/@href')
page = html.xpath('//div[@class="pagination"]/@limit_page')[0]
a = 2
try:
if page!=0:
heats = html.xpath('//ul[@class="food-list"]/li/div/p/text()')
if len(heats)!=0:
names = html.xpath('//ul[@class="food-list"]/li/div/h4/a/@title')
hrefs = html.xpath('//ul[@class="food-list"]/li/div/h4/a/@href')
heat = html.xpath('//ul[@class="food-list"]/li/div/p/text()')
num = 0
for index in names:
info = {}
info["name"] = index
info["href"] = hrefs[num]
info["heat"] = heat[num]
heatss.append(info)
num += 1
# next = html.xpath('//div[@class="pagination"]/a[@class="next_page"]/@herf')
next ="http://www.boohee.com/food/view_menu?page="+str(a)
while(len(next)!=0):
if a==int(page)+1:
break
a+=1
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36 '
}
response = requests.get(next, headers=headers)
html1 = etree.HTML(response.text)
heats2 = html1.xpath('//ul[@class="food-list"]/li/div/p/text()')
if len(heats2)!=0:
names = html1.xpath('//ul[@class="food-list"]/li/div/h4/a/@title')
hrefs = html1.xpath('//ul[@class="food-list"]/li/div/h4/a/@href')
heat = html1.xpath('//ul[@class="food-list"]/li/div/p/text()')
num = 0
for index in names:
info = {}
info["name"] = index
info["href"] = hrefs[num]
info["heat"] = heat[num]
heatss.append(info)
num += 1
next ="http://www.boohee.com/food/view_menu?page="+str(a)
response.close()
result = json.dumps(heatss, sort_keys=True, indent=2)
with open('./data5.json', 'w', encoding='utf-8') as file:
for i in result:
file.write(i)
except requests.exceptions.ConnectionError:
result = json.dumps(heatss, sort_keys=True, indent=2)
with open('./data4.json', 'w', encoding='utf-8') as file:
for i in result:
file.write(i)
with open('./htmlhref1.txt', 'w', encoding="utf-8") as file:
for i in heatss:
file.write(i)
View Code
youdaosql.py
import mysql.connector
import json
with open('./data5.json', 'r') as file:
data = file.read()
data = json.loads(data)
def du_sql():
mydb = mysql.connector.connect(
host="localhost",
user="root",
password="password",
database="test1",
auth_plugin="mysql_native_password"
)
dbpath = mydb.cursor()
savaDataSql(dbpath)
print("ok")
mydb.commit()
def savaDataSql(dbpath):
cur = dbpath
for each in data:
name = each['name']
href = each['href']
heat = each['heat']
sql = "INSERT INTO heats (name,href,heat) values (%s,%s,%s)"
var = (name,href,heat)
cur.execute(sql,var)
View Code
2、学习oracle的使用
安装oracle19c——地址就是官网就可以下载 https://www.oracle.com/database/technologies/oracle-database-software-downloads.html
下载完后需要建立数据库,然后打开sql plus进行oracle数据库操作,具体可视化软件还没用
首次打开sql plus 需要登入 账号可以为 sys as sysdba 密码为自己设置的
登入后可以创建用户 create user 用户名 identified by 密码;
给用户修改密码 alter user 用户名 identified by 新密码;
删除用户 drop user 用户名 [cascade]可选参数 cascade
**给用户赋权限**
grant 权限/角色 to 用户名;
其中权力有
具体的权限使用可以单独查询,
但赋权需要使用管理员 可以使用conn /as sysdba进行切换
切换回去就是 conn 用户名/密码
创建表、序列、触发器、视图、以及查找、添加、修改、删除操作都和mysql一样;