python bag模块

转载

数码墨鱼 2024-10-15 09:45:09

文章标签 python bag模块 json shell python Python 文章分类 Python 后端开发

Python 的创始人为吉多·范罗苏姆(Guido van Rossum).1989年的圣诞节期间,吉多·范罗苏姆为了在阿姆斯特丹打发时间,决心开发一个新的脚本解释程序,作为ABC语言的一种继承.Python是纯粹的自由软件,源代码和解释器CPython遵循 GPL(GNU General Public License)协议.关于python的哲学：python崇尚:"优雅"、"明确"、"简单",Python是用最简单最优雅最明确的方法来解决问题.

OS 基础模块

OS模块提供了多数操作系统的功能接口函数,当OS模块被导入后,它会自适应于不同的操作系统平台,根据不同的平台进行相应的操作,在Python编程时,经常和文件、目录打交道,所以离不了OS模块,OS模块也是在开发中最常用到的模块之一,本节内容将对OS模块提供的函数进行详细的解读,先来看一下OS模块的常用参数吧.

import os

os.getcwd()                 #获取当前工作目录,即当前python脚本工作的目录路径
os.chdir("dirname")         #改变当前脚本工作目录,相当于shell下cd
os.curdir                   #返回当前目录: ('.')
os.pardir                   #获取当前目录的父目录字符串名：('..')
os.makedirs('dir1/dir2')    #生成多层递归目录,此处递归生成./dir1/dir2
os.removedirs('dirname')    #若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
os.mkdir('dirname')         #创建目录,创建一个新的目录
os.rmdir('dirname')         #删除空目录,若目录不为空则无法删除,报错
os.listdir('dirname')       #列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
os.walk('dirname')          #遍历所有目录,包括子目录
os.remove()                 #删除一个文件
os.rename("oldname","new")  #重命名文件/目录
os.stat('path/filename')    #获取文件/目录信息
#-----------------------------------------------------------------------------------
os.sep                      #查系统特定的路径分隔符,win下为"\\"; Linux下为"/"
os.name                     #查看字符串指示当前使用平台.win->'nt'; Linux->'posix'
os.linesep                  #查看平台使用的行终止符,win下为"\t\n"; Linux下为"\n"
os.pathsep                  #查看当前,用于分割文件路径的字符串
os.system("shell")          #运行shell命令,直接显示,不能保存执行结果
os.popen("shell").read()    #运行shell命令,可以保存执行结果
os.environ                  #获取系统环境变量
#-----------------------------------------------------------------------------------
os.path.abspath(path)       #返回path规范化的绝对路径
os.path.split(path)         #将path分割成目录和文件名二元组返回
os.path.dirname(path)       #返回path的目录,其实就是os.path.split(path)的第一个元素
os.path.basename(path)      #返回path最后的文件名,如何path以／或\结尾,那么就会返回空值.
os.path.exists(path)        #如果path存在,返回True.如果path不存在,返回False
os.path.isabs(path)         #如果path是绝对路径,返回True
os.path.isfile(path)        #如果path是一个存在的文件,返回True,否则返回False
os.path.isdir(path)         #如果path是一个存在的目录,则返回True,否则返回False
os.path.join(path)          #将多个路径组合后返回,第一个绝对路径之前的参数将被忽略
os.path.getatime(path)      #返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)      #返回path所指向的文件或者目录的最后修改时间
import os

os.getcwd()                 #获取当前工作目录,即当前python脚本工作的目录路径
os.chdir("dirname")         #改变当前脚本工作目录,相当于shell下cd
os.curdir                   #返回当前目录: ('.')
os.pardir                   #获取当前目录的父目录字符串名：('..')
os.makedirs('dir1/dir2')    #生成多层递归目录,此处递归生成./dir1/dir2
os.removedirs('dirname')    #若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
os.mkdir('dirname')         #创建目录,创建一个新的目录
os.rmdir('dirname')         #删除空目录,若目录不为空则无法删除,报错
os.listdir('dirname')       #列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
os.walk('dirname')          #遍历所有目录,包括子目录
os.remove()                 #删除一个文件
os.rename("oldname","new")  #重命名文件/目录
os.stat('path/filename')    #获取文件/目录信息
#-----------------------------------------------------------------------------------
os.sep                      #查系统特定的路径分隔符,win下为"\\"; Linux下为"/"
os.name                     #查看字符串指示当前使用平台.win->'nt'; Linux->'posix'
os.linesep                  #查看平台使用的行终止符,win下为"\t\n"; Linux下为"\n"
os.pathsep                  #查看当前,用于分割文件路径的字符串
os.system("shell")          #运行shell命令,直接显示,不能保存执行结果
os.popen("shell").read()    #运行shell命令,可以保存执行结果
os.environ                  #获取系统环境变量
#-----------------------------------------------------------------------------------
os.path.abspath(path)       #返回path规范化的绝对路径
os.path.split(path)         #将path分割成目录和文件名二元组返回
os.path.dirname(path)       #返回path的目录,其实就是os.path.split(path)的第一个元素
os.path.basename(path)      #返回path最后的文件名,如何path以／或\结尾,那么就会返回空值.
os.path.exists(path)        #如果path存在,返回True.如果path不存在,返回False
os.path.isabs(path)         #如果path是绝对路径,返回True
os.path.isfile(path)        #如果path是一个存在的文件,返回True,否则返回False
os.path.isdir(path)         #如果path是一个存在的目录,则返回True,否则返回False
os.path.join(path)          #将多个路径组合后返回,第一个绝对路径之前的参数将被忽略
os.path.getatime(path)      #返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)      #返回path所指向的文件或者目录的最后修改时间

SYS 系统模块

Python的SYS模块提供访问解释器使用或维护的变量,和与解释器进行交互的函数.通俗来讲,SYS模块负责程序与Python解释器的交互,提供了一系列的函数和变量,用于操控Python运行时的环境,SYS模块也Python默认集成的模块,它被集成在了Python的解释器里,是必须的模块.

import sys

sys.argv              #命令行参数列表,第一个元素是程序本身路径
sys.exit(n)           #退出程序,正常退出时exit(0)
sys.version           #获取Python解释程序的版本信息
sys.path              #返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值
sys.platform          #返回操作系统平台名称
sys.stdin             #输入相关
sys.stdout            #输出相关
sys.stderror          #错误相关
import sys

sys.argv              #命令行参数列表,第一个元素是程序本身路径
sys.exit(n)           #退出程序,正常退出时exit(0)
sys.version           #获取Python解释程序的版本信息
sys.path              #返回模块的搜索路径,初始化时使用PYTHONPATH环境变量的值
sys.platform          #返回操作系统平台名称
sys.stdin             #输入相关
sys.stdout            #输出相关
sys.stderror          #错误相关

取出命令行参数: 命令行参数列表,第一个元素是程序本身路径,可遍历出具体传入参数数量.

import sys

for x in sys.argv:
    print(x)
import sys

for x in sys.argv:
    print(x)

判断系统版本: 通过使用sys.platform()函数,可以判断当前系统版本.

>>> import sys
>>>
>>> sys.platform
'win32'
>>> import sys
>>>
>>> sys.platform
'win32'

返回当前模块路径: 通过使用sys.path()函数,可遍历出python的当前路径.

>>> sys.path[0]
''
>>> sys.path[1]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\python37.zip'
>>> sys.path[2]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\DLLs'
>>> sys.path[3]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\lib'
>>> sys.path[0]
''
>>> sys.path[1]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\python37.zip'
>>> sys.path[2]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\DLLs'
>>> sys.path[3]
'C:\\Users\\LyShark\\AppData\\Local\\Programs\\Python\\Python37\\lib'

实现动态进度条: 使用标准输入与输出,实现动态进度条小实例.

import sys
import time

def view_bar(num,total):
    rate = num / total
    rate_num = int(rate * 100)
    r = '\r%s%d%%' % (">"*num,rate_num)
    sys.stdout.write(r)
    sys.stdout.flush()

if __name__ == '__main__':

    for i in range(0, 100):
        time.sleep(0.1)
        view_bar(i, 100)
import sys
import time

def view_bar(num,total):
    rate = num / total
    rate_num = int(rate * 100)
    r = '\r%s%d%%' % (">"*num,rate_num)
    sys.stdout.write(r)
    sys.stdout.flush()

if __name__ == '__main__':

    for i in range(0, 100):
        time.sleep(0.1)
        view_bar(i, 100)

Hashlib 模块

Python里面的hashlib模块提供了很多加密的算法,该模块实现了许多不同安全散列和消息摘要算法的通用接口,包括FIPS安全散列算法SHA1,SHA224,SHA256,SHA384和SHA512以及RSA的MD5算法,"安全散列"和"消息摘要"是可互换的,较旧的算法称为消息摘要,现代术语是安全散列.

MD5加密: MD5消息摘要算法,被广泛使用的密码散列函数,可产生出一个128位的散列值(hash value).

import hashlib

# ######## md5 ########
hash = hashlib.md5()
# help(hash.update)
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
print(hash.digest())
import hashlib

# ######## md5 ########
hash = hashlib.md5()
# help(hash.update)
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
print(hash.digest())

SHA1加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA1会产生一个160位的消息摘要(已被淘汰).

import hashlib

######## sha1 ########
hash = hashlib.sha1()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
import hashlib

######## sha1 ########
hash = hashlib.sha1()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())

SHA256加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为256位.

import hashlib

# ######## sha256 ########
hash = hashlib.sha256()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
import hashlib

# ######## sha256 ########
hash = hashlib.sha256()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())

SHA384加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为384位.

import hashlib

# ######## sha384 ########
hash = hashlib.sha384()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
import hashlib

# ######## sha384 ########
hash = hashlib.sha384()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())

SHA512加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为512位.

import hashlib

# ######## sha512 ########
hash = hashlib.sha512()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())
import hashlib

# ######## sha512 ########
hash = hashlib.sha512()
hash.update(bytes('admin', encoding='utf-8'))
print(hash.hexdigest())

MD5加盐加密: 以上的几个加密算法通过撞库可被破解,所以有必要对加密算法中添加自定义KEY再来做双重加密.

import hashlib

# ######## md5 ########
hash = hashlib.md5(bytes('898oaFs09f',encoding="utf-8"))
hash.update(bytes('admin',encoding="utf-8"))
print(hash.hexdigest())
import hashlib

# ######## md5 ########
hash = hashlib.md5(bytes('898oaFs09f',encoding="utf-8"))
hash.update(bytes('admin',encoding="utf-8"))
print(hash.hexdigest())

计算文件HASH值: 我们可以通过两个文件的HASH数值,来对比文件是否被修改过,常用来检测文件是否被修改.

import hashlib
m = hashlib.md5()
with open(r'C:/lyshark.png','rb') as f:
    for line in f:
        m.update(line)
print(m.hexdigest())

import hashlib
m = hashlib.md5()
with open(r'D:/lyshark.png','rb') as f:
    for line in f:
        m.update(line)
print(m.hexdigest()) 
import hashlib
m = hashlib.md5()
with open(r'C:/lyshark.png','rb') as f:
    for line in f:
        m.update(line)
print(m.hexdigest())

import hashlib
m = hashlib.md5()
with open(r'D:/lyshark.png','rb') as f:
    for line in f:
        m.update(line)
print(m.hexdigest())

Random 模块

Random模块实现了一个伪随机数生成器,可用来生成随机数以及完成与随机数相关的功能,对于整数,从范围中统一选择,对于序列,随机元素的统一选择,用于生成列表的随机排列的函数,以及用于随机抽样而无需替换的函数,下面我们来介绍下该模块下常用的几个函数.

import random

random.shuffle()                           #随机打乱列表元素排列
random.randint(1,20)                       #生成1到20的整数包括20
random.uniform(10,20)                      #生成10到20之间的浮点数
random.randrange(1,10)                     #生成1到10的整数不包括10
random.choice()                            #从序列中随机选择数据
import random

random.shuffle()                           #随机打乱列表元素排列
random.randint(1,20)                       #生成1到20的整数包括20
random.uniform(10,20)                      #生成10到20之间的浮点数
random.randrange(1,10)                     #生成1到10的整数不包括10
random.choice()                            #从序列中随机选择数据

生成随机数: 通过使用random.randint()函数,随机生成整数,字符,大小写等.

>>> import random
>>>
>>> random.randint(1,10)
6
>>> random.randint(100,9999)
1189
>>> chr(random.randint(97,122))    #随机生成a-z
>>> chr(random.randint(65,90))     #随机生成A-Z
>>> chr(random.randint(48,57))     #随机生成0-9
>>> import random
>>>
>>> random.randint(1,10)
6
>>> random.randint(100,9999)
1189
>>> chr(random.randint(97,122))    #随机生成a-z
>>> chr(random.randint(65,90))     #随机生成A-Z
>>> chr(random.randint(48,57))     #随机生成0-9

随机打乱数据: 通过使用random.shuffle()函数,实现随机打乱一个列表中的数据.

>>> import random
>>>
>>> lists = [1,2,3,4,5,6,7,8,9]
>>> print(lists)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
>>> random.shuffle(lists)
>>> print(lists)
[4, 7, 1, 8, 3, 9, 5, 6, 2]
>>> import random
>>>
>>> lists = [1,2,3,4,5,6,7,8,9]
>>> print(lists)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
>>> random.shuffle(lists)
>>> print(lists)
[4, 7, 1, 8, 3, 9, 5, 6, 2]

随机弹出数据: 通过使用random.choice()函数,实现从指定列表中随机弹出一个元素.

>>> import random
>>>
>>> lists=[1,2,3,4,5,6,7,8,9]
>>> string=["admin","guest","lyshark"]
>>>
>>> random.choice(lists)
2
>>> random.choice(lists)
5
>>>
>>> random.choice(string)
'lyshark'
>>> random.choice(string)
'guest'
>>> import random
>>>
>>> lists=[1,2,3,4,5,6,7,8,9]
>>> string=["admin","guest","lyshark"]
>>>
>>> random.choice(lists)
2
>>> random.choice(lists)
5
>>>
>>> random.choice(string)
'lyshark'
>>> random.choice(string)
'guest'

随机生成验证码: 通过random()函数,配合循环语句,和选择语句来实现随机生成验证码.

import random

li = []
for i in range(6):
    r = random.randint(0, 4)
    if r == 2 or r == 4:
        num = random.randrange(0, 10)
        li.append(str(num))
    else:
        temp = random.randrange(65,91)
        c = chr(temp)
        li.append(c)

result = "".join(li)
print(result)
import random

li = []
for i in range(6):
    r = random.randint(0, 4)
    if r == 2 or r == 4:
        num = random.randrange(0, 10)
        li.append(str(num))
    else:
        temp = random.randrange(65,91)
        c = chr(temp)
        li.append(c)

result = "".join(li)
print(result)

Time 时间模块

Time模块是通过调用C库实现的,所以有些方法在某些平台上可能无法调用,但是其提供的大部分接口与C标准库time.h基本一致,尽管此模块始终可用,但并非所有平台上都提供所有功能,此模块中定义的大多数函数调用具有相同名称的平台C库函数,因为这些函数的语义因平台而异.

import time

time.sleep(4)                                    #暂停程序执行4秒
time.clock()                                     #返回处理器时间
time.process_time()                              #返回处理器时间
time.time()                                      #返回当前系统时间戳
time.ctime()                                     #当前系统时间,输出字符串格式化
time.ctime(time.time()-86640)                    #将时间戳转为字符串格式
time.gmtime()                                    #获取结构化时间
time.gmtime(time.time()-86640)                   #将时间戳转换成结构化格式
time.localtime(time.time()-86640)                #将时间戳转换成结构格式,但返回本地时间
time.mktime(time.localtime())                    #与localtime()功能相反,将结构时间转换为时间戳
time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime()) #将struct_time格式转成指定的字符串格式
time.strptime("2019-09-20","%Y-%m-%d")           #将字符串格式转换成struct_time格式
import time

time.sleep(4)                                    #暂停程序执行4秒
time.clock()                                     #返回处理器时间
time.process_time()                              #返回处理器时间
time.time()                                      #返回当前系统时间戳
time.ctime()                                     #当前系统时间,输出字符串格式化
time.ctime(time.time()-86640)                    #将时间戳转为字符串格式
time.gmtime()                                    #获取结构化时间
time.gmtime(time.time()-86640)                   #将时间戳转换成结构化格式
time.localtime(time.time()-86640)                #将时间戳转换成结构格式,但返回本地时间
time.mktime(time.localtime())                    #与localtime()功能相反,将结构时间转换为时间戳
time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime()) #将struct_time格式转成指定的字符串格式
time.strptime("2019-09-20","%Y-%m-%d")           #将字符串格式转换成struct_time格式

DataTime 模块

DateTime模块提供了处理日期和时间的类,既有简单的方式,又有复杂的方式,它虽然支持日期和时间算法,但其实现的重点是为输出格式化和操作提供高效的属性提取功能,该模块提供了以简单和复杂的方式操作日期和时间的类,虽然支持日期和时间算法,但实现的重点是有效的属性提取,用于输出格式和操作.

import datetime

datetime.date.today()                             #格式化输出今天时间
datetime.datetime.now()                           #格式化输出当前的时间
datetime.datetime.now().timetuple()               #以struct_time格式输出当前时间
datetime.date.fromtimestamp(time.time()-864400)   #将时间戳转成日期格式
#-----------------------------------------------------------------------------------
temp = datetime.datetime.now()                    #输出当前时间,并赋值给变量
temp.replace(2019,10,10)                          #替换输出内容中的,年月日为2019-10-10
#-----------------------------------------------------------------------------------
#时间替换关键字:<[year,month,day,hour,minute,second,microsecond,tzinfo>
str_to_date = datetime.datetime.strptime("19/10/05 12:30", "%y/%m/%d %H:%M") #将字符串转换成日期格式
new_date = datetime.datetime.now() + datetime.timedelta(days=10)             #在当前基础上加10天
new_date = datetime.datetime.now() + datetime.timedelta(days=-10)            #在当前基础上减10天
new_date = datetime.datetime.now() + datetime.timedelta(hours=-10)           #在当前基础上减10小时
new_date = datetime.datetime.now() + datetime.timedelta(seconds=120)         #在当前基础上加120秒
import datetime

datetime.date.today()                             #格式化输出今天时间
datetime.datetime.now()                           #格式化输出当前的时间
datetime.datetime.now().timetuple()               #以struct_time格式输出当前时间
datetime.date.fromtimestamp(time.time()-864400)   #将时间戳转成日期格式
#-----------------------------------------------------------------------------------
temp = datetime.datetime.now()                    #输出当前时间,并赋值给变量
temp.replace(2019,10,10)                          #替换输出内容中的,年月日为2019-10-10
#-----------------------------------------------------------------------------------
#时间替换关键字:<[year,month,day,hour,minute,second,microsecond,tzinfo>
str_to_date = datetime.datetime.strptime("19/10/05 12:30", "%y/%m/%d %H:%M") #将字符串转换成日期格式
new_date = datetime.datetime.now() + datetime.timedelta(days=10)             #在当前基础上加10天
new_date = datetime.datetime.now() + datetime.timedelta(days=-10)            #在当前基础上减10天
new_date = datetime.datetime.now() + datetime.timedelta(hours=-10)           #在当前基础上减10小时
new_date = datetime.datetime.now() + datetime.timedelta(seconds=120)         #在当前基础上加120秒

Shutil 压缩模块

该shutil模块对文件和文件集合提供了许多高级操作,特别是,提供了支持文件复制和删除的功能,特别针对文件拷贝和删除,主要功能为目录和文件操作以及压缩操作Shutil模块也是Python中默认自带的标准库.

文件拷贝(1): 将/etc/passwd文件中的内容,拷贝到/tmp/passwd文件中去.

>>> import shutil
>>>
>>> shutil.copyfileobj(open("/etc/passwd","r"),open("/tmp/passwd","w"))
>>> import shutil
>>>
>>> shutil.copyfileobj(open("/etc/passwd","r"),open("/tmp/passwd","w"))

文件拷贝(2): 将/etc/passwd文件中的内容,拷贝到/tmp/passwd文件中去,且目标文件无需存在.

>>> import shutil
>>>
>>> shutil.copyfile("/etc/passwd","/tmp/passwd")
>>> import shutil
>>>
>>> shutil.copyfile("/etc/passwd","/tmp/passwd")

递归拷贝: 递归拷贝/etc目录下的所有文件,拷贝到/tmp目录下,目标目录不能存在,ignore的意思是排除.

>>> import shutil
>>>
>>> shutil.copytree("/etc","/tmp", ignore=shutil.ignore_patterns('*.conf', 'tmp*'))
>>> import shutil
>>>
>>> shutil.copytree("/etc","/tmp", ignore=shutil.ignore_patterns('*.conf', 'tmp*'))

递归删除: 递归删除/etc文件夹中的所有内容.

>>> import shutil
>>>
>>> shutil.rmtree("/etc")
>>> import shutil
>>>
>>> shutil.rmtree("/etc")

文件移动: 实现文件的移动,或者是给文件重命名.

>>> import shutil
>>>
>>> shutil.move("file1","file2")
>>> import shutil
>>>
>>> shutil.move("file1","file2")

文件归档: 实现将/etc/下的文件打包放置/home/目录下面.

>>> import shutil
>>>
>>> ret = shutil.make_archive("/etc/","gztar",root_dir='/home/')
>>> import shutil
>>>
>>> ret = shutil.make_archive("/etc/","gztar",root_dir='/home/')

ZIP文件压缩: 通过ZipFile模块,压缩指定目录下的指定文件.

>>> import zipfile
>>>
# 压缩
>>> z = zipfile.ZipFile('lyshark.zip', 'w')
>>> z.write('lyshark.log')
>>> z.write('data.data')
>>> z.close()

# 解压
>>> z = zipfile.ZipFile('lyshark.zip', 'r')
>>> z.extractall()
>>> z.close()
>>> import zipfile
>>>
# 压缩
>>> z = zipfile.ZipFile('lyshark.zip', 'w')
>>> z.write('lyshark.log')
>>> z.write('data.data')
>>> z.close()

# 解压
>>> z = zipfile.ZipFile('lyshark.zip', 'r')
>>> z.extractall()
>>> z.close()

TAR文件压缩: 通过TarFile模块,压缩指定目录下的指定文件.

>>> import tarfile
>>>
# 压缩
>>> tar = tarfile.open('your.tar','w')
>>> tar.add('/bbs2.log', arcname='bbs2.log')
>>> tar.add('/cmdb.log', arcname='cmdb.log')
>>> tar.close()

# 解压
>>> tar = tarfile.open('your.tar','r')
>>> tar.extractall()  # 可设置解压地址
>>> import tarfile
>>>
# 压缩
>>> tar = tarfile.open('your.tar','w')
>>> tar.add('/bbs2.log', arcname='bbs2.log')
>>> tar.add('/cmdb.log', arcname='cmdb.log')
>>> tar.close()

# 解压
>>> tar = tarfile.open('your.tar','r')
>>> tar.extractall()  # 可设置解压地址

Logging 模块

很多程序都有记录日志的需求,并且日志中包含的信息即有正常的程序访问日志,还可能有错误、警告等信息输出,Python的logging模块提供了标准的日志接口,你可以通过它存储各种格式的日志,logging的日志可以分为debug(),info(),warning(),error(),critical(),5个级别,下面我们看一下怎么用.

如果只想把日志文件输入到显示器上,则我们可以直接执行以下操作.

>>> import logging
>>>
>>> logging.debug("hello debug")
>>> logging.warning("hello warning")
>>> logging.critical("hello critical")

#---输出结果-------------------------------
DEBUG:root:hello debug
WARNING:root:hello warning
CRITICAL:root:hello critical
>>> import logging
>>>
>>> logging.debug("hello debug")
>>> logging.warning("hello warning")
>>> logging.critical("hello critical")

#---输出结果-------------------------------
DEBUG:root:hello debug
WARNING:root:hello warning
CRITICAL:root:hello critical

以上可看到logging.后面跟3个不同参数,其实除了以上三种日志等级以外,logging还支持如下几种等级:

日志等级	日志数字	日志信息说明
DEBUG	10	详细信息,通常仅在调试阶段时才有意义
INFO	20	确认事情按预期工作,正常工作时发送
WARNING	30	警告等级,表示发生了不可预料的意外
ERROR	40	错误,比警告等级更加严重,软件无法运行
CRITICAL	50	严重错误,表明程序本身可能无法继续运行

如果想把日志等级写入文件的话,只需要在程序启动时指定配置路径即可.

import logging
 
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S %p',
                    filename='test.log',
                    filemode='w')

#---参数调用-------------------------------
logging.debug('debug message')
logging.info('info message')
logging.warning('warning message')
logging.error('error message')
logging.critical('critical message')
import logging
 
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S %p',
                    filename='test.log',
                    filemode='w')

#---参数调用-------------------------------
logging.debug('debug message')
logging.info('info message')
logging.warning('warning message')
logging.error('error message')
logging.critical('critical message')

日志的format()相关格式列表如下所示,以上的配置格式可以随意自定义.

格式名称	格式的作用
%(name)s	Logger的名字
%(levelno)s	数字形式的日志级别
%(levelname)s	文本形式的日志级别
%(pathname)s	调用日志输出函数的模块的完整路径名
%(filename)s	调用日志输出函数的模块的文件名
%(module)s	调用日志输出函数的模块名
%(funcName)s	调用日志输出函数的函数名
%(lineno)d	调用日志输出函数的语句所在的代码行
%(created)f	当前时间,用UNIX标准的表示时间
%(asctime)s	字符串形式的当前时间
%(thread)d	线程ID,可能没有
%(threadName)s	线程名,可能没有
%(process)d	进程ID,可能没有
%(message)s	用户输出的消息

其实日志文件的相关功能还很多,包括多文件日志记录功能等,笔者认为这些功能太过于繁琐,在开发中容易混用,掌握上面的常用方法就已经足够,所以不再继续往下延伸了.

Process 模块

早期的Python版本中,我们主要是通过os.system()、os.popen().read()等函数来执行命令行指令的,另外还有一个很少使用的commands模块,但是从现在开始官方文档中建议使用的是subprocess模块,所以os模块和commands模块的相关函数在这里只提供一个简单的使用示例,我们重要要介绍的是subprocess模块.

使用popen执行命令: 先来演示一下os.popen()函数,来执行一条命令的过程吧.

>>> import os
>>>
>>> temp=os.popen("ls -lh")
>>> temp
<open file 'ls -lh', mode 'r' at 0x7fd1d09b35d0>
>>> temp.read()
'total 4.0K\n-rw-------. 1 root root 1.2K Dec 20 01:53 anaconda-ks.cfg\n'
>>> import os
>>>
>>> temp=os.popen("ls -lh")
>>> temp
<open file 'ls -lh', mode 'r' at 0x7fd1d09b35d0>
>>> temp.read()
'total 4.0K\n-rw-------. 1 root root 1.2K Dec 20 01:53 anaconda-ks.cfg\n'

使用call()执行命令: 接下来通过使用subprocess.call()执行一个命令,返回状态码,shell=False,第一个参数必须是列表,shell=True,第一个参数就直接输入命令即可.

>>> import subprocess
>>>
>>> ret = subprocess.call(["ls","-lh"],shell=False)
>>> print(ret)
0
>>> ret = subprocess.call("ls -l", shell=True)
>>> print(ret)
0
>>> import subprocess
>>>
>>> ret = subprocess.call(["ls","-lh"],shell=False)
>>> print(ret)
0
>>> ret = subprocess.call("ls -l", shell=True)
>>> print(ret)
0

使用check_call()检查命令: 执行命令,如果执行状态码是0,则返回0,否则抛异常.

>>> import subprocess
>>>
>>> ret = subprocess.check_call(["ls", "-l"],shell=False)
>>> ret = subprocess.check_call("exit 1",shell=True)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib64/python2.7/subprocess.py", line 542, in check_call
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command 'exit 1' returned non-zero exit status 1
>>> import subprocess
>>>
>>> ret = subprocess.check_call(["ls", "-l"],shell=False)
>>> ret = subprocess.check_call("exit 1",shell=True)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib64/python2.7/subprocess.py", line 542, in check_call
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command 'exit 1' returned non-zero exit status 1

使用check_output()检查命令: 执行命令,如果状态码是0,则返回执行结果否则抛异常,注意这里返回的是字节类型,需要转换.

>>> import subprocess
>>>
>>> ret = subprocess.check_output(["echo", "Hello World!"],shell=False)
>>> print(str(ret,encoding='utf-8'))

>>> ret = subprocess.check_output("exit 1", shell=True)
>>> print(str(ret,encoding='utf-8'))
>>> import subprocess
>>>
>>> ret = subprocess.check_output(["echo", "Hello World!"],shell=False)
>>> print(str(ret,encoding='utf-8'))

>>> ret = subprocess.check_output("exit 1", shell=True)
>>> print(str(ret,encoding='utf-8'))

使用run()运行命令: python3.5新加的功能,代替os.system,os.spawn.

>>> import subprocess
>>> 
>>> subprocess.run(["ls", "-l"])
total 56
-rw-rw-r-- 1 tomcat tomcat    61  8月 11 23:27 a.py
CompletedProcess(args=['ls', '-l'], returncode=0)
>>> 
>>> subprocess.run(["ls", "-l", "/dev/null"], stdout=subprocess.PIPE)
CompletedProcess(args=['ls', '-l', '/dev/null'], returncode=0, stdout=b'crw-rw-rw- 1 root root 1, 3  8\xe6\x9c\x88 11 09:27 /dev/null\n')
>>> import subprocess
>>> 
>>> subprocess.run(["ls", "-l"])
total 56
-rw-rw-r-- 1 tomcat tomcat    61  8月 11 23:27 a.py
CompletedProcess(args=['ls', '-l'], returncode=0)
>>> 
>>> subprocess.run(["ls", "-l", "/dev/null"], stdout=subprocess.PIPE)
CompletedProcess(args=['ls', '-l', '/dev/null'], returncode=0, stdout=b'crw-rw-rw- 1 root root 1, 3  8\xe6\x9c\x88 11 09:27 /dev/null\n')

使用popen()命令: 此模块并非os.popen()而是在subprocess里面的一个模块,用来执行一些复杂操作.

>>> import subprocess
>>> 
>>> p = subprocess.Popen("ls -lh",shell=True,stdout=subprocess.PIPE)
>>> print(p.stdout.read())
>>> import subprocess
>>> 
>>> p = subprocess.Popen("ls -lh",shell=True,stdout=subprocess.PIPE)
>>> print(p.stdout.read())

Urllib 模块

URLlib是Python提供的一个用于操作URL的模块,这个库在我们爬取网页的时候会经常用到,也是很多网站测试,网站状态检测等常用的模块之一,不过一般用来写爬虫的比较多,这里也应该了解一下它的作用.

快速抓取网页: 使用urllib最基本的抓取功能,将百度首页的内容保存到本地目录下.

>>> import urllib.request
>>>
>>> res=urllib.request.urlopen("https://www.baidu.com")
>>> print(res.read().decode("utf-8"))

>>> f=open("./test.html","wb")      #保存在本地
>>> f.write(res.read())
>>> f.close()
>>> import urllib.request
>>>
>>> res=urllib.request.urlopen("https://www.baidu.com")
>>> print(res.read().decode("utf-8"))

>>> f=open("./test.html","wb")      #保存在本地
>>> f.write(res.read())
>>> f.close()

实现POST请求: 上述的例子是通过请求百度的get请求获得百度,下面使用urllib的post请求.

>>> import urllib.parse
>>> import urllib.request
>>>
>>> data=bytes(urllib.parse.urlencode({"hello":"lyshark"}),encoding="utf-8")
>>> print(data)
>>> response = urllib.request.urlopen('http://www.baidu.com/post',data=data)
>>> print(response.read())
>>> import urllib.parse
>>> import urllib.request
>>>
>>> data=bytes(urllib.parse.urlencode({"hello":"lyshark"}),encoding="utf-8")
>>> print(data)
>>> response = urllib.request.urlopen('http://www.baidu.com/post',data=data)
>>> print(response.read())

设置TIMEOUT时间: 我们需要给请求设置一个超时时间,而不是让程序一直在等待结果.

import urllib.request

response = urllib.request.urlopen('http://www.baidu.com', timeout=1)
print(response.read())
import urllib.request

response = urllib.request.urlopen('http://www.baidu.com', timeout=1)
print(response.read())

获取网站状态: 我们可以通过status、getheaders(),getheader("server"),获取状态码以及头部信息.

>>> import urllib.request
>>>
>>> res=urllib.request.urlopen("https://www.python.org")
>>> print(type(res))
<class 'http.client.HTTPResponse'>
>>>
>>> res.status
>>> res.getheaders()
>>> res.getheader("server")
>>> import urllib.request
>>>
>>> res=urllib.request.urlopen("https://www.python.org")
>>> print(type(res))
<class 'http.client.HTTPResponse'>
>>>
>>> res.status
>>> res.getheaders()
>>> res.getheader("server")

伪装访问网站: 给请求添加头部信息,从而定制自己请求网站是时的头部信息,防止被和谐.

from urllib import request,parse

url = 'http://www.baidu.com'
headers = {
    'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
    'Host': 'mkdirs.org'
}
dict = {
    'name': 'LyShark'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))
from urllib import request,parse

url = 'http://www.baidu.com'
headers = {
    'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
    'Host': 'mkdirs.org'
}
dict = {
    'name': 'LyShark'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))

URL拼接功能: 我们以时候,可以拼接一个网页地址,实现下一步的访问.

>>> from urllib.parse import urljoin
>>>
>>> urljoin("http://www.baidu.com","abuot.html")
'http://www.baidu.com/abuot.html'
>>> from urllib.parse import urljoin
>>>
>>> urljoin("http://www.baidu.com","abuot.html")
'http://www.baidu.com/abuot.html'

Config 模块

ConfigParser模块用来读取配置文件,配置文件的格式跟windows下的ini配置文件相似,可以包含一个或多个节(section),每个节可以有多个参数(键=值),使用的配置文件的好处就是一些参数无需写死,可以使程序更灵活的配置一些参数.

为了方便演示以下的例子,请在Python所在目录创建一个test.ini配置文件,写入以下内容.

[db]
db_host = 127.0.0.1
db_port = 69
db_user = root
db_pass = 123123
host_port = 69

[concurrent]
thread = 10
processor = 20

获取所有节点: 通过使用以下方式,我们可以获取到指定文件的所有主节点名称.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.sections()
>>> print(result)
['db', 'concurrent']
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.sections()
>>> print(result)
['db', 'concurrent']

获取指定键值: 使用以下方式遍历,来获取指定节点(concurrent)下的所有键值对.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.items("concurrent")
>>> print(result)
[('thread', '10'), ('processor', '20')]
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.items("concurrent")
>>> print(result)
[('thread', '10'), ('processor', '20')]

获取指定键: 使用以下方式遍历,来获取指定节点(concurrent)下的所有的键.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.options("concurrent")
>>> print(result)
['thread', 'processor']
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.options("concurrent")
>>> print(result)
['thread', 'processor']

获取指定值: 使用以下方式遍历,来获取指定节点下指定键的对应值.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.get("concurrent","thread")
# result = config.getint("concurrent","thread")
# result = config.getfloat("concurrent","thread")
# result = config.getboolean("concurrent","thread")
>>> print(result)
10
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.get("concurrent","thread")
# result = config.getint("concurrent","thread")
# result = config.getfloat("concurrent","thread")
# result = config.getboolean("concurrent","thread")
>>> print(result)
10

检查&添加&删除主节点: 检查、添加、删除指定的主节点数据.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查主节点---------------------------------------------
>>> has_sec=config.has_section("db")
>>> print(has_sec)
True
#--添加主节点---------------------------------------------
>>> config.add_section("lyshark")
>>> config.write(open("test.ini","w"))
#--删除主节点---------------------------------------------
>>> config.remove_section("lyshark")
True
>>> config.write(open("test.ini","w"))
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查主节点---------------------------------------------
>>> has_sec=config.has_section("db")
>>> print(has_sec)
True
#--添加主节点---------------------------------------------
>>> config.add_section("lyshark")
>>> config.write(open("test.ini","w"))
#--删除主节点---------------------------------------------
>>> config.remove_section("lyshark")
True
>>> config.write(open("test.ini","w"))

检查&添加&删除指定键值对: 检查、删除、设置指定组内的键值对.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查节点中的键值对--------------------------------------
>>> has_opt=config.has_option("db","db_host")
>>> print(has_opt)
True
#--设置节点中的键值对--------------------------------------
>>> config.set("test.ini","db_host","8888888888")
>>> config.write(open("test.ini","w"))
#--删除节点中的键值对--------------------------------------
>>> config.remove_option("db","db_host")
True
>>> config.write(open("test.ini","w"))
>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查节点中的键值对--------------------------------------
>>> has_opt=config.has_option("db","db_host")
>>> print(has_opt)
True
#--设置节点中的键值对--------------------------------------
>>> config.set("test.ini","db_host","8888888888")
>>> config.write(open("test.ini","w"))
#--删除节点中的键值对--------------------------------------
>>> config.remove_option("db","db_host")
True
>>> config.write(open("test.ini","w"))

JSON 模块

JSON(JavaScript Object Notation),是一种轻量级的数据交换格式,它基于 ECMAScript(欧洲计算机协会制定的js规范)的一个子集,采用完全独立于编程语言的文本格式来存储和表示数据,简洁和清晰的层次结构使得JSON成为理想的数据交换语言,易于人阅读和编写,同时也易于机器解析和生成,并有效地提升网络传输效率,JSON实现了字符串和编程语言之间的数据共享与交互,通用各种编程语言中,JSON模块提供了四个功能:dumps、dump、loads、load下面将详细介绍它的应用场景.

dumps(): 将Python的基本数据类型转化成字符串形式.

>>> import json
>>>
>>> dic={"admin":"123","lyshark":"123123"}
>>>
>>> print(dic,type(dic))
{'admin': '123', 'lyshark': '123123'} <class 'dict'>
>>>
>>> result=json.dumps(dic)
>>> print(result,type(result))
{"admin": "123", "lyshark": "123123"} <class 'str'>
>>> import json
>>>
>>> dic={"admin":"123","lyshark":"123123"}
>>>
>>> print(dic,type(dic))
{'admin': '123', 'lyshark': '123123'} <class 'dict'>
>>>
>>> result=json.dumps(dic)
>>> print(result,type(result))
{"admin": "123", "lyshark": "123123"} <class 'str'>

loads(): 将Python字符串形式转化成基本数据类型.

>>> import json
>>>
>>> string='{"key":"value"}'
>>> print(string,type(string))
{"key":"value"} <class 'str'>

>>> dic=json.loads(string)
>>> print(dic,type(dic))
{'key': 'value'} <class 'dict'>
>>> import json
>>>
>>> string='{"key":"value"}'
>>> print(string,type(string))
{"key":"value"} <class 'str'>

>>> dic=json.loads(string)
>>> print(dic,type(dic))
{'key': 'value'} <class 'dict'>

dump(): 先将指定数据序列化,然后再写入文件中,持久化存储,一步到位.

>>> import json
>>>
>>> lists=[1,2,3,4,5,6,7,8,9,10]
>>> lists
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
>>>
>>> json.dump(lists,open("db.json","w",encoding="utf-8"))

>>> f=open("db.json","w")
>>> json.dump(lists,f)
>>> import json
>>>
>>> lists=[1,2,3,4,5,6,7,8,9,10]
>>> lists
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
>>>
>>> json.dump(lists,open("db.json","w",encoding="utf-8"))

>>> f=open("db.json","w")
>>> json.dump(lists,f)

load(): 读取一个序列文件,将其中的内容加载,反序列化到程序中.

>>> import json
>>>
>>> lists=json.load(open("db.json","r",encoding="utf-8"))
>>> lists
'{"admin": "123123", "guest": "456789"}'
>>> import json
>>>
>>> lists=json.load(open("db.json","r",encoding="utf-8"))
>>> lists
'{"admin": "123123", "guest": "456789"}'

XML 模块

XML可扩展标记语言,XML的宗旨传输数据的,XML是实现不同语言或程序之间进行数据交换的协议,XML是目前数据交换的唯一公共语言,跟json差不多,但json使用起来更简单,不过,在json还没诞生的黑暗年代,大家只能选择用xml,至今很多传统公司如金融行业的很多系统的接口还主要是XML作为数据通信接口,如下我们就来学习一下这个模块的使用吧.

为了方便演示后续内容,请自行在Python当前目录下创建lyshark.xml以下XML文档.

<?xml version="1.0" encoding="UTF-8"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2019</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2020</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2029</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>

创建XML文档: 通过使用XML函数,创建一个XML文档,原生保存的XML时默认无缩进.

<root>
    <son name="1号儿子">
        <grand name="1号孙子"></grand>
    </son>
    <son name="2号儿子">
        <grand name="2号孙子"></grand>
    </son>
</root>
#--以下代码则可创建如上格式-------------------------------------------------
>>> import xml.etree.ElementTree as ET
>>>
>>> root=ET.Element("root")
>>>
>>> son1=ET.Element("son",{"name":"1号儿子"})
>>> son2=ET.Element("son",{"name":"2号儿子"})
>>>
>>> grand1=ET.Element("grand",{"name":"1号孙子"})
>>> grand2=ET.Element("grand",{"name":"2号孙子"})
>>>
>>> son1.append(grand1)
>>> son2.append(grand2)
>>>
>>> root.append(son1)
>>> root.append(son2)
>>>
>>> tree=ET.ElementTree(root)
>>> tree.write('lyshark.xml',encoding='utf-8',short_empty_elements=False)
<root>
    <son name="1号儿子">
        <grand name="1号孙子"></grand>
    </son>
    <son name="2号儿子">
        <grand name="2号孙子"></grand>
    </son>
</root>
#--以下代码则可创建如上格式-------------------------------------------------
>>> import xml.etree.ElementTree as ET
>>>
>>> root=ET.Element("root")
>>>
>>> son1=ET.Element("son",{"name":"1号儿子"})
>>> son2=ET.Element("son",{"name":"2号儿子"})
>>>
>>> grand1=ET.Element("grand",{"name":"1号孙子"})
>>> grand2=ET.Element("grand",{"name":"2号孙子"})
>>>
>>> son1.append(grand1)
>>> son2.append(grand2)
>>>
>>> root.append(son1)
>>> root.append(son2)
>>>
>>> tree=ET.ElementTree(root)
>>> tree.write('lyshark.xml',encoding='utf-8',short_empty_elements=False)

打开XML文档: 通过使用xml.etree.ElementTree,来实现打开要XML文件.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree = ET.parse("lyshark.xml")
>>> root = tree.getroot()
>>> print(root.tag)
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree = ET.parse("lyshark.xml")
>>> root = tree.getroot()
>>> print(root.tag)

遍历XML文档(单层): 通过使用循环的方式,来实现对XML文件子树的遍历.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for child in root:
...     print(child.tag,child.attrib)
...
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for child in root:
...     print(child.tag,child.attrib)
...
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}

遍历XML文档(多层): 通过使用循环的方式遍历root下面的目录,来实现对XML文件子树的子树进行遍历.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历XML文档的第二层
>>> for x in root:
        # 第二层节点的标签名称和标签属性
...     print("主目录: %s"%x.tag)
        # 遍历XML文档的第三层
...     for y in x:
        # 第三层节点的标签名称和内容
...             print(y.tag,y.attrib,y.text)
...
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'E', 'name': 'Austria'}
neighbor {'direction': 'W', 'name': 'Switzerland'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'N', 'name': 'Malaysia'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'W', 'name': 'Costa Rica'}
neighbor {'direction': 'E', 'name': 'Colombia'}
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历XML文档的第二层
>>> for x in root:
        # 第二层节点的标签名称和标签属性
...     print("主目录: %s"%x.tag)
        # 遍历XML文档的第三层
...     for y in x:
        # 第三层节点的标签名称和内容
...             print(y.tag,y.attrib,y.text)
...
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'E', 'name': 'Austria'}
neighbor {'direction': 'W', 'name': 'Switzerland'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'N', 'name': 'Malaysia'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'W', 'name': 'Costa Rica'}
neighbor {'direction': 'E', 'name': 'Colombia'}

遍历指定节点: 通过循环的方式,配合root.iter()来实现只遍历XML文档中的year节点.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):
...     print(node.tag,node.text)
...
year 2019
year 2020
year 2029
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):
...     print(node.tag,node.text)
...
year 2019
year 2020
year 2029

修改XML字段: 通过遍历的方式,找到节点为year的数据行,并将其内容自动加1,并会写到XML文档.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):     #遍历并修改每个字段内容
...     new_year=int(node.text) + 1    #先将node.text变成整数,实现加法
...     node.text=str(new_year)        #然后变成字符串,复制给内存中的text
...     node.set("updated","yes")      #在每个year字段上加上一段属性,updated=yes
...
>>> tree.write("lyshark.xml")          #回写到配置文件中,覆盖成最新的数据
>>> del node.attrib["name"]            #删除节点中的指定属性字段
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):     #遍历并修改每个字段内容
...     new_year=int(node.text) + 1    #先将node.text变成整数,实现加法
...     node.text=str(new_year)        #然后变成字符串,复制给内存中的text
...     node.set("updated","yes")      #在每个year字段上加上一段属性,updated=yes
...
>>> tree.write("lyshark.xml")          #回写到配置文件中,覆盖成最新的数据
>>> del node.attrib["name"]            #删除节点中的指定属性字段

删除XML字段: 通过遍历的方式,查找所有的country节点,并判断如果内部rank>50则删除这个country节点.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历data下的所有country节点
>>> for country in root.findall("country"):
        # 获取每一个country节点下rank节点的内容
...     rank=int(country.find("rank").text)
...     if rank > 50:
        # 删除指定country节点
...             root.remove(country)
...
>>> tree.write("output.xml",encoding="utf-8")
>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历data下的所有country节点
>>> for country in root.findall("country"):
        # 获取每一个country节点下rank节点的内容
...     rank=int(country.find("rank").text)
...     if rank > 50:
        # 删除指定country节点
...             root.remove(country)
...
>>> tree.write("output.xml",encoding="utf-8")

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。

上一篇：将指定文件夹下所有图片转成EXCEL PYTHON 修改复制

下一篇：写一个判断String类包含数字的汉字时间说法是否能转化成整分的java代码

提问和评论都可以，用心的回复会被更多人看到评论

发布评论

相关文章

官方博客	全部文章	热门标签	班级博客
了解我们	网站地图	意见反馈

鸿蒙开发者社区	51CTO学堂
51CTO	软考资讯