一:re模块:正则表达式进行字符串匹配的时候需要调用的包。
1:导包:import re
2:检测:result = re.match(正则表达式,需要匹配的字符串),成功返回成功的部分字符串,不成功返回null。
3:取出匹配的内容:result.group()
import re
if __name__ == '__main__':
result = re.match("0\d{3}-\d{8}","0755-81234567")
if result:
print("匹配成功")
print(result.group())
else :
print("匹配失败")
4:match和search的区别:match会从头开始匹配,如果有不一样的,就失败。search 里面有就可以匹配成功。search与findall的区别:search只能匹配到第一个,findall可以匹配到所有字符串。并且findall获取到的直接用print打印就可以。不需要使用group获取。
import re
result = re.search("itcast","www.itcast.cn")
if result :
print("匹配成功",result.group())
else:
print("匹配失败")
result = re.match("itcast","www.itcast.cn")
if result :
print("匹配成功",result.group())
else:
print("匹配失败")
二:匹配单个字符:
1:点
import re
# 匹配任意一个除了换行的字符
ret = re.match(".","M")
print(ret.group())
try:
ret = re.match(".","\n")
print(ret.group())
except Exception as e:
print("匹配失败,不允许是换行")
ret = re.match("t.o","too")
print(ret.group())
运行结果:
M
匹配失败,不允许是换行
too
2:方框:
import re
# 1:匹配开头大小H写都可以
ret = re.match("[hH]","hello python")
print(ret.group())
ret = re.match("[hH]","Hello python")
print(ret.group())
ret = re.match("[hH]ello python","Hello python")
print(ret.group())
# 运行结果:
# h
# H
# Hello python
# 2:匹配0-9的任何一个
ret = re.match("[0-9]hello","1hello")
print(ret.group())
ret = re.match("[0-9]hello","2hello")
print(ret.group())
ret = re.match("[0-9]hello","9hello")
print(ret.group())
# 运行结果:
# 1hello
# 2hello
# 9hello
#3:匹配0-3 5-9中的任何一个
ret = re.match("[0-35-9]hello","1hello")
print(ret.group())
try:
ret = re.match("[0-35-9]hello", "4hello")
print(ret.group())
except Exception as e:
print("匹配失败")
# 运行结果:
# 1hello
# 匹配失败
3:\d 与\D:
import re
# /d 可以匹配任意一个数字:
ret = re.match("python\d","python1")
print(ret.group())
ret = re.match("python\d","python9")
print(ret.group())
# /D :匹配任意一个非数字
ret = re.match("python\D","python_")
print(ret.group())
ret = re.match("python\D","python\n")
print(ret.group())
ret = re.match("python\D","python&")
print(ret.group())
ret = re.match("python\D","python.")
print(ret.group())
# 运行结果:
# python1
# python9
# python_
# python
#
# python&
# python.
4:\s与\S:
import re
# 1:匹配空字符
res = re.match("hello\sworld","hello world")
print(res.group())
res = re.match("hello\sworld","hello\nworld")
print(res.group())
res = re.match("hello\sworld","hello\tworld")
print(res.group())
# 运行结果:
# hello world
# hello
# world
# hello world
# 2:匹配非空字符
res = re.match("hello\S","hello*")
print(res.group())
res = re.match("hello\S","hello-")
print(res.group())
try:
res = re.match("hello\S", "hello ")
print(res.group())
except:
print("匹配失败")
# 运行结果:
# hello*
# hello-
# 匹配失败
5:\w 和\W
import re
#/w匹配非特殊字符,即a-z、A-Z、0-9、_、汉字
res = re.match("hello\w","helloa")
print(res.group())
res = re.match("hello\w","helloA")
print(res.group())
res = re.match("hello\w","hello0")
print(res.group())
res = re.match("hello\w","hello_")
print(res.group())
res = re.match("hello\w","hello任")
print(res.group())
try:
res = re.match("hello\w", "hello$")
print(res.group())
except Exception as e:
print("匹配失败")
# 运行结果:
# helloa
# helloA
# hello0
# hello_
# hello任
# 匹配失败
# 2:匹配特殊字符:
res = re.match("hello\W","hello#")
print(res.group())
res = re.match("hello\W","hello*")
print(res.group())
res = re.match("hello\W","hello ")
print(res.group())
res = re.match("hello\W","hello%")
print(res.group())
res = re.match("hello\W","hello/")
print(res.group())
# 运行结果:
# hello#
# hello*
# hello
# hello%
# hello/
三:匹配多个字符:
import re
res = re.match("python*","pytho")
print(res.group())
res = re.match("python*","pythonn")
print(res.group())
res = re.match("python*","pythonnnnnnnnn")
print(res.group())
# 运行结果:
# pytho
# pythonn
# pythonnnnnnnnn
try:
res = re.match("python+","pytho")
print(res.group())
except:
print("匹配失败")
res = re.match("python+","python")
print(res.group())
res = re.match("python+","pythonn")
print(res.group())
res = re.match("python+","pythonnnnnn")
print(res.group())
# 运行结果:
# 匹配失败
# python
# pythonn
# pythonnnnnn
print("--------------------------")
res = re.match("python?","pytho")
print(res.group())
res = re.match("python?","python")
print(res.group())
try:
res = re.match("python?","pythonn")
print(res.group())
except:
print("匹配失败,只能匹配一个或没有")
# pytho
# python
# python
# 注意:最后这个不报错,因为匹配完成了,不再向下验证
print("-----------------------------------")
res = re.match("python{2}","pythonn")
print(res.group())
try:
res = re.match("python{2}","python")
print(res.group())
except Exception as e:
print("少一个也不行")
res = re.match("python{2}","pythonnn")
print(res.group())
# 注意:多一个可以,后面的仍然不匹配。
# 运行结果:
# pythonn
# 少一个也不行
# pythonn
print("------------------------------------")
res = re.match("python{2,4}","pythonn")
print(res.group())
res = re.match("python{2,4}","pythonnn")
print(res.group())
res = re.match("python{2,4}","pythonnnn")
print(res.group())
res = re.match("python{2,4}","pythonnnnn")
print(res.group())
# 注意多一个没事,但匹配结果仍然显示前面的
try:
res = re.match("python{2,4}","python")
print(res.group())
except Exception as e:
print("少一个也不行")
# 运行结果:
# pythonn
# pythonnn
# pythonnnn
# pythonnnn
# 少一个也不行
四:匹配开头和结尾:
import re
# 一:匹配以数字开头
# ^ 以后面的开头
# \d 匹配0-9的数字
# . 任意一个除换行字符
# * 前面的有任意多个
mach_obj = re.match("^\d.*","4acajks")
print(mach_obj.group())
try:
mach_obj = re.match("^\d.*","hello")
print(mach_obj.group())
except Exception as e:
print("匹配失败")
# 4acajks
# 匹配失败
# 匹配以数字结尾:
mach_obj = re.match(".*\d$","hello333")
print(mach_obj.group())
try:
mach_obj = re.match(".*\d$","helloaaa")
print(mach_obj.group())
except Exception as e:
print("匹配失败")
# hello333
# 匹配失败
# 匹配以数字开头中间内容不管以数字结尾
mach_obj = re.match("^\d.*\d$","111hello333")
print(mach_obj.group())
try:
mach_obj = re.match("^\d.*\d$", "hello333")
print(mach_obj.group())
except Exception as e:
print("匹配失败")
try:
mach_obj = re.match("^\d.*\d$", "hello")
print(mach_obj.group())
except Exception as e:
print("匹配失败")
# 111hello333
# 匹配失败
# 匹配失败
# 第一个字符除了aeiou的字符都匹配
mach_obj = re.match("[^aeiou]", "hello333")
print(mach_obj.group())
# 注意这里只会匹配显示h,后面的不会管
try:
mach_obj = re.match("[^aeiou]", "aello333")
print(mach_obj.group())
except Exception as e:
print("匹配失败")
# 运行结果:
# h
# 匹配失败
五:匹配分组:
import re
# 1:在列表中["apple", "banana", "orange", "pear"],匹配apple和pear
my_list = ["apple", "banana", "orange", "pear"]
for i in my_list:
res = re.match("apple|pear",i)
if res:
print("匹配成功")
else:
print("匹配失败")
# 运行结果:
# 匹配成功
# 匹配失败
# 匹配失败
# 匹配成功
print("--------------------------")
# 2: 匹配出163、126、qq等邮箱
# 以字母数字下划线组成的4到20位,中间是@ 后面163或者126或者...
result = re.match("[a-zA-Z0-9_]{4,20}@(163|126|qq|sina|yahoo).com","hello@163.com")
print(result.group())
# 提取分组的内容:
# 提取分组一的内容
print("分组1的内容:" + result.group(1))
# hello@163.com
# 分组1的内容:163
#3:匹配qq:10567这样的数据,提取出来qq文字和qq号码
#前面一个分组是qq,后面是1-9之间任意一个字符,任意一个0-9数字,任意0-9数字有4位到10位
match_obj = re.match("(qq):([1-9]\d{4,10})", "qq:10567")
print(match_obj.group())
print(match_obj.group(1))
print(match_obj.group(2))
# 运行结果:
# qq:10567
# qq
# 10567
# 4:引用分组匹配的内容:
# 匹配出<html>hh</html>
match_obj = re.match("<[a-zA-Z1-6]+>.*<[/a-zA-Z1-6]+>","<html>hh</div>")
if match_obj:
print(match_obj.group())
else:
print("匹配失败。。。。")
# <html>hh</div>
# 但是这个如果匹配<html></p>这不是一对标签也能匹配成功。怎么能让两边一样才能匹配呢?
# 需要将左边的分组,然后右边取出左边的分组内容,1就是第一个分组
match_obj = re.match("<([a-zA-Z1-6]+)>.*</\\1>","<html></html>")
if match_obj:
print(match_obj.group())
else:
print("匹配失败。。。。")
# 5:匹配:<html><h1>www.itcast.cn</h1></html>
# 注意:后面的括号数字要反着写
match_obj = re.match("<([a-zA-Z1-6]+)><([a-zA-Z1-6]+)>.*</\\2></\\1>","<html><h1>www.itcast.cn</h1></html>")
if match_obj:
print(match_obj.group())
else:
print("匹配失败。。。。")
# 运行结果
# <html><h1>www.itcast.cn</h1></html>
# 6:分组取别名,用别名引用。
match_obj = re.match("<(?P<name1>[a-zA-Z1-6]+)><(?P<name2>[a-zA-Z1-6]+)>.*</(?P=name2)></(?P=name1)>","<html><h1>www.itcast.cn</h1></html>")
if match_obj:
print(match_obj.group())
else:
print("匹配失败。。。。")
# <html><h1>www.itcast.cn</h1></html>