黑马程序员---正则表达式---记忆

原创

mb61037a3723f67 2021-07-30 14:05:12 博主文章分类：python ©著作权

文章标签 python html 字符串正则表达式特殊字符 文章分类 前端开发

©著作权归作者所有：来自51CTO博客作者mb61037a3723f67的原创作品，请联系作者获取转载授权，否则将追究法律责任

一：re模块：正则表达式进行字符串匹配的时候需要调用的包。
1:导包：import re
2:检测：result = re.match(正则表达式，需要匹配的字符串),成功返回成功的部分字符串，不成功返回null。
3：取出匹配的内容：result.group()

import re

if __name__ == '__main__':
    result = re.match("0\d{3}-\d{8}","0755-81234567")
    if result:
        print("匹配成功")
        print(result.group())

    else :
        print("匹配失败")

4：match和search的区别：match会从头开始匹配，如果有不一样的，就失败。search 里面有就可以匹配成功。search与findall的区别：search只能匹配到第一个，findall可以匹配到所有字符串。并且findall获取到的直接用print打印就可以。不需要使用group获取。

import re

result = re.search("itcast","www.itcast.cn")

if result :
    print("匹配成功",result.group())
else:
    print("匹配失败")


result = re.match("itcast","www.itcast.cn")

if result :
    print("匹配成功",result.group())
else:
    print("匹配失败")

二：匹配单个字符：
黑马程序员---正则表达式---记忆_正则表达式
1：点

import  re

# 匹配任意一个除了换行的字符
ret = re.match(".","M")
print(ret.group())

try:
    ret = re.match(".","\n")
    print(ret.group())
except Exception as e:
    print("匹配失败，不允许是换行")

ret = re.match("t.o","too")
print(ret.group())

运行结果：

M
匹配失败，不允许是换行
too

2：方框：

import re

# 1:匹配开头大小H写都可以
ret = re.match("[hH]","hello python")
print(ret.group())
ret = re.match("[hH]","Hello python")
print(ret.group())
ret = re.match("[hH]ello python","Hello python")
print(ret.group())
# 运行结果：
# h
# H
# Hello python

# 2：匹配0-9的任何一个
ret = re.match("[0-9]hello","1hello")
print(ret.group())
ret = re.match("[0-9]hello","2hello")
print(ret.group())
ret = re.match("[0-9]hello","9hello")
print(ret.group())
# 运行结果：
# 1hello
# 2hello
# 9hello



#3:匹配0-3  5-9中的任何一个

ret = re.match("[0-35-9]hello","1hello")
print(ret.group())

try:
    ret = re.match("[0-35-9]hello", "4hello")
    print(ret.group())
except Exception as e:
    print("匹配失败")

# 运行结果：
# 1hello
# 匹配失败

3：\d 与\D:

import  re

# /d  可以匹配任意一个数字：
ret = re.match("python\d","python1")
print(ret.group())
ret = re.match("python\d","python9")
print(ret.group())

# /D ：匹配任意一个非数字
ret = re.match("python\D","python_")
print(ret.group())
ret = re.match("python\D","python\n")
print(ret.group())
ret = re.match("python\D","python&")
print(ret.group())
ret = re.match("python\D","python.")
print(ret.group())

# 运行结果：
# python1
# python9
# python_
# python
# 
# python&
# python.

4:\s与\S:

import re

# 1：匹配空字符
res = re.match("hello\sworld","hello world")
print(res.group())
res = re.match("hello\sworld","hello\nworld")
print(res.group())
res = re.match("hello\sworld","hello\tworld")
print(res.group())

# 运行结果：
# hello world
# hello
# world
# hello	world


# 2：匹配非空字符
res = re.match("hello\S","hello*")
print(res.group())
res = re.match("hello\S","hello-")
print(res.group())
try:
    res = re.match("hello\S", "hello ")
    print(res.group())
except:
    print("匹配失败")
    
# 运行结果：
# hello*
# hello-
# 匹配失败

5:\w 和\W

import re

#/w匹配非特殊字符，即a-z、A-Z、0-9、_、汉字
res = re.match("hello\w","helloa")
print(res.group())
res = re.match("hello\w","helloA")
print(res.group())
res = re.match("hello\w","hello0")
print(res.group())
res = re.match("hello\w","hello_")
print(res.group())
res = re.match("hello\w","hello任")
print(res.group())
try:
    res = re.match("hello\w", "hello$")
    print(res.group())
except Exception as e:
    print("匹配失败")

# 运行结果：
# helloa
# helloA
# hello0
# hello_
# hello任
# 匹配失败

# 2:匹配特殊字符：
res = re.match("hello\W","hello#")
print(res.group())
res = re.match("hello\W","hello*")
print(res.group())
res = re.match("hello\W","hello ")
print(res.group())
res = re.match("hello\W","hello%")
print(res.group())
res = re.match("hello\W","hello/")
print(res.group())

# 运行结果：
# hello#
# hello*
# hello 
# hello%
# hello/

三：匹配多个字符：
黑马程序员---正则表达式---记忆_python_02

import  re

res = re.match("python*","pytho")
print(res.group())
res = re.match("python*","pythonn")
print(res.group())
res = re.match("python*","pythonnnnnnnnn")
print(res.group())
# 运行结果：
# pytho
# pythonn
# pythonnnnnnnnn

try:
    res = re.match("python+","pytho")
    print(res.group())
except:
    print("匹配失败")

res = re.match("python+","python")
print(res.group())
res = re.match("python+","pythonn")
print(res.group())
res = re.match("python+","pythonnnnnn")
print(res.group())

# 运行结果：
# 匹配失败
# python
# pythonn
# pythonnnnnn
print("--------------------------")
res = re.match("python?","pytho")
print(res.group())
res = re.match("python?","python")
print(res.group())
try:
    res = re.match("python?","pythonn")
    print(res.group())
except:
    print("匹配失败，只能匹配一个或没有")

# pytho
# python
# python
# 注意：最后这个不报错，因为匹配完成了，不再向下验证

print("-----------------------------------")

res = re.match("python{2}","pythonn")
print(res.group())
try:
    res = re.match("python{2}","python")
    print(res.group())
except Exception as e:
    print("少一个也不行")

res = re.match("python{2}","pythonnn")
print(res.group())
# 注意：多一个可以，后面的仍然不匹配。
# 运行结果：
# pythonn
# 少一个也不行
# pythonn

print("------------------------------------")

res = re.match("python{2,4}","pythonn")
print(res.group())
res = re.match("python{2,4}","pythonnn")
print(res.group())
res = re.match("python{2,4}","pythonnnn")
print(res.group())
res = re.match("python{2,4}","pythonnnnn")
print(res.group())

# 注意多一个没事，但匹配结果仍然显示前面的
try:
    res = re.match("python{2,4}","python")
    print(res.group())
except Exception as e:
    print("少一个也不行")
    
# 运行结果：
# pythonn
# pythonnn
# pythonnnn
# pythonnnn
# 少一个也不行

四：匹配开头和结尾：
黑马程序员---正则表达式---记忆_正则表达式_03

import re

# 一：匹配以数字开头
# ^ 以后面的开头
# \d  匹配0-9的数字
# . 任意一个除换行字符
# * 前面的有任意多个

mach_obj = re.match("^\d.*","4acajks")
print(mach_obj.group())


try:
    mach_obj = re.match("^\d.*","hello")
    print(mach_obj.group())
except Exception as e:
    print("匹配失败")

# 4acajks
# 匹配失败


# 匹配以数字结尾：
mach_obj = re.match(".*\d$","hello333")
print(mach_obj.group())
try:
    mach_obj = re.match(".*\d$","helloaaa")
    print(mach_obj.group())
except Exception as e:
    print("匹配失败")

# hello333
# 匹配失败


# 匹配以数字开头中间内容不管以数字结尾
mach_obj = re.match("^\d.*\d$","111hello333")
print(mach_obj.group())
try:
    mach_obj = re.match("^\d.*\d$", "hello333")
    print(mach_obj.group())
except Exception as e:
    print("匹配失败")

try:
    mach_obj = re.match("^\d.*\d$", "hello")
    print(mach_obj.group())
except Exception as e:
    print("匹配失败")

# 111hello333
# 匹配失败
# 匹配失败

# 第一个字符除了aeiou的字符都匹配

mach_obj = re.match("[^aeiou]", "hello333")
print(mach_obj.group())

#  注意这里只会匹配显示h，后面的不会管
try:
    mach_obj = re.match("[^aeiou]", "aello333")
    print(mach_obj.group())
except Exception as e:
    print("匹配失败")
    
# 运行结果：
# h
# 匹配失败

五：匹配分组：
黑马程序员---正则表达式---记忆_字符串_04

import re

# 1：在列表中["apple", "banana", "orange", "pear"]，匹配apple和pear

my_list = ["apple", "banana", "orange", "pear"]

for i in my_list:
    res = re.match("apple|pear",i)
    if res:
        print("匹配成功")
    else:
        print("匹配失败")

# 运行结果：
# 匹配成功
# 匹配失败
# 匹配失败
# 匹配成功
print("--------------------------")



# 2： 匹配出163、126、qq等邮箱
# 以字母数字下划线组成的4到20位，中间是@ 后面163或者126或者...
result = re.match("[a-zA-Z0-9_]{4,20}@(163|126|qq|sina|yahoo).com","hello@163.com")
print(result.group())

# 提取分组的内容：
# 提取分组一的内容
print("分组1的内容：" + result.group(1))

# hello@163.com
# 分组1的内容：163


#3：匹配qq:10567这样的数据，提取出来qq文字和qq号码
#前面一个分组是qq，后面是1-9之间任意一个字符，任意一个0-9数字，任意0-9数字有4位到10位
match_obj = re.match("(qq):([1-9]\d{4,10})", "qq:10567")
print(match_obj.group())
print(match_obj.group(1))
print(match_obj.group(2))

# 运行结果：
# qq:10567
# qq
# 10567


# 4：引用分组匹配的内容：
# 匹配出<html>hh</html>

match_obj = re.match("<[a-zA-Z1-6]+>.*<[/a-zA-Z1-6]+>","<html>hh</div>")
if match_obj:
    print(match_obj.group())
else:
    print("匹配失败。。。。")
# <html>hh</div>
# 但是这个如果匹配<html></p>这不是一对标签也能匹配成功。怎么能让两边一样才能匹配呢？
# 需要将左边的分组，然后右边取出左边的分组内容,1就是第一个分组
match_obj = re.match("<([a-zA-Z1-6]+)>.*</\\1>","<html></html>")
if match_obj:
    print(match_obj.group())
else:
    print("匹配失败。。。。")


# 5：匹配:<html><h1>www.itcast.cn</h1></html>
# 注意：后面的括号数字要反着写
match_obj = re.match("<([a-zA-Z1-6]+)><([a-zA-Z1-6]+)>.*</\\2></\\1>","<html><h1>www.itcast.cn</h1></html>")
if match_obj:
    print(match_obj.group())
else:
    print("匹配失败。。。。")

# 运行结果
# <html><h1>www.itcast.cn</h1></html>


# 6：分组取别名，用别名引用。
match_obj = re.match("<(?P<name1>[a-zA-Z1-6]+)><(?P<name2>[a-zA-Z1-6]+)>.*</(?P=name2)></(?P=name1)>","<html><h1>www.itcast.cn</h1></html>")
if match_obj:
    print(match_obj.group())
else:
    print("匹配失败。。。。")

# <html><h1>www.itcast.cn</h1></html>