python3源生字符 python中的元字符

转载

mob6454cc6e6a40 2024-04-16 20:49:04

文章标签 python3源生字符 re python 元字符正则表达式 文章分类 Python 后端开发

#！/usr/bin/env python 
# -*- coding:utf-8  -*-

import re
# 高级文本搜索模式，提供搜索，替换的功能
# 本质是特殊符号和字符组成的子串，这个字串就是正则表达式
# 这个表达式描述了字符和字符的重复行为

# 正则表达式：方便进行检索和修改
#            支持语言众多
#            灵活多样
# 应用场景：mongo正则类型，django等框架作为url匹配，爬虫（Clawler）

# re.findall(pattern, string)
#     pattern  正则表达式
#     string   目标字符串

# 元字符
# 1. 普通字符：
#     元字符：abc
#     规则： 匹配相应的普通字符
s = "dsrfsagzigpdsrsadfsddsr"
res = re.findall("dsr", s)
print(res)

# 2. 或
#     元字符： ab|cd
#     规则： 匹配| 竖线两边任意一个正则表达式符合的情况
# re.finditer(pattern, string)
# 使用规则与findall一致，
# 返回值是一个迭代器，使用group（）进行读取
res = re.finditer("ab|cd", "abdcdsfsfagcd")
for i in res:
    print(i.group())
print(res)

# 3. 匹配单一字符
#     元字符： .
#     规则： 匹配除换行符外的任一字符（ASCII字符）

res = re.findall("f.o", "foo is fao hefloat")
print(res)

res = re.findall("你好.", "你好，世界。")
print(res)

# 4. 匹配开头位置
#     元字符： ^
#     规则： 匹配一个字符串的开始位置

res = re.findall("^hel", "hello world,let me introduce")
print(res)

# 5. 匹配结束位置
#     元字符：$
#     规则： 匹配目标字符串的结束位置
#         字符串$

res = re.findall("on$", "hello python")
print(res)

# 6. 匹配重复
#     元字符：*
#     规则：匹配前面的字符0次或者多次
#     不可单独使用，需和其他表达式组合使用
#     fo* 表示o重复0次或者多次，可以匹配到f，因为o*是一个整体
#     .* 表示任意字符

res = re.findall("fo*", "f, fo,fooo,ffff,f,fffffffo" )
print(res)

res = re.findall(".*", "asfsdafas")
print(res)
# 出现空字符串
# ['asfsdafas', '']

# 7. 匹配重复
#     元字符： +
#     规则： 匹配前面的正则表达式一次或多次
#     与* 不同的是，前面的字符至少出现一次

res = re.findall(".+", "hello.py")
print(res)
res = re.findall(".py$", "heea.asf   hello.py     python.py")
print(res)

# 8. 匹配重复
#     元字符： ？
#     规则： 匹配前面的正则表达式重复0次或1次

res = re.findall("ab?", "abadsfasnasfab")
print(res)

# 9. 匹配重复
#     元字符：{}
#     规则： 前面的正则表达式重复次数
#     sb{3} -----》sbbbbb
res = re.findall("sb{3}", "sbbbb adsafasbbbasbb")
print(res)


# 10. 匹配重复
#     元字符： {m,n}
#     规则： 匹配前面的字符m到n次
#     db{3, 5}  --->   dbbbb dbbb dbbbbb

res = re.findall("db{2,5}", "dbbbb adafabdbbbb ab db")
print(res)


# 11. 匹配字符集合
#     元字符： []
#     规则：匹配括号内范围的任意一个字符
# [abslop]
# [a-z]
# [A-Z]
# [0-9]
# [123a-zA-Z]

res = re.findall("[a-p]", "dasfasdfkasjgaszzzzzzx")
print(res)

# 12. 匹配字符集合
#     元字符： [^...]
#     规则： 匹配除指定字符集之外的任意字符

res = re.findall("[^a-z]", "21sadf0987f7asdfASDSF")
print(res)
# ['2', '1', '0', '9', '8', '7', '7', 'A', 'S', 'D', 'S', 'F']


# 13. 匹配任意（非）数字字符
#     元字符： \d \D
#     规则： \d 匹配任意数字字符   \D 匹配任意非数字字符

res = re.findall("1\d{10}", "13298760023")
print(res)

res = re.findall("a\D{2}", "asfa1234u89knsad")
print(res)
# ['asf']
# 14. 匹配（非）普通字符（普通字符：数字字母下划线）
#     元字符：'\w'   \W
#     规则：\w  匹配普通字符   \W 匹配非普通字符

res = re.findall("\w+", "arefdzfdf&^&*")
print(res)

res = re.findall("\W+", "dasf658kjkf12es87&^%&^")
print(res)

# 15. 匹配（非）空字符   空字符-------->   [\n \t \r   " "]
#     元字符： \s   \S
#     规则： 匹配任意字符   \s
#           匹配任意非空字符   \S

res = re.findall("\s", "hello world")
print(res)

res = re.findall("\s", "hello \n \r \t")
print(res)
# [' ', '\n', ' ', '\r', ' ', '\t']

a = "hello World nihao China**s"
res = re.findall("[A-Z]\w+", a)
print(res)

# 16. 匹配起止位置
#     元字符：\A   \Z
#     规则： \A匹配开始位置   \Z匹配结束位置
#     绝对匹配----》 ^abc$ -----》字符串只是abc

res = re.findall("\A/\w+/\w+\Z", "/football/zhonchaoadfas")
print(res)


# 17. 匹配(非)单词边界位置
#     元字符： \b   \B
#     匹配规则：  \b 匹配单词的边界   \B匹配非单词的边界
#
# 单词边界：  数字字母下划线 和其他字符的交界位置为单词的边界
# adc_1   hhhha

res = re.findall(r"\bis\b", "This is a test")

print(res)

# res = re.findall(r"\Bis\b", "This is a test")
print(res)

res = re.findall("is", "This is a test")
print(res)

s = "My email is lvze@tedu.cn"

s = "My email is wei@tedu.cn"

res = re.findall("\w+@\w+\.cn", s)
print(res)

res = re.findall("^[0-9]\w{7,9}", "32asaassa64")
print(res)

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。