# 需要先导入re模块 import re #字符串,匹配查找 info = "www baidu com" print("=======字符串自带find方法查找============") print(info.find("baidu")) print(info.find("www")) print("=======re.match方法从头查找==============") print("从头匹配OK:", re.match("www", info)) # 匹配成功返回match类对象 print("获取从头匹配OK结果:", str(re.match("www", info).span())) # span获取位置 print("从头不匹配:", re.match("baidu", info)) #不匹配返回None print("忽略大小写:", re.match("www","WWW.BAIDU.COM", re.IGNORECASE)) print("=======re.search任意位置查找==============") print("search匹配:",re.search("www",info)) print("search匹配:",re.search("baidu",info)) print("search忽略大小写:", re.search("www","WWW.BAIDU.COM", re.IGNORECASE)) print("=======re.findall全局查找==============") print("findall匹配:",re.findall("www",info)) print("findall匹配:",re.findall("baidu",info)) print("findall忽略大小写:", re.findall("www","WWW.BAIDU.COM", re.IGNORECASE)) """ =======字符串自带find方法查找============= 5 0 =======re.match方法从头查找============== 从头匹配OK: <re.Match object; span=(0, 3), match='www'> 获取从头匹配OK结果: (0, 3) 从头不匹配: None 忽略大小写: <re.Match object; span=(0, 3), match='WWW'> =======re.search任意位置查找============== search匹配: <re.Match object; span=(0, 3), match='www'> search匹配: <re.Match object; span=(5, 10), match='baidu'> search忽略大小写: <re.Match object; span=(0, 3), match='WWW'> =======re.findall全局查找================ findall匹配: ['www'] findall匹配: ['baidu'] findall忽略大小写: ['WWW'] """
import re info = "www.baidu.com.17745674567.127.o.0.1.hello.heLLo.heLlo" print("原始字符串:",info) print("==========范围内匹配==========") mark1 = "he[lkn][lkn]o" mark2 = "he[a-z][a-z]o" mark3 = "he[a-zA-Z][a-zA-Z]o" print("匹配[lkn]范围字符:",re.findall(mark1, info, re.I)) print("匹配[a-z]范围字符:",re.findall(mark2, info, re.I)) print("匹配[a-zA-Z]范围字符:",re.findall(mark3, info, re.I)) print("匹配[0-9]范围字符:",re.findall("[0-9]", info, re.I)) print("匹配[^0-9a-t]范围取反匹配:",re.findall("[^0-9a-t]", info, re.I)) """ 原始字符串: www.baidu.com.17745674567.127.o.0.1.hello.heLLo.heLlo ==========范围内匹配========== 匹配[lkn]范围字符: ['hello', 'heLLo', 'heLlo'] 匹配[a-z]范围字符: ['hello', 'heLLo', 'heLlo'] 匹配[a-zA-Z]范围字符: ['hello', 'heLLo', 'heLlo'] 匹配[0-9]范围字符: ['1', '7', '7', '4', '5', '6', '7', '4', '5', '6', '7', '1', '2', '7', '0', '1'] 匹配[^0-9a-t]范围取反匹配: ['w', 'w', 'w', '.', 'u', '.', '.', '.', '.', '.', '.', '.', '.', '.'] """
import re #有边界符号,表示单边位置 #如没有边界符号,表示任意全局匹配 str = "food hello Food" pat = "fo[ol][dlk]$" print("============开始结束匹配==============") pat22 = "^fo[ol][dlk]" pat33 = "fo[ol][dlk]" print("匹配开头:", re.findall(pat, str, re.I)) print("匹配结尾:", re.findall(pat22, str, re.I)) print("匹配不限开头结尾:", re.findall(pat33, str, re.I)) print("============多位简写匹配==============") input_data = "2020-09-09" mark = "[0-9]{4}-[0-9]{2}-[0-9]{2}" # 使用{}进行多次匹配 print("生日格式匹配为:", re.match(mark, input_data, re.I)) print("============拆分匹配==============") info = "hjdfkj3243423.4324324kjkl343435353adf" mark2 = r"\d+" #加R表示,这是正则匹配,不加理论上也可以 print(re.split(mark2, info)) score = "119.9" mark3 = r"^[+-]?\d+(\.\d+)?$" print(re.match(mark3, score, re.I)) print("============逻辑匹配==============") tel = "(010)-12343234" """ 7-8位数字:'\d{7,8}' 前三位区号:\d{3,4} 区号+括号:(\d{3,4}) """ mark4 = r"((\d{3,4})|(\(\d{3,4}\)-))?\d{7,8}" print(re.match(mark4, tel)) """ ============开始结束匹配============== 匹配开头: ['Food'] 匹配结尾: ['food'] 匹配不限开头结尾: ['food', 'Food'] ============多位简写匹配============== 生日格式匹配为: <re.Match object; span=(0, 10), match='2020-09-09'> ============拆分匹配============== ['hjdfkj', '.', 'kjkl', 'adf'] <re.Match object; span=(0, 5), match='119.9'> ============逻辑匹配============== <re.Match object; span=(0, 14), match='(010)-12343234'> """
正则匹配模式
#正则匹配模式 import re data = """ food is very good food is very good food is very good """ mark = "fo{2}d" mark2 = ".+" print("多行匹配以及忽略大小写:",re.findall(mark, data, re.I | re.M)) #多行匹配以及忽略大小写 print("默认匹配,不加其他后缀:",re.findall(mark2, data)) #多行匹配以及忽略大小写 print("修改.匹配任意模式,可匹配换行等任意字符:",re.findall(mark2, data, re.S)) #多行匹配以及忽略大小写 print("忽略空白和注释,进行匹配:",re.findall(mark2, data, re.X)) #多行匹配以及忽略大小写 """ 多行匹配以及忽略大小写: ['food', 'food', 'food'] 默认匹配,不加其他后缀: [' ', 'food is very good', 'food is very good', 'food is very good '] 修改.匹配任意模式,可匹配换行等任意字符: [' \nfood is very good\nfood is very good\nfood is very good \n'] 忽略空白和注释,进行匹配: [' ', 'food is very good', 'food is very good', 'food is very good '] """
分组
#获取数据后,需要再拆分,可以使用分组 import re info = "id:root, phone:11012341234, bir:2021-09-08" mark = r"(\d{4})-(\d{2})-(\d{2})" res = re.search(mark, info) print("获取分组数据为 :", res.group()) print("获取第1组数据为:", res.group(1)) print("获取第2组数据为:", res.group(2)) print("获取第3组数据为:", res.group(3)) mark2 = r"\d{4}-\d{2}-\d{2}" res2 = re.search(mark2, info) print("不加分组括号为 :",res2.group()) """ 获取分组数据为 : 2021-09-08 获取第1组数据为: 2021 获取第2组数据为: 09 获取第3组数据为: 08 不加分组括号为 : 2021-09-08 """
环视
# 通过获取指定字符串位置,然后达到匹配目的字符 import re info = "id:root,name:tom,age:33,id:root" mark = r"(?<=id:)(?P<name>\w+)" print(re.findall(mark, info)) """ ['root', 'root'] """