正则表达式语法实例演示-python

import re

# ^：以什么字符开始
# .：匹配任意字符
# *：0至多个字符
line='bobby123'
regex_str='^b.*' # 以b开头的字符串
if re.match(regex_str,line): # 匹配
    print(regex_str)

regex_str='^a.*' # 以a开头的字符串
if re.match(regex_str,line): # 不匹配
    print(regex_str)

# $：以什么字符结尾
regex_str='.*3$' # 以3结尾的字符串
if re.match(regex_str,line): # 匹配
    print(regex_str)

regex_str='.*4$' #以4结尾的字符串
if re.match(regex_str,line): # 不匹配
    print(regex_str)

regex_str='^b.3$' # 以b开头、以3结尾、b和3之间有一个任意字符的字符串
if re.match(regex_str,line): # 不匹配
    print(regex_str)

regex_str='^b.*3$' # 以b开头、以3结尾、b和3之间有0至多个任意字符的字符串
if re.match(regex_str,line): # 匹配
    print(regex_str)

^b.*
.*3$
^b.*3$

line='booooooobbbbby123'

# 现在想匹配booooooob

regex_str='.*(b.*b).*' #贪婪匹配，反向匹配（从右往左匹配）
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

# ?：匹配0或者1个字符，或指明一个非贪婪限定符
regex_str='.*?(b.*b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

# 使用?变为非贪婪模式，?在哪个字符前面表示要从左开始匹配这个字符
regex_str='.*?(b.*?b).*' #非贪婪匹配，从左往右匹配
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

.*(b.*b).*
bb
.*?(b.*b).*
booooooobbbbb
.*?(b.*?b).*
booooooob

line='boooobaaaooobbbbby123'

# +：字符出现1至多次
regex_str='.*(b.+b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='boooobaaaooobbbaaby123'

# {正整数}：字符出现正整数次
regex_str='.*(b.{2}b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

# {正整数,}：字符出现正整数次及以上
regex_str='.*(b.{3,}b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

# {正整数1,正整数2}：字符出现至少正整数1次，最多正整数2次
regex_str='.*(b.{2,5}b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='boooobaaaooobbbaaaaaaby123'

regex_str='.*(b.{2,5}b).*'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

.*(b.+b).*
bbb
.*(b.{2}b).*
baab
.*(b.{3,}b).*
bbaab
.*(b.{2,5}b).*
baab
.*(b.{2,5}b).*
boooob

line='bobby123'

# |：或的关系
regex_str='(bobby|bobby123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

regex_str='(bobby123|bobby)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

regex_str='((bobby|boobby)123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))
    print(match_obj.group(2))

line='boobby123'

regex_str='((bobby|boobby)123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))
    print(match_obj.group(2))

(bobby|bobby123)
bobby
(bobby123|bobby)
bobby123
((bobby|boobby)123)
bobby123
bobby
((bobby|boobby)123)
boobby123
boobby

line='boobby123'

# []：只要满足中括号中的任意字符就可以
regex_str='([abcd]oobby123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='aoobby123'

regex_str='([abcd]oobby123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='eoobby123'

regex_str='([abcd]oobby123)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

([abcd]oobby123)
boobby123
([abcd]oobby123)
aoobby123

line='18782902222'

# [0-9]：表示0到9这10个数，与[0123456789]效果一样
# 类似的有：[a-z]、[A-Z]，还可以[0-9a-z]
regex_str='(1[34578][0-9]{9})'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='18782902222'

# [^1]：只要不为中括号中的字符就可以
regex_str='(1[34578][^1]{9})'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

regex_str='(1[34578][^12]{9})'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

(1[34578][0-9]{9})
18782902222
(1[34578][^1]{9})
18782902222

line='12.1*'

# [.]：中括号中的一些特殊字符不再具有特殊含义，例如：.、*等
regex_str='(.*[.].*)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

(.*[.].*)
12.1*

line='你 好'

# \s：匹配一个空格，\S匹配非空格字符
regex_str='(你\s好)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='你s好'

regex_str='(你\s好)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

regex_str='(你\S好)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

(你\s好)
你好
(你\S好)
你s好

line='你s好'

# \w：与[0-9a-zA-Z_]作用相同
regex_str='(你\w好)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='你s好'

regex_str='(你\W好)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

(你\w好)
你s好

line='你好'

# [\u4E00-\u9FA5]：匹配汉字
regex_str='([\u4E00-\u9FA5]+)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='你好s'

regex_str='([\u4E00-\u9FA5]+)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

line='你 好s'

regex_str='([\u4E00-\u9FA5]+)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

([\u4E00-\u9FA5]+)
你好
([\u4E00-\u9FA5]+)
你好
([\u4E00-\u9FA5]+)
你

line='study in 清华大学'

regex_str='.*([\u4E00-\u9FA5]+大学)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

regex_str='.*?([\u4E00-\u9FA5]+大学)'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

.*?([\u4E00-\u9FA5]+大学)
华大学
.*?([\u4E00-\u9FA5]+大学)
清华大学

line='XXX出生于2001年'

# \d：匹配数字
regex_str='.*?(\d+)年'
match_obj=re.match(regex_str,line)
if match_obj:
    print(regex_str)
    print(match_obj.group(1))

.*?(\d+)年
2001

line1='XXX出生于2001年06月01日'
line2='XXX出生于2001年6月1日'
line3='XXX出生于2001年6月'
line4='XXX出生于2001年06月'
line5='XXX出生于2001/06/01'
line6='XXX出生于2001/6/1'
line7='XXX出生于2001/6'
line8='XXX出生于2001/06'
line9='XXX出生于2001-06-01'
line10='XXX出生于2001-6-1'
line11='XXX出生于2001-6'
line12='XXX出生于2001-06'
lines=[line1,line2,line3,line4,line5,line6,line7,line8,line9,line10,line11,line12]

regex_str='.*出生于(\d{4}[年/-]\d{1,2}((月\d{1,2}日$)|([/-]\d{1,2}$)|月$|$))'
for line in lines:
    match_obj=re.match(regex_str,line)
    if match_obj:
        print(line)
        print(match_obj.group(1))

XXX出生于2001年06月01日
2001年06月01日
XXX出生于2001年6月1日
2001年6月1日
XXX出生于2001年6月
2001年6月
XXX出生于2001年06月
2001年06月
XXX出生于2001/06/01
2001/06/01
XXX出生于2001/6/1
2001/6/1
XXX出生于2001/6
2001/6
XXX出生于2001/06
2001/06
XXX出生于2001-06-01
2001-06-01
XXX出生于2001-6-1
2001-6-1
XXX出生于2001-6
2001-6
XXX出生于2001-06
2001-06

正则表达式语法实例演示-python

猜你喜欢