二十一、正则表达式


正则表达式:(普通字符串,元字符)

'''
1.普通字符:
re.findall():在已知的字符串中寻找指定的字符串,返回一个列表
'''
import re
r=re.findall("jin","1613265161651651dsadsadsajin365465416546dsa4ds")
print (r)
------------------------------------
['jin']
-------------------------------------------------------------------------------
'''
2.元字符:
. ^ $ * + ? {} [] | () \
'''

import re
'''
 2.1 .:相当于一个字符
'''
r=re.findall("jin.com","1613265161651651dsadsadsajinqcom365465416546dsa4ds")
print (r)
------------------------------------
['jinqcom']
-------------------------------------------------------------------------------

'''
 2.2 ^:必须开头才可匹配到
'''
r=re.findall("^jin","1613265161651651dsadsadsa^jinqcom365465416546dsa4ds")
print (r)

r=re.findall("^jin","jinqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)

r=re.findall("^jin","1jinqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
'''
------------------------------------
[]
['jin']
[]
-------------------------------------------------------------------------------

 2.3 $:必须结尾才可匹配到
'''
r=re.findall("jin$","qcom1613265161651651dsadsadsa365465416546dsa4dsjin")
print (r)

r=re.findall("jin$","jinqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
------------------------------------
['jin']
[]
-------------------------------------------------------------------------------
'''
 2.4 * :匹配0-多次(重复)
'''
r=re.findall("jin*","xxxxxxxxxxxxjinqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)

r=re.findall("jin*","xxxxxxxxxxxxjinnnnnnnnnnnnnnnnnnnnnnnnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
r=re.findall("jin*","xxxxxxxxxxxxjiqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
------------------------------------
['jin']
['jinnnnnnnnnnnnnnnnnnnnnnnn']
['ji']
-------------------------------------------------------------------------------
'''
 2.5 +:匹配1-多次(重复)
'''

r=re.findall("jin+","xxxxxxxxxxxxjinnnnnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
r=re.findall("jin+","xxxxxxxxxxxxjiqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
------------------------------------
['jinnnnn']
[]
-------------------------------------------------------------------------------

'''
 2.6 ?:0-1次匹配
'''

r=re.findall("jin?","xxxxxxxxxxxxjinnnnnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
r=re.findall("jin?","xxxxxxxxxxxxjiqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
------------------------------------
['jin']
['ji']
-------------------------------------------------------------------------------

'''
 2.7 {}:指定匹配
'''

r=re.findall("jin{4}","xxxxxxxxxxxxjinnnnnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
r=re.findall("jin{0}","xxxxxxxxxxxxjiqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)

r=re.findall("jin{3,5}","xxxxxxxxxxxxjinnnnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
r=re.findall("jin{3,5}","xxxxxxxxxxxxjinnqcom1613265161651651dsadsadsa365465416546dsa4ds")
print (r)
------------------------------------
['jinnnn']
['ji']
['jinnnn']
[]
-------------------------------------------------------------------------------

'''
 2.8 \:反斜杠后面的元字符去除特殊功能;反斜杠后跟普通字符实现特殊功能;
\d:匹配十进制数,相当于[0-9]
\D:匹配任何非数字字符,相当[^0-9](非\d)
\s:匹配任何空白字符,相当于[ \t\n\r\f\v]
\S:匹配任何字母数字字符,相当于[^ \t\n\r\f\v]
\w:匹配任何字母数字字符,相当于[Aa-z-Z0-9]
\W:匹配任何非字母数字字符,相当于[^Aa-z-Z0-9]
\b:匹配一个单词的边界,也就是指单词和空格之间的位置
'''
#\d
r=re.findall("\d","66abc88")
print (r)
r=re.findall("\d\d","66abc88")
print (r)
------------------------------------
['6','6','8','8']
['66','88']
-------------------------------------------------------------------------------

#\w
r=re.findall("\w","66abc88..com")
print (r)
------------------------------------
['6','6','a','b','c','8','8','c','o','m']
-------------------------------------------------------------------------------

#\s
r=re.findall("\s","6666 6abc888..com")
print (r)
------------------------------------
[' ']
-------------------------------------------------------------------------------

#[\d]
r=re.findall("[\d]","66abc88..com")
print (r)
------------------------------------
['6','6','8','8'
-------------------------------------------------------------------------------

'''
 2.9 []:字符集,或
'''
r=re.findall("a[bc]d","333333ssssssabdc")
print (r)
------------------------------------
['abd']
-------------------------------------------------------------------------------

r=re.findall("a[bc]d","333333ssssssacdc")
print (r)
------------------------------------
['acd']
-------------------------------------------------------------------------------

r=re.findall("a[.]d","333333ssssssacdc")
print (r)
------------------------------------
[]
-------------------------------------------------------------------------------
r=re.findall("a[.]d","333333ssssssa.dc")
print (r)
------------------------------------
['a.d']
-------------------------------------------------------------------------------

r=re.findall("[a-z]","333333saa.dc")
print (r)
------------------------------------
['s','a','a','d','c']
-------------------------------------------------------------------------------

r=re.findall("[^a-z]","333ssssssa.dc")
print (r)
------------------------------------
['3','3','3','.']
-------------------------------------------------------------------------------
'''
 2.10 ():组
re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;而re.search匹配整个字符串,直到找到一个匹配。
'''
r=re.findall("(ab)","ababdddddab")
print (r)
------------------------------------
['ab','ab','ab']
-------------------------------------------------------------------------------
r=re.search("(ab)","dddddab").group()
print (r)
------------------------------------
ab
-------------------------------------------------------------------------------
r=re.match("(ab)","dddddab")
print (r)
------------------------------------
None
-------------------------------------------------------------------------------

r=re.match("(ab)","abdddddab").group()
print (r)
------------------------------------
ab
-------------------------------------------------------------------------------
r=re.search(r"a(\d+)","a23b").group()
print (r) #<_sre.SRE_Match object; span=(0, 3), match='a23'>
------------------------------------
a23
-------------------------------------------------------------------------------


#按照最小的值匹配(+:0-无穷,?:0-1)
r=re.search(r"a(\d+?)","a23666666b").group()
print (r)
------------------------------------
a2
-------------------------------------------------------------------------------

r=re.search(r"a(\d*?)","a23666666b").group()
print (r) #<_sre.SRE_Match object; span=(0, 1), match='a'>
------------------------------------
a
-------------------------------------------------------------------------------

#只取()中的内容
r=re.findall(r"a(\d+)b","a23666666b")
print (r)
------------------------------------
['23666666']
-------------------------------------------------------------------------------

r=re.findall(r"a(\d+?)b","a23666666b")
print (r)
------------------------------------
['23666666']
-------------------------------------------------------------------------------
r=re.search(r"a(\d+)b","a23666666b").group()
print (r)
------------------------------------
['a23666666b']
-------------------------------------------------------------------------------

r=re.search(r"(tom)(jack)com\2","tomjackcomjack").group() #这里的\2相当于jack
print (r)
------------------------------------
['tomjackcomjack']
-------------------------------------------------------------------------------

r=re.search(r"(tom)(jack)com\2\1","tomjackcomjacktom").group()
print (r)
------------------------------------
['tomjackcomjacktom']
-------------------------------------------------------------------------------

猜你喜欢

转载自www.cnblogs.com/chushujin/p/9385620.html