Examples of commonly used regular expressions

import re

"""
   Match matching method using
   result = re.match (regular expression to match string)
   If a match to the matching data, the method can be used to extract data group

   Note: If the string matches the regular expression, the match method returns an object matching (Match Object), otherwise None (not an empty string)
       match matching method is from left to right (because this feature match, so ^ matches start from the string head can not write), to match different place, method match is over!

"""

 

 

"""
.: Matches any character, \ n except
[]: Match [] characters listed
\ D: number, i.e. 0-9
\ D: Non-digital
\ S: whitespace characters, including spaces, \ n, \ t, \ r, CRLF
\ S: non-whitespace characters
\ W: word (parity python can be seen as a variable name, there is no relationship between the actual, a ~ z, A ~ Z, 0 ~ 9, _)
\ W: non-word character
"""

 

In [1]: import re

In [2]: re.match('admin','good.admin')

In [3]: re.match('.+','good.admin')
Out[3]: <re.Match object; span=(0, 10), match='good.admin'>

In [4]: re.match('.','good.admin')
Out[4]: <re.Match object; span=(0, 1), match='g'>

In [5]: re.match('..','good.admin')
Out[5]: <re.Match object; span=(0, 2), match='go'>

In [6]: re.match('.*','good.admin')
Out[6]: <re.Match object; span=(0, 10), match='good.admin'>

 

In [7]: re.match('\w','good')
Out[7]: <re.Match object; span=(0, 1), match='g'>

In [8]: re.match('\w','g1')
Out[8]: <re.Match object; span=(0, 1), match='g'>

The In [ . 9]: re.match ( ' \ W ' , ' G1 ' )
 
# \ W and g match, but \ W 1 and does not match, it ultimately does not match, return None In [
10]: re.match('\w\W','g1')
"""

Border issues

^: Match beginning of the string
$: The end of the match
\ B: matching a word boundary
\ B: matching non-word boundary

"""

 

 

"""

Number of matches:

    *: Matching a character appears zero or infinity times
    +: Matches one character appear more than once or unlimited times
    ? : Matching a character appear 0 or 1 times
    {M}: Match m times a character appears
    {M,}: match at least m times a character appears
    {M, n}: matches a character appears to n times m


"""

 

"""

Border issues

^: Match beginning of the string
$: The end of the match
\ B: matching a word boundary
\ B: matching non-word boundary

"""

 

'''
Example 2: Match phone number
'''

R & lt = re.match ( ' . 1 [35689] \. 9 {D} $ ' , ' 13,218,545,678 ' )
 Print (R & lt)   # <re.match Object; span = (0,. 11), match = '13,218,545,678'> 

R & lt = re.match ( ' . 1 [35689] \. 9 {D} $ ' , ' 132 185 456 789 ' )
 Print (R & lt)   # None 

R & lt = re.match ( ' . 1 [35689] \ {D}. 9 ' , ' 132 185 456 789 ' )   # no $ symbol, no border problems 
Print (r)   #<re.Match object; span=(0, 11), match='13218545678'>

r = re.match('1[35689]\d{9}','13218545aaaa6789')
print(r)  #None

 

'''
Example 3: Match a word boundary

'''

# R & lt: Escape removed; ^: beginning of the string, od \ b: od word boundary is 
R & lt re.match = (R & lt ' ^ \ + W OD \ B ' , ' Good ' )
 Print (R & lt) # <Re. Object match; span = (0,. 4), match = 'Good'> 

R & lt = re.match (R & lt ' ^ \ + W OD \ B ' , ' gooder ' )
 Print (R & lt) # None 

R & lt = re.match ( R & lt ' ^ \ W + \ BOD \ B ' , ' Good ' )
 Print (R & lt) # None 

R & lt = re.match (R & lt '^ \ w + \ that \ b' , ' JD OD ' )
 Print (R & lt) # None, because \ b represents only a word boundary, the space does not mean that 

R & lt = re.match (R & lt ' ^ \ + W \ S \ BOD \ b ' , ' JD OD ' )   # used here \ s to match the gap between the character and the jd OD 
Print (R & lt) # <re.match Object; span = (0,. 5), match = 'OD jd'>


r = re.match(r'^\w+od\B','gooder')
print(r) #<re.Match object; span=(0, 4), match='good'>

r = re.match(r'^\w+od\B','good')
print(r) #None

 

"""
Grouping:


|: Any representation about an expression
(Ab): The characters in brackets as a packet
\ Num: num matched reference packet string
(? P <name>): Packet surnamed
(? P = name): reference packet matches the alias name string

"""

 

 

'' ' Example numbers match between 0-1004 ' ''

r = re.match(r'[1-9]\d?$|0$|100$','100' )
print(r) #<re.Match object; span=(0, 3), match='100'>

# Improved version of 
R & lt re.match = (R & lt ' [1-9] \ D $ | $ 100?? ' , ' 0 ' )
 Print (R & lt) # <re.match Object; span = (0,. 1), match = '0'>

 

'''
Example 5: extracting specific characters from a string as the extracted character h1 between tags
'''

S = ' <h1> I am a Chinese </ h1> '

r = re.match (r " <h1> (. *) </ h1> " , S)
 Print (r.group ())   # <h1> I am a Chinese </ h1> 
Print (r.group (1 ))   # I am Chinese, 1: table regular expressions appear packet 1st 

S = ' <h1> Down with little Japan </ h1> ' 
r = re.match (r " <h1> (*) <. / h1 of> " , S)
 Print (r.group ())   # <h1 of> down small Japan </ h1 of> 
Print (r.group (1))   # down small Japan, 1: table regular expressions of the first packet occurrences 

S = ' <h1> Down with little Japan I am Chinese </ h1> '
r = re.match(r"<h1> (\ D *) \ S (\ D *) </ h1> " , S)
 Print (r.group ())   # <h1> Down with little Japan I am Chinese </ h1> 
Print (r. Group (1))   # Down with little Japan take the first one grouping of values 
Print (r.group (2))   # I am a Chinese take the value of the two groups of 
Print (r.groups ())   # all packets print out the results in the form of a tuple

 

 

'''
Example 6: Match <html> <h1> zhengqinfeng </ h1> </ html>, and the same as the contents of the inclusive angle brackets, angle brackets same content in the inner layer
'''

s = '<html><h1>zhengqinfeng</h1></html>'

r = re.match(r'<.+><.+>.+</.+></.+>',s)
print(r)  #<re.Match object; span=(0, 34), match='<html><h1>zhengqinfeng</h1></html>'>

# Above needs regular seemingly complete than it really is, it is not satisfied "and" behind the requirements, so that the correct regular as 

R & lt = re.match (R & lt ' <(. +)> <(. +)>. + </ \ 2> </ \. 1> ' , S)   # record preceded by r, will not escape 
Print (R & lt) # <re.match Object; span = (0, 34 is), match =' <HTML> <h1 of> zhengqinfeng </ h1 of> </ HTML> '> 

S = ' <HTML> <h1 of> zhengqinfeng </ H2> </ HTML> ' 
R & lt = re.match (R & lt ' <(. +)> <(. +)>. + </ \ 2> </ \. 1> ' , S)   # record preceded by r, will not escape 
Print (R & lt) # None

"""
    When you use the content (), re will be recorded () in, we can use the \ num to reference
    In the above example, the first occurrence of re recorded as num = 1 (+.); (. +) Re records for the second occurrence num = 2
    In use the back, can be directly \ 1, \ 2 to the reference value

"""

 

Of Example 5, Example 6 can be seen two examples, is the packet for the following:


1. The character string extracted from the specific character

2. Dynamic content matching reference packet
'''
Example 7: Extraction mail account

        (163 | 126 | 135 | qq | gmail): indicates the matching one
        \: Matching mailbox.
        (Com | cn | net): means a match-mail suffix
        $: End table, that mailbox is com, cn, or the end of the net

'''

p = r'(\w+)@(163|126|135|qq|gmail)\.(com|cn|net)$'

R & lt = re.match (P, ' [email protected] ' )
 Print (R & lt)
 Print (r.groups ())
 Print (r.group (. 1))   # extract mail account 132

 

 

'''
Example 8: regular packets take the name

'''

s = '<html><h1>zhengqinfeng</h1></html>'
r =  re.match('<(?P<key1>.+)><(?P<key2>.+)>.+</(?P=key2)></(?P=key1)>',s)
print(r)  # <re.Match object; span=(0, 34), match='<html><h1>zhengqinfeng</h1></html>'>
print(r.groups())  # ('html', 'h1')

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

Guess you like

Origin www.cnblogs.com/z-qinfeng/p/11999963.html
Recommended