''' Search: regex101 online regular matching https://regex101.com/ ''' ''' Number: \d non-numeric: \D Empty string: \s Non-empty string: \S accsii code: \w Non-accsii code: \W All characters: * A single arbitrary character: . 0 or one: ? Occurs mn times: {m,n} one or more: + what to start with: ^ what ends with: $ or: | ---> abc|bcd The matched regular is named as the name variable: {?P<name>regular} '''
''' matches '<h1>xxx</h1>' ''' # re.compile('<(?P<tagname>\w*)>.*</(?P=tagname)>') # re.match('<h1>xxx</h1>') #a = timeit.timeit(setup='''import re ; reg = re.compile('<(?P<tagname>\w*)>.*</(?P=tagname)>')''',stmt='''reg.match('<h1>xxx</h1>')''',number=1000000) #b = timeit.timeit(setup='''import re''',stmt='''re.match('<(?P<tagname>\w*)>.*</(?P=tagname)>','<h1>xxx</h1>')''',number=1000000) # print(a) # print(b) # 0.895464103505607 # 2.401233610585925 # Write the regular expression object first, and then the matching efficiency is high
''' In: https://regex101.com/ enter: <(?P<tagname>\w*)>.*</(?P=tagname)> <(?P<tagname>\w*)>(.*)</(?P=tagname)> <(?P<tagname>\w*)>.*</(?P=tagname)> <h1>xxx</h1> What result do you want, add parentheses grouping with parentheses (<(?P<tagname>\w*)>(.*)</(?P=tagname)>) # There are three groups '''
#First declare a regular object and match it through the regular object, which is efficient s = str("aa<h1>xxx</h1> 到的 的 <html>xxx</html>") # reg = re.compile(r"(<(?P<tagname>\w*)>(.*)</(?P=tagname)>)") #Add an r to solve the escape problem reg = re.compile(r"(<(?P<tagname>\w+)>(.*)</(?P=tagname)>)") #Add an r to solve the escape problem a=reg.match(s) # match at the beginning, no return print(a) # None b=reg.search(s).groups() # match from the beginning, then the second, return a result g=reg.search(s).group("tagname") # match from the beginning, then the second, return a result # group()[1] print(b) #('<h1>xxx</h1>', 'h1', 'xxx') print(g) #h1 # reg.split() t = reg.findall(s) print(t) #[('<h1>xxx</h1>', 'h1', 'xxx'), ('<html>xxx</html>', 'html', 'xxx')] # reg.groups() # match search 的 # Real-time debugging and modification using https://regex101.com/
x = '1one2two3three4four' reg1 = re.compile("\d") c = reg1.findall(x) # find regex -- match all d = reg1.split(x) # Regular is the separator print(c) # ['1', '2', '3', '4'] print(d) # ['', 'one', 'two', 'three', 'four']