Day29 Notes

# Exception handling
# try except Be sure to write some hints or processing content after except

# try:
  # '''Code where exceptions may occur'''
# except ValueError:
  # '''Print some prompts or processing content'''
# except NameError:
  # '''...'''
# # except Exception as e:
  # # '''Print e'''
# else:
  # '''The code in the try is executed normally'''
# finally:
  # '''This code will be executed regardless of whether the error occurs or not, Used to do some finishing touches'''

-------------*****-------------

1. Regular expressions

# regex
# Regular expressions
# The re module can read the regular expressions
you write # Perform tasks according to the expressions you write

 

# What do regular expressions do?

# Operation of regular expression strings
# Use some rules to detect whether a string meets my requirements - form validation
# Find content that meets my requirements from a string - crawler

# Exactly equal strings can be matched ==

# Character group Character group represents everything that can appear at a character position
# Range:
# According to the asc code, the range must be pointed from small to large
# A character group can have multiple ranges

#ID verification
#common practice
 number = input('please input your phone number : ')
 if number.isdigit() and number.startswith('13')\
     or number.startswith('14')\
     or number.startswith('15')\ or number.startswith('16')\ or number.startswith('17' )\ or number.startswith('18' )\ or number.startswith('19' ): print('Passed the initial check' ) else : print('Format error' ) #Regular practice import re number = input('please input your phone number : ' ) ret = re.match('(13|14|15|17|18|19)[0-9]{9}' ,number) if ret: print('Pass the initial check')

example:

# The ID number is a string with a length of 15 or 18 characters, the first digit cannot be 0
# If it is 15 digits, it consists of numbers;
# If it is 18 digits, the first 17 digits are all digits, and the last digit may be Number or x,
# Let's try to use regular expressions below:

# 15th place
a = '[1-9]\d{14}'
# 18th place
b = '[1-9]\d{16}[\dx]'

# [1-9]\d{13,16}[\dx]
# [1-9]\d{16}[\dx]|[1-9]\d{14}
# a | b [ab]

#[1-9]\d{16}[\dx]|[1-9]\d{14} or
# If two regular expressions are connected by "or", and some regular rules are the same,
# Then be sure to put the long rule in the front

# [1-9]\d{13,16}[\dx]
# [1-9]\d{14}(\d{2}[\dx]){0,1}

# [1-9]\d{14}(\d{2}[\dx])? Grouping
# If there is a quantifier constraint on a group of regular expressions as a whole, divide the group of expressions into a group
# Quantifier constraints outside the group

# r'\\n',r'\n'

# greedy match

 

2.re module

 # findall receives two parameters: the string to be matched by the regular expression
 # A return value of a list data type: all results that match this regular expression

 import re
 ret = re.findall ('! '1 )
 print (ret)
 ret = re.findall('\d+', 'dsaglhlkdfh1892494kashdgkjh127839') print (ret)

 

Example: # Find all mobile phone numbers from a file - regular

# The difference between search and findall:
  1.search returns if it finds one, findall is to find all
  2.findall directly returns a list of results, search returns an object
# If it matches, it returns a result object
# If it doesn't match, return a None

 import re
 with open('a',encoding='utf-8') as f:
     l = []
     for line in f: ret = re.findall('1[3-9]\d{9}',line) l.extend(ret) print(l)

 

#search

 import re
 ret = re.search ('b', 'eva egon yuan' )
 if ret:
     print(ret.group()) # get the result from the result object

 

# match
# ·1 means adding a ^ to the regular expression
# ·2 Same as search returns the result object does not match to return None
# ·3 Same as search to get the value from the result and still use group

 import re
 ret = re.match ('a', 'eva egon yuan' )
 if ret:
     print (ret.group ())

 

#subn

 import re
 ret = re.subn('\d', 'H', 'eva3egon4yuan4')#Replace the number with 'H' and return a tuple (result of replacement, how many times it has been replaced)
 print (ret)

 

# Regular expression --> Match strings according to the rules
# Find a string that matches the rules from a string --> python
# Regular rules - compile -> languages ​​understood by python
# Multiple executions require multiple compilations Waste of time re.findall('1[3-9]\d{9}',line)
# Compile re.compile('\d{3}')

 import re
 obj = re.compile('\d{3}') # Compilation only applies when the same regular rule is executed multiple times
 ret1 = obj.search('abc123eeee')
 ret2 = obj.findall('abc123eeee')
 print(ret1.group()) print(ret2)
 import re
 ret = re.finditer('\d', 'ds3sy4784a') #finditer is suitable for cases where there are many results, which can effectively save memory
 print(ret)  # <callable_iterator object at 0x10195f940>
 print(ret.__next__().group())
 for i in ret:
     print(i.group())
 print(next(ret).group()) #View the first result print(next(ret).group()) #View the second result print([i.group() for i in ret]) #View the remaining left and right results

 

# When the group encounters the re module

 import re
 ret1 = re.findall('www.(baidu|oldboy).com', 'www.oldboy.com' )
 ret2 = re.findall('www.(?:baidu|oldboy).com', 'www.baidu.com' )
 print(ret1) print(ret2) findall will display the matching content in the group first. If you want to cancel the group priority effect, add ?: at the beginning of the group.
 import re
 ret=re.split("\d+","eva3egon4yuan")
 print(ret) #Result: ['eva', 'egon', 'yuan']
 ret=re.split("(\d+)","eva162784673egon44yuan")
 print(ret) #Result: ['eva', '3', 'egon', '4', 'yuan']

# split splits a string. By default, the matched delimiter will not appear in the result list.
# If the matched regex is placed in a group, the delimiter will be placed in the result list.

 

Group naming and search encounter group
# tag.html web page file tag file
import re #meaning
of grouping
  # 1. Constrain a set of regular rules with quantifiers
  # 2. From the matching results of a whole regular rule, the group within the group is displayed preferentially content

"<h1>hello</h1>"
 ret = re.findall('<\w+>(\w+)</\w+>',"<h1>hello</h1>")
 print (ret)

 

# group name

 ret = re.search("<(?P<tag>\w+)>(?P<content>\w+)</(?P=tag)>","<h1>hello</h1>")
 print (ret)
 print(ret.group()) # There is no concept of group priority in search
 print (ret.group ('tag' ))
 print(ret.group('content'))

 

 #If you don't name the group, you can also use \sequential number to find the corresponding group, indicating that the content you are looking for is consistent with the content of the previous group #The
 matching result obtained can be directly used group(serial number) to get the corresponding value

 ret = re.search(r"<(\w+)>(\w+)</\1>","<h1>hello</h1>")
 print (ret.group ())
 print(ret.group(0))  #结果 :<h1>hello</h1>
 print(ret.group(1)) #Result: h1
 print(ret.group(2)) #Result: hello

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325168583&siteId=291194637