6.16 re module
Regular use of some symbol combinations that have special meaning or the method described character string together (referred to as regular expressions). Or: Regular rule is used to describe a class of things (in Python) it is embedded in Python, and implemented by the re module.
mode | For example | Explanation | result |
---|---|---|---|
\w | print ( re.findall ('\w','ab 12+- *&_') ) | Match alphanumeric characters and underscores | ['a', 'b', '1', '2', '_'] |
\W | print ( re.findall ('\W','ab 12+- *&_') ) | Matching non-alphanumeric characters and underscores | [' ', '\', '+', '-', ' ', '*', '&'] |
\s | print(re.findall('\s','ab \r1\n2\t+- *&_')) | Matches any whitespace [\ r \ n \ t \ f] | [' ', '\r', '\n', '\t', ' '] |
\S | print(re.findall('\S','ab \r1\n2\t+- *&_')) | Matches any non-blank character | ['a', 'b', '1', '2', '\', '+', '-', '*', '&', '_'] |
\d | print(re.findall('\d','ab \r1\n2\t+- *&_')) | Matches any number [0-9] | ['1', '2'] |
\D | print(re.findall('\D','ab \r1\n2\t+- *&_')) | Matches any non-digit | ['a', 'b', ' ', '\r', '\n', '\t', '\', '+', '-', ' ', '*', '&', '] |
\A | print(re.findall('\Aalex','abcalex is sb')) | Matches the beginning of the string | [ ] |
^ | print(re.findall('^alex','alex is salexb')) | Equivalent to \ A | ['alex'] |
\WITH | print(re.findall('sb\Z','alex is alexbsb')) | End of the string | ['sb'] |
$ | print(re.findall('sb$','alex is alexbsb')) | Equivalent to \ Z | ['sb'] |
\n | print(re.findall('a\nc','a\nc a\tc a1c')) | Matches a newline | ['a\nc'] |
. | print(re.findall('a.c','abc a1c aaca\nc')) | In addition to any newline character | ['abc', 'a1c', 'aac'] |
re.DOTALL | print(re.findall('a.c','abc a1c aaca\nc',re.DOTALL)) | Matches any character including newline | |
? | print(re.findall('ab?','a ab abb abbb abbbb abbbb')) | Left one character repeated 0 or 1 | [ 'A', 'is', 'is', 'is', 'is', 'two'] |
* | print(re.findall('ab*','a ab abb abbb abbbb abbbb a1bbbbbbb')) | Left a character appear 0 or infinity times | [ 'A', 'b', 'er', 'abbb', 'abbbb', 'abbbb', 'a'] |
+ | print(re.findall('ab+','a ab abb abbb abbbb abbbb a1bbbbbbb')) | Left a character appear more than once or infinity times | [ 'Now', 'Abb' Abbb 'Abbbb' Abbbb '] |
{m,n} | print(re.findall('ab{0,1}','a ab abb abbb abbbb abbbb')) | M times a character appears on the left n times to | [ 'A', 'is', 'is', 'is', 'is', 'two'] |
.* | print(re.findall('a.*c','ac a123c aaaac a *123)()c asdfasfdsadf')) | Match of any length, match any character greedy | ['ac a123c aaaac a *123)()c'] |
.*? | print(re.findall('a.*?c','a123c456c')) | Non-greedy match | ['a123c'] |
() | print(re.findall('(alex)_sb','alex_sb asdfsafdafdaalex_sb')) | Matching expression in parentheses | ['alex', 'alex'] |
print(re.findall('^ebn$','ebn1')) #[]
print(re.findall('href="(.*?)"','<li><a id="blog_nav_sitehome" class="menu"href="http://www.cnblogs.com/">博客园</a></li>')
) #['http://www.cnblogs.com/']
print(re.findall('a[0-9][0-9]c','a1c a+c a2c a9c a11c a-c acc aAc'))#[]:匹配一个指定范围内的字符(这一个字符来自于括号内定义的) #['a11c']
print(re.findall('a[-+*]c','a1c a+c a2c a9c a*c a11c a-c acc aAc')) #当-需要被当普通符号匹配时,只能放到[]的最左边或最右边 ['a+c', 'a*c', 'a-c']
print(re.findall('a[a-zA-Z]c','a1c a+c a2c a9c a*c a11c a-c acc aAc')) #['acc', 'aAc']
print(re.findall('a[^a-zA-Z]c','a c a1c a+c a2c a9c a*c a11c a-c acc aAc'))# []内的^代表取反的意思
#['a c', 'a1c', 'a+c', 'a2c', 'a9c', 'a*c', 'a-c']
print(re.findall('([a-z]+)_sb','egon alex_sb123123wxxxxxxxxxxxxx_sb,lxx_sb'))#['alex', 'wxxxxxxxxxxxxx', 'lxx']
print(re.findall('compan(ies|y)','Too many companies have gone bankrupt, and the next one is my company')) #| :或者 ['ies', 'y']
print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt, and the next one is my company')) #(?:):代表取匹配成功的所有内容,而不仅仅只是括号内的内容 ['companies', 'company']
re模块的其他方法:
print(re.findall('alex|sb','123123 alex sb sadfsadfasdfegon alex sb egon'))#['alex', 'sb', 'alex', 'sb']
print(re.search('alex|sb','123213 alex sb sadfsadfasdfegon alex sb egon').group())#只到找到第一个匹配,然后返回一个包含匹配信息的对象 #alex
print(re.search('^alex','123213 alex sb sadfsadfasdfegon alex sb egon'))# None 匹配不成功返回None而不是[]
print(re.search('^alex','alex sb sadfsadfasdfegon alex sb egon').group()) #alex
print(re.match('alex','alex sb sadfsadfasdfegon alex sb egon').group()) #alex
print(re.match('alex','123213 alex sb sadfsadfasdfegon alex sb egon')) #None
#search+^可以代替match
info='a:b:c:d'
print(info.split(':')) #['a', 'b', 'c', 'd']
print(re.split(':',info)) #['a', 'b', 'c', 'd']
info=r'get :a.txt\3333/rwx'
print(re.split('[ :\\\/]',info)) #['get', '', 'a.txt', '3333', 'rwx']
print('egon is beutifull egon'.replace('egon','EGON',1)) #EGON is beutifull egon
#123 egon is beutifull EGON 123
print(re.sub('(.*?)(egon)(.*?)(egon)(.*?)',r'\1\2\3EGON\5','123 egon is beutifull egon 123'))
#(123)(egon)( is beutifull )(egon)(123)
print(re.sub('(lqz)(.*?)(SB)',r'\3\2\1',r'lqz is SB'))# SB is lqz
#(lqz)(is)(SB)
print(re.sub('([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)',r'\5\2\3\4\1',r'lqzzzz123+ is SB')) #SB123+ is lqzzzz #(lqzzzz)(123+ )(is)( )(SB)
pattern=re.compile('alex')
print(pattern.findall('alex is alex alex')) #['alex', 'alex', 'alex']
print(pattern.findall('alexasdfsadfsadfasdfasdfasfd is alex alex')) #['alex', 'alex', 'alex']
6.17 hashlib模块
hash是一种算法,该算法接受传入的内容,经过运算得到一串hash值
hash值的特点是:
1 只要传入的内容一样,得到的hash值必然一样(用于明文传输密码、文件完整性校验) 2 不能由hash值返解成内容=======》把密码做成hash值,不应该在网络传输明文密码 3 只要使用的hash算法不变,无论校验的内容有多大,得到的hash值长度是固定的
import hashlib
m=hashlib.md5()
m.update('hello'.encode('utf-8'))
m.update('world'.encode('utf-8'))
m.update('egon'.encode('utf-8'))
print(m.hexdigest()) #3801fab9b8c8d9fcb481017969843ed5
import hashlib
m=hashlib.md5()
m.update('h'.encode('utf-8'))
m.update('e'.encode('utf-8'))
m.update('lloworld'.encode('utf-8'))
m.update('egon'.encode('utf-8'))
print(m.hexdigest()) #3801fab9b8c8d9fcb481017969843ed5
注意:m.update()无论是多次传值还是一次传值,得到的hash值相同
import hashlib
m=hashlib.md5()
with open(r'C:\Users\Desktop\上节课复习','rb') as f:
for line in f:
m.update(line)
hv=m.hexdigest()
print(hv) #98416536bdf1f0dc0776629f501ae469
密码加盐
import hashlib
m=hashlib.md5()
pwd='alex3714'
m.update('天王盖地虎'.encode('utf-8'))
m.update(pwd.encode('utf-8'))
m.update('小鸡炖蘑菇'.encode('utf-8'))
print(m.hexdigest()) #ab44c43ea02e8c1083346ca707a6f572
hashlib.sha256(),hashlib.sha512()
import hashlib
m=hashlib.md5()
m.update('helloworld'.encode('utf-8'))
print(m.hexdigest()) #fc5e038d38a57032085441e7fe7010b0
m=hashlib.sha256()
m.update('helloworld'.encode('utf-8'))
print(m.hexdigest()) #936a185caaa266bb9cbe981e9e05cb78cd732b0b3280eb944412bb6f8f8f07af
m=hashlib.sha512()
m.update('helloworld'.encode('utf-8'))
print(m.hexdigest()) #1594244d52f2d8c12b142bb61f47bc2eaf503d6d9ca8480cae9fcf112f66e4967dc5e8fa98285e36db8af1b8ffa8b84cb15e0fbcf836c3deb803c13f37659a60
hmac 模块 ,它内部对我们创建 key 和 内容 进行进一步的处理然后再加密
import hmac
m=hmac.new('天王盖地虎,小鸡炖模块'.encode('utf-8'))
m.update('alex3814'.encode('utf-8'))
print(m.hexdigest())