table of Contents
A, re module
1, the import mode
import re
2, the role of
Looking in particular from a string
3, the basic grammar
^
Matches the beginnings = 'abdhgtsab' print(re.findall('^abd',s)) # ['abd'] 开头有就输出abd,没有就回返回[]
$ Matches the end
s = 'abdhgtsab' print(re.findall('ab$',s)) # ['ab'] 结尾有就返回ab,没有就返回[]
[]
Matching character [] in thes = 'acefghjkacefsdfsdf' print(re.findall('[acef]', s)) # 只要[]内的单个字符 ------------------------------------------------------------- ['a', 'c', 'e', 'f', 'a', 'c', 'e', 'f', 'f', 'f']
^[]
Of [] elements inside negated, taken in addition to each character element [] in thes = 'acefghjkacefsdfsdf' print(re.findall('[^acef]', s)) ------------------------------------------------------ ['g', 'h', 'j', 'k', 's', 'd', 's', 'd']
.
Any character (except \ n)
s = 'acefghjkacefsdfsdf'
print(re.findall('a..', s))
------------------------------------------------------
['aba', 'ada']
*
In front of the infinite number of characters 0-
s = 'abaacaaaaa'
print(re.findall('a*', s))
------------------------------------------------------
['a', '', 'aa', '', 'aaaaa', '']
+
In front of the infinite number of characters 1-
s = 'abaacaaaaa'
print(re.findall('a+', s))
------------------------------------------------------
['a', 'aa', 'aaaaa']
?
In front of the characters 0-1
s = 'abaacaaaaa'
print(re.findall('a?', s))
------------------------------------------------------
['a', '', 'a', 'a', '', 'a', 'a', 'a', 'a', 'a', '']
{m}
The character preceding the m
s = 'abaacaaaaa'
print(re.findall('a{5}', s))
------------------------------------------------------
['aaaaa']
{m,n}
In front of a character mn
s = 'abaacaaaaa'
print(re.findall('a{2,5}', s))
------------------------------------------------------
['aa', 'aaaaa']
\d
digital
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\d', s)
------------------------------------------------------
['1', '2', '3']
\D
Non-numeric
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\D', s)
------------------------------------------------------
['s', ' ', ' ', ' ', ' ', ' ', 's', '+', '\n', '=', '$', '\t', '_', 's', ' ', ' ']
\w
Number / letter / underscore
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\w', s))
------------------------------------------------------
['s', '1', 's', '2', '_', 's', '3']
\W
Non number / letter / underscore
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\W', s))
------------------------------------------------------
[' ', ' ', ' ', ' ', ' ', '+', '\n', '=', '$', '\t', ' ', ' ']
\s
Space / \ t / \ n
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\s', s))
------------------------------------------------------
[' ', ' ', ' ', ' ', ' ', '\n', '\t', ' ', ' ']
\S
Non-space / \ t / \ n
s = 's 1 s+\n=$\t2_s 3'
print(re.findall('\S', s))
------------------------------------------------------
['s', '1', 's', '+', '=', '$', '2', '_', 's', '3']
\
Cancel significance
s = 'aba\d'
print(re.findall(r'a\\d', s))
------------------------------------------------------
['a\\d']
.*
Greedy mode (maximize), keep looking to find, so to maximize results
s = 'abbbcabc'
print(re.findall('a.*c', s))
------------------------------------------------------
['abbbcabc']
.*?
Non-greedy mode (minimize), found immediately stop
s = 'abbbcabc'
print(re.findall('a.*?c', s))
------------------------------------------------------
['abbbc', 'abc']
()
As long as in brackets
s = 'abacad'
print(re.findall('a(.)', s))
------------------------------------------------------
['b', 'c', 'd']
A|B
A and B should be
s = 'abacad'
print(re.findall('a|b', s))
------------------------------------------------------
['a', 'b', 'a', 'a']
4, the module method
re.mathch () : From the beginning of the search, searched there, no search is none
s = 'abc123\ndef456'
res = re.match('\d+', s) #从开头搜索数字,搜索到了就有,没搜索到就是none
print(res)
----------------------------------------------
None
s = '123abc123\ndef456'
res = re.match('\d+', s)
print(res) #返回的是一个对象
print(res.group()) #对象必须用group()返回
-----------------------------------------------------
<re.Match object; span=(0, 3), match='123'>
re.search () : The first match search results to find not find
s = '123abc123\ndef456'
res = re.search('\d+', s)
print(res)
print(res.group())
------------------------------------------------------
123
re.split () : cut to match the rule
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.split('\d+', s1))
-----------------------------------------------
['abc', 'asdfk', 'lkjsf', 'lkj']
re.sub (): replace (focus) in accordance with the matching rules
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.sub('\d+', '***', s1))
-----------------------------------------------
abc***asdfk***lkjsf***lkj
re.subn (): according to the matching rule replacement and counted
s1 = 'abc324asdfk234lkjsf324lkj'
print(re.subn('\d+', '***', s1))
-----------------------------------------------
('abc***asdfk***lkjsf***lkj', 3)
5, Application Examples
example:
1. For stringLife234234is beautiful234because234of persistence
2. Use re module line of code reduction of the sentenceLife is beautiful because of persistence
import re
s = 'Life234234is beautiful234because234of persistence'
# 结果为:Life is beautiful because of persistence
answer:
print(" ".join(re.sub('[0-9]', " ", s).split()))
Two, typing module
1, the import mode
from typing import xxx
2, the role of
Providing Generator Type (cenerator), may be an iterative type (Iterable), iterator type (Iterator) three types of data, limiting function
3, method
from typing import Generator,Iterable,Iterator
# 参数的数据类型 返回值
def func(i: int, f: float, b: bool, lt: list, tup: tuple, dic: dict,g:Generator) -> tuple:
lis = [i, f, b, lt, tup, dic]
return tuple(lis)
# i, f, b, lt, tup, dic = func(1,2,3,4,5,6) # 不错误,只是不规范
def ger():
yield
res = func(1, 2, True, [1, 2], (1, 2), {'a': 1},ger())
print(res)
-----------------------------------------------------
(1, 2, True, [1, 2], (1, 2), {'a': 1})
Three, collections module
1, the import mode
from collections import xxx
2, the role of
For complex data types
3, method
3.1 famous Moto组 namedtuple
from collections import namedtuple
point = namedtuple('point',['x','y'])
p = point(1,2)
print(p.x)
print(p.y)
---------------------------------------------------
1
2
3.2 default dictionary defaultdict
from collections import defaultdict
# dic = {'a':1}
# print(dic['b'])
dic = defaultdict(lambda :'nan') # dic = {} # 如果找不到赋了一个默认值
dic['a'] = 1
print(dic['a'])
print(dic['c']) #找不到关键字c给c赋了一个默认值nan
----------------------------------------------------------
1
nan
3.3 pairs deque
and
When using the list of data storage, access the elements by index quickly, but inserting and removing elements is very slow, because the list is linear memory, when a large amount of data, efficiency is very low insertion and deletion.
deque is to achieve efficient insertion and deletion of two-way list, queue and stack suitable for:
# lis = [1,2,3] # 线性表
# lis.append(4) #在后面追加4
# print(lis)
from collections import deque # 链表
de = deque([1,2,3])
de.append(4) #在最后面追加4
print(de)
de.appendleft(0) #在最前面追加0
print(de) # 默认删除左边的
de.popleft()
print(de)
-------------------------------------------------------------
deque([1, 2, 3, 4])
3.4 Counter
'''比较麻烦的方法'''
s= 'programming'
# dic = {}
# for i in s:
# if i in dic:
# dic[i]+=1
# else:
# dic[i] =1
# print(dic)
-------------------------------------------------------------
{'p': 1, 'r': 2, 'o': 1, 'g': 2, 'a': 1, 'm': 2, 'i': 1, 'n': 1}
# 新方法
from collections import Counter
s= 'programming'
c = Counter() # 字典
for i in s:
c[i] +=1 #在内部自己进行了一个判断
print(c)
-----------------------------------------------------
Counter({'r': 2, 'g': 2, 'm': 2, 'p': 1, 'o': 1, 'a': 1, 'i': 1, 'n': 1})