Python regex


# coding: utf-8

# In[1]:


import re


# In[2]:


text =' alpha. beta....gamma delta'


# In[3]:


re.split('[\.]',text)


# In[4]:


re.split('[\.]+',text)


# In[5]:


re.split('[\s]+',text)


# In[6]:


re.split('[\s]+',text)


# In[7]:


re.split('[a]',text)


# In[10]:


re.split('[\.]+',text,maxsplit=1)


# In[11]:


regex = '[a-zA-Z]+'


# In[13]:


re.findall(regex,text)


# In[14]:


s = 'a s d'


# In[15]:


re.sub('[asd]','good',s)


# In[16]:


re.sub('a|s|d','good',s)


# In[17]:


print(re.match('done|quit','doned')) #如果匹配成功,返回一个match对象,否则,返回一个None


# In[18]:


print(re.match('done|quit','don'))


# In[19]:


print(re.match('done|quit','d!one!'))


# In[20]:


import re


# In[21]:


s = "aaa     bbb cccc"


# In[22]:


re.split('\s',s)


# In[24]:


re.split('[\s]',s)


# In[25]:


s = 'aa bbb cc ddd   dd ef ff             '


# In[26]:


re.split('[\s]+',s)


# In[27]:


re.split('[\s]+',s.strip())


# In[28]:


' '.join(re.split('[\s]+',s.strip()))


# In[29]:


'*'.join(re.split('[\s]+',s.strip()))


# In[30]:


#实现了清理字符串


# In[31]:


example = 'NanJing Audit University'


# In[32]:


re.findall(r'\ba.+?\b',example)


# In[34]:


re.findall(r'\ba.+?\b',example,re.I)


# In[37]:


re.findall(r'\Bi.+?\b',example)#找不是i开头的单词


# In[40]:


re.findall('\d\.\d\.\d{2}','Python 3.6.10')


# In[41]:


re.findall('\d\.\d\.\d+','Python 3.6.10')


# In[42]:


re.split('[\s]',example)


# In[44]:


import re


# In[45]:


example = 'NanJing University of Science and Technology'


# In[47]:


pattern = re.compile(r'\bs\w+',re.I) #匹配s或者S开头的单词


# In[48]:


pattern.findall(example)


# In[49]:


pattern = re.compile(r'\w+y\b') # 匹配y结尾的单词


# In[50]:


pattern.findall(example)


# In[59]:


#匹配三个字母长
pattern = re.compile(r'\b[a-zA-Z]{3}\b')


# In[60]:


string = 'a d fdfd ef fdafds adf dfads dfd ddd dde NanJing University of Science and Technology'


# In[61]:


pattern.findall(string)


# In[62]:


pattern.match(example)


# In[63]:


print(pattern.match(example))


# In[64]:


print(pattern.match(string))


# In[65]:


pattern.search(string)


# In[66]:


print(example)


# In[67]:


pattern = re.compile(r'\b\w*n\w*\b')


# In[69]:


pattern.findall(example)


# In[82]:


pattern=re.compile('@.+?\.')


# In[83]:


message = '[email protected] fads [email protected]'


# In[84]:


pattern.findall(message)


# In[85]:


example = '''Beautiful is * than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.'''


# In[86]:


pattern = re.compile(r'\bb\w*\b',re.I)


# In[87]:


pattern.findall(example)


# In[93]:


print(pattern.sub('*****',example))


# In[94]:


print(pattern.sub('*****',example,2))


# In[98]:


import re
telNumber = '''Suppose my Phone No. is 0513-1234567,
yours is 010-12345678, his is 025-87654321.'''
pattern = re.compile(r'(\d{3,4})-(\d{7,8})')
index = 0
while True:
    matchResult = pattern.search(telNumber, index) #index=i表示从telNumber第i+1个字符开始查找
    if not matchResult:
        break
    for i in range(3):
        print ('Searched content:', matchResult.group(i),        ' Start from:',matchResult.start(i),'End at:',matchResult.end(i),          ' Its span is:', matchResult.span(i))
    index = matchResult.end(2)


# In[111]:


exampleString = '''There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than right now.''' 


# In[100]:


pattern = re.compile(r'(?<=\w\s)never(?=\s\w)')  #never是文中出现的never,不是句子的开头或者结尾


# In[101]:


pattern.findall(exampleString)


# In[104]:


matchResult = pattern.search(exampleString)


# In[105]:


print(matchResult)


# In[106]:


pattern = re.compile(r'(?:is\s)better(\sthan)') #?:匹配但是不进行捕获,因此matchResult.group(0)不是is


# In[109]:


matchResult = pattern.search(exampleString)
print(pattern.search(exampleString))
print(matchResult.group(0))
print(matchResult.group(1))


# In[113]:


exampleString = '''There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than right now.''' 
index = 0
while True:
    matchResult = pattern.search(exampleString,index)
    if not matchResult:
        break
    print(matchResult.group(0),":",matchResult.span(0))
    index = matchResult.end()
发布了44 篇原创文章 · 获赞 7 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_38060122/article/details/83793826