正则表达式re库学习笔记

版权声明: https://blog.csdn.net/t1anyuan/article/details/79561202
import re
content = 'Hello 123 4567 World_This is a Demo'

泛匹配

# result = re.match('^Hello\s\d',content)
# print(result)
# print(result.group())
#
# result1 = re.match('^Hello(.*)mo$',content)
# print(result1.group(1))
#group(1) 匹配第一个小括号里面的(.*)

贪婪匹配

# .* 匹配尽可能多的
#非贪婪匹配
# .*?
# result1 = re.match('^Hello.*?(\d+).*mo$',content)
# print(result1.group(1))

转义

# content1 = 'price is $5.00'
# r = re.match('price is \$5\.00', content1)
# print(r)

search方法

html = '''
<span title="上传时间" class="so-icon time"><i class="icon-date"></i>
          2018-03-12
        </span>
        <span title="up主" class="so-icon">
        <i class="icon-uper">
        </i>
        <a href="//space.bilibili.com/50329118?from=search&amp;seid=4013545761942438181" target="_blank" class="up-name">
        bilibili英雄联盟赛事</a></span>
        </span><span title="up主" class="so-icon"><i class="icon-uper"></i><a href="//space.bilibili.com/111369485?from=search&amp;seid=4013545761942438181" target="_blank" class="up-name">GENJI丶虚无</a></span></div></div></li><li class="video matrix"><a href="http://www.bilibili.com/video/av20564642?from=search&amp;seid=4013545761942438181" target="_blank" title="[LPL春季赛]3月9日 IG vs BLG"><div class="img"><div class="lazy-img"><img alt="" src="//i0.hdslb.com/bfs/archive/3972fa1be2d1329483b80ef46cf92ecc16236e53.jpg@400w_250h.webp"></div><span class="so-imgTag_rb">
          02:17:54
        </span><div class="watch-later-trigger watch-later"></div></div></a><div class="info"><div class="headline clearfix"><span class="type avid">av20564642</span><span class="type hide">电子竞技</span><a title="[LPL春季赛]3月9日 IG vs BLG" href="http://www.bilibili.com/video/av20564642?from=search&amp;seid=4013545761942438181" target="_blank" class="title">[<em class="keyword">LPL</em>春季赛]3月9日 IG vs BLG</a></div><div class="des hide">
        相关游戏: 英雄联盟
简介补充: [LPL春季赛]3月9日 IG vs BLG
      </div><div class="tags"><span title="观看" class="so-icon watch-num"><i class="icon-playtime"></i>
          14.5万
        </span><span title="弹幕" class="so-icon hide"><i class="icon-subtitle"></i>
          5045
        </span><span title="上传时间" class="so-icon time"><i class="icon-date"></i>
 '''

r = re.findall('<a\shref="(.*?)\starget="_blank"\sclass="up-name">(.*?)</a></span>', html, re.S)


for i in r:
    print(i)
    print('--------')

猜你喜欢

转载自blog.csdn.net/t1anyuan/article/details/79561202