python 爬虫

import urllib2

import re

response = urllib2.urlopen('http://www.baidu.com/')

 

text = 'JGood is<title>sdfa</title>  a handsome <title> boy, </title>he is cool, clever, and so on...'

text2 = text.replace('y','')

#m = re.search(r'<title>(.*)<\/title>',response.read())

#m = re.match(r'.*<title>(.*)<\/title>.*',response.read())

#m = re.match(r'.*<title>(.*)<\/title>.*',text2)

m = re.search(r'<title>(.*)<\/title>',text2)

 

 

print m.group(1).decode('utf-8','ignore')

 

 

 

 

#m = re.finditer(r'<title>(.*)</title>',text)

#m = re.finditer(r'<title>([^<title>]*)</title>',text)     ///匹配不能包含<title>中任意字符的一个。

 

 

m = re.finditer(r'<title>((.(?!<title>))*.)</title>',text)     ///匹配不是<title>的字符串。

猜你喜欢

转载自zhangmingwei.iteye.com/blog/2173900