python爬虫爬取ip记录网站信息并存入数据库

 1 import requests
 2 import re
 3 import pymysql
 4 #10页 仔细观察路由
 5 db = pymysql.connect("localhost","root","root","testdb" )
 6 cursor = db.cursor()
 7 
 8 for i in range(1,10):
 9     url = 'http://*******8****'
10     url=url+'index_'+str(i)+'.html'
11     response = requests.get(url)
12     HTML = response.text
13     pattern= re.compile(r'(((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})(\.((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})){3})')
14     compile_1 = re.compile(r'\s+(\d{3,6})\s')
15     compile_2 = re.compile(r'\s+([\u4e00-\u9fa5]{1,9})\s?(?:省|新疆|内蒙古|市|县|区])')
16     # compile_4 = re.compile(r's+(\d{4}/\d{2}/\d{2}\s+\d{2}:\d{2}:\d{2})\s')
17     res2 = compile_2.findall(HTML) 
18     res1 = compile_1.findall(HTML)
19     result = pattern.findall(HTML) 
20     # res3 = compile_4.findall(HTML)
21     # print(res3,len(result))    
22     for ip_ in result:
23         print(ip_[0])  
24         sql = "INSERT INTO test (ip,port,place) VALUES ('%s','%s','%s')" %(ip_[0],res1[0],res2[0])
25         print(sql)
26         cursor.execute(sql)
27 db.close()
28 
29 # 使用cursor()方法获取操作游标 

猜你喜欢

转载自www.cnblogs.com/Y139147/p/11306733.html