使用动态IP拨号服务器
import os
g_adsl_account = {"name": "adsl",
"username": "...",
"password": "..."}
class Adsl(object):
# __init__ : name: adsl名称
def __init__(self):
self.name = g_adsl_account["name"]
self.username = g_adsl_account["username"]
self.password = g_adsl_account["password"]
# connect : 宽带拨号
def connect(self):
cmd_str = "rasdial %s %s %s" % (self.name, self.username, self.password)
os.system(cmd_str)
time.sleep(5)
# disconnect : 断开宽带连接
def disconnect(self):
cmd_str = "rasdial %s /disconnect" % self.name
os.system(cmd_str)
time.sleep(5)
# reconnect : 重新进行拨号
def reconnect(self):
self.disconnect()
self.connect()
if __name__ == '__main__':
A = Adsl()
A.reconnect()
import requests
import time
import random
import changeIP
link = "http://www.santostang.com/"
headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
def scrapy(url, num_try = 3):
try:
r = requests.get(url, headers= headers)
html = r.text
time.sleep(random.randint(0,2)+random.random())
except Exception as e:
print (e)
html = None
if num_try >0:
x = changeIP.adsl()
x.reconnect()
html = scrap(url, num_try-1)
return html
result = scrapy(link)
使用Tor服务器
from stem import Signal
from stem.control import Controller
import socket
import socks
import requests
import time
print(0)
controller = Controller.from_port(port = 9151)
print(2)
controller.authenticate()
print(1)
socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 9150)
socket.socket = socks.socksocket
total_scrappy_time = 0
total_changeIP_time = 0
for x in range(0,10):
a = requests.get("http://checkip.amazonaws.com").text
print ("第", x+1, "次IP:", a)
time1 = time.time()
a = requests.get("http://www.santostang.com/").text
#print (a)
time2 = time.time()
total_scrappy_time = total_scrappy_time + time2-time1
print ("第", x+1, "次抓取花费时间:", time2-time1)
time3 = time.time()
controller.signal(Signal.NEWNYM)
time.sleep(5)
time4 = time.time()
total_changeIP_time = total_changeIP_time + time4-time3-5
print ("第", x+1, "次更换IP花费时间:", time4-time3-5)
print ("平均抓取花费时间:", total_scrappy_time/10)
print ("平均更换IP花费时间:", total_changeIP_time/10)
我在jupyter notebook 上运行有错误,
SocketError: Socket error: 0x01: General SOCKS server failure
在spyder上运行没问题