Porxy Server
- source code
# proxyServer.py
from socket import *
tcpSerSock = socket(AF_INET, SOCK_STREAM)
server_port = 22500
tcpSerSock.bind(('', server_port))
tcpSerSock.listen(1)
while True:
print('Ready to serve...')
tcpCliSock, addr = tcpSerSock.accept()
print('Received a connection from:', addr)
message = tcpCliSock.recv(1024).decode(encoding="utf-8")
print(message)
# Extract the filename from the given message
filename = message.split()[1].partition("//")[2].replace('/', '_')
print(filename)
fileExist = "false"
try:
# Check whether the file exist in the cache
f = open(filename, "r")
outputdata = f.readlines()
outputdata = outputdata[outputdata.index("<html>\n")-1:]
fileExist = "true"
# ProxyServer finds a cache hit and generates a response message
tcpCliSock.send("HTTP/1.1 200 OK\r\n".encode(encoding="utf-8"))
tcpCliSock.send("Content-Type:text/html\r\n".encode(encoding="utf-8"))
print(outputdata)
for line in outputdata:
tcpCliSock.send(line.encode(encoding="utf-8"))
print('Read from cache')
# Error handling for file not found in cache
except IOError:
if fileExist == "false":
# Create a socket on the proxy server
c = socket(AF_INET, SOCK_STREAM)
hostname = message.split()[1].partition("//")[2].partition("/")[0].replace("www.", "", 1)
print("Host name:", hostname)
try:
# Connect to the socket to port 80
c.connect((hostname, 80))
c.send(message.encode(encoding="utf-8"))
buff = c.recv(1024).decode(encoding="utf-8")
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket and the corresponding file in the cache
tcpCliSock.send(buff.encode(encoding="utf-8"))
tmpFile = open("./" + filename, "w")
tmpFile.writelines(buff)
tmpFile.close()
except:
print("Illegal request")
else:
# HTTP response message for file not found
tcpCliSock.send("404 Found".encode(encoding="utf-8"))
tcpCliSock.close()
tcpSerSock.close()
-
搜索中输入代理服务器,打开系统设置的代理服务器设置,地址设为
http://localhost
,端口设置为程序中的server_port
-
访问http://gaia.cs.umass.edu/wireshark-labs/INTRO-wireshark-file1.html,通过代理服务器请求html对象
由于代理服务器没有缓存,因此代理服务器会向服务器gaia.cs.umass.edu请求该对象,见图中Host name: gaia.cs.umass.edu
,并将该文件对象存在代理服务器的文件夹中,结果如下
-
再次访问该网页,通过代理服务器请求对象
由于代理服务器有该对象的缓存,因此直接从缓存中取出对象发送给浏览器,见图中Read from cache
![proxy5](proxy5.png)// 代理服务器缓存的文件,除html外还包括响应报文的状态行和首部行
HTTP/1.1 200 OK
Date: Fri, 06 Mar 2020 07:35:34 GMT
Server: Apache/2.4.6 (CentOS) OpenSSL/1.0.2k-fips PHP/5.4.16 mod_perl/2.0.11 Perl/v5.16.3
Last-Modified: Fri, 06 Mar 2020 06:59:02 GMT
ETag: "51-5a02a3045f9f3"
Accept-Ranges: bytes
Content-Length: 81
Content-Type: text/html; charset=UTF-8
<html>
Congratulations! You've downloaded the first Wireshark lab file!
</html>
值得一提的是,该文件除包含响应报文的entity body,即html部分内容外,还包状态行、首部行等,若直接显示则会出现
可对该对象进行一定的处理后再发送给浏览器,保证只显示有用信息,或在保存该文件对象时便对其进行处理
outputdata = f.readlines()
outputdata = outputdata[outputdata.index("<html>\n")-1:]
-
其他注意事项
代理服务器运行在本机时,若将IP地址设为
127.0.0.1
,可能无法正常运行解决方法:直接输入空字符,使用默认值localhost
# tcpSerSock.bind(('127.0.0.1', server_port)) tcpSerSock.bind(('', server_port))