1.urllib.parse.urlparse(urlstring, scheme=’’, allow_fragments=True)
- 功能: 将url分为6部分, 返回一个元组;
- 协议, 服务器的地址(ip:port), 文件路径, 访问的页面
from urllib import parse
url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=0&rsv_idx=1&tn=baidu&wd=hello&rsv_pq=d0f841b10001fab6&rsv_t=2d43603JgfgVkvPtTiNX%2FIYssE6lWfmSKxVCtgi0Ix5w1mnjks2eEMG%2F0Gw&rqlang=cn&rsv_enter=1&rsv_sug3=6&rsv_sug1=4&rsv_sug7=101&rsv_sug2=0&inputT=838&rsv_sug4=1460'
parsed_tuple = parse.urlparse(url)
print(parsed_tuple)
print(parsed_tuple.netloc)
print(parsed_tuple.path)
urlencode:
from urllib.parse import urlencode
params = {
'name':'westos',
'age':20
}
base_url = 'http://www.baidu.com?'
url = base_url + urlencode(params)
print(url)
url异常处理
- 异常
exception urllib.error.URLError¶
exception urllib.error.HTTPError
exception urllib.error.ContentTooShortError(msg, content)
** 超时异常处理
from urllib import request, error
import socket
#
try:
url = 'https://www.baidu.com'
response = request.urlopen(url, timeout=0.01)
print(response.read().decode('utf-8'))
except error.HTTPError as e:
print(e.reason, e.code, e.headers, sep='\n')
except error.URLError as e:
print(e.reason)
if isinstance(e.reason, socket.timeout):
print("超时")
else:
print("成功")