Python拼接URL

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接: https://blog.csdn.net/Yellow_python/article/details/96157042

urlsplit、urljoin

from urllib.parse import urlsplit, urljoin

print('\033[035m{}\033[0m\n'.format(urlsplit('https://www.baidu.com/s?ie=UTF-8&wd=scrapy')))


def join_url(url, postfix, real):
    tu = urlsplit(url)
    domain = tu[0] + '://' + tu[1]
    url_total = urljoin(domain, postfix)
    for i in ['url', 'domain', 'url_total', 'real', 'url_total==real']:
        print('\033[033m%-15s\033[0m' % i, eval(i))
    print()


ls = [
    ('https://blog.csdn.net/Yellow_python',
     'https://blog.csdn.net/Yellow_python/article/details/94435972',
     'https://blog.csdn.net/Yellow_python/article/details/94435972'),
    ('https://k.autohome.com.cn/314/#pvareaid=2099126',
     '/spec/36144/',
     'https://k.autohome.com.cn/spec/36144/'),
]

for url, postfix, real in ls:
    join_url(url, postfix, real)

函数(复制用)

from urllib.parse import urlsplit, urljoin


def join_url(url, postfix):
    tu = urlsplit(url)
    domain = tu[0] + '://' + tu[1]
    return urljoin(domain, postfix)


url = 'https://github.com/AryeYellow'
postfix = '/AryeYellow/NLP'
url_total = join_url(url, postfix)
print(url_total)
print
https://github.com/AryeYellow/NLP

parse

from urllib import parse
keyword = 'Python爬虫'
# url编码
dic = {"wd": keyword, 'q': 120}
wd = parse.urlencode(dic)
print(wd)
# quote(引述;报价;引文;引号;)
quote = parse.quote(keyword)
print(quote)
# url解码
unquote = parse.unquote(quote)
print(unquote)
print
wd=Python%E7%88%AC%E8%99%AB&q=120
Python%E7%88%AC%E8%99%AB
Python爬虫

函数(复制用)

from urllib import parse


def encode_url(url, dt):
    wd = parse.urlencode(dt)
    return url + wd


url = 'https://www.baidu.com/s?'
keyword = 'Python爬虫'
dt = {'ie': 'UTF-8', 'wd': 'K房'}
print(encode_url(url, dt))  # https://www.baidu.com/s?ie=UTF-8&wd=K%E6%88%BF

猜你喜欢

转载自blog.csdn.net/Yellow_python/article/details/96157042