学习Python的日子 爬虫(2)

随机选择ip案例

import requests
import random
proxy_list = [
    {"123.139.56.238":"9999"},
    {"113.119.58.230":"3128"},
    {"122.72.18.35":"80"},
    {"122.72.18.34":"80"},
    {"203.174.112.13":"3128"},
]
proxy=random.choice(proxy_list)
print(proxy)
response = requests.get("http://www.atguigu.com", proxies=proxy)
print(response.content.decode())

使用Cookie进入京东商城账号主页

from urllib.request import Request, urlopen
headers = {
    "Host": "home.jd.com",
    "Connection": "keep-alive",
    "Cache-Control": "max-age=0",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.15 Safari/537.36",  	 	"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Referer": "https://www.jd.com/",
    # "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cookie":"粘贴Request Headers下的Cookie",
}
request = Request("https://home.jd.com/", headers=headers)
response = urlopen(request)
print(response.read().decode())

URLError与HTTPError案例

from urllib.request import Request, urlopen, HTTPError, URLError
try:
   #request=Request("http://www.goubanjia.com/free/index.shtml")
		 # request = Request("http://192.168.16.1:8080")
    request = Request("http://www.baidu.com")
    response = urlopen(request)
except HTTPError as e:
    print("捕获HTTPError异常:", e.code)
    print("捕获HTTPError异常:", e)
except URLError as e:
    print("捕获URLError异常:", e)
else:
    print(response.read().decode())
    print("没有错误,正常执行!")

使用Requests的get请求

import requests
response=requests.get("http://www.baidu.com/")
print(response.request)
print(response.content.decode())

使用Requests的post请求

import requests
formdata = {
    "name": "python",
    "age": "666"
}
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.15 Safari/537.36"
}
response=requests.post("http://httpbin.org/post",data=formdata,headers=headers)
print(response.text)
print("=="*20)
print(response.url)
print("=="*20)
print(response.json())

下载图片

import requests
headers = {
    "Host": "mm.chinasareview.com",
    "Connection": "keep-alive",
    "Cache-Control": "max-age=0",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.15 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cookie": "__jsluid=154c798edd315176df82a315d002bd77",
    "If-None-Match": "f67cd360b83ed31: 108f",
    "If-Modified-Since": "Fri, 06 Oct 2017 15:32:54 GMT",
}
response=requests.get("http://mm.chinasareview.com/wp-content/uploads/2017a/07/18/07.jpg", headers=headers)
print("code--", response.status_code)
if response.status_code == 200:
    with open("huskie.jpg", "wb") as f:
        f.write(response.content)
print("下载完成!")










猜你喜欢

转载自blog.csdn.net/qq_42240071/article/details/80904130
今日推荐