crawler request 2

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Cookie:小蛋糕,饼干
# 特点
# 1. 用于存储用户的某些信息(不包含隐私信息)
# 2. 只用于存储少量数据
# 3. cookie是个文件,位于浏览器
# 4. cookie有生命周期,一旦网页退出,cookie就失效了
import requests
url = "http://dig.chouti.com"
response = requests.get(url)
print(response.cookies)

# 如果网站需要登录,而且带有验证码
# 利用cookie模拟登录知乎
url = "https://www.zhihu.com"
response = requests.get(
    url,
    headers={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:49.0) Gecko/20100101 Firefox/49.0",
        "Cookie": '''q_c1=dd70a0398f1b4836a5160b9f036fb447|1519631377000|1519631377000; capsion_ticket="2|1:0|10:1519724534|14:capsion_ticket|44:Y2QxYTJlNzNmN2ZkNDBkZTk4MGZjYzQyNGUxNmRlNjE=|1e9c827a7211c81987a5eb811d9f4f4acf80756838f6a10ae0c626f07ba54262"; _zap=5a78f8c6-2120-4eb3-a98d-6e7be333baba; aliyungf_tc=AQAAADKpiieNwAcALROfdUDjSU0fRvqS; d_c0="ALBr4aKQNQ2PTgyHuBjhaOLB-kTqUZQ_mgw=|1519712984"; _xsrf=37942126-c9d8-41f8-9749-4910c1aea54b; z_c0="2|1:0|10:1519724580|4:z_c0|92:Mi4xUE5TUkJRQUFBQUFBc0d2aG9wQTFEU1lBQUFCZ0FsVk5KSGFDV3dBUG5JdVVENkJlZGhTUTVyZWY1V2hrTUtYYUpB|e58c9866486e68edaf8e39d2da0d2892b3e484de609ca171783175ff54637cc7"'''
    }
)
print(response.content)

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 1. 如果网站登录的时候需要写验证码,可以考虑通过cookie直接登录
# 例如:知乎网
# 注意:cookie需要手动登录以后再粘贴

# 2. 如果网站登录的时候不需要验证码,可以考虑通过代码自动登录
# 例如:抽屉网
# 注意:它的cookie是通过代码自动获取的

# 抽屉网的登录逻辑(其它网站不适用)
# 1. 先获取首页返回的cookie,因为它是用户的凭证
# 2. 开始登录(要携带上第一步的cookie)
# 3. 开始点赞/评论......
import requests
from random_agent import RandomAgent
from fake_useragent import UserAgent
agent = UserAgent()


# 不要重复造轮子
# pip search 工具包名字

# pip install fake_useragent

url = "http://dig.chouti.com/"
# 通过浏览器获取的操作一般都是get请求
response = requests.get(
    url,
    headers={
        # "User-Agent": RandomAgent.rand()
        "User-Agent": agent.random
    }
)
cookies = response.cookies.get_dict()

url = "http://dig.chouti.com/login"
response = requests.post(
    url,
    data={"phone": "8615896901897","password": "qweqweqwe1","oneMonth": "1",},
    headers={"User-Agent": agent.random,},
    cookies=cookies,
)

cookies1 = response.cookies.get_dict()
print(cookies)
print(cookies1)

url = "http://dig.chouti.com/link/vote?linksId=17717073"
response = requests.post(
    url,
    headers={"User-Agent": agent.random,},
    cookies=cookies,
)
print(response.text)


# 1. 第一次访问首页的时候 服务器 返回一个cookie
# 2. 携带1的cookie进行登录,服务器 返回另一个cookie
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
from fake_useragent import UserAgent
agent = UserAgent()

# 如果通过session发请求,会自动携带cookie
session = requests.session()
s1 = session.get("http://dig.chouti.com/", headers={"User-Agent": agent.random})
s2 = session.post("http://dig.chouti.com/login", data={"phone": "8615896901897","password": "qweqweqwe1","oneMonth": "1",}, headers={"User-Agent": agent.random})
s3 = session.post("http://dig.chouti.com/link/vote?linksId=17717071",headers={"User-Agent": agent.random})
print(s3.text)




# python2.7
f = open("1.txt", "w")
import codecs
f = codecs.open("1.txt", "w", encoding="utf-8")

# python3.6
f = open("1.txt", "w", encoding="utf-8")


# 网络七层从下往上的协议有哪些?
# Cookie和Session的区别?

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325333849&siteId=291194637