requests实现登陆、点赞、批量点赞、取消点赞

Python标准库中提供了:urllib、urllib2、httplib等模块以供Http请求,但是,它的 API 太渣了。它是为另一个时代、另一个互联网所创建的。它需要巨量的工作,甚至包括各种方法覆盖,来完成最简单的任务。

Requests 是使用 Apache2 Licensed 许可证的 基于Python开发的HTTP 库,其在Python内置模块的基础上进行了高度的封装,从而使得Pythoner进行网络请求时,变得美好了许多,使用Requests可以轻而易举的完成浏览器可有的任何操作。

入门实例

import requests
from bs4 import BeautifulSoup

# 1. 下载页面
ret = requests.get(url='https://www.autohome.com.cn/news/')
ret.encoding = ret.apparent_encoding
# print(ret.text)

# 2. 解析:获取想要的指定内容beautifulsoup
soup = BeautifulSoup(ret.text,'html.parser') # lxml

div = soup.find(name='div',id='auto-channel-lazyload-article')
li_list = div.find_all(name='li')

for li in li_list:
    h3 = li.find(name='h3')
    #有的是None
    if not h3:
        continue

    p = li.find(name='p')
    a = li.find('a')

    print(h3.text, a.get('href'))
    print(p.text)
    print('=' * 25)

輸出如下:

=========================
内外焕然一新 全新长安CS35谍照曝光 //www.autohome.com.cn/news/201805/917066.html#pvareaid=102624
[汽车之家 国内谍照]  日前,我们从汽车拍客阿睿的微博处获取到了一组长安全新CS35的路试谍照。根据此前信息来看,新车将会在今年下半年正式上市销售。...
=========================
涉及7款新车型 北汽幻速公布产品规划 //www.autohome.com.cn/news/201805/917062.html#pvareaid=102624
[汽车之家 新闻]  日前,我们从北汽幻速官方获悉,其未来将推出全新“X”系列车型,并透露了其未来在新能源车型的布局以及将在未来推出一款全新MPV车型...
=========================

下载图片

import requests
from bs4 import BeautifulSoup

# 1. 下载页面
ret = requests.get(url='https://www.autohome.com.cn/news/')
ret.encoding = ret.apparent_encoding
# print(ret.text)

# 2. 解析:获取想要的指定内容beautifulsoup
soup = BeautifulSoup(ret.text,'html.parser') # lxml

div = soup.find(name='div',id='auto-channel-lazyload-article')
li_list = div.find_all(name='li')

for li in li_list:
    h3 = li.find(name='h3')
    #有的是None
    if not h3:
        continue

    img = li.find('img')
    #拿到圖片的url
    src = img.get('src')
    #後綴文件名
    file_name = src.rsplit('__',maxsplit=1)[1]
    #獲取網絡圖片
    ret_img = requests.get(
        url="https:" + src
    )
    #寫入文件
    with open(file_name,'wb') as f:
        f.write(ret_img.content)

1、网站反爬虫

r1 = requests.get(
    url='https://dig.chouti.com/all/hot/recent/1',
)

print(r1.text)

输出如下:会提示网站防火墙

<html xmlns="http://www.w3.org/1999/xhtml"><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>网站防火墙</title>
<style>
p {
    line-height:20px;
}

2、伪造request请求

r1 = requests.get(
    url='https://dig.chouti.com/all/hot/recent/1',
    headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
    }
)

print(r1.text)

添加headers,就可以爬取网站数据了

3、伪造登陆

response_login = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone':'XXXXX', 'password':"XXXXX", 'oneMonth':'1' },
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' },
)

print(response_login.text)

输出如下:

{"result":{"code":"9999", "message":"", "data":{"complateReg":"0","destJid":"cdu_49803354421"}}}

4、错误的伪造点赞

response_login = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone':'顺丰到付', 'password':"十分到", 'oneMonth':'1' },
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' },

)

r1_cookie_dict = response_login.cookies.get_dict()

ret = requests.post(
    url="https://dig.chouti.com/link/vote?linksId=19329006",
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' },
    cookies=r1_cookie_dict
)
print(ret.text)

以上是错误的,不能进行点赞

5、伪造点赞调试

点赞操作,网站cookie

Cookie: gpsd=9f1955c3dc27771fd2c1400a4d210b7d; JSESSIONID=aaaaCRNkvJO6KdAwZ6fmw; route=340ad5ec7bdbaaaa2d4d12be04eae5d2; gpid=9ed52f3ca2b64e49996d64cc9c86827a; _pk_ref.1.a2d5=%5B%22%22%2C%22%22%2C1525683488%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuR2E6enPPD46TEft2mcKxrSqHCpayNgKIXfIAAEQKQuiypGVXfAdhuXBWfHubt-K%26wd%3D%26eqid%3Db497a0aa00025657000000035af01516%22%5D; _pk_ses.1.a2d5=*; puid=cdu_49803354421; puid=cb55d2189f99a8d3a19c7441d09affe5; _pk_id.1.a2d5=d0e24c0126d32bf5.1525683488.1.1525687100.1525683488.
ret = requests.post(
    url="https://dig.chouti.com/link/vote?linksId=19329006",
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' },
    # cookies=r1_cookie_dict
    cookies={
        'gpsd': '9f1955c3dc27771fd2c1400a4d210b7d', 'gpid': '9ed52f3ca2b64e49996d64cc9c86827a', }
)
print(ret.text)

输出如下:

{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525687086616000","lvCount":"10","nick":"似懂非懂发","uvCount":"508","voteTime":"小于1分钟前"}}}

6、通过代码进行点赞

import requests
from bs4 import BeautifulSoup

# 1. 先访问抽屉新热榜,获取cookie(未授权)
r1 = requests.get(
    url='https://dig.chouti.com/all/hot/recent/1',
    headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
    }
)
# print(r1.text)

r1_cookie_dict = r1.cookies.get_dict()

#
# # 2. 发送用户名和密码认证 + cookie(未授权)
response_login = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone': '顺丰顺丰的',
        'password': "随风倒十分",
        'oneMonth': '1'
    },
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    },
    cookies = r1_cookie_dict
)

ret = requests.post(
    url="https://dig.chouti.com/link/vote?linksId=19329006",
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    },
    cookies=r1_cookie_dict

)
print(ret.text)

输出如下:

{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525687678292000","lvCount":"14","nick":"水水水水水","uvCount":"508","voteTime":"小于1分钟前"}}}

7、批量点赞

import requests
from bs4 import BeautifulSoup

# 1. 先访问抽屉新热榜,获取cookie(未授权)
r1 = requests.get(
    url='https://dig.chouti.com/all/hot/recent/1',
    headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
    }
)

r1_cookie_dict = r1.cookies.get_dict()

#
# # 2. 发送用户名和密码认证 + cookie(未授权)
response_login = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone': '算法大是大非',
        'password': "士大夫似的",
        'oneMonth': '1'
    },
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    },
    cookies = r1_cookie_dict
)


for page_num in range(1,2):

    response_index = requests.get(
        url='https://dig.chouti.com/all/hot/recent/%s' %page_num,
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
    )
    soup = BeautifulSoup(response_index.text,'html.parser')
    div = soup.find(attrs={'id':'content-list'})
    items = div.find_all(attrs={'class':'item'})
    for item in items:
        tag = item.find(attrs={'class':'part2'})
        nid = tag.get('share-linkid')

        # 根据每一个新闻ID点赞
        r1 = requests.post(
            url='https://dig.chouti.com/link/vote?linksId=%s' %nid,
            headers={
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
            },
            cookies=r1_cookie_dict
        )
        print(r1.text)

输出如下:

E:\python\python_sdk\python.exe E:/python/py_dev/python/day132/2.登录抽屉.py
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525688272054000","lvCount":"13","nick":"士大夫","uvCount":"509","voteTime":"小于1分钟前"}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525688273598000","lvCount":"10","nick":"士大夫","uvCount":"510","voteTime":"小于1分钟前"}}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525688280154000","lvCount":"18","nick":"顺丰顺丰的","uvCount":"511","voteTime":"小于1分钟前"}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_49803354421","likedTime":"1525688282204000","lvCount":"48","nick":"所发生的","uvCount":"512","voteTime":"小于1分钟前"}}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}

Process finished with exit code 0

8、取消点赞

只需要成如下代码即可

for page_num in range(1,5):

    response_index = requests.get(
        url='https://dig.chouti.com/all/hot/recent/%s' %page_num,
        headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
    )
    soup = BeautifulSoup(response_index.text,'html.parser')
    div = soup.find(attrs={'id':'content-list'})
    items = div.find_all(attrs={'class':'item'})
    for item in items:
        tag = item.find(attrs={'class':'part2'})
        nid = tag.get('share-linkid')

        # 根据每一个新闻ID点赞
        r1 = requests.post(
            # url='https://dig.chouti.com/link/vote?linksId=%s' %nid,
            url='https://dig.chouti.com/vote/cancel/vote.do',
            headers={
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
            },
            data={
                'linksId': nid,
            },
            cookies=r1_cookie_dict
        )
        print(r1.text)

模拟登陆github

import requests
from bs4 import BeautifulSoup

r1 = requests.get(
    url='https://github.com/login'
)
s1 = BeautifulSoup(r1.text, 'html.parser')
token = s1.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
print(token)

r1_cookie_dict = r1.cookies.get_dict()

r2 = requests.post(
    url='https://github.com/session',
    data={
        'commit': 'Sign in',
        'utf8': '✓',
        'authenticity_token': token,
        'login': '[email protected]',
        'password': 'xxx*+'
    },
    headers={
        'Host': 'github.com',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',

        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    },
    cookies=r1_cookie_dict
)

print(r2.text)

猜你喜欢

转载自blog.csdn.net/u013210620/article/details/80227049
今日推荐