处理登陆表单

 处理登录表单¶

session.cookies.save()
import requests
session = requests.session()
​
post_url = 'https://passport.csdn.net/account/login'
agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
headers = {
    "Host": "passport.csdn.net",
   
    "Referer":"https://www.csdn.net/",
    'User-Agent': agent
}
postdata = {
    'username': 'liudongdong19',
    'password': 'ASDF)(*&7890liu'
}
​
login_page = session.post(post_url, data=postdata, headers=headers)
print(login_page.status_code)
session.cookies.save()
200
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-7-29c347632458> in <module>()
     17 login_page = session.post(post_url, data=postdata, headers=headers)
     18 print(login_page.status_code)
---> 19 session.cookies.save()

AttributeError: 'RequestsCookieJar' object has no attribute 'save'

10.1.2 处理cookies,让网页记得你的登录

import requests
import http.cookiejar as cookielib

session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
    session.cookies.load(ignore_discard=True)
except:
print("Cookie 未能加载")
  File "<ipython-input-9-5044f0703fca>", line 6
    print("Cookie 未能加载")
        ^
IndentationError: expected an indented block



def isLogin():
    url = "http://www.santostang.com/wp-admin/profile.php"
    login_code = session.get(url, headers=headers, allow_redirects=False).status_code
    if login_code == 200:
        return True
    else:
        return False

if __name__ == '__main__':
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    headers = {
        "Host": "www.santostang.com",
        "Origin":"http://www.santostang.com",
        "Referer":"http://www.santostang.com/wp-login.php",
        'User-Agent': agent
    }
    if isLogin():
        print('您已经登录')
10.1.3 完整的登录代码

import requests
import http.cookiejar as cookielib
​
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
    session.cookies.load(ignore_discard=True)
except:
    print("Cookie 未能加载")
​
def isLogin():
    # 通过查看用户个人信息来判断是否已经登录
    url = "http://www.santostang.com/wp-admin/profile.php"
    login_code = session.get(url, headers=headers, allow_redirects=False).status_code
    if login_code == 200:
        return True
    else:
        return False
    
def login(secret, account):
    post_url = 'http://www.santostang.com/wp-login.php'
    postdata = {
        'pwd': secret,
        'log': account,
        'rememberme' : 'true',
        'redirect_to': 'http://www.santostang.com/wp-admin/',
        'testcookie' : 1,
    }
    try:
        # 不需要验证码直接登录成功
        login_page = session.post(post_url, data=postdata, headers=headers)
        login_code = login_page.text
        print(login_page.status_code)
        #print(login_code)
    except:
        pass
    session.cookies.save()
    
if __name__ == '__main__':
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    headers = {
        "Host": "www.santostang.com",
        "Origin":"http://www.santostang.com",
        "Referer":"http://www.santostang.com/wp-login.php",
        'User-Agent': agent
    }
    if isLogin():
        print('您已经登录') 
    else:
        login('a12345', 'test') 
Cookie 未能加载
200
10.2验证码的处理
10.2.2 人工方法处理验证码

def get_si_code():
    # si_code 是一个动态变化的参数
    index_url = 'http://www.santostang.com/wp-login.php?action=register'
    # 获取注册时需要用到的 si_code
    index_page = session.get(index_url, headers=headers)
    html = index_page.text
    pattern = r'name="si_code_reg" type="hidden"  value="(.*?)"'
    # 这里用re.search方法找到si_code
    si_code = re.search(pattern, html).group(1)
    return si_code

def get_captcha(si_code):
    captcha_url = "http://www.santostang.com/wp-content/plugins/si-captcha-for-wordpress/captcha/securimage_show.php?si_sm_captcha=1&si_form_id=reg" + si_code
    r = session.get(captcha_url, headers=headers)
    with open('captcha.jpg', 'wb') as f:
        f.write(r.content)
        f.close()
    try:
        im = Image.open('captcha.jpg')
        im.show()
        im.close()
    except:
        print(u'请到 %s 目录找到captcha.jpg 手动输入' % os.path.abspath('captcha.jpg'))
    captcha = input("please input the captcha\n>")
    return captcha

def register(account, email,si_code):      
    post_url = 'http://www.santostang.com/wp-login.php?action=register'
    postdata = {
        'user_login': account,
        'user_email': email,
        'si_code_reg': si_code,
        'redirect_to': '',
        }
    # 调用get_captcha函数,获取验证码数字
    postdata["captcha"] = get_captcha(si_code)  
    # 提交POST请求,进行注册
    register_page = session.post(post_url, data=postdata, headers=headers)
    # 若输出打印结果为200,则表示注册成功
    print(register_page.status_code)

import requests
import re
import os
from PIL import Image
if __name__ == '__main__':
    agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    headers = {
        "Host": "www.santostang.com",
        "Origin":"http://www.santostang.com",
        "Referer":"http://www.santostang.com/wp-login.php",
        'User-Agent': agent
    }
    session = requests.session()
    #获取我们需要的验证码匹配码
    si_code = get_si_code()
    # 调用注册函数进行注册
    account = '18341432113'
    email = '[email protected]'
    register(account, email, si_code)
please input the captcha
>3p5e
200
10.2.3 OCR处理验证码

from PIL import Image
im = Image.open('captcha.jpg')
gray = im.convert('L')
gray.show()

gray.save("captcha_gray.jpg")

threshold = 150
table = []
for i in range(256):
    if i < threshold:
        table.append(0)
    else:
        table.append(1)
out = gray.point(table, '1')
out.show()
out.save("captcha_thresholded.jpg")

import pytesseract
th = Image.open('captcha_thresholded.jpg')
th.show()
print(pytesseract.image_to_string(th))
import pytesseract
th = Image.open('captcha_thresholded.jpg')
th.show()
print(pytesseract.image_to_string(th))

猜你喜欢

转载自blog.csdn.net/liudongdong19/article/details/81139725