处理登录表单¶
session.cookies.save()
import requests
session = requests.session()
post_url = 'https://passport.csdn.net/account/login'
agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
headers = {
"Host": "passport.csdn.net",
"Referer":"https://www.csdn.net/",
'User-Agent': agent
}
postdata = {
'username': 'liudongdong19',
'password': 'ASDF)(*&7890liu'
}
login_page = session.post(post_url, data=postdata, headers=headers)
print(login_page.status_code)
session.cookies.save()
200
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-7-29c347632458> in <module>()
17 login_page = session.post(post_url, data=postdata, headers=headers)
18 print(login_page.status_code)
---> 19 session.cookies.save()
AttributeError: 'RequestsCookieJar' object has no attribute 'save'
10.1.2 处理cookies,让网页记得你的登录
import requests
import http.cookiejar as cookielib
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard=True)
except:
print("Cookie 未能加载")
File "<ipython-input-9-5044f0703fca>", line 6
print("Cookie 未能加载")
^
IndentationError: expected an indented block
def isLogin():
url = "http://www.santostang.com/wp-admin/profile.php"
login_code = session.get(url, headers=headers, allow_redirects=False).status_code
if login_code == 200:
return True
else:
return False
if __name__ == '__main__':
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
headers = {
"Host": "www.santostang.com",
"Origin":"http://www.santostang.com",
"Referer":"http://www.santostang.com/wp-login.php",
'User-Agent': agent
}
if isLogin():
print('您已经登录')
10.1.3 完整的登录代码
import requests
import http.cookiejar as cookielib
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard=True)
except:
print("Cookie 未能加载")
def isLogin():
# 通过查看用户个人信息来判断是否已经登录
url = "http://www.santostang.com/wp-admin/profile.php"
login_code = session.get(url, headers=headers, allow_redirects=False).status_code
if login_code == 200:
return True
else:
return False
def login(secret, account):
post_url = 'http://www.santostang.com/wp-login.php'
postdata = {
'pwd': secret,
'log': account,
'rememberme' : 'true',
'redirect_to': 'http://www.santostang.com/wp-admin/',
'testcookie' : 1,
}
try:
# 不需要验证码直接登录成功
login_page = session.post(post_url, data=postdata, headers=headers)
login_code = login_page.text
print(login_page.status_code)
#print(login_code)
except:
pass
session.cookies.save()
if __name__ == '__main__':
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
headers = {
"Host": "www.santostang.com",
"Origin":"http://www.santostang.com",
"Referer":"http://www.santostang.com/wp-login.php",
'User-Agent': agent
}
if isLogin():
print('您已经登录')
else:
login('a12345', 'test')
Cookie 未能加载
200
10.2验证码的处理
10.2.2 人工方法处理验证码
def get_si_code():
# si_code 是一个动态变化的参数
index_url = 'http://www.santostang.com/wp-login.php?action=register'
# 获取注册时需要用到的 si_code
index_page = session.get(index_url, headers=headers)
html = index_page.text
pattern = r'name="si_code_reg" type="hidden" value="(.*?)"'
# 这里用re.search方法找到si_code
si_code = re.search(pattern, html).group(1)
return si_code
def get_captcha(si_code):
captcha_url = "http://www.santostang.com/wp-content/plugins/si-captcha-for-wordpress/captcha/securimage_show.php?si_sm_captcha=1&si_form_id=reg" + si_code
r = session.get(captcha_url, headers=headers)
with open('captcha.jpg', 'wb') as f:
f.write(r.content)
f.close()
try:
im = Image.open('captcha.jpg')
im.show()
im.close()
except:
print(u'请到 %s 目录找到captcha.jpg 手动输入' % os.path.abspath('captcha.jpg'))
captcha = input("please input the captcha\n>")
return captcha
def register(account, email,si_code):
post_url = 'http://www.santostang.com/wp-login.php?action=register'
postdata = {
'user_login': account,
'user_email': email,
'si_code_reg': si_code,
'redirect_to': '',
}
# 调用get_captcha函数,获取验证码数字
postdata["captcha"] = get_captcha(si_code)
# 提交POST请求,进行注册
register_page = session.post(post_url, data=postdata, headers=headers)
# 若输出打印结果为200,则表示注册成功
print(register_page.status_code)
import requests
import re
import os
from PIL import Image
if __name__ == '__main__':
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
headers = {
"Host": "www.santostang.com",
"Origin":"http://www.santostang.com",
"Referer":"http://www.santostang.com/wp-login.php",
'User-Agent': agent
}
session = requests.session()
#获取我们需要的验证码匹配码
si_code = get_si_code()
# 调用注册函数进行注册
account = '18341432113'
email = '[email protected]'
register(account, email, si_code)
please input the captcha
>3p5e
200
10.2.3 OCR处理验证码
from PIL import Image
im = Image.open('captcha.jpg')
gray = im.convert('L')
gray.show()
gray.save("captcha_gray.jpg")
threshold = 150
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = gray.point(table, '1')
out.show()
out.save("captcha_thresholded.jpg")
import pytesseract
th = Image.open('captcha_thresholded.jpg')
th.show()
print(pytesseract.image_to_string(th))
import pytesseract
th = Image.open('captcha_thresholded.jpg')
th.show()
print(pytesseract.image_to_string(th))
处理登陆表单
猜你喜欢
转载自blog.csdn.net/liudongdong19/article/details/81139725
今日推荐
周排行