跟着网上一教学视频编写爬取公众号文章,代码结构如下:login.py是登录模块,cookies.txt是登录获取到的cookie信息,cookie.py是解析cookie文件,登录公众号并查询公众号文章进行解析
一、模拟登陆获取cookie,登陆过程中需要用手机扫描二维码
#-*- coding:UTF-8 -*- #编码设置 from selenium import webdriver import time import json driver = webdriver.Chrome() #谷歌驱动 driver.get('https://mp.weixin.qq.com/') #微信公众号网址 #用户名 driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[1]/div[1]/div/span/input').clear() driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[1]/div[1]/div/span/input').send_keys('[email protected]') time.sleep(1) #密码 driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[1]/div[2]/div/span/input').clear() driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[1]/div[2]/div/span/input').send_keys('123456') time.sleep(1) #记住密码 driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[3]/label').click() time.sleep(1) #登录 driver.find_element_by_xpath('//*[@id="header"]/div[2]/div/div/form/div[4]/a').click() time.sleep(15) #获取cookie cookies = driver.get_cookies() print cookies cookie = {} for items in cookies: cookie[items.get('name')] = items.get('value') with open('cookies.txt','w') as file: file.write(json.dumps(cookie)) driver.close()
二、读取cookie信息,自动登录并查旬公众号文章
#-*- coding:UTF-8 -*- #编码设置 import requests import json import re import random import time #打开cookie文件 with open('cookies.txt','r') as file1: cookie = file1.read() #将cookie转成json格式 cookies = json.loads(cookie) #微信公众号网址,根据cookie登录系统,获取返回的信息,获取token url = 'https://mp.weixin.qq.com/' response = requests.get(url,cookies = cookies) token = re.findall(r'token=(\d+)',str(response.url))[0] #获取返回的url token print token #查询条件 query = 'python' #查询链接 search_url = 'https://mp.weixin.qq.com/cgi-bin/operate_appmsg?sub=check_appmsg_copyright_stat' #http header headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'Referer':'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&share=1&token=327625735&lang=zh_CN', 'Host':'mp.weixin.qq.com', } #请求的数据 data = { 'token':token, #token 'lang':'zh_CN', #中文 'f':'json', #格式 'ajax':1, #异步 'random':random.random(), #随机数 'url':query,#查询条件 'begin':'0', 'count':'3',#每页多少条 } search_response = requests.post(search_url,cookies = cookies,data = data,headers = headers) #print search_response.text #总条数 max_num = search_response.json().get('total') #计算总页数 num = int(int(max_num/3)) begin = 0 while num +1>0: data = { 'token':token, 'lang':'zh_CN', 'f':'json', 'ajax':1, 'random':random.random(), 'url':query, 'begin':'{}'.format(str(begin)), 'count':'3', } search_response = requests.post(search_url,cookies = cookies,data = data,headers = headers) #print search_response.text #获取查询结果list content = search_response.json().get('list') #遍历获取title try: if content: for items in content: print items.get('title') print items.get('url') num -=1 begin = int(begin) begin += 3 time.sleep(1) except: print 'error'
三、查询结果
1267686864 Intermediate Python(中级PYTHON) http://mp.weixin.qq.com/s?__biz=MzA4MTA5MjE5Mw==&mid=400572886&idx=1&sn=d3c9c4d37351bc4ee8a8758c0d70c13a#rd 【Python编程】Python Style Guide http://mp.weixin.qq.com/s?__biz=MzI2NzUxMDg2NQ==&mid=2247483873&idx=1&sn=2314305172636f4817f17c4be09aaf30&chksm=eafcf4dfdd8b7dc918d7bfa12586425c697ecf4500315d01c8c42d0d68da1817c002bb974b28#rd 用 Python 实现 Python 解释器 http://mp.weixin.qq.com/s?__biz=MjM5NjQ4MjYwMQ==&mid=2664608140&idx=2&sn=ab178acde33b8007ab448fdfaa7895e9#rd Python进阶:Python魔法方法 http://mp.weixin.qq.com/s?__biz=MzIzMDQyMjcxOA==&mid=2247484471&idx=1&sn=c8dac6b9f475c84462609b0ebddfd41b&chksm=e8b2e5e6dfc56cf0af063df06d61c7917f685369497fae47b7d9a4aa64ac1da0e2505e94867a#rd 【Python编程】Python轻量级数据库SQLite http://mp.weixin.qq.com/s?__biz=MzI2NzUxMDg2NQ==&mid=2247484161&idx=1&sn=4a63700c7c418a912a954b86c4649019&chksm=eafcf63fdd8b7f296deea71b8db87fa25258f12bc6b166ef43664df2aef4f03146cc28168a48#rd Python For Data Analysis|Python书籍 http://mp.weixin.qq.com/s?__biz=MzA4OTg5NzY3NA==&mid=2649344821&idx=1&sn=c194b5190a52775348e082220f6231a1#rd Python入门教程脱水版 | 2. Python风格 http://mp.weixin.qq.com/s?__biz=MzIzMzI0NjkwMw==&mid=2652210070&idx=1&sn=09eb2d16f90c16391e94788a6429100a&chksm=f369ba4ec41e3358f24b76ce492ae0045df4dc918675b56d16802574bd5bf35739f43557ee39#rd 用 Python 实现 Python 解释器(上) http://mp.weixin.qq.com/s?__biz=MjM5NjQ4MjYwMQ==&mid=2664608140&idx=1&sn=f915e7ac0d9f2bc1eedf37f86dd722ea#rd 继续浅谈Python Python web开发 http://mp.weixin.qq.com/s?__biz=MzI5NzYwNjE3Ng==&mid=2247483915&idx=1&sn=bd4a62384236f55bf8e59017fe1704e2&chksm=ecb3cf44dbc44652882e8b27321642a6dd805a928e4a88d60f54eecc349058a8800b0c13ad92#rd Python pip http://mp.weixin.qq.com/s?__biz=MzI4MzIzNTUxMw==&mid=2247483836&idx=1&sn=5bb143dca38dda8daec4230ad6486ae5&chksm=eb8c846adcfb0d7ce33f65aa7461efc6b250be96202336ffbacc640c5ebd2790239adc5183d0#rd Python讲义 http://mp.weixin.qq.com/s?__biz=MzI2MTQ2NDM5Nw==&mid=2247484246&idx=1&sn=6e5f340dd5618c13c0ca731f2cd12625&chksm=ea5b4b74dd2cc26225f81729ea62f86a38407e72ea84d76df5db9a58d9ffb325ff81d764e0fb#rd Python 之旅 http://mp.weixin.qq.com/s?__biz=MzA3NDk1NjI0OQ==&mid=2247483882&idx=1&sn=33a42d9b74fc2d4df2dba86faa6b7d7d&chksm=9f76ad5ca801244a59585c3c53b43baceeab3dc04157113e4d0e244cadb6cb653e2ed836086f#rd