# -*- coding: utf-8 -*-
import scrapy
import sys
import io
sys.stout = io.TextIOWrapper(sys.stdout.buffer,encoding="gb18030")
from scrapy.selector import Selector,HtmlXPathSelector
from pyquery import PyQuery
from scrapy.http import Request
from scrapy.dupefilter import RFPDupeFilter
from scrapy.http.cookies import CookieJar
class ChoutiSpider(scrapy.Spider):
name = 'chouti'
allowed_domains = ['chouti.com']
start_urls = ['http://dig.chouti.com/']
cookies_list = None
def parse(self, response):
# print(response.url)
# content = str(response.body, encoding="utf-8")
# pq = PyQuery(content)
# pages = pq.find("#dig_lcpage li:gt(0)")
# for page in pages.items():
# index_web = page.find("a").attr("href")
# web = "https://dig.chouti.com%s" % index_web
# # print(web)
# yield Request(url=web, callback=self.parse)#给调度器用回调函数解析
cookie_obj = CookieJar()
self.cookies_list = cookie_obj._cookies
yield Request(
url="https://dig.chouti.com/login",
method="POST",
body="phone=86XXXXXXXXX&password=XXXXXXXXXX&oneMonth=1",
headers={"content-type": "application/x-www-form-urlencoded; charset=UTF-8"},
cookies=self.cookies_list,
callback=self.login)
def login(self,resposne):
print(resposne.text)
yield Request(url="https://dig.chouti.com/",callback=self.good)
def good(self, response):
content = str(response.body, encoding="utf-8")
pq = PyQuery(content)
items = pq.find(".item .part2")
for item in items.items():
url = "https://dig.chouti.com/link/vote?linksId=%s" % item.attr("share-linkid")
yield Request(url=url,
method="POST",
cookies=self.cookies_list,
callback=self.show)
def show(self,response):
print(response.text)
scrapy 自动登录给抽屉点赞
猜你喜欢
转载自blog.csdn.net/u014248032/article/details/83084580
今日推荐
周排行