1, FormRequest form automatic landing
# -*- coding: utf-8 -*- import scrapy import re class GithubSpider(scrapy.Spider): name = 'github' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first() utf8 = response.xpath("//input[@name='utf8']/@value").extract_first() commit = response.xpath("//input[@name='commit']/@value").extract_first() post_data = dict( login="812******[email protected]", password="******", authenticity_token=authenticity_token, utf8 = utf8, commit=commit, ) # Form Request yield scrapy.FormRequest ( " https://github.com/session " , formdata=post_data, callback=self.after_login ) def after_login(self,response): # with open("a.html","w",encoding="utf-8") as f: # f.write(response.body.decode()) print(re.findall("812406210",response.body.decode()))
2, FormRequest.from_response analog automatic landing
# -*- coding: utf-8 -*- import scrapy import re class Github2Spider(scrapy.Spider): name = 'github2' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): yield scrapy.FormRequest.from_response( response, # automatic response forms from looking from the # FormData only need to pass the word typical login name and password, a dictionary key is input tag name attribute FormData = { " the Login " : " ****@qq.com " , " password " : " *********** " }, callback = self.after_login ) def after_login(self,response): print(re.findall("........",response.body.decode()))
3, notes
a)FormRequest
b) FormRequest.from_response