pyspider_demo

#编写脚本文件
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2017-11-03 15:02:38
# Project: 11_03

from pyspider.libs.base_handler import *
from pyspider.database.mysql.mysqldb import SQL

class Handler(BaseHandler):
    crawl_config = {
         'itag': '2'
    }

    @every(minutes=24 * 60)
    def on_start(self):
        self.crawl('http://www.fjsen.com/j/node_94962.htm', callback=self.index_page)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('ul.list_page>li>a').items():
                self.crawl(each.attr.href, callback=self.detail_page)
        for each in response.doc('div#displaypagenum>center>a').items():
                self.crawl(each.attr.href, callback=self.index_page)
        
    @config(priority=2)
    def detail_page(self, response):
        return {
           "title": response.doc('div.cont_head>h1').text(),
        }
    def on_result(self,result):
        sql = SQL()
        if result==None:
            return
        sql.insert('news1',**result)  
#将数据存到mysql数据中,把该文件存到\pyspider\database\mysql目录下,文件命为mysqldb
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from six import itervalues
import MySQLdb

class SQL():
    #数据库初始化
    def __init__(self):
        #数据库连接相关信息
        hosts    = '127.0.0.1'  
        username = 'root'
        password = '123456'
        database = 'game_main'
        charsets = 'utf8'

        self.connection = False
        try:
            self.conn = MySQLdb.connect(host = hosts,user = username,passwd = password,db = database,charset = charsets)
            self.cursor = self.conn.cursor()
            self.cursor.execute("set names "+charsets)
            self.connection = True
        except Exception,e:
            print "Cannot Connect To Mysql!/n",e

    def escape(self,string):
        return '%s' % string
    #插入数据到数据库   
    def insert(self,tablename=None,**values):

        if self.connection: 
            tablename = self.escape(tablename)  
            if values:
                _keys = ",".join(self.escape(k) for k in values)
                _values = ",".join(['%s',]*len(values))
                sql_query = "insert into %s (%s) values (%s)" % (tablename,_keys,_values)
            else:
                sql_query = "replace into %s default values" % tablename
            try:
                if values:
                    self.cursor.execute(sql_query,list(itervalues(values)))
                else:       
                    self.cursor.execute(sql_query)
                self.conn.commit()
                return True
            except Exception,e:
                print "An Error Occured: ",e
                return False
#pyspider中想要js渲染在self.crawl中加上参数 fetch_type = 'js'


猜你喜欢

转载自blog.csdn.net/shuangyueliao/article/details/78443320