#编写脚本文件
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2017-11-03 15:02:38
# Project: 11_03
from pyspider.libs.base_handler import *
from pyspider.database.mysql.mysqldb import SQL
class Handler(BaseHandler):
crawl_config = {
'itag': '2'
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl('http://www.fjsen.com/j/node_94962.htm', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('ul.list_page>li>a').items():
self.crawl(each.attr.href, callback=self.detail_page)
for each in response.doc('div#displaypagenum>center>a').items():
self.crawl(each.attr.href, callback=self.index_page)
@config(priority=2)
def detail_page(self, response):
return {
"title": response.doc('div.cont_head>h1').text(),
}
def on_result(self,result):
sql = SQL()
if result==None:
return
sql.insert('news1',**result)
#将数据存到mysql数据中,把该文件存到\pyspider\database\mysql目录下,文件命为mysqldb
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from six import itervalues
import MySQLdb
class SQL():
#数据库初始化
def __init__(self):
#数据库连接相关信息
hosts = '127.0.0.1'
username = 'root'
password = '123456'
database = 'game_main'
charsets = 'utf8'
self.connection = False
try:
self.conn = MySQLdb.connect(host = hosts,user = username,passwd = password,db = database,charset = charsets)
self.cursor = self.conn.cursor()
self.cursor.execute("set names "+charsets)
self.connection = True
except Exception,e:
print "Cannot Connect To Mysql!/n",e
def escape(self,string):
return '%s' % string
#插入数据到数据库
def insert(self,tablename=None,**values):
if self.connection:
tablename = self.escape(tablename)
if values:
_keys = ",".join(self.escape(k) for k in values)
_values = ",".join(['%s',]*len(values))
sql_query = "insert into %s (%s) values (%s)" % (tablename,_keys,_values)
else:
sql_query = "replace into %s default values" % tablename
try:
if values:
self.cursor.execute(sql_query,list(itervalues(values)))
else:
self.cursor.execute(sql_query)
self.conn.commit()
return True
except Exception,e:
print "An Error Occured: ",e
return False
#pyspider中想要js渲染在self.crawl中加上参数
fetch_type
=
'js'