python---爬取中国彩票网的双色球数据,保存txt与xls格式。object has no attribute 'pipelines'

python—爬取中国彩票网的双色球数据,保存txt与xls格式。object has no attribute ‘pipelines’

一、保存txt格式的源代码文件:
1、源码文件 getWinningNum.py

root@kali:~/python/zhcw# ls
getWinningNum.log  getWinningNum.py  mylog.py  mylog.pyc


root@kali:~/python/zhcw# cat getWinningNum.py 
#!/usr/bin/python
# --*-- coding:utf-8 --*--

import requests
import re
from bs4 import BeautifulSoup
import urllib2
from mylog import MyLog as mylog

class DoubleColorBallItem(object):
        date = None #开奖日期
        order = None #当年的顺序
        red1 = None #第一个红球号码
        red2 = None #第二个红球号码
        red3 = None #第三个红球号码
        red4 = None #第四个红球号码
        red5 = None #第五个红球号码
        red6 = None #第六个红球号码
        blue = None #蓝色球号码
        money = None #彩池金额
        firstPrize = None #一等奖中奖人数
        secondPrize = None #二等奖中奖人数

class GetDoubleColorBallNumber(object):#用于获取双色球中奖号码,返回一个txt文件
        def __init__(self):
                self.urls = []
                self.log = mylog()
                self.getUrls()
                self.items = self.spider(self.urls)
                self.pipelines(self.items)

        def getUrls(self):#获取数据来源网页
                URL = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
                htmlContent = self.getResponseContent(URL)
                soup = BeautifulSoup(htmlContent,'lxml')
                tag = soup.find_all(re.compile('p'))[-1]
                pages = tag.strong.get_text()
                for i in xrange(1,int(pages)+1):
                        url = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_'+str(i)+'.html'
                        self.urls.append(url)
                        self.log.info(u'添加URL:%s到URLS \r\n' %url)

        def getResponseContent(self,url):#单独一个函数返回,后期代理抓包使用
                try:
                        response = urllib2.urlopen(url.encode('utf8'))
                except:
                        self.log.error(u'Python 返回URL:%s 数据失败\r\n' %url)
                else:
                        self.log.info(u'Python 返回URL:%s 数据失败\r\n' %url)
                        return response.read()

        def spider(self, urls):#从获取的数据中过滤得到中奖信息
                items = []
                for url in urls:
                        htmlContent = self.getResponseContent(url)
                        soup = BeautifulSoup(htmlContent,'lxml')
                        tags = soup.find_all('tr',attrs={})
                        for tag in tags:
                                if tag.find('em'):
                                        item =  DoubleColorBallItem()
                                        tagTd = tag.find_all('td')
                                        item.date = tagTd[0].get_text()
                                        item.order = tagTd[1].get_text()
                                        tagEm = tagTd[2].find_all('em')
                                        item.red1 = tagEm[0].get_text()
                                        item.red2 = tagEm[1].get_text()
                                        item.red3 = tagEm[2].get_text()
                                        item.red4 = tagEm[3].get_text()
                                        item.red5 = tagEm[4].get_text()
                                        item.red6 = tagEm[5].get_text()
                                        item.blue = tagEm[6].get_text()
                                        item.money = tagTd[3].find("strong").get_text()
                                        item.firstPrize = tagTd[4].find("strong").get_text()
                                        item.secondPrize = tagTd[5].find("strong").get_text()
                                        items.append(item)
                                        self.log.info(u'获取日期为:%s 的数据成功' %(item.date))
                        return items

        def pipeliens(self, items):
                fileName = u'双色球.txt'.encode('GBK')
                with open(fileName,'w') as fp:
                        for item in items:
                                fp.write('%s %s \t %s %s %s %s %s %s %s \t %s \t %s %s \n' %(item.date,item.order,item.red1,item.red2,item.red3,item.red4,item.red5,item.red6,item.blue,item.money,item.firstPrize,item.secondPrize))
                                self.log.info(u'将日期为:%s的数据存入"%s"...' %(item.data,filename.decode('GBK')))

if __name__ == "__main__":
    GDCBN = GetDoubleColorBallNumber()

2、源码文件 mylog.py

root@kali:~/python/zhcw# ls
getWinningNum.log  getWinningNum.py  mylog.py  mylog.pyc


root@kali:~/python/zhcw# cat mylog.py
#!/usr/bin/python
# --*-- coding:utf-8 --*--

import logging
import getpass
import sys

class MyLog(object):#类MyLog的构造函数
        def __init__(self):
                self.user = getpass.getuser()
                self.logger = logging.getLogger(self.user)
                self.logger.setLevel(logging.DEBUG)

                #日志文件名
                self.logFile = sys.argv[0][0:-3] + '.log'
                self.formatter = logging.Formatter('%(asctime)-12s %(levelname)-8s %(name)-10s %(message)-12s\r\n')

                #日志显示到屏幕上并输出到日志文件内
                self.logHand = logging.FileHandler(self.logFile,encoding='utf8')
                self.logHand.setFormatter(self.formatter)
                self.logHand.setLevel(logging.DEBUG)

                self.logHandSt = logging.StreamHandler()
                self.logHandSt.setFormatter(self.formatter)
                self.logHandSt.setLevel(logging.DEBUG)

                self.logger.addHandler(self.logHand)
                self.logger.addHandler(self.logHandSt)

        #日志的5个级别对应以下的5个函数
        def debug(self,msg):
                self.logger.debug(msg)

        def info(self,msg):
                self.logger.info(msg)

        def warn(self,msg):
                self.logger.warn(msg)

        def error(self,msg):
                self.logger.error(msg)

        def critical(self,msg):
                self.logger.critical(msg)

if __name__ == "__mian__":
    mylog = MyLog()
    mylog.debug(u"I 'm debug 测试中文")
    mylog.info("I 'm info")
    mylog.warn("I 'm warn")
    mylog.info(u"I 'm error 测试中文")
    mylog.critical("I 'm critical")

3、保存txt格式的脚本运行情况:

.....................
.........................
...........................
2018-01-12 00:13:19,570 INFO     root       添加URL:http://kaijiang.zhcw.com/zhcw/html/ssq/list_108.html到URLS 


2018-01-12 00:13:19,570 INFO     root       添加URL:http://kaijiang.zhcw.com/zhcw/html/ssq/list_109.html到URLS 


2018-01-12 00:13:19,571 INFO     root       添加URL:http://kaijiang.zhcw.com/zhcw/html/ssq/list_110.html到URLS 


2018-01-12 00:13:19,571 INFO     root       添加URL:http://kaijiang.zhcw.com/zhcw/html/ssq/list_111.html到URLS 


2018-01-12 00:13:20,795 INFO     root       Python 返回URL:http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html 数据失败


2018-01-12 00:13:22,577 INFO     root       获取日期为:2018-01-11 的数据成功

2018-01-12 00:13:22,579 INFO     root       获取日期为:2018-01-09 的数据成功

2018-01-12 00:13:22,581 INFO     root       获取日期为:2018-01-07 的数据成功

2018-01-12 00:13:22,582 INFO     root       获取日期为:2018-01-04 的数据成功

2018-01-12 00:13:22,583 INFO     root       获取日期为:2018-01-02 的数据成功

2018-01-12 00:13:22,584 INFO     root       获取日期为:2017-12-31 的数据成功

2018-01-12 00:13:22,586 INFO     root       获取日期为:2017-12-28 的数据成功

2018-01-12 00:13:22,587 INFO     root       获取日期为:2017-12-26 的数据成功

2018-01-12 00:13:22,588 INFO     root       获取日期为:2017-12-24 的数据成功

2018-01-12 00:13:22,589 INFO     root       获取日期为:2017-12-21 的数据成功

2018-01-12 00:13:22,591 INFO     root       获取日期为:2017-12-19 的数据成功

2018-01-12 00:13:22,592 INFO     root       获取日期为:2017-12-17 的数据成功

2018-01-12 00:13:22,592 INFO     root       获取日期为:2017-12-14 的数据成功

2018-01-12 00:13:22,593 INFO     root       获取日期为:2017-12-12 的数据成功

2018-01-12 00:13:22,594 INFO     root       获取日期为:2017-12-10 的数据成功

2018-01-12 00:13:22,595 INFO     root       获取日期为:2017-12-07 的数据成功

2018-01-12 00:13:22,595 INFO     root       获取日期为:2017-12-05 的数据成功

2018-01-12 00:13:22,596 INFO     root       获取日期为:2017-12-03 的数据成功

2018-01-12 00:13:22,597 INFO     root       获取日期为:2017-11-30 的数据成功

2018-01-12 00:13:22,598 INFO     root       获取日期为:2017-11-28 的数据成功

Traceback (most recent call last):
  File "getWinningNum.py", line 87, in <module>
    GDCBN = GetDoubleColorBallNumber()
  File "getWinningNum.py", line 30, in __init__
    self.pipelines(self.items)
AttributeError: 'GetDoubleColorBallNumber' object has no attribute 'pipelines'
root@kali:~/python/zhcw# 

二、保存xls格式的源代码文件

1、源码文件 getWinningNum_excel.py

#!/usr/bin/python
# --*-- coding:utf-8 --*--

import requests
import re
from bs4 import BeautifulSoup
import urllib2
from mylog import MyLog as mylog
from SaveExcel import SaveBallDate

class DoubleColorBallItem(object):
    date = None #开奖日期
    order = None #当年的顺序
    red1 = None #第一个红球号码
    red2 = None #第二个红球号码
    red3 = None #第三个红球号码
    red4 = None #第四个红球号码
    red5 = None #第五个红球号码
    red6 = None #第六个红球号码
    blue = None #蓝色球号码
    money = None #彩池金额
    firstPrize = None #一等奖中奖人数
    secondPrize = None #二等奖中奖人数

class GetDoubleColorBallNumber(object):#用于获取双色球中奖号码,返回一个txt文件
    def __init__(self):
        self.urls = []
        self.log = mylog()
        self.getUrls()
        self.items = self.spider(self.urls)
        self.pipelines(self.items)
        self.log.info('beging save data to excel \r\n')
        SaveBallData(self.items)
        self.log.info('save data to excel end ...\r\n')

    def getUrls(self):#获取数据来源网页
        URL = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
        htmlContent = self.getResponseContent(URL)
        soup = BeautifulSoup(htmlContent,'lxml')
        tag = soup.find_all(re.compile('p'))[-1]
        pages = tag.strong.get_text()
        for i in xrange(1,int(pages)+1):
            url = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_'+str(i)+'.html'
            self.urls.append(url)
            self.log.info(u'添加URL:%s到URLS \r\n' %url)

    def getResponseContent(self,url):#单独一个函数返回,后期代理抓包使用
        try:
            response = urllib2.urlopen(url.encode('utf8'))
        except:
            self.log.error(u'Python 返回URL:%s 数据失败\r\n' %url)
        else:
            self.log.info(u'Python 返回URL:%s 数据失败\r\n' %url)
            return response.read()

    def spider(self, urls):#从获取的数据中过滤得到中奖信息
        items = []
        for url in urls:
            htmlContent = self.getResponseContent(url)
            soup = BeautifulSoup(htmlContent,'lxml')
            tags = soup.find_all('tr',attrs={})
            for tag in tags:
                if tag.find('em'):
                    item =  DoubleColorBallItem()
                    tagTd = tag.find_all('td')
                    item.date = tagTd[0].get_text()
                    item.order = tagTd[1].get_text()
                    tagEm = tagTd[2].find_all('em')
                    item.red1 = tagEm[0].get_text()
                    item.red2 = tagEm[1].get_text()
                    item.red3 = tagEm[2].get_text()
                    item.red4 = tagEm[3].get_text()
                    item.red5 = tagEm[4].get_text()
                    item.red6 = tagEm[5].get_text()
                    item.blue = tagEm[6].get_text()
                    item.money = tagTd[3].find("strong").get_text()
                    item.firstPrize = tagTd[4].find("strong").get_text()
                    item.secondPrize = tagTd[5].find("strong").get_text()
                    items.append(item)
                    self.log.info(u'获取日期为:%s 的数据成功' %(item.date))
        return items

    def pipeliens(self, items):
        fileName = u'双色球.txt'.encode('GBK')
        with open(fileName,'w') as fp:
            for item in items:
                fp.write('%s %s \t %s %s %s %s %s %s %s \t %s \t %s %s \n' %(item.date,item.order,item.red1,item.red2,item.red3,item.red4,item.red5,item.red6,item.blue,item.money,item.firstPrize,item.secondPrize))
                self.log.info(u'将日期为:%s的数据存入"%s"...' %(item.data,filename.decode('GBK')))

if __name__ == "__main__":
    GDCBN = GetDoubleColorBallNumber()

2、源码文件 SaveExcel.py

# -*- coding: utf-8 -*-

import xlwt

class SaveBallDate(object):
    def __inti__(self,items):
        self.items = items
        self.run(self.items)

    def run(self,items):
        fileName = u'双色球.xls'.encode('GBK')
        book = xlwt.Workbook(encodeing='utf8')
        sheet = book.add_sheet('ball',cell_overwrite_ok = True)
        sheet.write(0,0,u'开奖日期'.encode('utf8'))
        sheet.write(0,1,u'期号'.encode('utf8'))
        sheet.write(0,2,u'红1'.encode('utf8'))
        sheet.write(0,3,u'红2'.encode('utf8'))
        sheet.write(0,4,u'红3'.encode('utf8'))
        sheet.write(0,5,u'红4'.encode('utf8'))
        sheet.write(0,6,u'红5'.encode('utf8'))
        sheet.write(0,7,u'红6'.encode('utf8'))
        sheet.write(0,8,u'蓝'.encode('utf8'))
        sheet.write(0,9,u'销售金额'.encode('utf8'))
        sheet.write(0,10,u'一等奖'.encode('utf8'))
        sheet.write(0,12,u'二等奖'.encode('utf8'))
        i = 1
        while i <= len(items):
            item = items[-1]
            sheet.write(i,0,item.date)
            sheet.write(i,1,item.order)
            sheet.write(i,2,item.red1)
            sheet.write(i,3,item.red2)
            sheet.write(i,4,item.red3)
            sheet.write(i,5,item.red4)
            sheet.write(i,6,item.red5)
            sheet.write(i,7,item.red6)
            sheet.write(i,8,item.blue)
            sheet.write(i,9,item.money)
            sheet.write(i,10,item.firstPrize)
            sheet.write(i,11,item.secondPrize)
            i += 1
        book.save(fileName)

if __name__ == "__main__":
    pass     

3、源码文件 mylog.py

#!/usr/bin/python
# --*-- coding:utf-8 --*--

import logging
import getpass
import sys

class MyLog(object):#类MyLog的构造函数
    def __init__(self):
        self.user = getpass.getuser()
        self.logger = logging.getLogger(self.user)
        self.logger.setLevel(logging.DEBUG)

        #日志文件名
        self.logFile = sys.argv[0][0:-3] + '.log'
        self.formatter = logging.Formatter('%(asctime)-12s %(levelname)-8s %(name)-10s %(message)-12s\r\n')

        #日志显示到屏幕上并输出到日志文件内
        self.logHand = logging.FileHandler(self.logFile,encoding='utf8')
        self.logHand.setFormatter(self.formatter)
        self.logHand.setLevel(logging.DEBUG)

        self.logHandSt = logging.StreamHandler()
        self.logHandSt.setFormatter(self.formatter)
        self.logHandSt.setLevel(logging.DEBUG)

        self.logger.addHandler(self.logHand)
        self.logger.addHandler(self.logHandSt)

    #日志的5个级别对应以下的5个函数
    def debug(self,msg):
        self.logger.debug(msg)

    def info(self,msg):
        self.logger.info(msg)

    def warn(self,msg):
        self.logger.warn(msg)

    def error(self,msg):
        self.logger.error(msg)

    def critical(self,msg):
        self.logger.critical(msg)

if __name__ == "__mian__":
    mylog = MyLog()
    mylog.debug(u"I 'm debug 测试中文")
    mylog.info("I 'm info")
    mylog.warn("I 'm warn")
    mylog.info(u"I 'm error 测试中文")
    mylog.critical("I 'm critical")

4、运行情况

2018-01-13 20:19:38,615 INFO     Administrator 获取日期为:2003-08-31 的数据成功

2018-01-13 20:19:38,617 INFO     Administrator 获取日期为:2003-08-28 的数据成功

...........................................................................
...........................................................................
...........................................................................

2018-01-13 20:19:39,444 INFO     Administrator 获取日期为:2003-03-27 的数据成功

2018-01-13 20:19:39,447 INFO     Administrator 获取日期为:2003-03-23 的数据成功

2018-01-13 20:19:39,448 INFO     Administrator 获取日期为:2003-03-20 的数据成功

2018-01-13 20:19:39,450 INFO     Administrator 获取日期为:2003-03-16 的数据成功

2018-01-13 20:19:39,453 INFO     Administrator 获取日期为:2003-03-13 的数据成功

2018-01-13 20:19:39,454 INFO     Administrator 获取日期为:2003-03-09 的数据成功

2018-01-13 20:19:39,457 INFO     Administrator 获取日期为:2003-03-06 的数据成功

2018-01-13 20:19:39,459 INFO     Administrator 获取日期为:2003-03-02 的数据成功

2018-01-13 20:19:39,460 INFO     Administrator 获取日期为:2003-02-27 的数据成功

2018-01-13 20:19:39,461 INFO     Administrator 获取日期为:2003-02-23 的数据成功

Traceback (most recent call last):

  File "<ipython-input-4-3a22468d5a14>", line 1, in <module>
    runfile('F:/SOFT/pythonpro/getWinningNum_excel.py', wdir='F:/SOFT/pythonpro')

  File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
    execfile(filename, namespace)

  File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 86, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)

  File "F:/SOFT/pythonpro/getWinningNum_excel.py", line 91, in <module>
    GDCBN = GetDoubleColorBallNumber()

  File "F:/SOFT/pythonpro/getWinningNum_excel.py", line 31, in __init__
    self.pipelines(self.items)

AttributeError: 'GetDoubleColorBallNumber' object has no attribute 'pipelines'

猜你喜欢

转载自blog.csdn.net/xwbk12/article/details/79039740
今日推荐