Recording and showing export excel table

# coding=utf-8
#@auther:Mana_菜小刀
import requests
import queue
import threading
import xlrd
import xlwt
from lxml import etree
from xlutils.copy import copy
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"
}

myxls = xlwt.Workbook()
sheet1 = myxls.add_sheet('收录search')
lst_name = ['url', '收录/未收录', '图片']
for i in range(len(lst_name)):
    sheet1.write(0, i, lst_name[i])
myxls.save('result.xls')

def log(*args,**kwargs):
    print(*args,**kwargs)


class baiduSpider(threading.Thread):
    def __init__(self, queue_li, name):
        threading.Thread.__init__(self)
        self._queue = queue_li
        self._name = name

    def run(self):
        while not self._queue.empty():
            url = self._queue.get()
            try:
                self.get_url(url)
            except Exception as e:
                log(e)
                pass

    def get_url(self,url):
        requests.adapters.DEFAULT_RETRIES = 5
        r = requests.session()
        r.keep_alive = False
        s = r.get(url=url, headers=headers)
        #log(s)
        xpather = etree.HTML(s.text)

        strs = xpather.xpath('//span[@class="nums_text"]//text()')
        imgs = xpather.xpath('//img[@class="c-img c-img6"]/@src')
        #log(strs, imgs)
        search_mo = ['收录','Not included']
        img_mo = ['有图','无图']
        url_mo = url.replace('http://www.baidu.com/s?wd=','')

        workbook = xlrd.open_workbook('result.xls', formatting_info=True)
        sheet = workbook.sheet_by_index(0)
        rowNum = sheet.nrows
        colNum = sheet.ncols
        newbook = copy(workbook)
        newsheet =newbook.get_sheet (0) 

        IF ! strs [0] = " Baidu for you to find relevant results about 0 "  and len (imgs)> 0: 
            newsheet.write (rowNum, 0, url_mo) 
            newsheet.write (rowNum, 1 , search_mo [0]) 
            newsheet.write (rowNum, 2 , img_mo [0]) 
            log (search_mo [0], ' Shu ' , img_mo [0], ' Shu ' , url_mo)
             # newbook.save ( 'result.xls' ) 
        elif ! strs [0] = " Baidu to find relevant results you about 0 "  and len (imgs) == 0: 
            newsheet.write (rowNum, 0, url_mo)
            newsheet.write(rowNum, 1, search_mo[0])
            newsheet.write(rowNum, 2, img_mo[1])
            log(search_mo[0],'',img_mo[1],'',url_mo)
            #newbook.save('result.xls')
        else:
            newsheet.write(rowNum, 0, url_mo)
            newsheet.write(rowNum, 1, search_mo[1])
            newsheet.write(rowNum, 2, img_mo[1])
            log(search_mo[1],'', img_mo [. 1], ' Shu ', url_mo) 
        newbook.save ( ' result.xls ' ) 

DEF main (): 
    queue_li = Queue.Queue () 
    Threads = [] 
    THREAD_COUNT = 10 
    myxls = xlwt.Workbook () 
    Sheet1 = myxls.add_sheet ( ' the IDF ' )
     ' '' the 'urls' into their own txt document name: ' '' 
    with Open ( ' urls ' , ' r ' , encoding = ' UTF-8 ' , errors = "ignore") as f:
        content = f.read()
        urls = content.split('\n')
    for url in urls:
        if len(url) > 0:
            url_search = url
            queue_li.put('http://www.baidu.com/s?wd={}'.format(url_search))

    for i in range(thread_count):
        spider = baiduSpider(queue_li, url_search)
        threads.append(spider)

    for i in threads:
        i.start()

    for i in threads:
        i.join()


    '''log("Mana好伟大!(^-^)V")'''

if __name__ == '__main__':
    log("Mana好伟大!(^-^)V")
    main()

 

Guess you like

Origin www.cnblogs.com/mana66ccff/p/11184899.html