工具类(EXcel)[二]

import numpy as np
import pandas as pd
import xlwt
import xlrd
from excelTools import ExcelT
from pandas import DataFrame
# from pandas.core.frame import DataFrame

def makeExcel(ta_one,sh_one_li,ta_two,sh_two_li):
    for x in range(len(sh_one_li)):
        data_x = pd.read_excel(ta_one,sheet_name=sh_one_li[x])#pd.dataframe
        data_y = pd.read_excel(ta_two,sheet_name=sh_two_li[x])
        train_data = np.array(data_x)#np.ndarray()
        train_data_list=train_data.tolist()#list
        train_company = np.array(data_y)#np.ndarray()
        train_company_list=train_company.tolist()#list
        fee = []
        money = []
        company = []
        for name in train_company_list:
            company.append(name[0])
            sum = 0
            for li in train_data_list:
                 # print(li[1],name)
                 if li[1] == name[0]:
                     sum = sum+li[4]
                     fee.append(li[4])
            # s = 0
            # for t in fee:
            #     s = s + t
            # print(s)
            # print(name[0],len(fee))
            money.append(sum)
        dict_data = {
            'company':company,
            'money':money
        }
        data=DataFrame(dict_data)#将字典转换成为数据框
        # DataFrame(data).to_excel('one(一).xlsx',sheet_name='1901')
        DataFrame(data).to_excel(sh_two_li[x]+'.xlsx',sheet_name=sh_two_li[x])
def dictChangeDataF():
    a = [1, 2, 3, 4]  # 列表a
    b = [5, 6, 7, 8]  # 列表b
    c = {"a": a,
         "b": b}  # 将列表a，b转换成字典
    data = DataFrame(c)  # 将字典转换成为数据框
    DataFrame(data).to_excel('b.xlsx', sheet_name='测试')

def endTable():
    totalData = xlrd.open_workbook('final.xlsx')
    table = totalData.sheets()[0]
    data = table.col_values(0)
    workbook = xlwt.Workbook(encoding='utf-8')
    sheet = workbook.add_sheet('总表')
    sheet.write(0, 0, '公司名字')
    sheet.write(0, 1, '')
    sheet.write(0, 2, '')
    sheet.write(0, 3, '')
    sheet.write(0, 4, '')
    sheet.write(0, 5, '')
    sheet.write(0, 6, '')
    sheet.write(0, 7, '')
    sheet.write(0, 8, '')
    sheet.write(0, 9, '')
    sheet.write(0, 10, '')

    ta_name = ['four', 'five', 'six(二)', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'one(一)']

    num = 0
    company_count = 0
    for na in ta_name:
        company_count += 1
        count = 0
        num += 1
        for comname in data[1:]:
            count += 1
            if num == 1:
                tableData = xlrd.open_workbook(na + '.xlsx')
                table = tableData.sheets()[0]
                nrows = table.nrows
                for x in range(1, nrows):
                    data_row = table.row_values(x)
                    if data_row[1] == comname:
                        sheet.write(count, 0, comname)
                        sheet.write(count, company_count, data_row[2])
                data_col = table.col_values(1)
                if comname not in data_col:
                    sheet.write(count, 0, comname)
                    sheet.write(count, company_count, '')
            else:
                tableData = xlrd.open_workbook(na + '.xlsx')
                table = tableData.sheets()[0]
                nrows = table.nrows
                for x in range(1, nrows):
                    data_row = table.row_values(x)
                    if data_row[1] == comname:
                        sheet.write(count, company_count, data_row[2])
    workbook.save('结果(三).xls')
if __name__ == '__main__':
    ta_one = ''
    ta_two = ''
    sh_one_li = []
    sh_two_li = []
    makeExcel('one(一)',1901)


===============================================================

# -*- coding:utf-8*-

import time
import requests
time1=time.time()
import pandas as pd
import  json
import xlwt

# workbook = xlwt.Workbook(encoding='utf-8')
# sheet = workbook.add_sheet('黑名单')
# sheet.write(0,0,'姓名')
# sheet.write(0,1,'身份证号')
################定义数据结构列表存储数据####################

def getName():
    iname=[]
    icard=[]

    ################循环发送请求解析数据#######################
    count = 0
    for i in range(1,2):
        time.sleep(10)
        print('正在抓取第'+str(i)+"页.................................")
        url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899&query=%E8%80%81%E8%B5%96&pn="+str(i*10)+"&ie=utf-8&oe=utf-8&format=json"
        head={
        "Host": "sp0.baidu.com",
        "Connection": "keep-alive",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
        "Accept": "*/*",
        "Referer": "https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=1&tn=95943715_hao_pg&wd=%E8%80%81%E8%B5%96&oq=%25E8%2580%2581%25E8%25B5%2596&rsv_pq=ec5e631d0003d8eb&rsv_t=b295wWZB5DEWWt%2FICZvMsf2TZJVPmof2YpTR0MpCszb28dLtEQmdjyBEidZohtPIr%2FBmMrB3&rqlang=cn&rsv_enter=0&prefixsug=%25E8%2580%2581%25E8%25B5%2596&rsp=0&rsv_sug9=es_0_1&rsv_sug=9",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.8"
        }

        html=requests.get(url,headers=head).content
        html_json=json.loads(html)
        html_data=html_json['data']
        for each in html_data:
            count+=1
            k=each['result']
            for each in k:
                print(each['iname'],each['cardNum'])
                iname.append(each['iname'])
                icard.append(each['cardNum'])
                # sheet.write(count,0,each['iname'])
                # sheet.write(count,1,each['cardNum'])


    #####################将数据组织成数据框###########################
    data=pd.DataFrame({"name":iname,"IDCard":icard})

    #################数据框去重####################################
    data1=data.drop_duplicates()
    print(data1)


    # #########################写出数据到excel#########################################
    pd.DataFrame.to_excel(data1,"c.xlsx",header=True,encoding='gbk',index=False)
    # time2=time.time()
    # print u'ok,爬虫结束!'
    # print u'总共耗时：'+str(time2-time1)+'s'

def getTel():
    url = 'http://api.fxhyd.cn/appapi.aspx?callback=jQuery22308425623862046725_1550671913748&jsonp=MobileSeachJsonCallback&actionid=getmobile&token=01126710166b7d58cbf047af98f36a729bff30b6f801&itemid=352&province=0&city=0&isp=0&'
    res = requests.get(url).text
    print(res)

========================================================================

class ExcelT(object):

    def __init__(self):
        self.workbook = xlwt.Workbook(encoding='utf-8')
        self.sheet = self.workbook.add_sheet('sheet1')
        self.sheet.write(0, 0, 'A')
        self.sheet.write(0, 1, 'B')
        self.sheet.write(0, 2, 'C')
        self.sheet.write(0, 3, 'D')
        self.sheet.write(0, 4, 'E')
        self.sheet.write(0, 5, 'F')
        self.sheet.write(0, 6, 'G')
        self.sheet.write(0, 7, 'H')
        self.sheet.write(0, 8, 'I')
        self.sheet.write(0, 9, 'J')
        self.sheet.write(0, 10, 'K')


    def writeExcel(self,data_lis,*args):
         for idx,data_li in enumerate(data_lis):
            count = idx+1
            self.sheet.write(count, 0, data_li[0])
            self.sheet.write(count, 1, data_li[1])
            self.sheet.write(count, 2, data_li[2])
            self.sheet.write(count, 3, data_li[3])
            self.sheet.write(count, 4, data_li[4])
            self.sheet.write(count, 5, data_li[5])
            self.sheet.write(count, 6, data_li[6])
            self.sheet.write(count, 7, data_li[7])
            self.sheet.write(count, 8, data_li[8])
            self.sheet.write(count, 9, data_li[9])
            self.sheet.write(count, 10, data_li[10])

    def saveExcel(self,tableN):

        self.workbook.save(tableN+'.xls')

猜你喜欢