python通过下载链接可以下载成excel,直接将数据写入数据库中

from urllib.request import urlopen
import sys
import datetime
import psycopg2
import os

# 用来操作数据库的类
class GPCommand(object):
    # 类的初始化
    def __init__(self):
      self.hostname = 'XXXXXXXXX'
      self.username = 'XXXXXX'
      self.password = 'XXXXXXXXX'
      self.database = 'XXXX'
    def connectGp(self):
      try:
        #链接数据库
        #读取配置利用connect链接数据库
        self.connect = psycopg2.connect( host=self.hostname, user=self.username, password=self.password, dbname=self.database )
        #创建一个新的cursor
        self.cursor = self.connect.cursor()
        print("connect gp successful."+'\n' + '数据库连接成功')
        return ('con_successful')
      except psycopg2.Error:
          error = 'Failed to setup Postgres environment.\n{0}'.format(sys.exc_info())
          print('connect gp error.'+'\n' + '数据库连接失败')
          return 'con_error'+ error
#关闭数据库
    def closeMysql(self):
        self.cursor.close()
        self.connect.close()
        print("数据库已关闭")

#插入数据
    def insert_data(self,dict):
        try:
            date = escape_character(dict['date'])
            name = escape_character(dict['name'])
            email = escape_character(dict['email'])
            institution = escape_character(dict['institution'])
            validatecode = escape_character(dict['validatecode'])
            formhash = escape_character(dict['formhash'])
            return_message = escape_character(dict['return_message'])
            return_url = escape_character(dict['return_url'])
            url_type = dict['url_type']
            insertsql = "INSERT INTO dw_ana.whitepaper_application_note_leads_report (date,name,email,institution,validatecode,formhash,return_message,return_url,load_dt,url_type) " \
                        "values('%s','%s','%s','%s','%s','%s','%s','%s',now(),'%s') " % (date,name, email, institution, validatecode, formhash, return_message,return_url,url_type)
            self.cursor.execute(insertsql)
            self.connect.commit()
            #print('success')
        except Exception as e:
            print(e)
            os._exit(0)

    def delete_date(self):
        delete_sql = 'delete from dw_ana.whitepaper_application_note_leads_report ' \
                     'where to_char(cast(date as date),\'YYYY-MM\') = to_char(cast(CURRENT_DATE - interval \'1 month\' as date),\'YYYY-MM\')'
        self.cursor.execute(delete_sql)



def escape_character(string):
    script = ''
    script_tmp = string
    if "'" in script_tmp:
        nops = []
        new_loop = []
        itemplist = list(script_tmp)
        for i in range(len(itemplist)):
            if itemplist[i] == "'":
                nops.append(i)
        for item in nops:
            new_loop.append(item + nops.index(item))
        for i in new_loop:
            itemplist.insert(i, "'")
        script = "".join(itemplist)
    else:
        script = script_tmp
    return  script
def last_month(now_time):
    last_month = now_time.month - 1
    last_year = now_time.year
    if last_month == 0:
        last_month = 12
        last_year -= 1
    month_time = datetime.datetime(month=last_month, year=last_year, day=now_time.day)
    return month_time


def main():
    today = datetime.date.today()
    today = last_month(today)
    #tod = today.replace(month = 9)
    #str_day = today.strftime("%Y%m")
    str_day = today.strftime("%Y%m")
    # print(str_day)
    gpCommand = GPCommand()
    connect_result = gpCommand.connectGp()
    gpCommand.delete_date()
    urls = ['https://www.xxxxx.com/monthly/down?file=protein-white-paper11','https://www.xxxx.com/monthly/down?file=protein-application-note11'] #
    for url in urls:
        url_type = url.split('=')[1]
        if url == 'https://www.xxxxxx.com/monthly/down?file=protein-white-paper':
            results = []
            content = urlopen(url + str_day + '.xls').read()
            info = content.decode('utf-8')
            res = info.split('\r\n')
            for i in range(1,len(res)):
                result = res[i].split('\t')
                # print(result)
                if len(result) < 7 :
                    pass
                else:
                    results.append(result)
            for item in results:
                #print(item)
                dict = {
                    "date": item[0],
                    "name":item[1][1:len(item[1])-1],
                    "email":item[2][1:len(item[2])-1],
                    "institution":item[3][1:len(item[3])-1],
                    "validatecode":item[4][1:len(item[4])-1],
                    "formhash":item[5][1:len(item[5])-1],
                    "return_message":item[6][1:len(item[6])-1],
                    "return_url":item[7][1:len(item[7])-1],
                    "url_type":url_type
                }
                gpCommand.insert_data(dict)
        else:
            results = []
            content = urlopen(url + str_day + '.xls').read()
            info = content.decode('utf-8')
            # print(info)
            res = info.split('\r\n')
            for i in range(1, len(res)):
                result = res[i].split('\t')
                # print(result)
                if len(result) < 6:
                    pass
                else:
                    results.append(result)
            for item in results:
                dict1 = {
                    #'date', 'name', 'email', 'institution', 'formhash', 'return_message', 'return_url'
                    "date": item[0],
                    "name": item[1][1:len(item[1]) - 1],
                    "email": item[2][1:len(item[2]) - 1],
                    "institution": item[3][1:len(item[3]) - 1],
                    "validatecode": '',
                    "formhash": item[4][1:len(item[4]) - 1],
                    "return_message": item[5][1:len(item[5]) - 1],
                    "return_url": item[6][1:len(item[6]) - 1],
                    "url_type": url_type
                }
                gpCommand.insert_data(dict1)


    gpCommand.closeMysql()  # 关闭连接

main()

猜你喜欢

转载自blog.csdn.net/qq_22994783/article/details/83025575