python图形化界面+爬取淘宝信息+生成柱状分析图(淘宝查询系统)

注册登录用户信息
在这里插入图片描述
未连接数据库,采用python第三方库pickle,永久保存信息。
登陆成功后,界面跳转
在这里插入图片描述
输入爬取的内容,爬取44条信息,爬取结果如图
在这里插入图片描述
将爬取内容保存到csv文件中,根据csv文件生成柱状分析图:
csv文件
在这里插入图片描述
柱状分析图:
在这里插入图片描述
代码如下:

import re
import requests
import time
import tkinter as tk
import tkinter.messagebox
import pickle
import csv
import matplotlib.pyplot as plt

#a=""
fileName=''
# 窗口
window = tk.Tk()
window.title('欢迎进入淘宝爬取系统')
window.geometry('450x300')
# 画布放置图片
canvas = tk.Canvas(window, height=300, width=500)
# imagefile = tk.PhotoImage(file='qm.png')
# image = canvas.create_image(0, 0, anchor='nw', image=imagefile)
canvas.pack(side='top')
# 标签 用户名密码
tk.Label(window, text='用户名:').place(x=100, y=150)
tk.Label(window, text='密码:').place(x=100, y=190)
# 用户名输入框
var_usr_name = tk.StringVar()
entry_usr_name = tk.Entry(window, textvariable=var_usr_name)
entry_usr_name.place(x=160, y=150)
# 密码输入框
var_usr_pwd = tk.StringVar()
entry_usr_pwd = tk.Entry(window, textvariable=var_usr_pwd, show='*')
entry_usr_pwd.place(x=160, y=190)


# 登录函数
def usr_log_in():
    # 输入框获取用户名密码
    usr_name = var_usr_name.get()
    usr_pwd = var_usr_pwd.get()
    # 从本地字典获取用户信息,如果没有则新建本地数据库
    try:
        with open('usr_info.pickle', 'rb') as usr_file:
            usrs_info = pickle.load(usr_file)
    except FileNotFoundError:
        with open('usr_info.pickle', 'wb') as usr_file:
            usrs_info = {'admin': 'admin'}
            pickle.dump(usrs_info, usr_file)
    # 判断用户名和密码是否匹配
    if usr_name in usrs_info:
        if usr_pwd == usrs_info[usr_name]:
            tk.messagebox.showinfo(title='welcome',
                                   message='欢迎您:' + usr_name)
            window.destroy()
            win = tkinter.Tk()
            win.title("淘宝爬取")
            win.geometry("400x400+200+50")
            menubar = tkinter.Menu(win)
            win.config(menu=menubar)
            menu1 = tkinter.Menu(menubar, tearoff=False)

            # 给菜单选项添加内容
            for item in ['退出']:
                if item == '退出':
                    # 添加分割线
                    menu1.add_separator()
                    menu1.add_command(label=item, command=win.quit)

            # 向菜单条上添加菜单选项
            menubar.add_cascade(label='系统', menu=menu1)

            def getNowTime(form='%Y-%m-%d_%H-%M-%S'):
                nowTime = time.strftime(form, time.localtime())
                return nowTime
            def finxi(a):
                print(a)
                #filename = '%r' % (a)
                with open(a, 'r', encoding='ANSI') as f:
                        reader = csv.DictReader(f)
                        view_price = []
                        view_sales = []
                        for row in reader:
                            # TODO
                            # 将 'Home Team Goals' 、'Away Team Goals'中的每个元素以整型数据分别添加在相应的列表中
                            view_price.append(row['view_price'])
                            view_sales.append(row['view_sales'])

                            # 转变数据内形式,str2int
                            # home_team_goals.append(int(row['Home Team Goals']))
                            # away_team_goals.append(int(row['Away Team Goals']))

                print(view_price[:44])
                print(view_sales[:44])

                fig = plt.figure(dpi=128, figsize=(10, 6))
                # 设置图片大小
                view_price.reverse()
                view_sales.reverse()
                # 设置图表的字体微软雅黑 防止中文乱码的
                # zh_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\msyh.ttf')
                # 使用横向条形图表
                plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
                plt.xlabel('view_price')
                plt.ylabel('view_sales')
                plt.title('销量价格分析表')

                plt.barh(view_sales, view_price)
                fig.autofmt_xdate()  # 让x轴标签斜着打印避免拥挤

                # for x,y in enumerate(view_price):
                #     plt.text(y+0.1,x,'%s' %y,va='center')
                plt.show()


            def showinfo():
                # 获取输入的内容
                searchKey = entry.get()
                # 输出文件编码(一般是utf-8,不过我用excel打开输出的csv文件发现会乱码,就用了ansi)
                encode = 'ansi'
                # keys是我要获取的宝贝信息属性
                keys = ('raw_title', 'view_price', 'item_loc', 'view_sales', 'comment_count', 'nick')

                url = 'https://s.taobao.com/search'
                params = {'q': searchKey, 'ie': 'utf8'}
                header = {
                    "cookie": "cna=EYnEFeatJWUCAbfhIw4Sd0GO; x=__ll%3D-1%26_ato%3D0; hng=CN%7Czh-CN%7CCNY%7C156; uc1=cookie14=UoTaHYecARKhrA%3D%3D; uc3=vt3=F8dBy32hRyZzP%2FF7mzQ%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&nk2=1DsN4FjjwTp04g%3D%3D&id2=UondHPobpDVKHQ%3D%3D; t=ad1fbf51ece233cf3cf73d97af1b6a71; tracknick=%5Cu4F0F%5Cu6625%5Cu7EA22013; lid=%E4%BC%8F%E6%98%A5%E7%BA%A22013; uc4=nk4=0%401up5I07xsWKbOPxFt%2BwuLaZ8XIpO&id4=0%40UOE3EhLY%2FlTwLmADBuTfmfBbGpHG; lgc=%5Cu4F0F%5Cu6625%5Cu7EA22013; enc=ieSqdE6T%2Fa5hYS%2FmKINH0mnUFINK5Fm1ZKC0431E%2BTA9eVjdMzX9GriCY%2FI2HzyyntvFQt66JXyZslcaz0kXgg%3D%3D; _tb_token_=536fb5e55481b; cookie2=157aab0a58189205dd5030a17d89ad52; _m_h5_tk=150df19a222f0e9b600697737515f233_1565931936244; _m_h5_tk_enc=909fba72db21ef8ca51c389f65d5446c; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; l=cBa4gFrRqYHNUtVvBOfiquI8a17O4IJ51sPzw4_G2ICP9B5DeMDOWZezto8kCnGVL6mpR3RhSKO4BYTKIPaTlZXRFJXn9MpO.; isg=BI6ORhr9X6-NrOuY33d_XmZFy2SQp1Ju1qe4XLjXJRHsGyp1IJ9IG0kdUwfSA0oh",
                    "referer": "https://detail.tmall.com/item.htm",
                    "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36"
                }
                startPage = 1  # 起始页面
                pageTotal = 3  # 爬取多少页
                waitTime = 2  # 等待时间(如果爬的速度太快可能会出事)
                rowWrited = 0
                startTime = time.time()
                print('任务启动\n{} | 初始化存储文件...'.format(getNowTime()))
                fileName = r'tb_{}_{}_{}_{}.csv'.format(searchKey, startPage, pageTotal, getNowTime())
                print(fileName)
                a=fileName
                with open(fileName, 'w', encoding=encode) as saveFile:
                    saveFile.write(','.join(keys) + '\n')

                print('关键词:{} 起始页面:{} 爬取页面数:{}, 开始执行..'.format(searchKey, startPage, pageTotal))
                for page in range(startPage, pageTotal + 1):
                    print('\npage{}: 获取数据...'.format(page))
                time.sleep(waitTime)
                params['s'] = str(page * 44) if page > 1 else '1'
                resp = requests.get(url, params, headers=header)
                results = [re.findall(r'"{}":"([^"]+)"'.format(key),
                                      resp.text.replace('\n', '').replace('\r', '').replace(',', '').strip(), re.I) for
                           key in keys]
                print('page{}: 正在写入数据...'.format(page))
                with open(fileName, 'a', encoding=encode) as saveFile:
                    for row in range(len(results[0])):
                        print('\r写入第{}条..'.format(row + 1), end='')
                        rowWrited += 1
                        for key in range(len(results)):
                            try:
                                saveFile.write(
                                    '{}{}'.format(results[key][row], ',' if key + 1 < len(results) else '\n'))
                            except:
                                saveFile.write('null{}'.format(',' if key + 1 < len(results) else '\n'))
                print('page{}完成...'.format(page, len(results[0])))

                print(
                    '\n任务完成!! 页面总数: {} | 写入数据: {}条 | 用时: {:.2f}s'.format(pageTotal, rowWrited, time.time() - startTime))
                tkinter.messagebox.showinfo('提示', '爬取完成!')
                finxi(fileName)





            label = tkinter.Label(win, text="请输入爬取的关键字")
            label.pack()
            entry = tkinter.Entry(win)
            entry.pack()
            button = tkinter.Button(win, text="点击爬取", command=showinfo)
            button.pack()
            #print(fileName)
            win.mainloop()
           # time.sleep(10)
           # finxi(fileName)

        else:
            tk.messagebox.showerror(message='密码错误')
    # 用户名密码不能为空
    elif usr_name == '' or usr_pwd == '':
        tk.messagebox.showerror(message='用户名或密码为空')
    # 不在数据库中弹出是否注册的框
    else:
        is_signup = tk.messagebox.askyesno('欢迎', '您还没有注册,是否现在注册')
        if is_signup:
            usr_sign_up()


# 注册函数
def usr_sign_up():
    # 确认注册时的相应函数
    def signtowcg():
        # 获取输入框内的内容
        nn = new_name.get()
        np = new_pwd.get()
        npf = new_pwd_confirm.get()

        # 本地加载已有用户信息,如果没有则已有用户信息为空
        try:
            with open('usr_info.pickle', 'rb') as usr_file:
                exist_usr_info = pickle.load(usr_file)
        except FileNotFoundError:
            exist_usr_info = {}

            # 检查用户名存在、密码为空、密码前后不一致
        if nn in exist_usr_info:
            tk.messagebox.showerror('错误', '用户名已存在')
        elif np == '' or nn == '':
            tk.messagebox.showerror('错误', '用户名或密码为空')
        elif np != npf:
            tk.messagebox.showerror('错误', '密码前后不一致')
        # 注册信息没有问题则将用户名密码写入数据库
        else:
            exist_usr_info[nn] = np
            with open('usr_info.pickle', 'wb') as usr_file:
                pickle.dump(exist_usr_info, usr_file)
            tk.messagebox.showinfo('欢迎', '注册成功')
            # 注册成功关闭注册框
            window_sign_up.destroy()

    # 新建注册界面
    window_sign_up = tk.Toplevel(window)
    window_sign_up.geometry('350x200')
    window_sign_up.title('注册')
    # 用户名变量及标签、输入框
    new_name = tk.StringVar()
    tk.Label(window_sign_up, text='用户名:').place(x=10, y=10)
    tk.Entry(window_sign_up, textvariable=new_name).place(x=150, y=10)
    # 密码变量及标签、输入框
    new_pwd = tk.StringVar()
    tk.Label(window_sign_up, text='请输入密码:').place(x=10, y=50)
    tk.Entry(window_sign_up, textvariable=new_pwd, show='*').place(x=150, y=50)
    # 重复密码变量及标签、输入框
    new_pwd_confirm = tk.StringVar()
    tk.Label(window_sign_up, text='请再次输入密码:').place(x=10, y=90)
    tk.Entry(window_sign_up, textvariable=new_pwd_confirm, show='*').place(x=150, y=90)
    # 确认注册按钮及位置
    bt_confirm_sign_up = tk.Button(window_sign_up, text='确认注册',
                                   command=signtowcg)
    bt_confirm_sign_up.place(x=150, y=130)



# 退出的函数
def usr_sign_quit():
    window.destroy()


# 登录 注册按钮
bt_login = tk.Button(window, text='登录', command=usr_log_in)
bt_login.place(x=140, y=230)
bt_logup = tk.Button(window, text='注册', command=usr_sign_up)
bt_logup.place(x=210, y=230)
bt_logquit = tk.Button(window, text='退出', command=usr_sign_quit)
bt_logquit.place(x=280, y=230)

window.mainloop()
# def fenxi(fileName):
#     filename = '%r.csv' % (fileName)
#     with open(filename, 'r', encoding='ANSI') as f:
#         reader = csv.DictReader(f)
#         view_price = []
#         view_sales = []
#         for row in reader:
#             # TODO
#             # 将 'Home Team Goals' 、'Away Team Goals'中的每个元素以整型数据分别添加在相应的列表中
#             view_price.append(row['view_price'])
#             view_sales.append(row['view_sales'])
#
#             # 转变数据内形式,str2int
#             # home_team_goals.append(int(row['Home Team Goals']))
#             # away_team_goals.append(int(row['Away Team Goals']))
#
#     print(view_price[:44])
#     print(view_sales[:44])
#
#     fig = plt.figure(dpi=128, figsize=(10, 6))
#     # 设置图片大小
#     view_price.reverse()
#     view_sales.reverse()
#     # 设置图表的字体微软雅黑 防止中文乱码的
#     # zh_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\msyh.ttf')
#     # 使用横向条形图表
#     plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
#
#     plt.barh(view_sales, view_price)
#     fig.autofmt_xdate()  # 让x轴标签斜着打印避免拥挤
#
#     # for x,y in enumerate(view_price):
#     #     plt.text(y+0.1,x,'%s' %y,va='center')
#     plt.show()

window.mainloop()










发布了3 篇原创文章 · 获赞 1 · 访问量 94

猜你喜欢

转载自blog.csdn.net/qq_44026036/article/details/104109976