python graphical interface crawling + + Taobao information generating columnar analysis diagram (Taobao query system)

Login User information
Here Insert Picture Description
is not connected to a database, the use of third-party libraries python pickle, permanent preservation information.
After a successful login, the jump screen
Here Insert Picture Description
contents of the input crawling crawling information 44, crawling results are shown in
Here Insert Picture Description
the crawling csv file to save the contents of the columnar csv files generated according to FIG. Analysis:
csv file
Here Insert Picture Description
columnar FIG. Analysis:
Here Insert Picture Description
code below :

import re
import requests
import time
import tkinter as tk
import tkinter.messagebox
import pickle
import csv
import matplotlib.pyplot as plt

#a=""
fileName=''
# 窗口
window = tk.Tk()
window.title('欢迎进入淘宝爬取系统')
window.geometry('450x300')
# 画布放置图片
canvas = tk.Canvas(window, height=300, width=500)
# imagefile = tk.PhotoImage(file='qm.png')
# image = canvas.create_image(0, 0, anchor='nw', image=imagefile)
canvas.pack(side='top')
# 标签 用户名密码
tk.Label(window, text='用户名:').place(x=100, y=150)
tk.Label(window, text='密码:').place(x=100, y=190)
# 用户名输入框
var_usr_name = tk.StringVar()
entry_usr_name = tk.Entry(window, textvariable=var_usr_name)
entry_usr_name.place(x=160, y=150)
# 密码输入框
var_usr_pwd = tk.StringVar()
entry_usr_pwd = tk.Entry(window, textvariable=var_usr_pwd, show='*')
entry_usr_pwd.place(x=160, y=190)


# 登录函数
def usr_log_in():
    # 输入框获取用户名密码
    usr_name = var_usr_name.get()
    usr_pwd = var_usr_pwd.get()
    # 从本地字典获取用户信息,如果没有则新建本地数据库
    try:
        with open('usr_info.pickle', 'rb') as usr_file:
            usrs_info = pickle.load(usr_file)
    except FileNotFoundError:
        with open('usr_info.pickle', 'wb') as usr_file:
            usrs_info = {'admin': 'admin'}
            pickle.dump(usrs_info, usr_file)
    # 判断用户名和密码是否匹配
    if usr_name in usrs_info:
        if usr_pwd == usrs_info[usr_name]:
            tk.messagebox.showinfo(title='welcome',
                                   message='欢迎您:' + usr_name)
            window.destroy()
            win = tkinter.Tk()
            win.title("淘宝爬取")
            win.geometry("400x400+200+50")
            menubar = tkinter.Menu(win)
            win.config(menu=menubar)
            menu1 = tkinter.Menu(menubar, tearoff=False)

            # 给菜单选项添加内容
            for item in ['退出']:
                if item == '退出':
                    # 添加分割线
                    menu1.add_separator()
                    menu1.add_command(label=item, command=win.quit)

            # 向菜单条上添加菜单选项
            menubar.add_cascade(label='系统', menu=menu1)

            def getNowTime(form='%Y-%m-%d_%H-%M-%S'):
                nowTime = time.strftime(form, time.localtime())
                return nowTime
            def finxi(a):
                print(a)
                #filename = '%r' % (a)
                with open(a, 'r', encoding='ANSI') as f:
                        reader = csv.DictReader(f)
                        view_price = []
                        view_sales = []
                        for row in reader:
                            # TODO
                            # 将 'Home Team Goals' 、'Away Team Goals'中的每个元素以整型数据分别添加在相应的列表中
                            view_price.append(row['view_price'])
                            view_sales.append(row['view_sales'])

                            # 转变数据内形式,str2int
                            # home_team_goals.append(int(row['Home Team Goals']))
                            # away_team_goals.append(int(row['Away Team Goals']))

                print(view_price[:44])
                print(view_sales[:44])

                fig = plt.figure(dpi=128, figsize=(10, 6))
                # 设置图片大小
                view_price.reverse()
                view_sales.reverse()
                # 设置图表的字体微软雅黑 防止中文乱码的
                # zh_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\msyh.ttf')
                # 使用横向条形图表
                plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
                plt.xlabel('view_price')
                plt.ylabel('view_sales')
                plt.title('销量价格分析表')

                plt.barh(view_sales, view_price)
                fig.autofmt_xdate()  # 让x轴标签斜着打印避免拥挤

                # for x,y in enumerate(view_price):
                #     plt.text(y+0.1,x,'%s' %y,va='center')
                plt.show()


            def showinfo():
                # 获取输入的内容
                searchKey = entry.get()
                # 输出文件编码(一般是utf-8,不过我用excel打开输出的csv文件发现会乱码,就用了ansi)
                encode = 'ansi'
                # keys是我要获取的宝贝信息属性
                keys = ('raw_title', 'view_price', 'item_loc', 'view_sales', 'comment_count', 'nick')

                url = 'https://s.taobao.com/search'
                params = {'q': searchKey, 'ie': 'utf8'}
                header = {
                    "cookie": "cna=EYnEFeatJWUCAbfhIw4Sd0GO; x=__ll%3D-1%26_ato%3D0; hng=CN%7Czh-CN%7CCNY%7C156; uc1=cookie14=UoTaHYecARKhrA%3D%3D; uc3=vt3=F8dBy32hRyZzP%2FF7mzQ%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&nk2=1DsN4FjjwTp04g%3D%3D&id2=UondHPobpDVKHQ%3D%3D; t=ad1fbf51ece233cf3cf73d97af1b6a71; tracknick=%5Cu4F0F%5Cu6625%5Cu7EA22013; lid=%E4%BC%8F%E6%98%A5%E7%BA%A22013; uc4=nk4=0%401up5I07xsWKbOPxFt%2BwuLaZ8XIpO&id4=0%40UOE3EhLY%2FlTwLmADBuTfmfBbGpHG; lgc=%5Cu4F0F%5Cu6625%5Cu7EA22013; enc=ieSqdE6T%2Fa5hYS%2FmKINH0mnUFINK5Fm1ZKC0431E%2BTA9eVjdMzX9GriCY%2FI2HzyyntvFQt66JXyZslcaz0kXgg%3D%3D; _tb_token_=536fb5e55481b; cookie2=157aab0a58189205dd5030a17d89ad52; _m_h5_tk=150df19a222f0e9b600697737515f233_1565931936244; _m_h5_tk_enc=909fba72db21ef8ca51c389f65d5446c; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; l=cBa4gFrRqYHNUtVvBOfiquI8a17O4IJ51sPzw4_G2ICP9B5DeMDOWZezto8kCnGVL6mpR3RhSKO4BYTKIPaTlZXRFJXn9MpO.; isg=BI6ORhr9X6-NrOuY33d_XmZFy2SQp1Ju1qe4XLjXJRHsGyp1IJ9IG0kdUwfSA0oh",
                    "referer": "https://detail.tmall.com/item.htm",
                    "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36"
                }
                startPage = 1  # 起始页面
                pageTotal = 3  # 爬取多少页
                waitTime = 2  # 等待时间(如果爬的速度太快可能会出事)
                rowWrited = 0
                startTime = time.time()
                print('任务启动\n{} | 初始化存储文件...'.format(getNowTime()))
                fileName = r'tb_{}_{}_{}_{}.csv'.format(searchKey, startPage, pageTotal, getNowTime())
                print(fileName)
                a=fileName
                with open(fileName, 'w', encoding=encode) as saveFile:
                    saveFile.write(','.join(keys) + '\n')

                print('关键词:{} 起始页面:{} 爬取页面数:{}, 开始执行..'.format(searchKey, startPage, pageTotal))
                for page in range(startPage, pageTotal + 1):
                    print('\npage{}: 获取数据...'.format(page))
                time.sleep(waitTime)
                params['s'] = str(page * 44) if page > 1 else '1'
                resp = requests.get(url, params, headers=header)
                results = [re.findall(r'"{}":"([^"]+)"'.format(key),
                                      resp.text.replace('\n', '').replace('\r', '').replace(',', '').strip(), re.I) for
                           key in keys]
                print('page{}: 正在写入数据...'.format(page))
                with open(fileName, 'a', encoding=encode) as saveFile:
                    for row in range(len(results[0])):
                        print('\r写入第{}条..'.format(row + 1), end='')
                        rowWrited += 1
                        for key in range(len(results)):
                            try:
                                saveFile.write(
                                    '{}{}'.format(results[key][row], ',' if key + 1 < len(results) else '\n'))
                            except:
                                saveFile.write('null{}'.format(',' if key + 1 < len(results) else '\n'))
                print('page{}完成...'.format(page, len(results[0])))

                print(
                    '\n任务完成!! 页面总数: {} | 写入数据: {}条 | 用时: {:.2f}s'.format(pageTotal, rowWrited, time.time() - startTime))
                tkinter.messagebox.showinfo('提示', '爬取完成!')
                finxi(fileName)





            label = tkinter.Label(win, text="请输入爬取的关键字")
            label.pack()
            entry = tkinter.Entry(win)
            entry.pack()
            button = tkinter.Button(win, text="点击爬取", command=showinfo)
            button.pack()
            #print(fileName)
            win.mainloop()
           # time.sleep(10)
           # finxi(fileName)

        else:
            tk.messagebox.showerror(message='密码错误')
    # 用户名密码不能为空
    elif usr_name == '' or usr_pwd == '':
        tk.messagebox.showerror(message='用户名或密码为空')
    # 不在数据库中弹出是否注册的框
    else:
        is_signup = tk.messagebox.askyesno('欢迎', '您还没有注册,是否现在注册')
        if is_signup:
            usr_sign_up()


# 注册函数
def usr_sign_up():
    # 确认注册时的相应函数
    def signtowcg():
        # 获取输入框内的内容
        nn = new_name.get()
        np = new_pwd.get()
        npf = new_pwd_confirm.get()

        # 本地加载已有用户信息,如果没有则已有用户信息为空
        try:
            with open('usr_info.pickle', 'rb') as usr_file:
                exist_usr_info = pickle.load(usr_file)
        except FileNotFoundError:
            exist_usr_info = {}

            # 检查用户名存在、密码为空、密码前后不一致
        if nn in exist_usr_info:
            tk.messagebox.showerror('错误', '用户名已存在')
        elif np == '' or nn == '':
            tk.messagebox.showerror('错误', '用户名或密码为空')
        elif np != npf:
            tk.messagebox.showerror('错误', '密码前后不一致')
        # 注册信息没有问题则将用户名密码写入数据库
        else:
            exist_usr_info[nn] = np
            with open('usr_info.pickle', 'wb') as usr_file:
                pickle.dump(exist_usr_info, usr_file)
            tk.messagebox.showinfo('欢迎', '注册成功')
            # 注册成功关闭注册框
            window_sign_up.destroy()

    # 新建注册界面
    window_sign_up = tk.Toplevel(window)
    window_sign_up.geometry('350x200')
    window_sign_up.title('注册')
    # 用户名变量及标签、输入框
    new_name = tk.StringVar()
    tk.Label(window_sign_up, text='用户名:').place(x=10, y=10)
    tk.Entry(window_sign_up, textvariable=new_name).place(x=150, y=10)
    # 密码变量及标签、输入框
    new_pwd = tk.StringVar()
    tk.Label(window_sign_up, text='请输入密码:').place(x=10, y=50)
    tk.Entry(window_sign_up, textvariable=new_pwd, show='*').place(x=150, y=50)
    # 重复密码变量及标签、输入框
    new_pwd_confirm = tk.StringVar()
    tk.Label(window_sign_up, text='请再次输入密码:').place(x=10, y=90)
    tk.Entry(window_sign_up, textvariable=new_pwd_confirm, show='*').place(x=150, y=90)
    # 确认注册按钮及位置
    bt_confirm_sign_up = tk.Button(window_sign_up, text='确认注册',
                                   command=signtowcg)
    bt_confirm_sign_up.place(x=150, y=130)



# 退出的函数
def usr_sign_quit():
    window.destroy()


# 登录 注册按钮
bt_login = tk.Button(window, text='登录', command=usr_log_in)
bt_login.place(x=140, y=230)
bt_logup = tk.Button(window, text='注册', command=usr_sign_up)
bt_logup.place(x=210, y=230)
bt_logquit = tk.Button(window, text='退出', command=usr_sign_quit)
bt_logquit.place(x=280, y=230)

window.mainloop()
# def fenxi(fileName):
#     filename = '%r.csv' % (fileName)
#     with open(filename, 'r', encoding='ANSI') as f:
#         reader = csv.DictReader(f)
#         view_price = []
#         view_sales = []
#         for row in reader:
#             # TODO
#             # 将 'Home Team Goals' 、'Away Team Goals'中的每个元素以整型数据分别添加在相应的列表中
#             view_price.append(row['view_price'])
#             view_sales.append(row['view_sales'])
#
#             # 转变数据内形式,str2int
#             # home_team_goals.append(int(row['Home Team Goals']))
#             # away_team_goals.append(int(row['Away Team Goals']))
#
#     print(view_price[:44])
#     print(view_sales[:44])
#
#     fig = plt.figure(dpi=128, figsize=(10, 6))
#     # 设置图片大小
#     view_price.reverse()
#     view_sales.reverse()
#     # 设置图表的字体微软雅黑 防止中文乱码的
#     # zh_font = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\msyh.ttf')
#     # 使用横向条形图表
#     plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
#
#     plt.barh(view_sales, view_price)
#     fig.autofmt_xdate()  # 让x轴标签斜着打印避免拥挤
#
#     # for x,y in enumerate(view_price):
#     #     plt.text(y+0.1,x,'%s' %y,va='center')
#     plt.show()

window.mainloop()










Released three original articles · won praise 1 · views 94

Guess you like

Origin blog.csdn.net/qq_44026036/article/details/104109976
Recommended