基于HTTP的电影搜索引擎

实现了一个带有一些社交功能的简单电影搜索引擎。服务器将为客户提供API(应用程序编程接口),以使用搜索电影或留言等功能。服务器和客户端程序都应该用Python实现。

使用来自互联网电影数据库(IMDb)的数据集来填充我们的搜索引擎的内容。数据集可以在Kaggle上找到(https://www.kaggle.com/PromptCloudHQ/imdb-data/data)。

详细说明

服务器

该服务器提供了几个功能:

[使用给定查询字符串搜索电影]:客户端可以选择在电影标题或演员/女演员列表中搜索查询字符串。符合条件的电影列表将返回给客户端,按照请求中指定的方式进行排序。
[检索电影信息]:当给出电影的ID时,服务器将返回关于电影的所有信息。包括用户留下的评论列表(请参阅下一个功能)。
[用户评论]:用户可以通过向服务器提交请求,通过指定电影的ID和消息的内容来对电影留言。
服务器通过提供它应该侦听传入客户端连接的端口号来执行:

$ python3 server.py 5000

下面是代码:

server.py

import pandas
import nltk
import datetime
from flask import Flask
from flask import current_app
from flask import request
from flask import jsonify

#拿到参数
#portNum = sys.argv[1]
portNum = 5001
app = Flask(__name__)
app.movie_dic = {}
app.title_index = {}
app.actor_index = {}

def init():
    app.movie_data = pandas.read_csv('http://iems5703.albertauyeung.com/files/imdb_top1000.csv').transpose()
    app.movie_dic = app.movie_data.to_dict()

    title_dic = app.movie_data[1:2].to_dict()
    actor_dic = app.movie_data[5:6].to_dict()

    #连贯数据分解
    for i in range(len(actor_dic)):
        temp = actor_dic[i]['Actors']
        new_value = nltk.word_tokenize(temp)
        actor_dic[i]['Actors'] = new_value

    for i in range(len(title_dic)):
        temp = title_dic[i]['Title']
        new_value = nltk.word_tokenize(temp)
        title_dic[i]['Title'] = new_value

    #建立倒排索引
    title_index = {}
    for i in range(len(title_dic)):
        for item in title_dic[i]['Title']:
            if item in title_index:
                item_list.append(i)
                title_index[item] = item_list
            else:
                item_list = []
                item_list.append(i)
                title_index[item] = item_list
    app.title_index = title_index

    actor_index = {}
    for i in range(len(actor_dic)):
        for item in actor_dic[i]['Actors']:
            if item in actor_index:
                item_list.append(i)
                actor_index[item] = item_list
            else:
                item_list = []
                item_list.append(i)
                actor_index[item] = item_list
    app.actor_index = actor_index

@app.route('/search', methods=['GET'])
def search():
    query = request.args.get('query').lower()
    attribute = request.args.get('attribute')
    sortby = request.args.get('sortby')
    order = request.args.get('order')
    reversed_flag = False
    if order == "descending":
        reversed_flag = True
    searched_list = []
    searched_dic = {}
    result_dic = {}
    if attribute == 'title':
        for key,value in current_app.title_index.items():
            if query == key.lower():
                searched_list = value
    if attribute== 'actor':
        for key,value in current_app.actor_index.items():
            if query == key.lower():
                searched_list = value
    if attribute == 'both':
        for key,value in current_app.title_index.items():
            if query == key.lower():
                searched_list.append(value)
        for key,value in current_app.actor_index.items():
            if query == key.lower():
                searched_list.append(value)

    searched_list = set(searched_list)
    for item in searched_list:
        searched_dic[item] = current_app.movie_dic[item]

    #将拿到的结果排序
    order_dic = {}
    if sortby == "year":
        for key,value in searched_dic.items():
            order_dic[key] = value['Year']

    sorted_list = sorted(order_dic.items(), key=lambda item: item[1], reverse=reversed_flag)
    #如多,截取前10项
    if len(sorted_list) > 10:
        sorted_list = sorted_list[:10]
    for item in sorted_list:
        result_dic[item[0]] = current_app.movie_dic[item[0]]
    response = jsonify(result_dic)
    return response

@app.route('/movie/<movie_id>', methods=['GET'])
def movie(movie_id, user_name=None, comment=None, timestamp=None):
    user_comment = {"comment": comment, "timestamp": timestamp, "user_name": user_name}
    movie = current_app.movie_dic[int(movie_id)]
    response = jsonify(movie)
    if "comments" in movie:
        if user_name is not None: movie['comments'].append(user_comment)
    else:
        movie['comments'] = []
        if user_name is not None: movie['comments'].append(user_comment)

    return response


@app.route('/comment', methods=['POST'])
def comment():
    comment = request.form.get('user_comment')
    user_name = request.form.get('user_name')
    movie_id = request.form.get('movie_id')
    response = movie(movie_id, user_name, comment, timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    return response



if __name__ == '__main__':
    init()
    app.run(host="localhost", port=portNum)

client.py

import sys
import json
import requests


def argv():
    args = {}
    if len(sys.argv)>-1:
        args['ipAddress'] = 'localhost'#sys.argv[1]
        args['portNum'] = 5001#sys.argv[2]
        args['type'] = 'comment'#sys.argv[3]

        if(args['type'] == "search"):
            args['query'] = 'leonardo'#sys.argv[4]
            args['attribute'] = 'actor'#sys.argv[5]
            args['sortby'] = 'year'#sys.argv[6]
            args['order'] = 'descending'#sys.argv[7]

        if(args['type'] == "movie"):
            args['movie_id'] = 5#sys.argv[4]

        if(args['type'] == "comment"):
            args['userName'] = 'albert'#sys.argv[4]
            args['movie_id'] = 85#sys.argv[5]

    else:
        print("Error!")
    return args


def excute(args):
    result = "Defult"
    if args['type'] == "search":
        param = {'query': args['query'], 'attribute': args['attribute'], 'sortby': args['sortby'],
                 'order': args['order']}
        req = 'http://{}:{}/search'.format(args['ipAddress'], args['portNum'])
        r = requests.get(req, params=param)
        result = json.dumps(r.json(), indent=4, sort_keys=True)
        print("Request: /search?query={}&attribute={}&sortby={}&order={}".format(args['query'], args['attribute'],args['sortby'], args['order']))
        print("\r")
        print("Response: ")
    if args['type'] == "movie":
        request = 'http://{}:{}/{}/{}'.format(args['ipAddress'], args['portNum'], args['type'], args['movie_id'])
        response = requests.get(request)
        result = json.dumps(response.json(), indent=4, sort_keys=True)
        print("Request: /movie/{}".format(args['movie_id']))
        print("\r")
        print("Response:")

    if args['type'] == "comment":
        input_comment = input("What is your comment? <User inputs his/her comment here and press enter>")
        user_name = args['userName']
        movie_id = args['movie_id']
        req = 'http://{}:{}/comment'.format(args['ipAddress'], args['portNum'])
        payload = {'user_comment': input_comment, 'user_name': user_name, 'movie_id': movie_id}
        r = requests.post(req, data=payload)
        result = json.dumps(r.json(), indent=4, sort_keys=True)
        print("Request: /comment")
        print("user_name = ", args['userName'])
        print("movie_id = ", args['movie_id'])
        print("comment = ", input_comment)
        print("\r")
        print("Response: ")
    print(result)

if __name__ == "__main__":
    args = argv()
    #print(args)
    excute(args)

猜你喜欢

转载自blog.csdn.net/ninnyyan/article/details/80178143