实现了一个带有一些社交功能的简单电影搜索引擎。服务器将为客户提供API(应用程序编程接口),以使用搜索电影或留言等功能。服务器和客户端程序都应该用Python实现。
使用来自互联网电影数据库(IMDb)的数据集来填充我们的搜索引擎的内容。数据集可以在Kaggle上找到(https://www.kaggle.com/PromptCloudHQ/imdb-data/data)。
详细说明
服务器
该服务器提供了几个功能:
[使用给定查询字符串搜索电影]:客户端可以选择在电影标题或演员/女演员列表中搜索查询字符串。符合条件的电影列表将返回给客户端,按照请求中指定的方式进行排序。
[检索电影信息]:当给出电影的ID时,服务器将返回关于电影的所有信息。包括用户留下的评论列表(请参阅下一个功能)。
[用户评论]:用户可以通过向服务器提交请求,通过指定电影的ID和消息的内容来对电影留言。
服务器通过提供它应该侦听传入客户端连接的端口号来执行:
$ python3 server.py 5000
下面是代码:
server.py
import pandas
import nltk
import datetime
from flask import Flask
from flask import current_app
from flask import request
from flask import jsonify
#拿到参数
#portNum = sys.argv[1]
portNum = 5001
app = Flask(__name__)
app.movie_dic = {}
app.title_index = {}
app.actor_index = {}
def init():
app.movie_data = pandas.read_csv('http://iems5703.albertauyeung.com/files/imdb_top1000.csv').transpose()
app.movie_dic = app.movie_data.to_dict()
title_dic = app.movie_data[1:2].to_dict()
actor_dic = app.movie_data[5:6].to_dict()
#连贯数据分解
for i in range(len(actor_dic)):
temp = actor_dic[i]['Actors']
new_value = nltk.word_tokenize(temp)
actor_dic[i]['Actors'] = new_value
for i in range(len(title_dic)):
temp = title_dic[i]['Title']
new_value = nltk.word_tokenize(temp)
title_dic[i]['Title'] = new_value
#建立倒排索引
title_index = {}
for i in range(len(title_dic)):
for item in title_dic[i]['Title']:
if item in title_index:
item_list.append(i)
title_index[item] = item_list
else:
item_list = []
item_list.append(i)
title_index[item] = item_list
app.title_index = title_index
actor_index = {}
for i in range(len(actor_dic)):
for item in actor_dic[i]['Actors']:
if item in actor_index:
item_list.append(i)
actor_index[item] = item_list
else:
item_list = []
item_list.append(i)
actor_index[item] = item_list
app.actor_index = actor_index
@app.route('/search', methods=['GET'])
def search():
query = request.args.get('query').lower()
attribute = request.args.get('attribute')
sortby = request.args.get('sortby')
order = request.args.get('order')
reversed_flag = False
if order == "descending":
reversed_flag = True
searched_list = []
searched_dic = {}
result_dic = {}
if attribute == 'title':
for key,value in current_app.title_index.items():
if query == key.lower():
searched_list = value
if attribute== 'actor':
for key,value in current_app.actor_index.items():
if query == key.lower():
searched_list = value
if attribute == 'both':
for key,value in current_app.title_index.items():
if query == key.lower():
searched_list.append(value)
for key,value in current_app.actor_index.items():
if query == key.lower():
searched_list.append(value)
searched_list = set(searched_list)
for item in searched_list:
searched_dic[item] = current_app.movie_dic[item]
#将拿到的结果排序
order_dic = {}
if sortby == "year":
for key,value in searched_dic.items():
order_dic[key] = value['Year']
sorted_list = sorted(order_dic.items(), key=lambda item: item[1], reverse=reversed_flag)
#如多,截取前10项
if len(sorted_list) > 10:
sorted_list = sorted_list[:10]
for item in sorted_list:
result_dic[item[0]] = current_app.movie_dic[item[0]]
response = jsonify(result_dic)
return response
@app.route('/movie/<movie_id>', methods=['GET'])
def movie(movie_id, user_name=None, comment=None, timestamp=None):
user_comment = {"comment": comment, "timestamp": timestamp, "user_name": user_name}
movie = current_app.movie_dic[int(movie_id)]
response = jsonify(movie)
if "comments" in movie:
if user_name is not None: movie['comments'].append(user_comment)
else:
movie['comments'] = []
if user_name is not None: movie['comments'].append(user_comment)
return response
@app.route('/comment', methods=['POST'])
def comment():
comment = request.form.get('user_comment')
user_name = request.form.get('user_name')
movie_id = request.form.get('movie_id')
response = movie(movie_id, user_name, comment, timestamp=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
return response
if __name__ == '__main__':
init()
app.run(host="localhost", port=portNum)
client.py
import sys
import json
import requests
def argv():
args = {}
if len(sys.argv)>-1:
args['ipAddress'] = 'localhost'#sys.argv[1]
args['portNum'] = 5001#sys.argv[2]
args['type'] = 'comment'#sys.argv[3]
if(args['type'] == "search"):
args['query'] = 'leonardo'#sys.argv[4]
args['attribute'] = 'actor'#sys.argv[5]
args['sortby'] = 'year'#sys.argv[6]
args['order'] = 'descending'#sys.argv[7]
if(args['type'] == "movie"):
args['movie_id'] = 5#sys.argv[4]
if(args['type'] == "comment"):
args['userName'] = 'albert'#sys.argv[4]
args['movie_id'] = 85#sys.argv[5]
else:
print("Error!")
return args
def excute(args):
result = "Defult"
if args['type'] == "search":
param = {'query': args['query'], 'attribute': args['attribute'], 'sortby': args['sortby'],
'order': args['order']}
req = 'http://{}:{}/search'.format(args['ipAddress'], args['portNum'])
r = requests.get(req, params=param)
result = json.dumps(r.json(), indent=4, sort_keys=True)
print("Request: /search?query={}&attribute={}&sortby={}&order={}".format(args['query'], args['attribute'],args['sortby'], args['order']))
print("\r")
print("Response: ")
if args['type'] == "movie":
request = 'http://{}:{}/{}/{}'.format(args['ipAddress'], args['portNum'], args['type'], args['movie_id'])
response = requests.get(request)
result = json.dumps(response.json(), indent=4, sort_keys=True)
print("Request: /movie/{}".format(args['movie_id']))
print("\r")
print("Response:")
if args['type'] == "comment":
input_comment = input("What is your comment? <User inputs his/her comment here and press enter>")
user_name = args['userName']
movie_id = args['movie_id']
req = 'http://{}:{}/comment'.format(args['ipAddress'], args['portNum'])
payload = {'user_comment': input_comment, 'user_name': user_name, 'movie_id': movie_id}
r = requests.post(req, data=payload)
result = json.dumps(r.json(), indent=4, sort_keys=True)
print("Request: /comment")
print("user_name = ", args['userName'])
print("movie_id = ", args['movie_id'])
print("comment = ", input_comment)
print("\r")
print("Response: ")
print(result)
if __name__ == "__main__":
args = argv()
#print(args)
excute(args)