引言
利用jupyter notebook实现一个简单的推荐系统,数据来源是Moivelen中的5M压缩文件
数据导入
准备两份数据来进行训练
(1)数据是拥有1682份数据的电影集
(2)用户点评数据
(3)电影明细表
#先把两份文件的数据读到文件内存里,用pandas来做数据读取。
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import time
def init():
#初始化,读入文件:return:用户表,评分表,电影明细表
user_file="E:\\Mycode\\Recommended_system\\ml-100k\\u.user"
data_file="E:\\Mycode\\Recommended_system\\ml-100k\\u.data"
item_file ="E:\\Mycode\\Recommended_system\\ml-100k\\u.item"
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(user_file, sep="|", names=u_cols, encoding='latin-1')
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv(data_file, sep='\t', names=r_cols,encoding='latin-1')
i_cols = ['movie_id', 'movie_title' ,'release date','video release date', 'IMDb URL', 'unknown',
'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary',
'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
'Thriller', 'War', 'Western']
items = pd.read_csv(item_file, sep='|', names=i_cols, encoding='latin-1')
return users, ratings, items
users.head()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-2-505e5cd46540> in <module>
----> 1 users.head()
NameError: name 'users' is not defined
user_file="E:\\Mycode\\Recommended_system\\ml-100k\\u.user"
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(user_file, sep="|", names=u_cols, encoding='latin-1')
users
user_id | age | sex | occupation | zip_code | |
---|---|---|---|---|---|
0 | 1 | 24 | M | technician | 85711 |
1 | 2 | 53 | F | other | 94043 |
2 | 3 | 23 | M | writer | 32067 |
3 | 4 | 24 | M | technician | 43537 |
4 | 5 | 33 | F | other | 15213 |
5 | 6 | 42 | M | executive | 98101 |
6 | 7 | 57 | M | administrator | 91344 |
7 | 8 | 36 | M | administrator | 05201 |
8 | 9 | 29 | M | student | 01002 |
9 | 10 | 53 | M | lawyer | 90703 |
10 | 11 | 39 | F | other | 30329 |
11 | 12 | 28 | F | other | 06405 |
12 | 13 | 47 | M | educator | 29206 |
13 | 14 | 45 | M | scientist | 55106 |
14 | 15 | 49 | F | educator | 97301 |
15 | 16 | 21 | M | entertainment | 10309 |
16 | 17 | 30 | M | programmer | 06355 |
17 | 18 | 35 | F | other | 37212 |
18 | 19 | 40 | M | librarian | 02138 |
19 | 20 | 42 | F | homemaker | 95660 |
20 | 21 | 26 | M | writer | 30068 |
21 | 22 | 25 | M | writer | 40206 |
22 | 23 | 30 | F | artist | 48197 |
23 | 24 | 21 | F | artist | 94533 |
24 | 25 | 39 | M | engineer | 55107 |
25 | 26 | 49 | M | engineer | 21044 |
26 | 27 | 40 | F | librarian | 30030 |
27 | 28 | 32 | M | writer | 55369 |
28 | 29 | 41 | M | programmer | 94043 |
29 | 30 | 7 | M | student | 55436 |
... | ... | ... | ... | ... | ... |
913 | 914 | 44 | F | other | 08105 |
914 | 915 | 50 | M | entertainment | 60614 |
915 | 916 | 27 | M | engineer | N2L5N |
916 | 917 | 22 | F | student | 20006 |
917 | 918 | 40 | M | scientist | 70116 |
918 | 919 | 25 | M | other | 14216 |
919 | 920 | 30 | F | artist | 90008 |
920 | 921 | 20 | F | student | 98801 |
921 | 922 | 29 | F | administrator | 21114 |
922 | 923 | 21 | M | student | E2E3R |
923 | 924 | 29 | M | other | 11753 |
924 | 925 | 18 | F | salesman | 49036 |
925 | 926 | 49 | M | entertainment | 01701 |
926 | 927 | 23 | M | programmer | 55428 |
927 | 928 | 21 | M | student | 55408 |
928 | 929 | 44 | M | scientist | 53711 |
929 | 930 | 28 | F | scientist | 07310 |
930 | 931 | 60 | M | educator | 33556 |
931 | 932 | 58 | M | educator | 06437 |
932 | 933 | 28 | M | student | 48105 |
933 | 934 | 61 | M | engineer | 22902 |
934 | 935 | 42 | M | doctor | 66221 |
935 | 936 | 24 | M | other | 32789 |
936 | 937 | 48 | M | educator | 98072 |
937 | 938 | 38 | F | technician | 55038 |
938 | 939 | 26 | F | student | 33319 |
939 | 940 | 32 | M | administrator | 02215 |
940 | 941 | 20 | M | student | 97229 |
941 | 942 | 48 | F | librarian | 78209 |
942 | 943 | 22 | M | student | 77841 |
943 rows × 5 columns
data_file="E:\\Mycode\\Recommended_system\\ml-100k\\u.data"
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv(data_file, sep='\t', names=r_cols,encoding='latin-1')
ratings
user_id | movie_id | rating | unix_timestamp | |
---|---|---|---|---|
0 | 196 | 242 | 3 | 881250949 |
1 | 186 | 302 | 3 | 891717742 |
2 | 22 | 377 | 1 | 878887116 |
3 | 244 | 51 | 2 | 880606923 |
4 | 166 | 346 | 1 | 886397596 |
5 | 298 | 474 | 4 | 884182806 |
6 | 115 | 265 | 2 | 881171488 |
7 | 253 | 465 | 5 | 891628467 |
8 | 305 | 451 | 3 | 886324817 |
9 | 6 | 86 | 3 | 883603013 |
10 | 62 | 257 | 2 | 879372434 |
11 | 286 | 1014 | 5 | 879781125 |
12 | 200 | 222 | 5 | 876042340 |
13 | 210 | 40 | 3 | 891035994 |
14 | 224 | 29 | 3 | 888104457 |
15 | 303 | 785 | 3 | 879485318 |
16 | 122 | 387 | 5 | 879270459 |
17 | 194 | 274 | 2 | 879539794 |
18 | 291 | 1042 | 4 | 874834944 |
19 | 234 | 1184 | 2 | 892079237 |
20 | 119 | 392 | 4 | 886176814 |
21 | 167 | 486 | 4 | 892738452 |
22 | 299 | 144 | 4 | 877881320 |
23 | 291 | 118 | 2 | 874833878 |
24 | 308 | 1 | 4 | 887736532 |
25 | 95 | 546 | 2 | 879196566 |
26 | 38 | 95 | 5 | 892430094 |
27 | 102 | 768 | 2 | 883748450 |
28 | 63 | 277 | 4 | 875747401 |
29 | 160 | 234 | 5 | 876861185 |
... | ... | ... | ... | ... |
99970 | 449 | 120 | 1 | 879959573 |
99971 | 661 | 762 | 2 | 876037121 |
99972 | 721 | 874 | 3 | 877137447 |
99973 | 821 | 151 | 4 | 874792889 |
99974 | 764 | 596 | 3 | 876243046 |
99975 | 537 | 443 | 3 | 886031752 |
99976 | 618 | 628 | 2 | 891308019 |
99977 | 487 | 291 | 3 | 883445079 |
99978 | 113 | 975 | 5 | 875936424 |
99979 | 943 | 391 | 2 | 888640291 |
99980 | 864 | 685 | 4 | 888891900 |
99981 | 750 | 323 | 3 | 879445877 |
99982 | 279 | 64 | 1 | 875308510 |
99983 | 646 | 750 | 3 | 888528902 |
99984 | 654 | 370 | 2 | 887863914 |
99985 | 617 | 582 | 4 | 883789294 |
99986 | 913 | 690 | 3 | 880824288 |
99987 | 660 | 229 | 2 | 891406212 |
99988 | 421 | 498 | 4 | 892241344 |
99989 | 495 | 1091 | 4 | 888637503 |
99990 | 806 | 421 | 4 | 882388897 |
99991 | 676 | 538 | 4 | 892685437 |
99992 | 721 | 262 | 3 | 877137285 |
99993 | 913 | 209 | 2 | 881367150 |
99994 | 378 | 78 | 3 | 880056976 |
99995 | 880 | 476 | 3 | 880175444 |
99996 | 716 | 204 | 5 | 879795543 |
99997 | 276 | 1090 | 1 | 874795795 |
99998 | 13 | 225 | 2 | 882399156 |
99999 | 12 | 203 | 3 | 879959583 |
100000 rows × 4 columns
item_file ="E:\\Mycode\\Recommended_system\\ml-100k\\u.item"
i_cols = ['movie_id', 'movie_title' ,'release date','video release date', 'IMDb URL', 'unknown',
'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary',
'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
'Thriller', 'War', 'Western']
items = pd.read_csv(item_file, sep='|', names=i_cols, encoding='latin-1')
items
movie_id | movie_title | release date | video release date | IMDb URL | unknown | Action | Adventure | Animation | Children's | ... | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Toy Story (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Toy%20Story%2... | 0 | 0 | 0 | 1 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 2 | GoldenEye (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?GoldenEye%20(... | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2 | 3 | Four Rooms (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Four%20Rooms%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
3 | 4 | Get Shorty (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Get%20Shorty%... | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 5 | Copycat (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Copycat%20(1995) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
5 | 6 | Shanghai Triad (Yao a yao yao dao waipo qiao) ... | 01-Jan-1995 | NaN | http://us.imdb.com/Title?Yao+a+yao+yao+dao+wai... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | 7 | Twelve Monkeys (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Twelve%20Monk... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
7 | 8 | Babe (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Babe%20(1995) | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
8 | 9 | Dead Man Walking (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Dead%20Man%20... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
9 | 10 | Richard III (1995) | 22-Jan-1996 | NaN | http://us.imdb.com/M/title-exact?Richard%20III... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
10 | 11 | Seven (Se7en) (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Se7en%20(1995) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
11 | 12 | Usual Suspects, The (1995) | 14-Aug-1995 | NaN | http://us.imdb.com/M/title-exact?Usual%20Suspe... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
12 | 13 | Mighty Aphrodite (1995) | 30-Oct-1995 | NaN | http://us.imdb.com/M/title-exact?Mighty%20Aphr... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13 | 14 | Postino, Il (1994) | 01-Jan-1994 | NaN | http://us.imdb.com/M/title-exact?Postino,%20Il... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
14 | 15 | Mr. Holland's Opus (1995) | 29-Jan-1996 | NaN | http://us.imdb.com/M/title-exact?Mr.%20Holland... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
15 | 16 | French Twist (Gazon maudit) (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Gazon%20maudi... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
16 | 17 | From Dusk Till Dawn (1996) | 05-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?From%20Dusk%2... | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
17 | 18 | White Balloon, The (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Badkonake%20S... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
18 | 19 | Antonia's Line (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Antonia%20(1995) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
19 | 20 | Angels and Insects (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Angels%20and%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
20 | 21 | Muppet Treasure Island (1996) | 16-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?Muppet%20Trea... | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 |
21 | 22 | Braveheart (1995) | 16-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?Braveheart%20... | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
22 | 23 | Taxi Driver (1976) | 16-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?Taxi%20Driver... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
23 | 24 | Rumble in the Bronx (1995) | 23-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?Hong%20Faan%2... | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
24 | 25 | Birdcage, The (1996) | 08-Mar-1996 | NaN | http://us.imdb.com/M/title-exact?Birdcage,%20T... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
25 | 26 | Brothers McMullen, The (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Brothers%20Mc... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
26 | 27 | Bad Boys (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Bad%20Boys%20... | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
27 | 28 | Apollo 13 (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Apollo%2013%2... | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
28 | 29 | Batman Forever (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Batman%20Fore... | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
29 | 30 | Belle de jour (1967) | 01-Jan-1967 | NaN | http://us.imdb.com/M/title-exact?Belle%20de%20... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1652 | 1653 | Entertaining Angels: The Dorothy Day Story (1996) | 27-Sep-1996 | NaN | http://us.imdb.com/M/title-exact?Entertaining%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1653 | 1654 | Chairman of the Board (1998) | 01-Jan-1998 | NaN | http://us.imdb.com/Title?Chairman+of+the+Board... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1654 | 1655 | Favor, The (1994) | 01-Jan-1994 | NaN | http://us.imdb.com/M/title-exact?Favor,%20The%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
1655 | 1656 | Little City (1998) | 20-Feb-1998 | NaN | http://us.imdb.com/M/title-exact?Little+City+(... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
1656 | 1657 | Target (1995) | 28-Feb-1996 | NaN | http://us.imdb.com/M/title-exact?Target%20(1995) | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1657 | 1658 | Substance of Fire, The (1996) | 06-Dec-1996 | NaN | http://us.imdb.com/M/title-exact?Substance%20o... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1658 | 1659 | Getting Away With Murder (1996) | 12-Apr-1996 | NaN | http://us.imdb.com/Title?Getting+Away+With+Mur... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1659 | 1660 | Small Faces (1995) | 09-Aug-1996 | NaN | http://us.imdb.com/M/title-exact?Small%20Faces... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1660 | 1661 | New Age, The (1994) | 01-Jan-1994 | NaN | http://us.imdb.com/M/title-exact?New%20Age,%20... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1661 | 1662 | Rough Magic (1995) | 30-May-1997 | NaN | http://us.imdb.com/M/title-exact?Rough%20Magic... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
1662 | 1663 | Nothing Personal (1995) | 30-Apr-1997 | NaN | http://us.imdb.com/M/title-exact?Nothing%20Per... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
1663 | 1664 | 8 Heads in a Duffel Bag (1997) | 18-Apr-1997 | NaN | http://us.imdb.com/Title?8+Heads+in+a+Duffel+B... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1664 | 1665 | Brother's Kiss, A (1997) | 25-Apr-1997 | NaN | http://us.imdb.com/M/title-exact?Brother%27s%2... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1665 | 1666 | Ripe (1996) | 02-May-1997 | NaN | http://us.imdb.com/M/title-exact?Ripe%20%28199... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1666 | 1667 | Next Step, The (1995) | 13-Jun-1997 | NaN | http://us.imdb.com/M/title-exact?Next%20Step%2... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1667 | 1668 | Wedding Bell Blues (1996) | 13-Jun-1997 | NaN | http://us.imdb.com/M/title-exact?Wedding%20Bel... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1668 | 1669 | MURDER and murder (1996) | 20-Jun-1997 | NaN | http://us.imdb.com/M/title-exact?MURDER+and+mu... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
1669 | 1670 | Tainted (1998) | 01-Feb-1998 | NaN | http://us.imdb.com/M/title-exact?Tainted+(1998) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
1670 | 1671 | Further Gesture, A (1996) | 20-Feb-1998 | NaN | http://us.imdb.com/M/title-exact?Further+Gestu... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1671 | 1672 | Kika (1993) | 01-Jan-1993 | NaN | http://us.imdb.com/M/title-exact?Kika%20(1993) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1672 | 1673 | Mirage (1995) | 01-Jan-1995 | NaN | http://us.imdb.com/M/title-exact?Mirage%20(1995) | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
1673 | 1674 | Mamma Roma (1962) | 01-Jan-1962 | NaN | http://us.imdb.com/M/title-exact?Mamma%20Roma%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1674 | 1675 | Sunchaser, The (1996) | 25-Oct-1996 | NaN | http://us.imdb.com/M/title-exact?Sunchaser,%20... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1675 | 1676 | War at Home, The (1996) | 01-Jan-1996 | NaN | http://us.imdb.com/M/title-exact?War%20at%20Ho... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1676 | 1677 | Sweet Nothing (1995) | 20-Sep-1996 | NaN | http://us.imdb.com/M/title-exact?Sweet%20Nothi... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1677 | 1678 | Mat' i syn (1997) | 06-Feb-1998 | NaN | http://us.imdb.com/M/title-exact?Mat%27+i+syn+... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1678 | 1679 | B. Monkey (1998) | 06-Feb-1998 | NaN | http://us.imdb.com/M/title-exact?B%2E+Monkey+(... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
1679 | 1680 | Sliding Doors (1998) | 01-Jan-1998 | NaN | http://us.imdb.com/Title?Sliding+Doors+(1998) | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
1680 | 1681 | You So Crazy (1994) | 01-Jan-1994 | NaN | http://us.imdb.com/M/title-exact?You%20So%20Cr... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1681 | 1682 | Scream of Stone (Schrei aus Stein) (1991) | 08-Mar-1996 | NaN | http://us.imdb.com/M/title-exact?Schrei%20aus%... | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1682 rows × 24 columns
构建用户矩阵
# 用户总数
num_users = users.user_id.unique().shape[0]
# 电影总数
num_items = ratings.movie_id.unique().shape[0]
data_matrix=np.zeros((num_users,num_items))
def constructUserMovieMatrix(users, ratings):
'''
构造用户-电影矩阵
:param users: 用户表
:param ratings: 打分表
:return: 用户对电影评分的矩阵
'''
num_users = users.user_id.unique().shape[0] #用户数
num_items = ratings.movie_id.unique().shape[0] #电影总数
data_matrix = np.zeros((num_users, num_items))
for line in ratings.itertuples():
data_matrix[line[1]-1, line[2]-1] = line[3]
return data_matrix
constructUserMovieMatrix(users, ratings)
array([[5., 3., 4., ..., 0., 0., 0.],
[4., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[5., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 5., 0., ..., 0., 0., 0.]])
1.unique()方法去重;
2.shape[0]返回表示DataFrame维度的元祖;
3.line的内容
def calculationSimilarity(data_matrix):
'''
转置计算电影之间相似度矩阵,不转置计算用户之间相似度矩阵
:param data_matrix: 评分矩阵
:return: 电影之间的相似度矩阵
'''
user_similarity = cosine_similarity(data_matrix, dense_output=True)
item_similarity = cosine_similarity(data_matrix.T, dense_output=True)
return item_similarity
def rec_sys(items,ratings, item_similarity, keywords, k):
'''
推荐系统
:param items: 电影明细表
:param ratings: 评分表
:param item_similarity: 电影相似度矩阵
:param keywords: 输入的电影名称或关键字
:param k: 推荐个数
:return: 推荐电影结果列表
'''
movie_list = [] # 存储推荐电影结果列表
movie_id = list(items[items['movie_title'].str.contains(keywords)].movie_id)[0] # 获得电影的id
movie_similarity = item_similarity[movie_id - 1] # 计算该电影的余弦相似度数组
movie_similarity_index = np.argsort(-movie_similarity)[1:k + 1] # 返回前k+1个最高相似度的索引位置
for index in movie_similarity_index:
rec_movie = list(items[items['movie_id'] == index + 1].movie_title) # 电影名
rec_movie.append(movie_similarity[index]) # 相似度
rec_movie.append(ratings[ratings['movie_id'] == index+1].rating.mean()) # 平均评分
rec_movie.append(len(ratings[ratings['movie_id'] == index+1])) # 评分用户数
movie_list.append(rec_movie)
return movie_list
if __name__ == '__main__':
beginTime = time.time()
keywords = "Assassins"
k = 5
keywords = keywords.title()
users, ratings, items = init()
data_matrix = constructUserMovieMatrix(users, ratings)
similarity = calculationSimilarity(data_matrix)
movie_list = rec_sys(items, ratings, similarity, keywords, k)
print(movie_list)
print("推荐耗时:", time.time()-beginTime)
[['Outbreak (1995)', 0.477148560717635, 3.2403846153846154, 104], ['Client, The (1994)', 0.4185073174104336, 3.381443298969072, 97], ['Net, The (1995)', 0.40070132781155243, 3.0083333333333333, 120], ['Under Siege 2: Dark Territory (1995)', 0.3980230845871082, 2.4583333333333335, 48], ['Demolition Man (1993)', 0.39672213644098525, 3.152173913043478, 92]]
推荐耗时: 0.5808792114257812