


  1. 找到和目标用户兴趣相似的用户集合;
  2. 找到这个集合中的用户喜欢的,且目标用户没有听说过的物品推荐给目标用户。




  1. 尽量选取更精准的rating值。
  2. 选择合适的距离公式,如:欧氏距离公式、余弦相似度距离公式、皮尔逊相关系数、曼哈顿距离公式、闵可夫斯基距离公式、杰卡德相似系数等。
  3. 根据应用场景选择合适的topN。




from math import sqrt,powimport operatorclass UserCf():    #获得初始化数据    def __init__(self,data):     
self.data=data;    #通过用户名获得电影列表,仅调试使用    def getItems(self,username1,username2):        return self.data[username1],self.data[username2]    #计算两个用户的皮尔逊相关系数    #数据格式为:电影,评分  {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}    def pearsonDist(self,user1,user2):        sumXY=0.0;        n=1;        sumX=0.0;        sumY=0.0;        sumX2=0.0;        sumY2=0.0;        try:            for movie1,score1 in user1.items():                if movie1 in user2.keys():#计算公共的电影的评分                    n+=1;                    score2 = user2[movie1]                    sumXY += score1*score2                    sumX += score1;                    sumY += score2                    sumX2+=pow(score1,2)                    sumY2+=pow(score2,2)            molecule=sumXY-(sumX*sumY)/n;            denominator = sqrt(1 + (sumX2-pow(sumX,2)/n)*(sumY2-pow(sumY,2)/n))            r=molecule/denominator        except Exception as e:            print("异常信息:",e)            return 0        return r    #计算两个用户的欧氏距离    #数据格式为:电影,评分  {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}    def euclidDist(self,user1,user2):        sum=0.0;        try:            for movie1,score1 in user1.items():                if movie1 in user2.keys():#计算公共的电影的评分                    score2 = user2[movie1]                    sum+=pow((score2-score1),2)            r=sqrt(sum)        except Exception as e:            print("异常信息:",e)            return 0        return r    #计算两个用户的余弦相似度距离    #数据格式为:电影,评分  {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}    def cosineDist(self,user1,user2):        sumXY=0.0;        sumX=0.0;        sumY=0.0;        sumX2=0.0;        sumY2=0.0;        try:            for movie1,score1 in user1.items():                sumX = sumX + pow(score1,2)                if movie1 in user2.keys():#计算公共的电影的评分                    score2 = user2[movie1]                    sumXY = sumXY + score1 * score2            for movie2,score2 in user2.items():                sumY = sumY + pow(score2,2)            molecule = sumXY;            denominator = sqrt(sumX) * sqrt(sumY)            r = molecule/denominator        except Exception as e:            print("异常信息:",e)            return 0        return r    #计算与当前用户的距离,获得最临近的用户    def nearstUser(self,username,n=1):        distances={};#用户,相似度        for otherUser,items in self.data.items():#遍历整个数据集            if otherUser not in username:#非当前的用户                #distance=self.euclidDist(self.data[username],self.data[otherUser])#计算两个用户的相似度                distance=self.cosineDist(self.data[username],self.data[otherUser])#计算两个用户的相似度                #distance=self.pearsonDist(self.data[username],self.data[otherUser])#计算两个用户的相似度                distances[otherUser]=distance        print("排序前的用户为:",distances)        sortedDistance=sorted(distances.items(),key=operator.itemgetter(1),reverse=True);#最相似的N个用户        print("排序后的用户为:",sortedDistance)        return sortedDistance[:n]    #给用户推荐电影    def recomand(self,username,n=1):        recommand={};#待推荐的电影        for user,score in dict(self.nearstUser(username,n)).items():#最相近的n个用户            print("推荐的用户:",user,score)            for movies,scores in self.data[user].items():#推荐的用户的电影列表                if movies not in self.data[username].keys():#当前username没有看过                    print("%s为该用户推荐的电影:%s"%(user,movies))                    if movies not in recommand.keys():#添加到推荐列表中                        recommand[movies]=scores        return sorted(recommand.items(),key=operator.itemgetter(1),reverse=True);#对推荐的结果按照电影评分排序#加载数据集def loadData(path):    f = open(path, 'r')    dataList = f.readlines()    users = {}    for data in dataList:        dataArr = data.strip('\n').split(",")        userId = dataArr[0]        itemId = dataArr[1]        rating = float(dataArr[2])        items = {}        if userId in users:            items = users[userId]        items[itemId] = rating        users[userId] = itemsreturn usersif __name__=='__main__':    '''    users = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,                           'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,                           'The Night Listener': 3.0},             'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,                              'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,                              'You, Me and Dupree': 3.5},             'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,                                  'Superman Returns': 3.5, 'The Night Listener': 4.0},             'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,                              'The Night Listener': 4.5, 'Superman Returns': 4.0,                              'You, Me and Dupree': 2.5},             'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,                              'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,                              'You, Me and Dupree': 2.0},             'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,                               'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},             'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}             }    '''    path = "/home/ai/recommandSystem/ALS1_rating.txt"    users = loadData(path)    print("data:",users)    userCf=UserCf(data=users)    recommandList=userCf.recomand('5', 2)    print("最终推荐:%s"%recommandList)