数据挖掘实践指南读书笔记1

原创

人生代码_公众号 2021-07-19 11:49:02 ©著作权

©著作权归作者所有：来自51CTO博客作者人生代码_公众号的原创作品，请联系作者获取转载授权，否则将追究法律责任

写在之前

本书涉及的源程序和数据都可以在以下网站中找到：http://guidetodatamining.com/ 这本书理论比较简单，书中错误较少，动手锻炼较多，如果每个代码都自己写出来，收获不少。总结：适合入门。欢迎转载，转载请注明出处，如有问题欢迎指正。合集地址：https://www.zybuluo.com/hainingwyx/note/559139

协同过滤

相似用户评判标准：曼哈顿距离、欧氏距离、明氏距离。

# Manhattan.py	
users = {	
"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5,	
"Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, 	
"Vampire Weekend": 2.0},	
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, 	
"Phoenix": 2.0,"Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},	
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, 	
"Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},	
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, 	
"Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, 	
"Vampire Weekend": 2.0},	
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, 	
"The Strokes": 4.0, "Vampire Weekend": 1.0},	
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, 	
"Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, 	
"Vampire Weekend": 4.0},	
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0,	
"Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},	
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0,	
"Slightly Stoopid": 2.5, "The Strokes": 3.0}	
        }	
def manhattan(rating1, rating2):	
    """Computes the Manhattan distance. Both rating1 and rating2 are dictionaries	
       of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""	
    distance = 0	
    commonRatings = False 	
    for key in rating1:	
        if key in rating2:	
            distance += abs(rating1[key] - rating2[key])	
            commonRatings = True	
    if commonRatings:	
        return distance	
    else:	
        return -1 #Indicates no ratings in common	
def computeNearestNeighbor(username, users):	
    """creates a sorted list of users based on their distance to username"""	
    distances = []	
    for user in users:	
        if user != username:	
            distance = manhattan(users[user], users[username])	
            distances.append((distance, user))	
    # sort based on distance -- closest first	
    distances.sort()	
    return distances	
def recommend(username, users):	
    """Give list of recommendations"""	
    # first find nearest neighbor	
    nearest = computeNearestNeighbor(username, users)[0][1]	
    print nearest	
    recommendations = []	
    # now find bands neighbor rated that user didn't	
    neighborRatings = users[nearest]	
    userRatings = users[username]	
    for artist in neighborRatings:	
        if not artist in userRatings:	
            recommendations.append((artist, neighborRatings[artist]))	
    # using the fn sorted for variety - sort is more efficient	
    return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)

# -*- coding: utf-8 -*-	
from math import sqrt	
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},	
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},	
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},	
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},	
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},	
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},	
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},	
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}	
}	
#明氏距离	
def minkowski(rating1,rating2,r):	
  distance=0	
  commonRatings=False	
  for key in rating1:	
    if key in rating2:	
      distance += pow(abs(rating1[key]-rating2[key]),r)	
      commonRatings=True	
      distance = pow(distance,1.0/r)	
    if commonRatings:	
      return distance	
    else:	
      return -1 #Indicates no ratings in common	
def computeNearestNeighbor(username, users):	
  """creates a sorted list of users based on their distance to username"""	
  distances = []	
  for user in users:	
    if user != username:	
      distance = minkowski(users[user], users[username],3)	
      distances.append((distance, user))	
      # sort based on distance -- closest first	
      distances.sort()	
      return distances	
def recommend(username, users):	
  """Give list of recommendations"""	
  # first find nearest neighbor	
  nearest = computeNearestNeighbor(username, users)[0][1]	
  print nearest	
  recommendations = []	
  # now find bands neighbor rated that user didn't	
  neighborRatings = users[nearest]	
  userRatings = users[username]	
  for artist in neighborRatings:	
    if not artist in userRatings:	
      recommendations.append((artist, neighborRatings[artist]))	
      # using the fn sorted for variety - sort is more efficient	
  return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)

但是可能存在常数差别，但是两者爱好相同的问题。皮尔逊相关系数：

# Pearson.py	
from math import sqrt	
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},	
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},	
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},	
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},	
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},	
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},	
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},	
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}	
}	
# 这里为了简单使用近似代替	
def pearson(rating1,rating2):	
  sum_xy=0	
  sum_x=0	
  sum_y=0	
  sum_x2=0	
  sum_y2=0	
  n=0	
  for key in rating1:	
    if key in rating2:	
      n += 1	
      x = rating1[key]	
      y = rating2[key]	
      sum_xy += x*y	
      sum_x += x	
      sum_y += y	
      sum_x2 += x**2	
      sum_y2 += y**2	
  denominnator = sqrt(sum_x2-(sum_x**2)/n)*sqrt(sum_y2-(sum_y**2)/n)	
  if denominnator == 0:	
    return 0	
  else:	
    return (sum_xy-(sum_x*sum_y)/n)/denominnator	
def cos_like(rating1,rating2):	
  innerProd=0	
  vector_x=0	
  vectoy_y=0	
  for key in rating1:	
    if key in rating2:	
      x=rating1[key]	
      y=rating2[key]	
      innerProd += x*y	
      vector_x += x**2	
      vectoy_y += y**2	
  denominnator = sqrt(vector_x) * sqrt(vectoy_y)	
  if denominnator == 0:	
    return 0	
  else:	
    return innerProd / denominnator

余弦相似度：$$cos(x, y)=\frac{x·y}{||x||\times ||y||}$$ 总结：如果数据稠密使用欧氏距离；如果数据稀疏，使用余弦相似度；如果用户评级范围不同，使用皮尔逊相关系数。但是如果仅仅是基于一个用户进行推荐，个别用户的怪癖也会被推荐。

k近邻：利用k个最相似的用户确定推荐结果，K和应有有关。利用皮尔逊系数来确定每个人的影响因子。

# A dictionary of movie critics and their ratings of a small	
# set of movies	
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,	
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 	
'The Night Listener': 3.0},	
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 	
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, 	
'You, Me and Dupree': 3.5}, 	
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,	
'Superman Returns': 3.5, 'The Night Listener': 4.0},	
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,	
'The Night Listener': 4.5, 'Superman Returns': 4.0, 	
'You, Me and Dupree': 2.5},	
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 	
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,	
'You, Me and Dupree': 2.0}, 	
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,	
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},	
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}	
from math import sqrt	
# Returns a distance-based similarity score for person1 and person2	
def sim_distance(prefs,person1,person2):	
  # Get the list of shared_items	
  si={}	
  for item in prefs[person1]: 	
    if item in prefs[person2]: si[item]=1	
  # if they have no ratings in common, return 0	
  if len(si)==0: return 0	
  # Add up the squares of all the differences	
  sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2) 	
  for item in prefs[person1] if item in prefs[person2]])	
  return 1/(1+sum_of_squares)	
# Returns the Pearson correlation coefficient for p1 and p2	
def sim_pearson(prefs,p1,p2):	
  # Get the list of mutually rated items	
  si={}	
  for item in prefs[p1]: 	
  if item in prefs[p2]: si[item]=1	
  # if they are no ratings in common, return 0	
  if len(si)==0: return 0	
  # Sum calculations	
  n=len(si)	
  # Sums of all the preferences	
  sum1=sum([prefs[p1][it] for it in si])	
  sum2=sum([prefs[p2][it] for it in si])	
  # Sums of the squares	
  sum1Sq=sum([pow(prefs[p1][it],2) for it in si])	
  sum2Sq=sum([pow(prefs[p2][it],2) for it in si]) 	
  # Sum of the products	
  pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])	
  # Calculate r (Pearson score)	
  num=pSum-(sum1*sum2/n)	
  den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))	
  if den==0: return 0	
  r=num/den	
  return r	
# Returns the best matches for person from the prefs dictionary.	
# Number of results and similarity function are optional params.	
def topMatches(prefs,person,n=5,similarity=sim_pearson):	
  scores=[(similarity(prefs,person,other),other) 	
  for other in prefs if other!=person]	
    scores.sort()	
    scores.reverse()	
  return scores[0:n]	
# Gets recommendations for a person by using a weighted average	
# of every other user's rankings	
def getRecommendations(prefs,person,similarity=sim_pearson):	
  totals={}	
  simSums={}	
  for other in prefs:	
    # don't compare me to myself	
    if other==person: continue	
      sim=similarity(prefs,person,other)	
    # ignore scores of zero or lower	
    if sim<=0: continue	
  for item in prefs[other]:	
    # only score movies I haven't seen yet	
    if item not in prefs[person] or prefs[person][item]==0:	
    # Similarity * Score	
      totals.setdefault(item,0)	
      totals[item]+=prefs[other][item]*sim	
    # Sum of similarities	
      simSums.setdefault(item,0)	
      simSums[item]+=sim	
# Create the normalized list	
  rankings=[(total/simSums[item],item) for item,total in totals.items()]	
# Return the sorted list	
  rankings.sort()	
  rankings.reverse()	
  return rankings	
def transformPrefs(prefs):	
  result={}	
  for person in prefs:	
  for item in prefs[person]:	
    result.setdefault(item,{})	
  # Flip item and person	
  result[item][person]=prefs[person][item]	
  return result	
def calculateSimilarItems(prefs,n=10):	
  # Create a dictionary of items showing which other items they	
  # are most similar to.	
  result={}	
  # Invert the preference matrix to be item-centric	
  itemPrefs=transformPrefs(prefs)	
  c=0	
  for item in itemPrefs:	
  # Status updates for large datasets	
  c+=1	
  if c%100==0: print "%d / %d" % (c,len(itemPrefs))	
  # Find the most similar items to this one	
  scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)	
  result[item]=scores	
  return result	
def getRecommendedItems(prefs,itemMatch,user):	
  userRatings=prefs[user]	
  scores={}	
  totalSim={}	
  # Loop over items rated by this user	
  for (item,rating) in userRatings.items( ):	
  # Loop over items similar to this one	
  for (similarity,item2) in itemMatch[item]:	
    # Ignore if this user has already rated this item	
    if item2 in userRatings: continue	
    # Weighted sum of rating times similarity	
    scores.setdefault(item2,0)	
    scores[item2]+=similarity*rating	
    # Sum of all the similarities	
    totalSim.setdefault(item2,0)	
    totalSim[item2]+=similarity	
  # Divide each total score by total weighting to get an average	
  rankings=[(score/totalSim[item],item) for item,score in scores.items( )]	
  # Return the rankings from highest to lowest	
  rankings.sort( )	
  rankings.reverse( )	
  return rankings	
def loadMovieLens(path='C:\Users\WangYixin\Desktop\PCI_Code\PCI_Code Folder\chapter2\data'):	
  # Get movie titles	
  movies={}	
  for line in open(path+'/u.item'):	
    (id,title)=line.split('|')[0:2]	
    movies[id]=title	
    # Load data	
    prefs={}	
  for line in open(path+'/u.data'):	
    (user,movieid,rating,ts)=line.split('\t')	
    prefs.setdefault(user,{})	
    prefs[user][movies[movieid]]=float(rating)	
  return prefs

# -*- coding: utf-8 -*-	
# 推荐类	
import codecs	
from math import sqrt	
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,	
"Norah Jones": 4.5, "Phoenix": 5.0,	
"Slightly Stoopid": 1.5,	
"The Strokes": 2.5, "Vampire Weekend": 2.0},	
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,	
"Deadmau5": 4.0, "Phoenix": 2.0,	
"Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},	
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,	
"Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5,	
"Slightly Stoopid": 1.0},	
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,	
"Deadmau5": 4.5, "Phoenix": 3.0,	
"Slightly Stoopid": 4.5, "The Strokes": 4.0,	
"Vampire Weekend": 2.0},	
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,	
"Norah Jones": 4.0, "The Strokes": 4.0,	
"Vampire Weekend": 1.0},	
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0,	
"Norah Jones": 5.0, "Phoenix": 5.0,	
"Slightly Stoopid": 4.5, "The Strokes": 4.0,	
"Vampire Weekend": 4.0},	
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,	
"Norah Jones": 3.0, "Phoenix": 5.0,	
"Slightly Stoopid": 4.0, "The Strokes": 5.0},	
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,	
"Phoenix": 4.0, "Slightly Stoopid": 2.5,	
"The Strokes": 3.0}	
}	
class recommender:	
  def __init__(self, data, k=1, metric='pearson', n=5):	
    """ initialize recommender	
    currently, if data is dictionary the recommender is initialized	
    to it.	
    For all other data types of data, no initialization occurs	
    k is the k value for k nearest neighbor	
    metric is which distance formula to use	
    n is the maximum number of recommendations to make"""	
    self.k = k	
    self.n = n	
    self.username2id = {}	
    self.userid2name = {}	
    self.productid2name = {}	
    # for some reason I want to save the name of the metric	
    self.metric = metric	
    if self.metric == 'pearson':	
      self.fn = self.pearson	
    #	
    # if data is dictionary set recommender data to it	
    #	
    if type(data).__name__ == 'dict':	
      self.data = data	
  def convertProductID2name(self, id):	
    """Given product id number return product name"""	
    if id in self.productid2name:	
      return self.productid2name[id]	
    else:	
      return id	
  def userRatings(self, id, n):	
    """Return n top ratings for user with id"""	
    print ("Ratings for " + self.userid2name[id])	
    ratings = self.data[id]	
    print(len(ratings))	
    ratings = list(ratings.items())	
    ratings = [(self.convertProductID2name(k), v)	
    for (k, v) in ratings]	
      # finally sort and return	
      ratings.sort(key=lambda artistTuple: artistTuple[1],	
      reverse = True)	
      ratings = ratings[:n]	
    for rating in ratings:	
      print("%s\t%i" % (rating[0], rating[1]))	
  def loadBookDB(self, path=''):	
    """loads the BX book dataset. Path is where the BX files are	
    located"""	
    self.data = {}	
    i = 0	
    #	
    # First load book ratings into self.data	
    #	
    f = codecs.open(path + "BX-Book-Ratings.csv", 'r', 'utf8')	
    f.readline() #read the title	
    for line in f:	
      i += 1	
      #separate line into fields	
      fields = line.split(';') # still with ""	
      user = fields[0].strip('"') #delete “ in the fields	
      book = fields[1].strip('"')	
      rating = int(fields[2].strip().strip('"'))	
      if user in self.data:	
        currentRatings = self.data[user]	
      else:	
        currentRatings = {}	
        currentRatings[book] = rating	
        self.data[user] = currentRatings	
        #line = f.readline()	
    f.close()	
    #	
    # Now load books into self.productid2name	
    # Books contains isbn, title, and author among other fields	
    #	
    f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')	
    for line in f:	
      i += 1	
      #separate line into fields	
      fields = line.split(';')	
      isbn = fields[0].strip('"')	
      title = fields[1].strip('"')	
      author = fields[2].strip().strip('"')	
      title = title + ' by ' + author	
      self.productid2name[isbn] = title	
    f.close()	
    #	
    # Now load user info into both self.userid2name and	
    # self.username2id	
    #	
    f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')	
    for line in f:	
      i += 1	
      #print(line)	
      #separate line into fields	
      fields = line.split(';')	
      userid = fields[0].strip('"')	
      location = fields[1].strip('"')	
      if len(fields) > 3:	
        age = fields[2].strip().strip('"')	
      else:	
        age = 'NULL'	
      if age != 'NULL':	
        value = location + ' (age: ' + age + ')'	
      else:	
        value = location	
        self.userid2name[userid] = value	
        self.username2id[location] = userid	
    f.close()	
    print(i)	
  def pearson(self, rating1, rating2):	
    sum_xy = 0	
    sum_x = 0	
    sum_y = 0	
    sum_x2 = 0	
    sum_y2 = 0	
    n = 0	
    for key in rating1:	
      if key in rating2:	
        n += 1	
        x = rating1[key]	
        y = rating2[key]	
        sum_xy += x * y	
        sum_x += x	
        sum_y += y	
        sum_x2 += pow(x, 2)	
        sum_y2 += pow(y, 2)	
  if n == 0:	
    return 0	
  # now compute denominator	
  denominator = (sqrt(sum_x2 - pow(sum_x, 2) / n)	
  * sqrt(sum_y2 - pow(sum_y, 2) / n))	
  if denominator == 0:	
    return 0	
  else:	
    return (sum_xy - (sum_x * sum_y) / n) / denominator	
  def computeNearestNeighbor(self, username):	
    """creates a sorted list of users based on their distance to	
    username"""	
    distances = []	
    for instance in self.data:	
      if instance != username:	
        distance = self.fn(self.data[username],	
        self.data[instance])	
        distances.append((instance, distance))	
  # sort based on distance -- closest first	
  distances.sort(key=lambda artistTuple: artistTuple[1],	
  reverse=True)	
  return distances	
  def recommend(self, user):	
    """Give list of recommendations"""	
    recommendations = {}	
    # first get list of users ordered by nearness	
    nearest = self.computeNearestNeighbor(user)	
    # now get the ratings for the user	
    userRatings = self.data[user]	
    # determine the total distance	
    totalDistance = 0.0	
    for i in range(self.k):	
      totalDistance += nearest[i][1]	
      # now iterate through the k nearest neighbors	
      # accumulating their ratings	
    for i in range(self.k):	
      # compute slice of pie	
      weight = nearest[i][1] / totalDistance	
      # get the name of the person	
      name = nearest[i][0]	
      # get the ratings for this person	
      neighborRatings = self.data[name]	
      # get the name of the person	
      # now find bands neighbor rated that user didn't	
    for artist in neighborRatings:	
      if not artist in userRatings:	
        if artist not in recommendations:	
          recommendations[artist] = (neighborRatings[artist] * weight)	
        else:	
          recommendations[artist] = (recommendations[artist] + neighborRatings[artist] * weight)	
      # now make list from dictionary	
    recommendations = list(recommendations.items())	
    recommendations = [(self.convertProductID2name(k), v)	
    for (k, v) in recommendations]	
      # finally sort and return	
      recommendations.sort(key=lambda artistTuple: artistTuple[1],	
      reverse = True)	
      # Return the first n items	
  return recommendations[:self.n]	
  ############test code############	
  #r = recommender(users)	
  #r.recommend('Jordyn')	
  #r.recommend('Hailey')	
  #r.loadBookDB('BX-CSV-Dump/')	
  #r.recommend('171118')	
  #r.userRatings('171118', 5)

数据挖掘实践指南读书笔记1_数据