0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 Collaborative Filtering Classification Example.
0020 """
0021 from __future__ import print_function
0022
0023 from pyspark import SparkContext
0024
0025
0026 from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
0027
0028
0029 if __name__ == "__main__":
0030 sc = SparkContext(appName="PythonCollaborativeFilteringExample")
0031
0032
0033 data = sc.textFile("data/mllib/als/test.data")
0034 ratings = data.map(lambda l: l.split(','))\
0035 .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
0036
0037
0038 rank = 10
0039 numIterations = 10
0040 model = ALS.train(ratings, rank, numIterations)
0041
0042
0043 testdata = ratings.map(lambda p: (p[0], p[1]))
0044 predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
0045 ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
0046 MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
0047 print("Mean Squared Error = " + str(MSE))
0048
0049
0050 model.save(sc, "target/tmp/myCollaborativeFilter")
0051 sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
0052