Back to home page

OSCL-LXR

 
 

    


0001 #
0002 # Licensed to the Apache Software Foundation (ASF) under one or more
0003 # contributor license agreements.  See the NOTICE file distributed with
0004 # this work for additional information regarding copyright ownership.
0005 # The ASF licenses this file to You under the Apache License, Version 2.0
0006 # (the "License"); you may not use this file except in compliance with
0007 # the License.  You may obtain a copy of the License at
0008 #
0009 #    http://www.apache.org/licenses/LICENSE-2.0
0010 #
0011 # Unless required by applicable law or agreed to in writing, software
0012 # distributed under the License is distributed on an "AS IS" BASIS,
0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014 # See the License for the specific language governing permissions and
0015 # limitations under the License.
0016 #
0017 
0018 """
0019 Collaborative Filtering Classification Example.
0020 """
0021 from __future__ import print_function
0022 
0023 from pyspark import SparkContext
0024 
0025 # $example on$
0026 from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
0027 # $example off$
0028 
0029 if __name__ == "__main__":
0030     sc = SparkContext(appName="PythonCollaborativeFilteringExample")
0031     # $example on$
0032     # Load and parse the data
0033     data = sc.textFile("data/mllib/als/test.data")
0034     ratings = data.map(lambda l: l.split(','))\
0035         .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
0036 
0037     # Build the recommendation model using Alternating Least Squares
0038     rank = 10
0039     numIterations = 10
0040     model = ALS.train(ratings, rank, numIterations)
0041 
0042     # Evaluate the model on training data
0043     testdata = ratings.map(lambda p: (p[0], p[1]))
0044     predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
0045     ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
0046     MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
0047     print("Mean Squared Error = " + str(MSE))
0048 
0049     # Save and load model
0050     model.save(sc, "target/tmp/myCollaborativeFilter")
0051     sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
0052     # $example off$