|
||||
0001 # 0002 # Licensed to the Apache Software Foundation (ASF) under one or more 0003 # contributor license agreements. See the NOTICE file distributed with 0004 # this work for additional information regarding copyright ownership. 0005 # The ASF licenses this file to You under the Apache License, Version 2.0 0006 # (the "License"); you may not use this file except in compliance with 0007 # the License. You may obtain a copy of the License at 0008 # 0009 # http://www.apache.org/licenses/LICENSE-2.0 0010 # 0011 # Unless required by applicable law or agreed to in writing, software 0012 # distributed under the License is distributed on an "AS IS" BASIS, 0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 # See the License for the specific language governing permissions and 0015 # limitations under the License. 0016 # 0017 0018 # $example on$ 0019 from pyspark.mllib.recommendation import ALS, Rating 0020 from pyspark.mllib.evaluation import RegressionMetrics 0021 # $example off$ 0022 from pyspark import SparkContext 0023 0024 if __name__ == "__main__": 0025 sc = SparkContext(appName="Ranking Metrics Example") 0026 0027 # Several of the methods available in scala are currently missing from pyspark 0028 # $example on$ 0029 # Read in the ratings data 0030 lines = sc.textFile("data/mllib/sample_movielens_data.txt") 0031 0032 def parseLine(line): 0033 fields = line.split("::") 0034 return Rating(int(fields[0]), int(fields[1]), float(fields[2]) - 2.5) 0035 ratings = lines.map(lambda r: parseLine(r)) 0036 0037 # Train a model on to predict user-product ratings 0038 model = ALS.train(ratings, 10, 10, 0.01) 0039 0040 # Get predicted ratings on all existing user-product pairs 0041 testData = ratings.map(lambda p: (p.user, p.product)) 0042 predictions = model.predictAll(testData).map(lambda r: ((r.user, r.product), r.rating)) 0043 0044 ratingsTuple = ratings.map(lambda r: ((r.user, r.product), r.rating)) 0045 scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1]) 0046 0047 # Instantiate regression metrics to compare predicted and actual ratings 0048 metrics = RegressionMetrics(scoreAndLabels) 0049 0050 # Root mean squared error 0051 print("RMSE = %s" % metrics.rootMeanSquaredError) 0052 0053 # R-squared 0054 print("R-squared = %s" % metrics.r2) 0055 # $example off$
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |