Back to home page

OSCL-LXR

 
 

    


0001 #
0002 # Licensed to the Apache Software Foundation (ASF) under one or more
0003 # contributor license agreements.  See the NOTICE file distributed with
0004 # this work for additional information regarding copyright ownership.
0005 # The ASF licenses this file to You under the Apache License, Version 2.0
0006 # (the "License"); you may not use this file except in compliance with
0007 # the License.  You may obtain a copy of the License at
0008 #
0009 #    http://www.apache.org/licenses/LICENSE-2.0
0010 #
0011 # Unless required by applicable law or agreed to in writing, software
0012 # distributed under the License is distributed on an "AS IS" BASIS,
0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014 # See the License for the specific language governing permissions and
0015 # limitations under the License.
0016 #
0017 # $example on$
0018 from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD
0019 from pyspark.mllib.evaluation import RegressionMetrics
0020 from pyspark.mllib.linalg import DenseVector
0021 # $example off$
0022 
0023 from pyspark import SparkContext
0024 
0025 if __name__ == "__main__":
0026     sc = SparkContext(appName="Regression Metrics Example")
0027 
0028     # $example on$
0029     # Load and parse the data
0030     def parsePoint(line):
0031         values = line.split()
0032         return LabeledPoint(float(values[0]),
0033                             DenseVector([float(x.split(':')[1]) for x in values[1:]]))
0034 
0035     data = sc.textFile("data/mllib/sample_linear_regression_data.txt")
0036     parsedData = data.map(parsePoint)
0037 
0038     # Build the model
0039     model = LinearRegressionWithSGD.train(parsedData)
0040 
0041     # Get predictions
0042     valuesAndPreds = parsedData.map(lambda p: (float(model.predict(p.features)), p.label))
0043 
0044     # Instantiate metrics object
0045     metrics = RegressionMetrics(valuesAndPreds)
0046 
0047     # Squared Error
0048     print("MSE = %s" % metrics.meanSquaredError)
0049     print("RMSE = %s" % metrics.rootMeanSquaredError)
0050 
0051     # R-squared
0052     print("R-squared = %s" % metrics.r2)
0053 
0054     # Mean absolute error
0055     print("MAE = %s" % metrics.meanAbsoluteError)
0056 
0057     # Explained variance
0058     print("Explained variance = %s" % metrics.explainedVariance)
0059     # $example off$