0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 FMRegressor Example.
0020 """
0021 from __future__ import print_function
0022
0023
0024 from pyspark.ml import Pipeline
0025 from pyspark.ml.regression import FMRegressor
0026 from pyspark.ml.feature import MinMaxScaler
0027 from pyspark.ml.evaluation import RegressionEvaluator
0028
0029 from pyspark.sql import SparkSession
0030
0031 if __name__ == "__main__":
0032 spark = SparkSession \
0033 .builder \
0034 .appName("FMRegressorExample") \
0035 .getOrCreate()
0036
0037
0038
0039 data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
0040
0041
0042 featureScaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures").fit(data)
0043
0044
0045 (trainingData, testData) = data.randomSplit([0.7, 0.3])
0046
0047
0048 fm = FMRegressor(featuresCol="scaledFeatures", stepSize=0.001)
0049
0050
0051 pipeline = Pipeline(stages=[featureScaler, fm])
0052
0053
0054 model = pipeline.fit(trainingData)
0055
0056
0057 predictions = model.transform(testData)
0058
0059
0060 predictions.select("prediction", "label", "features").show(5)
0061
0062
0063 evaluator = RegressionEvaluator(
0064 labelCol="label", predictionCol="prediction", metricName="rmse")
0065 rmse = evaluator.evaluate(predictions)
0066 print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
0067
0068 fmModel = model.stages[1]
0069 print("Factors: " + str(fmModel.factors))
0070 print("Linear: " + str(fmModel.linear))
0071 print("Intercept: " + str(fmModel.intercept))
0072
0073
0074 spark.stop()