0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 An example demonstrating generalized linear regression.
0020 Run with:
0021 bin/spark-submit examples/src/main/python/ml/generalized_linear_regression_example.py
0022 """
0023 from __future__ import print_function
0024
0025 from pyspark.sql import SparkSession
0026
0027 from pyspark.ml.regression import GeneralizedLinearRegression
0028
0029
0030 if __name__ == "__main__":
0031 spark = SparkSession\
0032 .builder\
0033 .appName("GeneralizedLinearRegressionExample")\
0034 .getOrCreate()
0035
0036
0037
0038 dataset = spark.read.format("libsvm")\
0039 .load("data/mllib/sample_linear_regression_data.txt")
0040
0041 glr = GeneralizedLinearRegression(family="gaussian", link="identity", maxIter=10, regParam=0.3)
0042
0043
0044 model = glr.fit(dataset)
0045
0046
0047 print("Coefficients: " + str(model.coefficients))
0048 print("Intercept: " + str(model.intercept))
0049
0050
0051 summary = model.summary
0052 print("Coefficient Standard Errors: " + str(summary.coefficientStandardErrors))
0053 print("T Values: " + str(summary.tValues))
0054 print("P Values: " + str(summary.pValues))
0055 print("Dispersion: " + str(summary.dispersion))
0056 print("Null Deviance: " + str(summary.nullDeviance))
0057 print("Residual Degree Of Freedom Null: " + str(summary.residualDegreeOfFreedomNull))
0058 print("Deviance: " + str(summary.deviance))
0059 print("Residual Degree Of Freedom: " + str(summary.residualDegreeOfFreedom))
0060 print("AIC: " + str(summary.aic))
0061 print("Deviance Residuals: ")
0062 summary.residuals().show()
0063
0064
0065 spark.stop()