0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 An example of Multiclass to Binary Reduction with One Vs Rest,
0020 using Logistic Regression as the base classifier.
0021 Run with:
0022 bin/spark-submit examples/src/main/python/ml/one_vs_rest_example.py
0023 """
0024 from __future__ import print_function
0025
0026
0027 from pyspark.ml.classification import LogisticRegression, OneVsRest
0028 from pyspark.ml.evaluation import MulticlassClassificationEvaluator
0029
0030 from pyspark.sql import SparkSession
0031
0032 if __name__ == "__main__":
0033 spark = SparkSession \
0034 .builder \
0035 .appName("OneVsRestExample") \
0036 .getOrCreate()
0037
0038
0039
0040 inputData = spark.read.format("libsvm") \
0041 .load("data/mllib/sample_multiclass_classification_data.txt")
0042
0043
0044 (train, test) = inputData.randomSplit([0.8, 0.2])
0045
0046
0047 lr = LogisticRegression(maxIter=10, tol=1E-6, fitIntercept=True)
0048
0049
0050 ovr = OneVsRest(classifier=lr)
0051
0052
0053 ovrModel = ovr.fit(train)
0054
0055
0056 predictions = ovrModel.transform(test)
0057
0058
0059 evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
0060
0061
0062 accuracy = evaluator.evaluate(predictions)
0063 print("Test Error = %g" % (1.0 - accuracy))
0064
0065
0066 spark.stop()