0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 from pyspark.mllib.classification import LogisticRegressionWithLBFGS
0020 from pyspark.mllib.util import MLUtils
0021 from pyspark.mllib.evaluation import MulticlassMetrics
0022
0023
0024 from pyspark import SparkContext
0025
0026 if __name__ == "__main__":
0027 sc = SparkContext(appName="MultiClassMetricsExample")
0028
0029
0030
0031
0032 data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt")
0033
0034
0035 training, test = data.randomSplit([0.6, 0.4], seed=11)
0036 training.cache()
0037
0038
0039 model = LogisticRegressionWithLBFGS.train(training, numClasses=3)
0040
0041
0042 predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp.label))
0043
0044
0045 metrics = MulticlassMetrics(predictionAndLabels)
0046
0047
0048 precision = metrics.precision(1.0)
0049 recall = metrics.recall(1.0)
0050 f1Score = metrics.fMeasure(1.0)
0051 print("Summary Stats")
0052 print("Precision = %s" % precision)
0053 print("Recall = %s" % recall)
0054 print("F1 Score = %s" % f1Score)
0055
0056
0057 labels = data.map(lambda lp: lp.label).distinct().collect()
0058 for label in sorted(labels):
0059 print("Class %s precision = %s" % (label, metrics.precision(label)))
0060 print("Class %s recall = %s" % (label, metrics.recall(label)))
0061 print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))
0062
0063
0064 print("Weighted recall = %s" % metrics.weightedRecall)
0065 print("Weighted precision = %s" % metrics.weightedPrecision)
0066 print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
0067 print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
0068 print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
0069