0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 """
0019 An example demonstrating bisecting k-means clustering.
0020 Run with:
0021 bin/spark-submit examples/src/main/python/ml/bisecting_k_means_example.py
0022 """
0023 from __future__ import print_function
0024
0025
0026 from pyspark.ml.clustering import BisectingKMeans
0027 from pyspark.ml.evaluation import ClusteringEvaluator
0028
0029 from pyspark.sql import SparkSession
0030
0031 if __name__ == "__main__":
0032 spark = SparkSession\
0033 .builder\
0034 .appName("BisectingKMeansExample")\
0035 .getOrCreate()
0036
0037
0038
0039 dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
0040
0041
0042 bkm = BisectingKMeans().setK(2).setSeed(1)
0043 model = bkm.fit(dataset)
0044
0045
0046 predictions = model.transform(dataset)
0047
0048
0049 evaluator = ClusteringEvaluator()
0050
0051 silhouette = evaluator.evaluate(predictions)
0052 print("Silhouette with squared euclidean distance = " + str(silhouette))
0053
0054
0055 print("Cluster Centers: ")
0056 centers = model.clusterCenters()
0057 for center in centers:
0058 print(center)
0059
0060
0061 spark.stop()