0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.ml;
0019
0020
0021 import org.apache.spark.ml.clustering.BisectingKMeans;
0022 import org.apache.spark.ml.clustering.BisectingKMeansModel;
0023 import org.apache.spark.ml.evaluation.ClusteringEvaluator;
0024 import org.apache.spark.ml.linalg.Vector;
0025 import org.apache.spark.sql.Dataset;
0026 import org.apache.spark.sql.Row;
0027
0028 import org.apache.spark.sql.SparkSession;
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038 public class JavaBisectingKMeansExample {
0039
0040 public static void main(String[] args) {
0041 SparkSession spark = SparkSession
0042 .builder()
0043 .appName("JavaBisectingKMeansExample")
0044 .getOrCreate();
0045
0046
0047
0048 Dataset<Row> dataset = spark.read().format("libsvm").load("data/mllib/sample_kmeans_data.txt");
0049
0050
0051 BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1);
0052 BisectingKMeansModel model = bkm.fit(dataset);
0053
0054
0055 Dataset<Row> predictions = model.transform(dataset);
0056
0057
0058 ClusteringEvaluator evaluator = new ClusteringEvaluator();
0059
0060 double silhouette = evaluator.evaluate(predictions);
0061 System.out.println("Silhouette with squared euclidean distance = " + silhouette);
0062
0063
0064 System.out.println("Cluster Centers: ");
0065 Vector[] centers = model.clusterCenters();
0066 for (Vector center : centers) {
0067 System.out.println(center);
0068 }
0069
0070
0071 spark.stop();
0072 }
0073 }