0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.mllib;
0019
0020
0021 import java.util.Arrays;
0022 import java.util.List;
0023
0024 import org.apache.spark.SparkConf;
0025 import org.apache.spark.api.java.JavaSparkContext;
0026
0027 import org.apache.spark.api.java.JavaRDD;
0028 import org.apache.spark.mllib.clustering.BisectingKMeans;
0029 import org.apache.spark.mllib.clustering.BisectingKMeansModel;
0030 import org.apache.spark.mllib.linalg.Vector;
0031 import org.apache.spark.mllib.linalg.Vectors;
0032
0033
0034
0035
0036
0037 public class JavaBisectingKMeansExample {
0038 public static void main(String[] args) {
0039 SparkConf sparkConf = new SparkConf().setAppName("JavaBisectingKMeansExample");
0040 JavaSparkContext sc = new JavaSparkContext(sparkConf);
0041
0042
0043 List<Vector> localData = Arrays.asList(
0044 Vectors.dense(0.1, 0.1), Vectors.dense(0.3, 0.3),
0045 Vectors.dense(10.1, 10.1), Vectors.dense(10.3, 10.3),
0046 Vectors.dense(20.1, 20.1), Vectors.dense(20.3, 20.3),
0047 Vectors.dense(30.1, 30.1), Vectors.dense(30.3, 30.3)
0048 );
0049 JavaRDD<Vector> data = sc.parallelize(localData, 2);
0050
0051 BisectingKMeans bkm = new BisectingKMeans()
0052 .setK(4);
0053 BisectingKMeansModel model = bkm.run(data);
0054
0055 System.out.println("Compute Cost: " + model.computeCost(data));
0056
0057 Vector[] clusterCenters = model.clusterCenters();
0058 for (int i = 0; i < clusterCenters.length; i++) {
0059 Vector clusterCenter = clusterCenters[i];
0060 System.out.println("Cluster Center " + i + ": " + clusterCenter);
0061 }
0062
0063
0064 sc.stop();
0065 }
0066 }