0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.mllib;
0019
0020 import org.apache.spark.SparkConf;
0021 import org.apache.spark.api.java.JavaSparkContext;
0022
0023 import java.util.Arrays;
0024
0025 import org.apache.spark.api.java.JavaRDD;
0026 import org.apache.spark.mllib.linalg.Vector;
0027 import org.apache.spark.mllib.linalg.Vectors;
0028 import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
0029 import org.apache.spark.mllib.stat.Statistics;
0030
0031
0032 public class JavaSummaryStatisticsExample {
0033 public static void main(String[] args) {
0034
0035 SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
0036 JavaSparkContext jsc = new JavaSparkContext(conf);
0037
0038
0039 JavaRDD<Vector> mat = jsc.parallelize(
0040 Arrays.asList(
0041 Vectors.dense(1.0, 10.0, 100.0),
0042 Vectors.dense(2.0, 20.0, 200.0),
0043 Vectors.dense(3.0, 30.0, 300.0)
0044 )
0045 );
0046
0047
0048 MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
0049 System.out.println(summary.mean());
0050 System.out.println(summary.variance());
0051 System.out.println(summary.numNonzeros());
0052
0053
0054 jsc.stop();
0055 }
0056 }