0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.mllib;
0019
0020 import org.apache.spark.SparkConf;
0021 import org.apache.spark.api.java.JavaSparkContext;
0022
0023 import java.util.Arrays;
0024
0025 import org.apache.spark.api.java.JavaDoubleRDD;
0026 import org.apache.spark.api.java.JavaRDD;
0027 import org.apache.spark.mllib.linalg.Matrix;
0028 import org.apache.spark.mllib.linalg.Vector;
0029 import org.apache.spark.mllib.linalg.Vectors;
0030 import org.apache.spark.mllib.stat.Statistics;
0031
0032
0033 public class JavaCorrelationsExample {
0034 public static void main(String[] args) {
0035
0036 SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
0037 JavaSparkContext jsc = new JavaSparkContext(conf);
0038
0039
0040 JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
0041 Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));
0042
0043
0044 JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
0045 Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
0046
0047
0048
0049 double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
0050 System.out.println("Correlation is: " + correlation);
0051
0052
0053 JavaRDD<Vector> data = jsc.parallelize(
0054 Arrays.asList(
0055 Vectors.dense(1.0, 10.0, 100.0),
0056 Vectors.dense(2.0, 20.0, 200.0),
0057 Vectors.dense(5.0, 33.0, 366.0)
0058 )
0059 );
0060
0061
0062
0063
0064 Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
0065 System.out.println(correlMatrix.toString());
0066
0067
0068 jsc.stop();
0069 }
0070 }
0071