Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.examples.mllib;
0019 
0020 import org.apache.spark.SparkConf;
0021 import org.apache.spark.api.java.JavaSparkContext;
0022 // $example on$
0023 import java.util.Arrays;
0024 
0025 import org.apache.spark.api.java.JavaDoubleRDD;
0026 import org.apache.spark.api.java.JavaRDD;
0027 import org.apache.spark.mllib.linalg.Matrix;
0028 import org.apache.spark.mllib.linalg.Vector;
0029 import org.apache.spark.mllib.linalg.Vectors;
0030 import org.apache.spark.mllib.stat.Statistics;
0031 // $example off$
0032 
0033 public class JavaCorrelationsExample {
0034   public static void main(String[] args) {
0035 
0036     SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
0037     JavaSparkContext jsc = new JavaSparkContext(conf);
0038 
0039     // $example on$
0040     JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
0041       Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));  // a series
0042 
0043     // must have the same number of partitions and cardinality as seriesX
0044     JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
0045       Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
0046 
0047     // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
0048     // If a method is not specified, Pearson's method will be used by default.
0049     double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
0050     System.out.println("Correlation is: " + correlation);
0051 
0052     // note that each Vector is a row and not a column
0053     JavaRDD<Vector> data = jsc.parallelize(
0054       Arrays.asList(
0055         Vectors.dense(1.0, 10.0, 100.0),
0056         Vectors.dense(2.0, 20.0, 200.0),
0057         Vectors.dense(5.0, 33.0, 366.0)
0058       )
0059     );
0060 
0061     // calculate the correlation matrix using Pearson's method.
0062     // Use "spearman" for Spearman's method.
0063     // If a method is not specified, Pearson's method will be used by default.
0064     Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
0065     System.out.println(correlMatrix.toString());
0066     // $example off$
0067 
0068     jsc.stop();
0069   }
0070 }
0071