0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.examples.mllib;
0019
0020
0021 import java.util.Arrays;
0022 import java.util.List;
0023
0024
0025 import org.apache.spark.SparkConf;
0026 import org.apache.spark.SparkContext;
0027
0028 import org.apache.spark.api.java.JavaRDD;
0029 import org.apache.spark.api.java.JavaSparkContext;
0030 import org.apache.spark.mllib.linalg.Matrix;
0031 import org.apache.spark.mllib.linalg.SingularValueDecomposition;
0032 import org.apache.spark.mllib.linalg.Vector;
0033 import org.apache.spark.mllib.linalg.Vectors;
0034 import org.apache.spark.mllib.linalg.distributed.RowMatrix;
0035
0036
0037
0038
0039
0040 public class JavaSVDExample {
0041 public static void main(String[] args) {
0042 SparkConf conf = new SparkConf().setAppName("SVD Example");
0043 SparkContext sc = new SparkContext(conf);
0044 JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
0045
0046
0047 List<Vector> data = Arrays.asList(
0048 Vectors.sparse(5, new int[] {1, 3}, new double[] {1.0, 7.0}),
0049 Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
0050 Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
0051 );
0052
0053 JavaRDD<Vector> rows = jsc.parallelize(data);
0054
0055
0056 RowMatrix mat = new RowMatrix(rows.rdd());
0057
0058
0059 SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(5, true, 1.0E-9d);
0060 RowMatrix U = svd.U();
0061 Vector s = svd.s();
0062 Matrix V = svd.V();
0063
0064 Vector[] collectPartitions = (Vector[]) U.rows().collect();
0065 System.out.println("U factor is:");
0066 for (Vector vector : collectPartitions) {
0067 System.out.println("\t" + vector);
0068 }
0069 System.out.println("Singular values are: " + s);
0070 System.out.println("V factor is:\n" + V);
0071
0072 jsc.stop();
0073 }
0074 }