0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.mllib.regression;
0019
0020 import java.util.List;
0021
0022 import org.junit.Assert;
0023 import org.junit.Test;
0024
0025 import org.apache.spark.SharedSparkSession;
0026 import org.apache.spark.api.java.JavaRDD;
0027 import org.apache.spark.mllib.util.LinearDataGenerator;
0028
0029 public class JavaLassoSuite extends SharedSparkSession {
0030
0031 int validatePrediction(List<LabeledPoint> validationData, LassoModel model) {
0032 int numAccurate = 0;
0033 for (LabeledPoint point : validationData) {
0034 Double prediction = model.predict(point.features());
0035
0036 if (Math.abs(prediction - point.label()) <= 0.5) {
0037 numAccurate++;
0038 }
0039 }
0040 return numAccurate;
0041 }
0042
0043 @Test
0044 public void runLassoUsingConstructor() {
0045 int nPoints = 10000;
0046 double A = 0.0;
0047 double[] weights = {-1.5, 1.0e-2};
0048
0049 JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
0050 weights, nPoints, 42, 0.1), 2).cache();
0051 List<LabeledPoint> validationData =
0052 LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
0053
0054 LassoWithSGD lassoSGDImpl = new LassoWithSGD(1.0, 20, 0.01, 1.0);
0055 LassoModel model = lassoSGDImpl.run(testRDD.rdd());
0056
0057 int numAccurate = validatePrediction(validationData, model);
0058 Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0059 }
0060
0061 @Test
0062 public void runLassoUsingStaticMethods() {
0063 int nPoints = 10000;
0064 double A = 0.0;
0065 double[] weights = {-1.5, 1.0e-2};
0066
0067 JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
0068 weights, nPoints, 42, 0.1), 2).cache();
0069 List<LabeledPoint> validationData =
0070 LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
0071
0072 LassoModel model = new LassoWithSGD(1.0, 100, 0.01, 1.0).run(testRDD.rdd());
0073
0074 int numAccurate = validatePrediction(validationData, model);
0075 Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0076 }
0077
0078 }