0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package org.apache.spark.mllib.classification;
0019
0020 import java.util.List;
0021
0022 import org.junit.Assert;
0023 import org.junit.Test;
0024
0025 import org.apache.spark.SharedSparkSession;
0026 import org.apache.spark.api.java.JavaRDD;
0027 import org.apache.spark.mllib.regression.LabeledPoint;
0028
0029 public class JavaSVMSuite extends SharedSparkSession {
0030
0031 int validatePrediction(List<LabeledPoint> validationData, SVMModel model) {
0032 int numAccurate = 0;
0033 for (LabeledPoint point : validationData) {
0034 Double prediction = model.predict(point.features());
0035 if (prediction == point.label()) {
0036 numAccurate++;
0037 }
0038 }
0039 return numAccurate;
0040 }
0041
0042 @Test
0043 public void runSVMUsingConstructor() {
0044 int nPoints = 10000;
0045 double A = 2.0;
0046 double[] weights = {-1.5, 1.0};
0047
0048 JavaRDD<LabeledPoint> testRDD = jsc.parallelize(SVMSuite.generateSVMInputAsList(A,
0049 weights, nPoints, 42), 2).cache();
0050 List<LabeledPoint> validationData =
0051 SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17);
0052
0053 SVMWithSGD svmSGDImpl = new SVMWithSGD();
0054 svmSGDImpl.setIntercept(true);
0055 svmSGDImpl.optimizer().setStepSize(1.0)
0056 .setRegParam(1.0)
0057 .setNumIterations(100);
0058 SVMModel model = svmSGDImpl.run(testRDD.rdd());
0059
0060 int numAccurate = validatePrediction(validationData, model);
0061 Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0062 }
0063
0064 @Test
0065 public void runSVMUsingStaticMethods() {
0066 int nPoints = 10000;
0067 double A = 0.0;
0068 double[] weights = {-1.5, 1.0};
0069
0070 JavaRDD<LabeledPoint> testRDD = jsc.parallelize(SVMSuite.generateSVMInputAsList(A,
0071 weights, nPoints, 42), 2).cache();
0072 List<LabeledPoint> validationData =
0073 SVMSuite.generateSVMInputAsList(A, weights, nPoints, 17);
0074
0075 SVMModel model = SVMWithSGD.train(testRDD.rdd(), 100, 1.0, 1.0, 1.0);
0076
0077 int numAccurate = validatePrediction(validationData, model);
0078 Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0079 }
0080 }