Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.mllib.regression;
0019 
0020 import java.util.List;
0021 
0022 import org.junit.Assert;
0023 import org.junit.Test;
0024 
0025 import org.apache.spark.SharedSparkSession;
0026 import org.apache.spark.api.java.JavaRDD;
0027 import org.apache.spark.mllib.util.LinearDataGenerator;
0028 
0029 public class JavaLassoSuite extends SharedSparkSession {
0030 
0031   int validatePrediction(List<LabeledPoint> validationData, LassoModel model) {
0032     int numAccurate = 0;
0033     for (LabeledPoint point : validationData) {
0034       Double prediction = model.predict(point.features());
0035       // A prediction is off if the prediction is more than 0.5 away from expected value.
0036       if (Math.abs(prediction - point.label()) <= 0.5) {
0037         numAccurate++;
0038       }
0039     }
0040     return numAccurate;
0041   }
0042 
0043   @Test
0044   public void runLassoUsingConstructor() {
0045     int nPoints = 10000;
0046     double A = 0.0;
0047     double[] weights = {-1.5, 1.0e-2};
0048 
0049     JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
0050       weights, nPoints, 42, 0.1), 2).cache();
0051     List<LabeledPoint> validationData =
0052       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
0053 
0054     LassoWithSGD lassoSGDImpl = new LassoWithSGD(1.0, 20, 0.01, 1.0);
0055     LassoModel model = lassoSGDImpl.run(testRDD.rdd());
0056 
0057     int numAccurate = validatePrediction(validationData, model);
0058     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0059   }
0060 
0061   @Test
0062   public void runLassoUsingStaticMethods() {
0063     int nPoints = 10000;
0064     double A = 0.0;
0065     double[] weights = {-1.5, 1.0e-2};
0066 
0067     JavaRDD<LabeledPoint> testRDD = jsc.parallelize(LinearDataGenerator.generateLinearInputAsList(A,
0068       weights, nPoints, 42, 0.1), 2).cache();
0069     List<LabeledPoint> validationData =
0070       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
0071 
0072     LassoModel model = new LassoWithSGD(1.0, 100, 0.01, 1.0).run(testRDD.rdd());
0073 
0074     int numAccurate = validatePrediction(validationData, model);
0075     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
0076   }
0077 
0078 }