examples/mllib/JavaNaiveBayesExample.java

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017
0018 package org.apache.spark.examples.mllib;
0019
0020 // $example on$
0021 import scala.Tuple2;
0022 import org.apache.spark.api.java.JavaPairRDD;
0023 import org.apache.spark.api.java.JavaRDD;
0024 import org.apache.spark.api.java.JavaSparkContext;
0025 import org.apache.spark.mllib.classification.NaiveBayes;
0026 import org.apache.spark.mllib.classification.NaiveBayesModel;
0027 import org.apache.spark.mllib.regression.LabeledPoint;
0028 import org.apache.spark.mllib.util.MLUtils;
0029 // $example off$
0030 import org.apache.spark.SparkConf;
0031
0032 public class JavaNaiveBayesExample {
0033   public static void main(String[] args) {
0034     SparkConf sparkConf = new SparkConf().setAppName("JavaNaiveBayesExample");
0035     JavaSparkContext jsc = new JavaSparkContext(sparkConf);
0036     // $example on$
0037     String path = "data/mllib/sample_libsvm_data.txt";
0038     JavaRDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), path).toJavaRDD();
0039     JavaRDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[]{0.6, 0.4});
0040     JavaRDD<LabeledPoint> training = tmp[0]; // training set
0041     JavaRDD<LabeledPoint> test = tmp[1]; // test set
0042     NaiveBayesModel model = NaiveBayes.train(training.rdd(), 1.0);
0043     JavaPairRDD<Double, Double> predictionAndLabel =
0044       test.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
0045     double accuracy =
0046       predictionAndLabel.filter(pl -> pl._1().equals(pl._2())).count() / (double) test.count();
0047
0048     // Save and load model
0049     model.save(jsc.sc(), "target/tmp/myNaiveBayesModel");
0050     NaiveBayesModel sameModel = NaiveBayesModel.load(jsc.sc(), "target/tmp/myNaiveBayesModel");
0051     // $example off$
0052
0053     jsc.stop();
0054   }
0055 }