spark/sql/JavaDatasetAggregatorSuiteBase.java

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017
0018 package test.org.apache.spark.sql;
0019
0020 import java.io.Serializable;
0021 import java.util.Arrays;
0022 import java.util.List;
0023
0024 import scala.Tuple2;
0025
0026 import org.junit.After;
0027 import org.junit.Before;
0028
0029 import org.apache.spark.api.java.function.MapFunction;
0030 import org.apache.spark.sql.Dataset;
0031 import org.apache.spark.sql.Encoder;
0032 import org.apache.spark.sql.Encoders;
0033 import org.apache.spark.sql.KeyValueGroupedDataset;
0034 import org.apache.spark.sql.test.TestSparkSession;
0035
0036 /**
0037  * Common test base shared across this and Java8DatasetAggregatorSuite.
0038  */
0039 public class JavaDatasetAggregatorSuiteBase implements Serializable {
0040   private transient TestSparkSession spark;
0041
0042   @Before
0043   public void setUp() {
0044     // Trigger static initializer of TestData
0045     spark = new TestSparkSession();
0046     spark.loadTestData();
0047   }
0048
0049   @After
0050   public void tearDown() {
0051     spark.stop();
0052     spark = null;
0053   }
0054
0055   protected KeyValueGroupedDataset<String, Tuple2<String, Integer>> generateGroupedDataset() {
0056     Encoder<Tuple2<String, Integer>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.INT());
0057     List<Tuple2<String, Integer>> data =
0058       Arrays.asList(new Tuple2<>("a", 1), new Tuple2<>("a", 2), new Tuple2<>("b", 3));
0059     Dataset<Tuple2<String, Integer>> ds = spark.createDataset(data, encoder);
0060
0061     return ds.groupByKey((MapFunction<Tuple2<String, Integer>, String>) value -> value._1(),
0062       Encoders.STRING());
0063   }
0064 }
0065