0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 package test.org.apache.spark.sql;
0019
0020 import java.io.Serializable;
0021 import java.util.Arrays;
0022 import java.util.List;
0023
0024 import scala.Tuple2;
0025
0026 import org.junit.After;
0027 import org.junit.Before;
0028
0029 import org.apache.spark.api.java.function.MapFunction;
0030 import org.apache.spark.sql.Dataset;
0031 import org.apache.spark.sql.Encoder;
0032 import org.apache.spark.sql.Encoders;
0033 import org.apache.spark.sql.KeyValueGroupedDataset;
0034 import org.apache.spark.sql.test.TestSparkSession;
0035
0036
0037
0038
0039 public class JavaDatasetAggregatorSuiteBase implements Serializable {
0040 private transient TestSparkSession spark;
0041
0042 @Before
0043 public void setUp() {
0044
0045 spark = new TestSparkSession();
0046 spark.loadTestData();
0047 }
0048
0049 @After
0050 public void tearDown() {
0051 spark.stop();
0052 spark = null;
0053 }
0054
0055 protected KeyValueGroupedDataset<String, Tuple2<String, Integer>> generateGroupedDataset() {
0056 Encoder<Tuple2<String, Integer>> encoder = Encoders.tuple(Encoders.STRING(), Encoders.INT());
0057 List<Tuple2<String, Integer>> data =
0058 Arrays.asList(new Tuple2<>("a", 1), new Tuple2<>("a", 2), new Tuple2<>("b", 3));
0059 Dataset<Tuple2<String, Integer>> ds = spark.createDataset(data, encoder);
0060
0061 return ds.groupByKey((MapFunction<Tuple2<String, Integer>, String>) value -> value._1(),
0062 Encoders.STRING());
0063 }
0064 }
0065