0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 package org.apache.spark.examples.sql;
0018
0019
0020 import org.apache.spark.sql.*;
0021 import org.apache.spark.sql.api.java.UDF1;
0022 import org.apache.spark.sql.expressions.UserDefinedFunction;
0023 import static org.apache.spark.sql.functions.udf;
0024 import org.apache.spark.sql.types.DataTypes;
0025
0026
0027 public class JavaUserDefinedScalar {
0028
0029 public static void main(String[] args) {
0030
0031
0032 SparkSession spark = SparkSession
0033 .builder()
0034 .appName("Java Spark SQL UDF scalar example")
0035 .getOrCreate();
0036
0037
0038
0039 UserDefinedFunction random = udf(
0040 () -> Math.random(), DataTypes.DoubleType
0041 );
0042 random.asNondeterministic();
0043 spark.udf().register("random", random);
0044 spark.sql("SELECT random()").show();
0045
0046
0047
0048
0049
0050
0051
0052 spark.udf().register("plusOne", new UDF1<Integer, Integer>() {
0053 @Override
0054 public Integer call(Integer x) {
0055 return x + 1;
0056 }
0057 }, DataTypes.IntegerType);
0058 spark.sql("SELECT plusOne(5)").show();
0059
0060
0061
0062
0063
0064
0065
0066 UserDefinedFunction strLen = udf(
0067 (String s, Integer x) -> s.length() + x, DataTypes.IntegerType
0068 );
0069 spark.udf().register("strLen", strLen);
0070 spark.sql("SELECT strLen('test', 1)").show();
0071
0072
0073
0074
0075
0076
0077
0078 spark.udf().register("oneArgFilter", new UDF1<Long, Boolean>() {
0079 @Override
0080 public Boolean call(Long x) {
0081 return x > 5;
0082 }
0083 }, DataTypes.BooleanType);
0084 spark.range(1, 10).createOrReplaceTempView("test");
0085 spark.sql("SELECT * FROM test WHERE oneArgFilter(id)").show();
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096 spark.stop();
0097 }
0098 }