Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 package org.apache.spark.examples.sql;
0018 
0019 // $example on:udf_scalar$
0020 import org.apache.spark.sql.*;
0021 import org.apache.spark.sql.api.java.UDF1;
0022 import org.apache.spark.sql.expressions.UserDefinedFunction;
0023 import static org.apache.spark.sql.functions.udf;
0024 import org.apache.spark.sql.types.DataTypes;
0025 // $example off:udf_scalar$
0026 
0027 public class JavaUserDefinedScalar {
0028 
0029   public static void main(String[] args) {
0030 
0031     // $example on:udf_scalar$
0032     SparkSession spark = SparkSession
0033       .builder()
0034       .appName("Java Spark SQL UDF scalar example")
0035       .getOrCreate();
0036 
0037     // Define and register a zero-argument non-deterministic UDF
0038     // UDF is deterministic by default, i.e. produces the same result for the same input.
0039     UserDefinedFunction random = udf(
0040       () -> Math.random(), DataTypes.DoubleType
0041     );
0042     random.asNondeterministic();
0043     spark.udf().register("random", random);
0044     spark.sql("SELECT random()").show();
0045     // +-------+
0046     // |UDF()  |
0047     // +-------+
0048     // |xxxxxxx|
0049     // +-------+
0050 
0051     // Define and register a one-argument UDF
0052     spark.udf().register("plusOne", new UDF1<Integer, Integer>() {
0053       @Override
0054       public Integer call(Integer x) {
0055         return x + 1;
0056       }
0057     }, DataTypes.IntegerType);
0058     spark.sql("SELECT plusOne(5)").show();
0059     // +----------+
0060     // |plusOne(5)|
0061     // +----------+
0062     // |         6|
0063     // +----------+
0064 
0065     // Define and register a two-argument UDF
0066     UserDefinedFunction strLen = udf(
0067       (String s, Integer x) -> s.length() + x, DataTypes.IntegerType
0068     );
0069     spark.udf().register("strLen", strLen);
0070     spark.sql("SELECT strLen('test', 1)").show();
0071     // +------------+
0072     // |UDF(test, 1)|
0073     // +------------+
0074     // |           5|
0075     // +------------+
0076 
0077     // UDF in a WHERE clause
0078     spark.udf().register("oneArgFilter", new UDF1<Long, Boolean>() {
0079       @Override
0080       public Boolean call(Long x) {
0081         return  x > 5;
0082       }
0083     }, DataTypes.BooleanType);
0084     spark.range(1, 10).createOrReplaceTempView("test");
0085     spark.sql("SELECT * FROM test WHERE oneArgFilter(id)").show();
0086     // +---+
0087     // | id|
0088     // +---+
0089     // |  6|
0090     // |  7|
0091     // |  8|
0092     // |  9|
0093     // +---+
0094 
0095     // $example off:udf_scalar$
0096     spark.stop();
0097   }
0098 }