Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.sql.connector.expressions;
0019 
0020 import java.util.Arrays;
0021 
0022 import org.apache.spark.annotation.Evolving;
0023 import scala.collection.JavaConverters;
0024 
0025 import org.apache.spark.sql.types.DataType;
0026 
0027 /**
0028  * Helper methods to create logical transforms to pass into Spark.
0029  *
0030  * @since 3.0.0
0031  */
0032 @Evolving
0033 public class Expressions {
0034   private Expressions() {
0035   }
0036 
0037   /**
0038    * Create a logical transform for applying a named transform.
0039    * <p>
0040    * This transform can represent applying any named transform.
0041    *
0042    * @param name the transform name
0043    * @param args expression arguments to the transform
0044    * @return a logical transform
0045    */
0046   public static Transform apply(String name, Expression... args) {
0047     return LogicalExpressions.apply(name,
0048         JavaConverters.asScalaBuffer(Arrays.asList(args)).toSeq());
0049   }
0050 
0051   /**
0052    * Create a named reference expression for a (nested) column.
0053    *
0054    * @param name The column name. It refers to nested column if name contains dot.
0055    * @return a named reference for the column
0056    */
0057   public static NamedReference column(String name) {
0058     return LogicalExpressions.parseReference(name);
0059   }
0060 
0061   /**
0062    * Create a literal from a value.
0063    * <p>
0064    * The JVM type of the value held by a literal must be the type used by Spark's InternalRow API
0065    * for the literal's {@link DataType SQL data type}.
0066    *
0067    * @param value a value
0068    * @param <T> the JVM type of the value
0069    * @return a literal expression for the value
0070    */
0071   public static <T> Literal<T> literal(T value) {
0072     return LogicalExpressions.literal(value);
0073   }
0074 
0075   /**
0076    * Create a bucket transform for one or more columns.
0077    * <p>
0078    * This transform represents a logical mapping from a value to a bucket id in [0, numBuckets)
0079    * based on a hash of the value.
0080    * <p>
0081    * The name reported by transforms created with this method is "bucket".
0082    *
0083    * @param numBuckets the number of output buckets
0084    * @param columns input columns for the bucket transform
0085    * @return a logical bucket transform with name "bucket"
0086    */
0087   public static Transform bucket(int numBuckets, String... columns) {
0088     NamedReference[] references = Arrays.stream(columns)
0089       .map(Expressions::column)
0090       .toArray(NamedReference[]::new);
0091     return LogicalExpressions.bucket(numBuckets, references);
0092   }
0093 
0094   /**
0095    * Create an identity transform for a column.
0096    * <p>
0097    * This transform represents a logical mapping from a value to itself.
0098    * <p>
0099    * The name reported by transforms created with this method is "identity".
0100    *
0101    * @param column an input column
0102    * @return a logical identity transform with name "identity"
0103    */
0104   public static Transform identity(String column) {
0105     return LogicalExpressions.identity(Expressions.column(column));
0106   }
0107 
0108   /**
0109    * Create a yearly transform for a timestamp or date column.
0110    * <p>
0111    * This transform represents a logical mapping from a timestamp or date to a year, such as 2018.
0112    * <p>
0113    * The name reported by transforms created with this method is "years".
0114    *
0115    * @param column an input timestamp or date column
0116    * @return a logical yearly transform with name "years"
0117    */
0118   public static Transform years(String column) {
0119     return LogicalExpressions.years(Expressions.column(column));
0120   }
0121 
0122   /**
0123    * Create a monthly transform for a timestamp or date column.
0124    * <p>
0125    * This transform represents a logical mapping from a timestamp or date to a month, such as
0126    * 2018-05.
0127    * <p>
0128    * The name reported by transforms created with this method is "months".
0129    *
0130    * @param column an input timestamp or date column
0131    * @return a logical monthly transform with name "months"
0132    */
0133   public static Transform months(String column) {
0134     return LogicalExpressions.months(Expressions.column(column));
0135   }
0136 
0137   /**
0138    * Create a daily transform for a timestamp or date column.
0139    * <p>
0140    * This transform represents a logical mapping from a timestamp or date to a date, such as
0141    * 2018-05-13.
0142    * <p>
0143    * The name reported by transforms created with this method is "days".
0144    *
0145    * @param column an input timestamp or date column
0146    * @return a logical daily transform with name "days"
0147    */
0148   public static Transform days(String column) {
0149     return LogicalExpressions.days(Expressions.column(column));
0150   }
0151 
0152   /**
0153    * Create an hourly transform for a timestamp column.
0154    * <p>
0155    * This transform represents a logical mapping from a timestamp to a date and hour, such as
0156    * 2018-05-13, hour 19.
0157    * <p>
0158    * The name reported by transforms created with this method is "hours".
0159    *
0160    * @param column an input timestamp column
0161    * @return a logical hourly transform with name "hours"
0162    */
0163   public static Transform hours(String column) {
0164     return LogicalExpressions.hours(Expressions.column(column));
0165   }
0166 
0167 }