Back to home page

OSCL-LXR

 
 

    


0001 #
0002 # Licensed to the Apache Software Foundation (ASF) under one or more
0003 # contributor license agreements.  See the NOTICE file distributed with
0004 # this work for additional information regarding copyright ownership.
0005 # The ASF licenses this file to You under the Apache License, Version 2.0
0006 # (the "License"); you may not use this file except in compliance with
0007 # the License.  You may obtain a copy of the License at
0008 #
0009 #    http://www.apache.org/licenses/LICENSE-2.0
0010 #
0011 # Unless required by applicable law or agreed to in writing, software
0012 # distributed under the License is distributed on an "AS IS" BASIS,
0013 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014 # See the License for the specific language governing permissions and
0015 # limitations under the License.
0016 #
0017 
0018 # Column Class
0019 
0020 #' @include generics.R jobj.R schema.R
0021 NULL
0022 
0023 setOldClass("jobj")
0024 
0025 #' S4 class that represents a SparkDataFrame column
0026 #'
0027 #' The column class supports unary, binary operations on SparkDataFrame columns
0028 #'
0029 #' @rdname column
0030 #'
0031 #' @slot jc reference to JVM SparkDataFrame column
0032 #' @note Column since 1.4.0
0033 setClass("Column",
0034          slots = list(jc = "jobj"))
0035 
0036 #' A set of operations working with SparkDataFrame columns
0037 #' @rdname columnfunctions
0038 #' @name columnfunctions
0039 NULL
0040 
0041 setMethod("initialize", "Column", function(.Object, jc) {
0042   .Object@jc <- jc
0043   .Object
0044 })
0045 
0046 #' @rdname column
0047 #' @name column
0048 #' @aliases column,jobj-method
0049 setMethod("column",
0050           signature(x = "jobj"),
0051           function(x) {
0052             new("Column", x)
0053           })
0054 
0055 #' @rdname show
0056 #' @name show
0057 #' @aliases show,Column-method
0058 #' @note show(Column) since 1.4.0
0059 setMethod("show", "Column",
0060           function(object) {
0061             cat("Column", callJMethod(object@jc, "toString"), "\n")
0062           })
0063 
0064 operators <- list(
0065   "+" = "plus", "-" = "minus", "*" = "multiply", "/" = "divide", "%%" = "mod",
0066   "==" = "equalTo", ">" = "gt", "<" = "lt", "!=" = "notEqual", "<=" = "leq", ">=" = "geq",
0067   # we can not override `&&` and `||`, so use `&` and `|` instead
0068   "&" = "and", "|" = "or", "^" = "pow"
0069 )
0070 column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
0071 column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
0072 
0073 createOperator <- function(op) {
0074   setMethod(op,
0075             signature(e1 = "Column"),
0076             function(e1, e2) {
0077               jc <- if (missing(e2)) {
0078                 if (op == "-") {
0079                   callJMethod(e1@jc, "unary_$minus")
0080                 } else {
0081                   callJMethod(e1@jc, operators[[op]])
0082                 }
0083               } else {
0084                 if (class(e2) == "Column") {
0085                   e2 <- e2@jc
0086                 }
0087                 if (op == "^") {
0088                   jc <- callJStatic("org.apache.spark.sql.functions", operators[[op]], e1@jc, e2)
0089                 } else {
0090                   callJMethod(e1@jc, operators[[op]], e2)
0091                 }
0092               }
0093               column(jc)
0094             })
0095 }
0096 
0097 createColumnFunction1 <- function(name) {
0098   setMethod(name,
0099             signature(x = "Column"),
0100             function(x) {
0101               column(callJMethod(x@jc, name))
0102             })
0103 }
0104 
0105 createColumnFunction2 <- function(name) {
0106   setMethod(name,
0107             signature(x = "Column"),
0108             function(x, data) {
0109               if (class(data) == "Column") {
0110                 data <- data@jc
0111               }
0112               jc <- callJMethod(x@jc, name, data)
0113               column(jc)
0114             })
0115 }
0116 
0117 createMethods <- function() {
0118   for (op in names(operators)) {
0119     createOperator(op)
0120   }
0121   for (name in column_functions1) {
0122     createColumnFunction1(name)
0123   }
0124   for (name in column_functions2) {
0125     createColumnFunction2(name)
0126   }
0127 }
0128 
0129 createMethods()
0130 
0131 #' @rdname alias
0132 #' @name alias
0133 #' @aliases alias,Column-method
0134 #' @family colum_func
0135 #' @examples
0136 #' \dontrun{
0137 #' df <- createDataFrame(iris)
0138 #'
0139 #' head(select(
0140 #'   df, alias(df$Sepal_Length, "slength"), alias(df$Petal_Length, "plength")
0141 #' ))
0142 #' }
0143 #' @note alias(Column) since 1.4.0
0144 setMethod("alias",
0145           signature(object = "Column"),
0146           function(object, data) {
0147             if (is.character(data)) {
0148               column(callJMethod(object@jc, "as", data))
0149             } else {
0150               stop("data should be character")
0151             }
0152           })
0153 
0154 #' substr
0155 #'
0156 #' An expression that returns a substring.
0157 #'
0158 #' @rdname substr
0159 #' @name substr
0160 #' @family colum_func
0161 #' @aliases substr,Column-method
0162 #'
0163 #' @param x a Column.
0164 #' @param start starting position. It should be 1-base.
0165 #' @param stop ending position.
0166 #' @examples
0167 #' \dontrun{
0168 #' df <- createDataFrame(list(list(a="abcdef")))
0169 #' collect(select(df, substr(df$a, 1, 4))) # the result is `abcd`.
0170 #' collect(select(df, substr(df$a, 2, 4))) # the result is `bcd`.
0171 #' }
0172 #' @note substr since 1.4.0
0173 setMethod("substr", signature(x = "Column"),
0174           function(x, start, stop) {
0175             jc <- callJMethod(x@jc, "substr", as.integer(start), as.integer(stop - start + 1))
0176             column(jc)
0177           })
0178 
0179 #' startsWith
0180 #'
0181 #' Determines if entries of x start with string (entries of) prefix respectively,
0182 #' where strings are recycled to common lengths.
0183 #'
0184 #' @rdname startsWith
0185 #' @name startsWith
0186 #' @family colum_func
0187 #' @aliases startsWith,Column-method
0188 #'
0189 #' @param x vector of character string whose "starts" are considered
0190 #' @param prefix character vector (often of length one)
0191 #' @note startsWith since 1.4.0
0192 setMethod("startsWith", signature(x = "Column"),
0193           function(x, prefix) {
0194             jc <- callJMethod(x@jc, "startsWith", as.vector(prefix))
0195             column(jc)
0196           })
0197 
0198 #' endsWith
0199 #'
0200 #' Determines if entries of x end with string (entries of) suffix respectively,
0201 #' where strings are recycled to common lengths.
0202 #'
0203 #' @rdname endsWith
0204 #' @name endsWith
0205 #' @family colum_func
0206 #' @aliases endsWith,Column-method
0207 #'
0208 #' @param x vector of character string whose "ends" are considered
0209 #' @param suffix character vector (often of length one)
0210 #' @note endsWith since 1.4.0
0211 setMethod("endsWith", signature(x = "Column"),
0212           function(x, suffix) {
0213             jc <- callJMethod(x@jc, "endsWith", as.vector(suffix))
0214             column(jc)
0215           })
0216 
0217 #' between
0218 #'
0219 #' Test if the column is between the lower bound and upper bound, inclusive.
0220 #'
0221 #' @rdname between
0222 #' @name between
0223 #' @family colum_func
0224 #' @aliases between,Column-method
0225 #'
0226 #' @param x a Column
0227 #' @param bounds lower and upper bounds
0228 #' @note between since 1.5.0
0229 setMethod("between", signature(x = "Column"),
0230           function(x, bounds) {
0231             if (is.vector(bounds) && length(bounds) == 2) {
0232               jc <- callJMethod(x@jc, "between", bounds[1], bounds[2])
0233               column(jc)
0234             } else {
0235               stop("bounds should be a vector of lower and upper bounds")
0236             }
0237           })
0238 
0239 #' Casts the column to a different data type.
0240 #'
0241 #' @param x a Column.
0242 #' @param dataType a character object describing the target data type.
0243 #'        See
0244 # nolint start
0245 #'        \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
0246 #'        Spark Data Types} for available data types.
0247 # nolint end
0248 #' @rdname cast
0249 #' @name cast
0250 #' @family colum_func
0251 #' @aliases cast,Column-method
0252 #'
0253 #' @examples
0254 #' \dontrun{
0255 #'   cast(df$age, "string")
0256 #' }
0257 #' @note cast since 1.4.0
0258 setMethod("cast",
0259           signature(x = "Column"),
0260           function(x, dataType) {
0261             if (is.character(dataType)) {
0262               column(callJMethod(x@jc, "cast", dataType))
0263             } else {
0264               stop("dataType should be character")
0265             }
0266           })
0267 
0268 #' Match a column with given values.
0269 #'
0270 #' @param x a Column.
0271 #' @param table a collection of values (coercible to list) to compare with.
0272 #' @rdname match
0273 #' @name %in%
0274 #' @aliases %in%,Column-method
0275 #' @return A matched values as a result of comparing with given values.
0276 #' @examples
0277 #' \dontrun{
0278 #' filter(df, "age in (10, 30)")
0279 #' where(df, df$age %in% c(10, 30))
0280 #' }
0281 #' @note \%in\% since 1.5.0
0282 setMethod("%in%",
0283           signature(x = "Column"),
0284           function(x, table) {
0285             jc <- callJMethod(x@jc, "isin", as.list(table))
0286             return(column(jc))
0287           })
0288 
0289 #' otherwise
0290 #'
0291 #' If values in the specified column are null, returns the value.
0292 #' Can be used in conjunction with \code{when} to specify a default value for expressions.
0293 #'
0294 #' @param x a Column.
0295 #' @param value value to replace when the corresponding entry in \code{x} is NA.
0296 #'              Can be a single value or a Column.
0297 #' @rdname otherwise
0298 #' @name otherwise
0299 #' @family colum_func
0300 #' @aliases otherwise,Column-method
0301 #' @note otherwise since 1.5.0
0302 setMethod("otherwise",
0303           signature(x = "Column", value = "ANY"),
0304           function(x, value) {
0305             value <- if (class(value) == "Column") { value@jc } else { value }
0306             jc <- callJMethod(x@jc, "otherwise", value)
0307             column(jc)
0308           })
0309 
0310 #' \%<=>\%
0311 #'
0312 #' Equality test that is safe for null values.
0313 #'
0314 #' Can be used, unlike standard equality operator, to perform null-safe joins.
0315 #' Equivalent to Scala \code{Column.<=>} and \code{Column.eqNullSafe}.
0316 #'
0317 #' @param x a Column
0318 #' @param value a value to compare
0319 #' @rdname eq_null_safe
0320 #' @name %<=>%
0321 #' @aliases %<=>%,Column-method
0322 #' @examples
0323 #' \dontrun{
0324 #' df1 <- createDataFrame(data.frame(
0325 #'   x = c(1, NA, 3, NA), y = c(2, 6, 3, NA)
0326 #' ))
0327 #'
0328 #' head(select(df1, df1$x == df1$y, df1$x %<=>% df1$y))
0329 #'
0330 #' df2 <- createDataFrame(data.frame(y = c(3, NA)))
0331 #' count(join(df1, df2, df1$y == df2$y))
0332 #'
0333 #' count(join(df1, df2, df1$y %<=>% df2$y))
0334 #' }
0335 #' @note \%<=>\% since 2.3.0
0336 setMethod("%<=>%",
0337           signature(x = "Column", value = "ANY"),
0338           function(x, value) {
0339             value <- if (class(value) == "Column") { value@jc } else { value }
0340             jc <- callJMethod(x@jc, "eqNullSafe", value)
0341             column(jc)
0342           })
0343 
0344 #' !
0345 #'
0346 #' Inversion of boolean expression.
0347 #'
0348 #' @rdname not
0349 #' @name not
0350 #' @aliases !,Column-method
0351 #' @examples
0352 #' \dontrun{
0353 #' df <- createDataFrame(data.frame(x = c(-1, 0, 1)))
0354 #'
0355 #' head(select(df, !column("x") > 0))
0356 #' }
0357 #' @note ! since 2.3.0
0358 setMethod("!", signature(x = "Column"), function(x) not(x))