Back to home page

OSCL-LXR

 
 

    


0001 # Licensed to the Apache Software Foundation (ASF) under one or more
0002 # contributor license agreements.  See the NOTICE file distributed with
0003 # this work for additional information regarding copyright ownership.
0004 # The ASF licenses this file to You under the Apache License, Version 2.0
0005 # (the "License"); you may not use this file except in compliance with
0006 # the License.  You may obtain a copy of the License at
0007 #
0008 #    http://www.apache.org/licenses/LICENSE-2.0
0009 #
0010 # Unless required by applicable law or agreed to in writing, software
0011 # distributed under the License is distributed on an "AS IS" BASIS,
0012 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013 # See the License for the specific language governing permissions and
0014 # limitations under the License.
0015 #
0016 # types.R. This file handles the data type mapping between Spark and R
0017 
0018 # The primitive data types, where names(PRIMITIVE_TYPES) are Scala types whereas
0019 # values are equivalent R types. This is stored in an environment to allow for
0020 # more efficient look up (environments use hashmaps).
0021 PRIMITIVE_TYPES <- as.environment(list(
0022   "tinyint" = "integer",
0023   "smallint" = "integer",
0024   "int" = "integer",
0025   "bigint" = "numeric",
0026   "float" = "numeric",
0027   "double" = "numeric",
0028   "decimal" = "numeric",
0029   "string" = "character",
0030   "binary" = "raw",
0031   "boolean" = "logical",
0032   "timestamp" = c("POSIXct", "POSIXt"),
0033   "date" = "Date",
0034   # following types are not SQL types returned by dtypes(). They are listed here for usage
0035   # by checkType() in schema.R.
0036   # TODO: refactor checkType() in schema.R.
0037   "byte" = "integer",
0038   "integer" = "integer"
0039   ))
0040 
0041 # The complex data types. These do not have any direct mapping to R's types.
0042 COMPLEX_TYPES <- list(
0043   "map" = NA,
0044   "array" = NA,
0045   "struct" = NA)
0046 
0047 # The full list of data types.
0048 DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
0049 
0050 SHORT_TYPES <- as.environment(list(
0051   "character" = "chr",
0052   "logical" = "logi",
0053   "POSIXct" = "POSIXct",
0054   "integer" = "int",
0055   "numeric" = "num",
0056   "raw" = "raw",
0057   "Date" = "Date",
0058   "map" = "map",
0059   "array" = "array",
0060   "struct" = "struct"
0061 ))
0062 
0063 # An environment for mapping R to Scala, names are R types and values are Scala types.
0064 rToSQLTypes <- as.environment(list(
0065   "integer" = "integer", # in R, integer is 32bit
0066   "numeric" = "double",  # in R, numeric == double which is 64bit
0067   "double" = "double",
0068   "character" = "string",
0069   "logical" = "boolean"))
0070 
0071 # Helper function of coverting decimal type. When backend returns column type in the
0072 # format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type
0073 # as double type. This function converts backend returned types that are not the key
0074 # of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES.
0075 # @param A type returned from the JVM backend.
0076 # @return A type is the key of the PRIMITIVE_TYPES.
0077 specialtypeshandle <- function(type) {
0078   returntype <- NULL
0079   m <- regexec("^decimal(.+)$", type)
0080   matchedStrings <- regmatches(type, m)
0081   if (length(matchedStrings[[1]]) >= 2) {
0082     returntype <- "double"
0083   }
0084   returntype
0085 }
0086 
0087 # Helper function that checks supported types in Arrow.
0088 checkSchemaInArrow <- function(schema) {
0089   stopifnot(inherits(schema, "structType"))
0090 
0091   if (!requireNamespace("arrow", quietly = TRUE)) {
0092     stop("'arrow' package should be installed.")
0093   }
0094 
0095   # Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
0096   field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
0097   if (any(field_strings == "FloatType")) {
0098     stop("Arrow optimization in R does not support float type yet.")
0099   }
0100   if (any(field_strings == "BinaryType")) {
0101     stop("Arrow optimization in R does not support binary type yet.")
0102   }
0103   if (any(startsWith(field_strings, "ArrayType"))) {
0104     stop("Arrow optimization in R does not support array type yet.")
0105   }
0106 
0107   # Arrow optimization in Spark does not yet support both cases below.
0108   if (any(startsWith(field_strings, "StructType"))) {
0109     stop("Arrow optimization in R does not support nested struct type yet.")
0110   }
0111   if (any(startsWith(field_strings, "MapType"))) {
0112     stop("Arrow optimization in R does not support map type yet.")
0113   }
0114 }