0001 # Licensed to the Apache Software Foundation (ASF) under one or more
0002 # contributor license agreements. See the NOTICE file distributed with
0003 # this work for additional information regarding copyright ownership.
0004 # The ASF licenses this file to You under the Apache License, Version 2.0
0005 # (the "License"); you may not use this file except in compliance with
0006 # the License. You may obtain a copy of the License at
0007 #
0008 # http://www.apache.org/licenses/LICENSE-2.0
0009 #
0010 # Unless required by applicable law or agreed to in writing, software
0011 # distributed under the License is distributed on an "AS IS" BASIS,
0012 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013 # See the License for the specific language governing permissions and
0014 # limitations under the License.
0015 #
0016 # types.R. This file handles the data type mapping between Spark and R
0017
0018 # The primitive data types, where names(PRIMITIVE_TYPES) are Scala types whereas
0019 # values are equivalent R types. This is stored in an environment to allow for
0020 # more efficient look up (environments use hashmaps).
0021 PRIMITIVE_TYPES <- as.environment(list(
0022 "tinyint" = "integer",
0023 "smallint" = "integer",
0024 "int" = "integer",
0025 "bigint" = "numeric",
0026 "float" = "numeric",
0027 "double" = "numeric",
0028 "decimal" = "numeric",
0029 "string" = "character",
0030 "binary" = "raw",
0031 "boolean" = "logical",
0032 "timestamp" = c("POSIXct", "POSIXt"),
0033 "date" = "Date",
0034 # following types are not SQL types returned by dtypes(). They are listed here for usage
0035 # by checkType() in schema.R.
0036 # TODO: refactor checkType() in schema.R.
0037 "byte" = "integer",
0038 "integer" = "integer"
0039 ))
0040
0041 # The complex data types. These do not have any direct mapping to R's types.
0042 COMPLEX_TYPES <- list(
0043 "map" = NA,
0044 "array" = NA,
0045 "struct" = NA)
0046
0047 # The full list of data types.
0048 DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
0049
0050 SHORT_TYPES <- as.environment(list(
0051 "character" = "chr",
0052 "logical" = "logi",
0053 "POSIXct" = "POSIXct",
0054 "integer" = "int",
0055 "numeric" = "num",
0056 "raw" = "raw",
0057 "Date" = "Date",
0058 "map" = "map",
0059 "array" = "array",
0060 "struct" = "struct"
0061 ))
0062
0063 # An environment for mapping R to Scala, names are R types and values are Scala types.
0064 rToSQLTypes <- as.environment(list(
0065 "integer" = "integer", # in R, integer is 32bit
0066 "numeric" = "double", # in R, numeric == double which is 64bit
0067 "double" = "double",
0068 "character" = "string",
0069 "logical" = "boolean"))
0070
0071 # Helper function of coverting decimal type. When backend returns column type in the
0072 # format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type
0073 # as double type. This function converts backend returned types that are not the key
0074 # of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES.
0075 # @param A type returned from the JVM backend.
0076 # @return A type is the key of the PRIMITIVE_TYPES.
0077 specialtypeshandle <- function(type) {
0078 returntype <- NULL
0079 m <- regexec("^decimal(.+)$", type)
0080 matchedStrings <- regmatches(type, m)
0081 if (length(matchedStrings[[1]]) >= 2) {
0082 returntype <- "double"
0083 }
0084 returntype
0085 }
0086
0087 # Helper function that checks supported types in Arrow.
0088 checkSchemaInArrow <- function(schema) {
0089 stopifnot(inherits(schema, "structType"))
0090
0091 if (!requireNamespace("arrow", quietly = TRUE)) {
0092 stop("'arrow' package should be installed.")
0093 }
0094
0095 # Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
0096 field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
0097 if (any(field_strings == "FloatType")) {
0098 stop("Arrow optimization in R does not support float type yet.")
0099 }
0100 if (any(field_strings == "BinaryType")) {
0101 stop("Arrow optimization in R does not support binary type yet.")
0102 }
0103 if (any(startsWith(field_strings, "ArrayType"))) {
0104 stop("Arrow optimization in R does not support array type yet.")
0105 }
0106
0107 # Arrow optimization in Spark does not yet support both cases below.
0108 if (any(startsWith(field_strings, "StructType"))) {
0109 stop("Arrow optimization in R does not support nested struct type yet.")
0110 }
0111 if (any(startsWith(field_strings, "MapType"))) {
0112 stop("Arrow optimization in R does not support map type yet.")
0113 }
0114 }