Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.sql.connector.catalog;
0019 
0020 import java.util.Map;
0021 
0022 import org.apache.spark.annotation.Evolving;
0023 import org.apache.spark.sql.connector.expressions.Transform;
0024 import org.apache.spark.sql.types.StructType;
0025 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
0026 
0027 /**
0028  * The base interface for v2 data sources which don't have a real catalog. Implementations must
0029  * have a public, 0-arg constructor.
0030  * <p>
0031  * Note that, TableProvider can only apply data operations to existing tables, like read, append,
0032  * delete, and overwrite. It does not support the operations that require metadata changes, like
0033  * create/drop tables.
0034  * <p>
0035  * The major responsibility of this interface is to return a {@link Table} for read/write.
0036  * </p>
0037  *
0038  * @since 3.0.0
0039  */
0040 @Evolving
0041 public interface TableProvider {
0042 
0043   /**
0044    * Infer the schema of the table identified by the given options.
0045    *
0046    * @param options an immutable case-insensitive string-to-string map that can identify a table,
0047    *                e.g. file path, Kafka topic name, etc.
0048    */
0049   StructType inferSchema(CaseInsensitiveStringMap options);
0050 
0051   /**
0052    * Infer the partitioning of the table identified by the given options.
0053    * <p>
0054    * By default this method returns empty partitioning, please override it if this source support
0055    * partitioning.
0056    *
0057    * @param options an immutable case-insensitive string-to-string map that can identify a table,
0058    *                e.g. file path, Kafka topic name, etc.
0059    */
0060   default Transform[] inferPartitioning(CaseInsensitiveStringMap options) {
0061     return new Transform[0];
0062   }
0063 
0064   /**
0065    * Return a {@link Table} instance with the specified table schema, partitioning and properties
0066    * to do read/write. The returned table should report the same schema and partitioning with the
0067    * specified ones, or Spark may fail the operation.
0068    *
0069    * @param schema The specified table schema.
0070    * @param partitioning The specified table partitioning.
0071    * @param properties The specified table properties. It's case preserving (contains exactly what
0072    *                   users specified) and implementations are free to use it case sensitively or
0073    *                   insensitively. It should be able to identify a table, e.g. file path, Kafka
0074    *                   topic name, etc.
0075    */
0076   Table getTable(StructType schema, Transform[] partitioning, Map<String, String> properties);
0077 
0078   /**
0079    * Returns true if the source has the ability of accepting external table metadata when getting
0080    * tables. The external table metadata includes user-specified schema from
0081    * `DataFrameReader`/`DataStreamReader` and schema/partitioning stored in Spark catalog.
0082    * <p>
0083    * By default this method returns false, which means the schema and partitioning passed to
0084    * `getTable` are from the infer methods. Please override it if this source has expensive
0085    * schema/partitioning inference and wants external table metadata to avoid inference.
0086    */
0087   default boolean supportsExternalMetadata() {
0088     return false;
0089   }
0090 }