|
||||
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 package org.apache.spark.sql.connector.read; 0019 0020 import org.apache.spark.annotation.Evolving; 0021 import org.apache.spark.sql.connector.read.streaming.ContinuousStream; 0022 import org.apache.spark.sql.connector.read.streaming.MicroBatchStream; 0023 import org.apache.spark.sql.types.StructType; 0024 import org.apache.spark.sql.connector.catalog.Table; 0025 import org.apache.spark.sql.connector.catalog.TableCapability; 0026 0027 /** 0028 * A logical representation of a data source scan. This interface is used to provide logical 0029 * information, like what the actual read schema is. 0030 * <p> 0031 * This logical representation is shared between batch scan, micro-batch streaming scan and 0032 * continuous streaming scan. Data sources must implement the corresponding methods in this 0033 * interface, to match what the table promises to support. For example, {@link #toBatch()} must be 0034 * implemented, if the {@link Table} that creates this {@link Scan} returns 0035 * {@link TableCapability#BATCH_READ} support in its {@link Table#capabilities()}. 0036 * </p> 0037 * 0038 * @since 3.0.0 0039 */ 0040 @Evolving 0041 public interface Scan { 0042 0043 /** 0044 * Returns the actual schema of this data source scan, which may be different from the physical 0045 * schema of the underlying storage, as column pruning or other optimizations may happen. 0046 */ 0047 StructType readSchema(); 0048 0049 /** 0050 * A description string of this scan, which may includes information like: what filters are 0051 * configured for this scan, what's the value of some important options like path, etc. The 0052 * description doesn't need to include {@link #readSchema()}, as Spark already knows it. 0053 * <p> 0054 * By default this returns the class name of the implementation. Please override it to provide a 0055 * meaningful description. 0056 * </p> 0057 */ 0058 default String description() { 0059 return this.getClass().toString(); 0060 } 0061 0062 /** 0063 * Returns the physical representation of this scan for batch query. By default this method throws 0064 * exception, data sources must overwrite this method to provide an implementation, if the 0065 * {@link Table} that creates this scan returns {@link TableCapability#BATCH_READ} support in its 0066 * {@link Table#capabilities()}. 0067 * 0068 * @throws UnsupportedOperationException 0069 */ 0070 default Batch toBatch() { 0071 throw new UnsupportedOperationException(description() + ": Batch scan are not supported"); 0072 } 0073 0074 /** 0075 * Returns the physical representation of this scan for streaming query with micro-batch mode. By 0076 * default this method throws exception, data sources must overwrite this method to provide an 0077 * implementation, if the {@link Table} that creates this scan returns 0078 * {@link TableCapability#MICRO_BATCH_READ} support in its {@link Table#capabilities()}. 0079 * 0080 * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure 0081 * recovery. Data streams for the same logical source in the same query 0082 * will be given the same checkpointLocation. 0083 * 0084 * @throws UnsupportedOperationException 0085 */ 0086 default MicroBatchStream toMicroBatchStream(String checkpointLocation) { 0087 throw new UnsupportedOperationException(description() + ": Micro-batch scan are not supported"); 0088 } 0089 0090 /** 0091 * Returns the physical representation of this scan for streaming query with continuous mode. By 0092 * default this method throws exception, data sources must overwrite this method to provide an 0093 * implementation, if the {@link Table} that creates this scan returns 0094 * {@link TableCapability#CONTINUOUS_READ} support in its {@link Table#capabilities()}. 0095 * 0096 * @param checkpointLocation a path to Hadoop FS scratch space that can be used for failure 0097 * recovery. Data streams for the same logical source in the same query 0098 * will be given the same checkpointLocation. 0099 * 0100 * @throws UnsupportedOperationException 0101 */ 0102 default ContinuousStream toContinuousStream(String checkpointLocation) { 0103 throw new UnsupportedOperationException(description() + ": Continuous scan are not supported"); 0104 } 0105 }
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |