Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 package org.apache.spark.sql.connector.read;
0019 
0020 import org.apache.spark.annotation.Evolving;
0021 
0022 /**
0023  * A physical representation of a data source scan for batch queries. This interface is used to
0024  * provide physical information, like how many partitions the scanned data has, and how to read
0025  * records from the partitions.
0026  *
0027  * @since 3.0.0
0028  */
0029 @Evolving
0030 public interface Batch {
0031 
0032   /**
0033    * Returns a list of {@link InputPartition input partitions}. Each {@link InputPartition}
0034    * represents a data split that can be processed by one Spark task. The number of input
0035    * partitions returned here is the same as the number of RDD partitions this scan outputs.
0036    * <p>
0037    * If the {@link Scan} supports filter pushdown, this Batch is likely configured with a filter
0038    * and is responsible for creating splits for that filter, which is not a full scan.
0039    * </p>
0040    * <p>
0041    * This method will be called only once during a data source scan, to launch one Spark job.
0042    * </p>
0043    */
0044   InputPartition[] planInputPartitions();
0045 
0046   /**
0047    * Returns a factory to create a {@link PartitionReader} for each {@link InputPartition}.
0048    */
0049   PartitionReaderFactory createReaderFactory();
0050 }