read/streaming/SupportsAdmissionControl.java

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017
0018 package org.apache.spark.sql.connector.read.streaming;
0019
0020 import org.apache.spark.annotation.Evolving;
0021
0022 /**
0023  * A mix-in interface for {@link SparkDataStream} streaming sources to signal that they can control
0024  * the rate of data ingested into the system. These rate limits can come implicitly from the
0025  * contract of triggers, e.g. Trigger.Once() requires that a micro-batch process all data
0026  * available to the system at the start of the micro-batch. Alternatively, sources can decide to
0027  * limit ingest through data source options.
0028  *
0029  * Through this interface, a MicroBatchStream should be able to return the next offset that it will
0030  * process until given a {@link ReadLimit}.
0031  *
0032  * @since 3.0.0
0033  */
0034 @Evolving
0035 public interface SupportsAdmissionControl extends SparkDataStream {
0036
0037   /**
0038    * Returns the read limits potentially passed to the data source through options when creating
0039    * the data source.
0040    */
0041   default ReadLimit getDefaultReadLimit() { return ReadLimit.allAvailable(); }
0042
0043   /**
0044    * Returns the most recent offset available given a read limit. The start offset can be used
0045    * to figure out how much new data should be read given the limit. Users should implement this
0046    * method instead of latestOffset for a MicroBatchStream or getOffset for Source.
0047    *
0048    * When this method is called on a `Source`, the source can return `null` if there is no
0049    * data to process. In addition, for the very first micro-batch, the `startOffset` will be
0050    * null as well.
0051    *
0052    * When this method is called on a MicroBatchStream, the `startOffset` will be `initialOffset`
0053    * for the very first micro-batch. The source can return `null` if there is no data to process.
0054    */
0055   Offset latestOffset(Offset startOffset, ReadLimit limit);
0056 }