0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 set -x
0023
0024 function error {
0025 echo "$@" 1>&2
0026 exit 1
0027 }
0028
0029 if [ -z "${SPARK_HOME}" ]; then
0030 SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
0031 fi
0032 . "${SPARK_HOME}/bin/load-spark-env.sh"
0033
0034 CTX_DIR="$SPARK_HOME/target/tmp/docker"
0035
0036 function is_dev_build {
0037 [ ! -f "$SPARK_HOME/RELEASE" ]
0038 }
0039
0040 function cleanup_ctx_dir {
0041 if is_dev_build; then
0042 rm -rf "$CTX_DIR"
0043 fi
0044 }
0045
0046 trap cleanup_ctx_dir EXIT
0047
0048 function image_ref {
0049 local image="$1"
0050 local add_repo="${2:-1}"
0051 if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
0052 image="$REPO/$image"
0053 fi
0054 if [ -n "$TAG" ]; then
0055 image="$image:$TAG"
0056 fi
0057 echo "$image"
0058 }
0059
0060 function docker_push {
0061 local image_name="$1"
0062 if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then
0063 docker push "$(image_ref ${image_name})"
0064 if [ $? -ne 0 ]; then
0065 error "Failed to push $image_name Docker image."
0066 fi
0067 else
0068 echo "$(image_ref ${image_name}) image not found. Skipping push for this image."
0069 fi
0070 }
0071
0072 function resolve_file {
0073 local FILE=$1
0074 if [ -n "$FILE" ]; then
0075 local DIR=$(dirname $FILE)
0076 DIR=$(cd $DIR && pwd)
0077 FILE="${DIR}/$(basename $FILE)"
0078 fi
0079 echo $FILE
0080 }
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091 function create_dev_build_context {(
0092 set -e
0093 local BASE_CTX="$CTX_DIR/base"
0094 mkdir -p "$BASE_CTX/kubernetes"
0095 cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
0096 "$BASE_CTX/kubernetes/dockerfiles"
0097
0098 cp -r "assembly/target/scala-$SPARK_SCALA_VERSION/jars" "$BASE_CTX/jars"
0099 cp -r "resource-managers/kubernetes/integration-tests/tests" \
0100 "$BASE_CTX/kubernetes/tests"
0101
0102 mkdir "$BASE_CTX/examples"
0103 cp -r "examples/src" "$BASE_CTX/examples/src"
0104
0105 mkdir "$BASE_CTX/examples/jars"
0106 for i in examples/target/scala-$SPARK_SCALA_VERSION/jars/*; do
0107 if [ ! -f "$BASE_CTX/jars/$(basename $i)" ]; then
0108 cp $i "$BASE_CTX/examples/jars"
0109 fi
0110 done
0111
0112 for other in bin sbin data; do
0113 cp -r "$other" "$BASE_CTX/$other"
0114 done
0115
0116 local PYSPARK_CTX="$CTX_DIR/pyspark"
0117 mkdir -p "$PYSPARK_CTX/kubernetes"
0118 cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
0119 "$PYSPARK_CTX/kubernetes/dockerfiles"
0120 mkdir "$PYSPARK_CTX/python"
0121 cp -r "python/lib" "$PYSPARK_CTX/python/lib"
0122 cp -r "python/pyspark" "$PYSPARK_CTX/python/pyspark"
0123
0124 local R_CTX="$CTX_DIR/sparkr"
0125 mkdir -p "$R_CTX/kubernetes"
0126 cp -r "resource-managers/kubernetes/docker/src/main/dockerfiles" \
0127 "$R_CTX/kubernetes/dockerfiles"
0128 cp -r "R" "$R_CTX/R"
0129 )}
0130
0131 function img_ctx_dir {
0132 if is_dev_build; then
0133 echo "$CTX_DIR/$1"
0134 else
0135 echo "$SPARK_HOME"
0136 fi
0137 }
0138
0139 function build {
0140 local BUILD_ARGS
0141 local SPARK_ROOT="$SPARK_HOME"
0142
0143 if is_dev_build; then
0144 create_dev_build_context || error "Failed to create docker build context."
0145 SPARK_ROOT="$CTX_DIR/base"
0146 fi
0147
0148
0149 if [ ! -d "$SPARK_ROOT/kubernetes/dockerfiles" ]; then
0150 error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
0151 fi
0152
0153
0154
0155 local TOTAL_JARS=$(ls $SPARK_ROOT/jars/spark-* | wc -l)
0156 TOTAL_JARS=$(( $TOTAL_JARS ))
0157 if [ "${TOTAL_JARS}" -eq 0 ]; then
0158 error "Cannot find Spark JARs. This script assumes that Apache Spark has first been built locally or this is a runnable distribution."
0159 fi
0160
0161 local BUILD_ARGS=(${BUILD_PARAMS})
0162
0163
0164 if [ -n "$SPARK_UID" ]; then
0165 BUILD_ARGS+=(--build-arg spark_uid=$SPARK_UID)
0166 fi
0167
0168 local BINDING_BUILD_ARGS=(
0169 ${BUILD_ARGS[@]}
0170 --build-arg
0171 base_img=$(image_ref spark)
0172 )
0173
0174 local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
0175 local PYDOCKERFILE=${PYDOCKERFILE:-false}
0176 local RDOCKERFILE=${RDOCKERFILE:-false}
0177 local ARCHS=${ARCHS:-"--platform linux/amd64,linux/arm64"}
0178
0179 (cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
0180 -t $(image_ref spark) \
0181 -f "$BASEDOCKERFILE" .)
0182 if [ $? -ne 0 ]; then
0183 error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
0184 fi
0185 if [ "${CROSS_BUILD}" != "false" ]; then
0186 (cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" \
0187 -t $(image_ref spark) \
0188 -f "$BASEDOCKERFILE" .)
0189 fi
0190
0191 if [ "${PYDOCKERFILE}" != "false" ]; then
0192 (cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
0193 -t $(image_ref spark-py) \
0194 -f "$PYDOCKERFILE" .)
0195 if [ $? -ne 0 ]; then
0196 error "Failed to build PySpark Docker image, please refer to Docker build output for details."
0197 fi
0198 if [ "${CROSS_BUILD}" != "false" ]; then
0199 (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
0200 -t $(image_ref spark-py) \
0201 -f "$PYDOCKERFILE" .)
0202 fi
0203 fi
0204
0205 if [ "${RDOCKERFILE}" != "false" ]; then
0206 (cd $(img_ctx_dir sparkr) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
0207 -t $(image_ref spark-r) \
0208 -f "$RDOCKERFILE" .)
0209 if [ $? -ne 0 ]; then
0210 error "Failed to build SparkR Docker image, please refer to Docker build output for details."
0211 fi
0212 if [ "${CROSS_BUILD}" != "false" ]; then
0213 (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
0214 -t $(image_ref spark-r) \
0215 -f "$RDOCKERFILE" .)
0216 fi
0217 fi
0218 }
0219
0220 function push {
0221 docker_push "spark"
0222 docker_push "spark-py"
0223 docker_push "spark-r"
0224 }
0225
0226 function usage {
0227 cat <<EOF
0228 Usage: $0 [options] [command]
0229 Builds or pushes the built-in Spark Docker image.
0230
0231 Commands:
0232 build Build image. Requires a repository address to be provided if the image will be
0233 pushed to a different registry.
0234 push Push a pre-built image to a registry. Requires a repository address to be provided.
0235
0236 Options:
0237 -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
0238 -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
0239 Skips building PySpark docker image if not specified.
0240 -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
0241 Skips building SparkR docker image if not specified.
0242 -r repo Repository address.
0243 -t tag Tag to apply to the built image, or to identify the image to be pushed.
0244 -m Use minikube's Docker daemon.
0245 -n Build docker image with --no-cache
0246 -u uid UID to use in the USER directive to set the user the main Spark process runs as inside the
0247 resulting container
0248 -X Use docker buildx to cross build. Automatically pushes.
0249 See https://docs.docker.com/buildx/working-with-buildx/ for steps to setup buildx.
0250 -b arg Build arg to build or push the image. For multiple build args, this option needs to
0251 be used separately for each build arg.
0252
0253 Using minikube when building images will do so directly into minikube's Docker daemon.
0254 There is no need to push the images into minikube in that case, they'll be automatically
0255 available when running applications inside the minikube cluster.
0256
0257 Check the following documentation for more information on using the minikube Docker daemon:
0258
0259 https://kubernetes.io/docs/getting-started-guides/minikube/#reusing-the-docker-daemon
0260
0261 Examples:
0262 - Build image in minikube with tag "testing"
0263 $0 -m -t testing build
0264
0265 - Build PySpark docker image
0266 $0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
0267
0268 - Build and push image with tag "v2.3.0" to docker.io/myrepo
0269 $0 -r docker.io/myrepo -t v2.3.0 build
0270 $0 -r docker.io/myrepo -t v2.3.0 push
0271
0272 - Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo
0273 $0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build
0274 $0 -r docker.io/myrepo -t v3.0.0 push
0275
0276 - Build and push JDK11-based image for multiple archs to docker.io/myrepo
0277 $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build
0278 # Note: buildx, which does cross building, needs to do the push during build
0279 # So there is no seperate push step with -X
0280
0281 EOF
0282 }
0283
0284 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
0285 usage
0286 exit 0
0287 fi
0288
0289 REPO=
0290 TAG=
0291 BASEDOCKERFILE=
0292 PYDOCKERFILE=
0293 RDOCKERFILE=
0294 NOCACHEARG=
0295 BUILD_PARAMS=
0296 SPARK_UID=
0297 CROSS_BUILD="false"
0298 while getopts f:p:R:mr:t:Xnb:u: option
0299 do
0300 case "${option}"
0301 in
0302 f) BASEDOCKERFILE=$(resolve_file ${OPTARG});;
0303 p) PYDOCKERFILE=$(resolve_file ${OPTARG});;
0304 R) RDOCKERFILE=$(resolve_file ${OPTARG});;
0305 r) REPO=${OPTARG};;
0306 t) TAG=${OPTARG};;
0307 n) NOCACHEARG="--no-cache";;
0308 b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
0309 X) CROSS_BUILD=1;;
0310 m)
0311 if ! which minikube 1>/dev/null; then
0312 error "Cannot find minikube."
0313 fi
0314 if ! minikube status 1>/dev/null; then
0315 error "Cannot contact minikube. Make sure it's running."
0316 fi
0317 eval $(minikube docker-env --shell bash)
0318 ;;
0319 u) SPARK_UID=${OPTARG};;
0320 esac
0321 done
0322
0323 case "${@: -1}" in
0324 build)
0325 build
0326 ;;
0327 push)
0328 if [ -z "$REPO" ]; then
0329 usage
0330 exit 1
0331 fi
0332 push
0333 ;;
0334 *)
0335 usage
0336 exit 1
0337 ;;
0338 esac