0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 SELF=$(cd $(dirname $0) && pwd)
0021 . "$SELF/release-util.sh"
0022
0023 function exit_with_usage {
0024 cat << EOF
0025 usage: release-build.sh <package|docs|publish-snapshot|publish-release>
0026 Creates build deliverables from a Spark commit.
0027
0028 Top level targets are
0029 package: Create binary packages and commit them to dist.apache.org/repos/dist/dev/spark/
0030 docs: Build docs and commit them to dist.apache.org/repos/dist/dev/spark/
0031 publish-snapshot: Publish snapshot release to Apache snapshots
0032 publish-release: Publish a release to Apache release repo
0033
0034 All other inputs are environment variables
0035
0036 GIT_REF - Release tag or commit to build from
0037 SPARK_PACKAGE_VERSION - Release identifier in top level package directory (e.g. 2.1.2-rc1)
0038 SPARK_VERSION - (optional) Version of Spark being built (e.g. 2.1.2)
0039
0040 ASF_USERNAME - Username of ASF committer account
0041 ASF_PASSWORD - Password of ASF committer account
0042
0043 GPG_KEY - GPG key used to sign release artifacts
0044 GPG_PASSPHRASE - Passphrase for GPG key
0045 EOF
0046 exit 1
0047 }
0048
0049 set -e
0050
0051 if [ $
0052 exit_with_usage
0053 fi
0054
0055 if [[ $@ == *"help"* ]]; then
0056 exit_with_usage
0057 fi
0058
0059 if [[ -z "$ASF_PASSWORD" ]]; then
0060 echo 'The environment variable ASF_PASSWORD is not set. Enter the password.'
0061 echo
0062 stty -echo && printf "ASF password: " && read ASF_PASSWORD && printf '\n' && stty echo
0063 fi
0064
0065 if [[ -z "$GPG_PASSPHRASE" ]]; then
0066 echo 'The environment variable GPG_PASSPHRASE is not set. Enter the passphrase to'
0067 echo 'unlock the GPG signing key that will be used to sign the release!'
0068 echo
0069 stty -echo && printf "GPG passphrase: " && read GPG_PASSPHRASE && printf '\n' && stty echo
0070 fi
0071
0072 for env in ASF_USERNAME GPG_PASSPHRASE GPG_KEY; do
0073 if [ -z "${!env}" ]; then
0074 echo "ERROR: $env must be set to run this script"
0075 exit_with_usage
0076 fi
0077 done
0078
0079 export LC_ALL=C.UTF-8
0080 export LANG=C.UTF-8
0081
0082
0083 GIT_REF=${GIT_REF:-master}
0084
0085 RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/spark"
0086
0087 GPG="gpg -u $GPG_KEY --no-tty --batch --pinentry-mode loopback"
0088 NEXUS_ROOT=https://repository.apache.org/service/local/staging
0089 NEXUS_PROFILE=d63f592e7eac0
0090 BASE_DIR=$(pwd)
0091
0092 init_java
0093 init_maven_sbt
0094
0095
0096 if [ ! -d spark ]; then
0097 git clone "$ASF_REPO"
0098 fi
0099 cd spark
0100 git fetch
0101 git checkout $GIT_REF
0102 git_hash=`git rev-parse --short HEAD`
0103 echo "Checked out Spark git hash $git_hash"
0104
0105 if [ -z "$SPARK_VERSION" ]; then
0106
0107 TMP=$(mktemp)
0108 $MVN help:evaluate -Dexpression=project.version > $TMP
0109 SPARK_VERSION=$(cat $TMP | grep -v INFO | grep -v WARNING | grep -vi Download)
0110 rm $TMP
0111 fi
0112
0113
0114
0115 BASE_PROFILES="-Pmesos -Pyarn"
0116 if [[ $SPARK_VERSION > "2.3" ]]; then
0117 BASE_PROFILES="$BASE_PROFILES -Pkubernetes"
0118 fi
0119
0120
0121
0122 PUBLISH_SCALA_2_11=1
0123 SCALA_2_11_PROFILES="-Pscala-2.11"
0124 if [[ $SPARK_VERSION > "2.3" ]]; then
0125 if [[ $SPARK_VERSION < "3.0." ]]; then
0126 SCALA_2_11_PROFILES="-Pkafka-0-8 -Pflume $SCALA_2_11_PROFILES"
0127 else
0128 PUBLISH_SCALA_2_11=0
0129 fi
0130 fi
0131
0132 PUBLISH_SCALA_2_12=0
0133 SCALA_2_12_PROFILES="-Pscala-2.12"
0134 if [[ $SPARK_VERSION < "3.0." ]]; then
0135 SCALA_2_12_PROFILES="-Pscala-2.12 -Pflume"
0136 fi
0137 if [[ $SPARK_VERSION > "2.4" ]]; then
0138 PUBLISH_SCALA_2_12=1
0139 fi
0140
0141
0142 HIVE_PROFILES="-Phive -Phive-thriftserver"
0143
0144
0145 PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Phive-2.3 -Pspark-ganglia-lgpl -Pkinesis-asl"
0146
0147 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
0148
0149 if [[ $JAVA_VERSION < "1.8." ]]; then
0150 echo "Java version $JAVA_VERSION is less than required 1.8 for 2.2+"
0151 echo "Please set JAVA_HOME correctly."
0152 exit 1
0153 fi
0154
0155
0156
0157
0158 LSOF=lsof
0159 if ! hash $LSOF 2>/dev/null; then
0160 LSOF=/usr/sbin/lsof
0161 fi
0162
0163 if [ -z "$SPARK_PACKAGE_VERSION" ]; then
0164 SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}"
0165 fi
0166
0167 DEST_DIR_NAME="$SPARK_PACKAGE_VERSION"
0168
0169 git clean -d -f -x
0170 rm .gitignore
0171 cd ..
0172
0173 if [[ "$1" == "package" ]]; then
0174
0175 echo "Packaging release source tarballs"
0176 cp -r spark spark-$SPARK_VERSION
0177
0178
0179 if [[ $SPARK_VERSION > "2.4" ]]; then
0180 rm spark-$SPARK_VERSION/LICENSE-binary
0181 rm spark-$SPARK_VERSION/NOTICE-binary
0182 rm -r spark-$SPARK_VERSION/licenses-binary
0183 fi
0184
0185 tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION
0186 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
0187 --detach-sig spark-$SPARK_VERSION.tgz
0188 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0189 SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512
0190 rm -rf spark-$SPARK_VERSION
0191
0192 ZINC_PORT=3035
0193
0194
0195 make_binary_release() {
0196 NAME=$1
0197 FLAGS="$MVN_EXTRA_OPTS -B $BASE_RELEASE_PROFILES $2"
0198
0199 BUILD_PACKAGE=$3
0200 SCALA_VERSION=$4
0201
0202 PIP_FLAG=""
0203 if [[ $BUILD_PACKAGE == *"withpip"* ]]; then
0204 PIP_FLAG="--pip"
0205 fi
0206 R_FLAG=""
0207 if [[ $BUILD_PACKAGE == *"withr"* ]]; then
0208 R_FLAG="--r"
0209 fi
0210
0211
0212
0213 ZINC_PORT=$((ZINC_PORT + 1))
0214
0215 echo "Building binary dist $NAME"
0216 cp -r spark spark-$SPARK_VERSION-bin-$NAME
0217 cd spark-$SPARK_VERSION-bin-$NAME
0218
0219 ./dev/change-scala-version.sh $SCALA_VERSION
0220
0221 export ZINC_PORT=$ZINC_PORT
0222 echo "Creating distribution: $NAME ($FLAGS)"
0223
0224
0225
0226 PYSPARK_VERSION=`echo "$SPARK_VERSION" | sed -e "s/-/./" -e "s/SNAPSHOT/dev0/" -e "s/preview/dev/"`
0227 echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
0228
0229
0230 MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
0231
0232 echo "Creating distribution"
0233 ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz \
0234 $PIP_FLAG $R_FLAG $FLAGS \
0235 -DzincPort=$ZINC_PORT 2>&1 > ../binary-release-$NAME.log
0236 cd ..
0237
0238 if [[ -n $R_FLAG ]]; then
0239 echo "Copying and signing R source package"
0240 R_DIST_NAME=SparkR_$SPARK_VERSION.tar.gz
0241 cp spark-$SPARK_VERSION-bin-$NAME/R/$R_DIST_NAME .
0242
0243 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0244 --output $R_DIST_NAME.asc \
0245 --detach-sig $R_DIST_NAME
0246 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0247 SHA512 $R_DIST_NAME > \
0248 $R_DIST_NAME.sha512
0249 fi
0250
0251 if [[ -n $PIP_FLAG ]]; then
0252 echo "Copying and signing python distribution"
0253 PYTHON_DIST_NAME=pyspark-$PYSPARK_VERSION.tar.gz
0254 cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_DIST_NAME .
0255
0256 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0257 --output $PYTHON_DIST_NAME.asc \
0258 --detach-sig $PYTHON_DIST_NAME
0259 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0260 SHA512 $PYTHON_DIST_NAME > \
0261 $PYTHON_DIST_NAME.sha512
0262 fi
0263
0264 echo "Copying and signing regular binary distribution"
0265 cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
0266 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0267 --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \
0268 --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz
0269 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0270 SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \
0271 spark-$SPARK_VERSION-bin-$NAME.tgz.sha512
0272 }
0273
0274
0275
0276
0277
0278
0279
0280
0281 declare -A BINARY_PKGS_ARGS
0282 BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
0283 if ! is_dry_run; then
0284 BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
0285 if [[ $SPARK_VERSION < "3.0." ]]; then
0286 BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
0287 else
0288 BINARY_PKGS_ARGS["hadoop2.7-hive1.2"]="-Phadoop-2.7 -Phive-1.2 $HIVE_PROFILES"
0289 BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
0290 fi
0291 fi
0292
0293 declare -A BINARY_PKGS_EXTRA
0294 BINARY_PKGS_EXTRA["hadoop2.7"]="withpip,withr"
0295
0296 if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
0297 key="without-hadoop-scala-2.11"
0298 args="-Phadoop-provided"
0299 extra=""
0300 if ! make_binary_release "$key" "$SCALA_2_11_PROFILES $args" "$extra" "2.11"; then
0301 error "Failed to build $key package. Check logs for details."
0302 fi
0303 fi
0304
0305 if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
0306 echo "Packages to build: ${!BINARY_PKGS_ARGS[@]}"
0307 for key in ${!BINARY_PKGS_ARGS[@]}; do
0308 args=${BINARY_PKGS_ARGS[$key]}
0309 extra=${BINARY_PKGS_EXTRA[$key]}
0310 if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
0311 error "Failed to build $key package. Check logs for details."
0312 fi
0313 done
0314 fi
0315
0316 rm -rf spark-$SPARK_VERSION-bin-*/
0317
0318 if ! is_dry_run; then
0319 svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
0320 rm -rf "svn-spark/${DEST_DIR_NAME}-bin"
0321 mkdir -p "svn-spark/${DEST_DIR_NAME}-bin"
0322
0323 echo "Copying release tarballs"
0324 cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
0325 cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
0326 cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
0327 svn add "svn-spark/${DEST_DIR_NAME}-bin"
0328
0329 cd svn-spark
0330 svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION" --no-auth-cache
0331 cd ..
0332 rm -rf svn-spark
0333 fi
0334
0335 exit 0
0336 fi
0337
0338 if [[ "$1" == "docs" ]]; then
0339
0340 cd spark
0341 echo "Building Spark docs"
0342 cd docs
0343
0344 PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build
0345 cd ..
0346 cd ..
0347
0348 if ! is_dry_run; then
0349 svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
0350 rm -rf "svn-spark/${DEST_DIR_NAME}-docs"
0351 mkdir -p "svn-spark/${DEST_DIR_NAME}-docs"
0352
0353 echo "Copying release documentation"
0354 cp -R "spark/docs/_site" "svn-spark/${DEST_DIR_NAME}-docs/"
0355 svn add "svn-spark/${DEST_DIR_NAME}-docs"
0356
0357 cd svn-spark
0358 svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION docs" --no-auth-cache
0359 cd ..
0360 rm -rf svn-spark
0361 fi
0362
0363 mv "spark/docs/_site" docs/
0364 exit 0
0365 fi
0366
0367 if [[ "$1" == "publish-snapshot" ]]; then
0368 cd spark
0369
0370 echo "Deploying Spark SNAPSHOT at '$GIT_REF' ($git_hash)"
0371 echo "Publish version is $SPARK_VERSION"
0372 if [[ ! $SPARK_VERSION == *"SNAPSHOT"* ]]; then
0373 echo "ERROR: Snapshots must have a version containing SNAPSHOT"
0374 echo "ERROR: You gave version '$SPARK_VERSION'"
0375 exit 1
0376 fi
0377
0378 $MVN versions:set -DnewVersion=$SPARK_VERSION
0379 tmp_settings="tmp-settings.xml"
0380 echo "<settings><servers><server>" > $tmp_settings
0381 echo "<id>apache.snapshots.https</id><username>$ASF_USERNAME</username>" >> $tmp_settings
0382 echo "<password>$ASF_PASSWORD</password>" >> $tmp_settings
0383 echo "</server></servers></settings>" >> $tmp_settings
0384
0385
0386 export ZINC_PORT=$(python -S -c "import random; print(random.randrange(3030,4030))")
0387
0388 $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $SCALA_2_12_PROFILES $PUBLISH_PROFILES deploy
0389
0390 rm $tmp_settings
0391 cd ..
0392 exit 0
0393 fi
0394
0395 if [[ "$1" == "publish-release" ]]; then
0396 cd spark
0397
0398 echo "Publishing Spark checkout at '$GIT_REF' ($git_hash)"
0399 echo "Publish version is $SPARK_VERSION"
0400
0401 $MVN versions:set -DnewVersion=$SPARK_VERSION
0402
0403
0404
0405 if ! is_dry_run; then
0406 echo "Creating Nexus staging repository"
0407 repo_request="<promoteRequest><data><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
0408 out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
0409 -H "Content-Type:application/xml" -v \
0410 $NEXUS_ROOT/profiles/$NEXUS_PROFILE/start)
0411 staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/")
0412 echo "Created Nexus staging repository: $staged_repo_id"
0413 fi
0414
0415 tmp_repo=$(mktemp -d spark-repo-XXXXX)
0416
0417
0418 export ZINC_PORT=$(python -S -c "import random; print(random.randrange(3030,4030))")
0419
0420
0421
0422 if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
0423 ./dev/change-scala-version.sh 2.11
0424 $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests \
0425 $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
0426 fi
0427
0428 if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
0429 ./dev/change-scala-version.sh 2.12
0430 $MVN -DzincPort=$((ZINC_PORT + 2)) -Dmaven.repo.local=$tmp_repo -DskipTests \
0431 $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
0432 fi
0433
0434 pushd $tmp_repo/org/apache/spark
0435
0436
0437 find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
0438
0439 echo "Creating hash and signature files"
0440
0441 for file in $(find . -type f)
0442 do
0443 echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --output $file.asc \
0444 --detach-sig --armour $file;
0445 if [ $(command -v md5) ]; then
0446
0447 md5 -q $file > $file.md5
0448 else
0449
0450 md5sum $file | cut -f1 -d' ' > $file.md5
0451 fi
0452 sha1sum $file | cut -f1 -d' ' > $file.sha1
0453 done
0454
0455 if ! is_dry_run; then
0456 nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id
0457 echo "Uplading files to $nexus_upload"
0458 for file in $(find . -type f)
0459 do
0460
0461 file_short=$(echo $file | sed -e "s/\.\///")
0462 dest_url="$nexus_upload/org/apache/spark/$file_short"
0463 echo " Uploading $file_short"
0464 curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
0465 done
0466
0467 echo "Closing nexus staging repository"
0468 repo_request="<promoteRequest><data><stagedRepositoryId>$staged_repo_id</stagedRepositoryId><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
0469 out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
0470 -H "Content-Type:application/xml" -v \
0471 $NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish)
0472 echo "Closed Nexus staging repository: $staged_repo_id"
0473 fi
0474
0475 popd
0476 rm -rf $tmp_repo
0477 cd ..
0478 exit 0
0479 fi
0480
0481 cd ..
0482 rm -rf spark
0483 echo "ERROR: expects to be called with 'package', 'docs', 'publish-release' or 'publish-snapshot'"