Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env bash
0002 
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 
0020 SELF=$(cd $(dirname $0) && pwd)
0021 . "$SELF/release-util.sh"
0022 
0023 function exit_with_usage {
0024   cat << EOF
0025 usage: release-build.sh <package|docs|publish-snapshot|publish-release>
0026 Creates build deliverables from a Spark commit.
0027 
0028 Top level targets are
0029   package: Create binary packages and commit them to dist.apache.org/repos/dist/dev/spark/
0030   docs: Build docs and commit them to dist.apache.org/repos/dist/dev/spark/
0031   publish-snapshot: Publish snapshot release to Apache snapshots
0032   publish-release: Publish a release to Apache release repo
0033 
0034 All other inputs are environment variables
0035 
0036 GIT_REF - Release tag or commit to build from
0037 SPARK_PACKAGE_VERSION - Release identifier in top level package directory (e.g. 2.1.2-rc1)
0038 SPARK_VERSION - (optional) Version of Spark being built (e.g. 2.1.2)
0039 
0040 ASF_USERNAME - Username of ASF committer account
0041 ASF_PASSWORD - Password of ASF committer account
0042 
0043 GPG_KEY - GPG key used to sign release artifacts
0044 GPG_PASSPHRASE - Passphrase for GPG key
0045 EOF
0046   exit 1
0047 }
0048 
0049 set -e
0050 
0051 if [ $# -eq 0 ]; then
0052   exit_with_usage
0053 fi
0054 
0055 if [[ $@ == *"help"* ]]; then
0056   exit_with_usage
0057 fi
0058 
0059 if [[ -z "$ASF_PASSWORD" ]]; then
0060   echo 'The environment variable ASF_PASSWORD is not set. Enter the password.'
0061   echo
0062   stty -echo && printf "ASF password: " && read ASF_PASSWORD && printf '\n' && stty echo
0063 fi
0064 
0065 if [[ -z "$GPG_PASSPHRASE" ]]; then
0066   echo 'The environment variable GPG_PASSPHRASE is not set. Enter the passphrase to'
0067   echo 'unlock the GPG signing key that will be used to sign the release!'
0068   echo
0069   stty -echo && printf "GPG passphrase: " && read GPG_PASSPHRASE && printf '\n' && stty echo
0070 fi
0071 
0072 for env in ASF_USERNAME GPG_PASSPHRASE GPG_KEY; do
0073   if [ -z "${!env}" ]; then
0074     echo "ERROR: $env must be set to run this script"
0075     exit_with_usage
0076   fi
0077 done
0078 
0079 export LC_ALL=C.UTF-8
0080 export LANG=C.UTF-8
0081 
0082 # Commit ref to checkout when building
0083 GIT_REF=${GIT_REF:-master}
0084 
0085 RELEASE_STAGING_LOCATION="https://dist.apache.org/repos/dist/dev/spark"
0086 
0087 GPG="gpg -u $GPG_KEY --no-tty --batch --pinentry-mode loopback"
0088 NEXUS_ROOT=https://repository.apache.org/service/local/staging
0089 NEXUS_PROFILE=d63f592e7eac0 # Profile for Spark staging uploads
0090 BASE_DIR=$(pwd)
0091 
0092 init_java
0093 init_maven_sbt
0094 
0095 # Only clone repo fresh if not present, otherwise use checkout from the tag step
0096 if [ ! -d spark ]; then
0097   git clone "$ASF_REPO"
0098 fi
0099 cd spark
0100 git fetch
0101 git checkout $GIT_REF
0102 git_hash=`git rev-parse --short HEAD`
0103 echo "Checked out Spark git hash $git_hash"
0104 
0105 if [ -z "$SPARK_VERSION" ]; then
0106   # Run $MVN in a separate command so that 'set -e' does the right thing.
0107   TMP=$(mktemp)
0108   $MVN help:evaluate -Dexpression=project.version > $TMP
0109   SPARK_VERSION=$(cat $TMP | grep -v INFO | grep -v WARNING | grep -vi Download)
0110   rm $TMP
0111 fi
0112 
0113 # Depending on the version being built, certain extra profiles need to be activated, and
0114 # different versions of Scala are supported.
0115 BASE_PROFILES="-Pmesos -Pyarn"
0116 if [[ $SPARK_VERSION > "2.3" ]]; then
0117   BASE_PROFILES="$BASE_PROFILES -Pkubernetes"
0118 fi
0119 
0120 # TODO: revisit for Scala 2.13
0121 
0122 PUBLISH_SCALA_2_11=1
0123 SCALA_2_11_PROFILES="-Pscala-2.11"
0124 if [[ $SPARK_VERSION > "2.3" ]]; then
0125   if [[ $SPARK_VERSION < "3.0." ]]; then
0126     SCALA_2_11_PROFILES="-Pkafka-0-8 -Pflume $SCALA_2_11_PROFILES"
0127   else
0128     PUBLISH_SCALA_2_11=0
0129   fi
0130 fi
0131 
0132 PUBLISH_SCALA_2_12=0
0133 SCALA_2_12_PROFILES="-Pscala-2.12"
0134 if [[ $SPARK_VERSION < "3.0." ]]; then
0135   SCALA_2_12_PROFILES="-Pscala-2.12 -Pflume"
0136 fi
0137 if [[ $SPARK_VERSION > "2.4" ]]; then
0138   PUBLISH_SCALA_2_12=1
0139 fi
0140 
0141 # Hive-specific profiles for some builds
0142 HIVE_PROFILES="-Phive -Phive-thriftserver"
0143 # Profiles for publishing snapshots and release to Maven Central
0144 # We use Apache Hive 2.3 for publishing
0145 PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Phive-2.3 -Pspark-ganglia-lgpl -Pkinesis-asl"
0146 # Profiles for building binary releases
0147 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
0148 
0149 if [[ $JAVA_VERSION < "1.8." ]]; then
0150   echo "Java version $JAVA_VERSION is less than required 1.8 for 2.2+"
0151   echo "Please set JAVA_HOME correctly."
0152   exit 1
0153 fi
0154 
0155 # This is a band-aid fix to avoid the failure of Maven nightly snapshot in some Jenkins
0156 # machines by explicitly calling /usr/sbin/lsof. Please see SPARK-22377 and the discussion
0157 # in its pull request.
0158 LSOF=lsof
0159 if ! hash $LSOF 2>/dev/null; then
0160   LSOF=/usr/sbin/lsof
0161 fi
0162 
0163 if [ -z "$SPARK_PACKAGE_VERSION" ]; then
0164   SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}"
0165 fi
0166 
0167 DEST_DIR_NAME="$SPARK_PACKAGE_VERSION"
0168 
0169 git clean -d -f -x
0170 rm .gitignore
0171 cd ..
0172 
0173 if [[ "$1" == "package" ]]; then
0174   # Source and binary tarballs
0175   echo "Packaging release source tarballs"
0176   cp -r spark spark-$SPARK_VERSION
0177 
0178   # For source release in v2.4+, exclude copy of binary license/notice
0179   if [[ $SPARK_VERSION > "2.4" ]]; then
0180     rm spark-$SPARK_VERSION/LICENSE-binary
0181     rm spark-$SPARK_VERSION/NOTICE-binary
0182     rm -r spark-$SPARK_VERSION/licenses-binary
0183   fi
0184 
0185   tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION
0186   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
0187     --detach-sig spark-$SPARK_VERSION.tgz
0188   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0189     SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512
0190   rm -rf spark-$SPARK_VERSION
0191 
0192   ZINC_PORT=3035
0193 
0194   # Updated for each binary build
0195   make_binary_release() {
0196     NAME=$1
0197     FLAGS="$MVN_EXTRA_OPTS -B $BASE_RELEASE_PROFILES $2"
0198     # BUILD_PACKAGE can be "withpip", "withr", or both as "withpip,withr"
0199     BUILD_PACKAGE=$3
0200     SCALA_VERSION=$4
0201 
0202     PIP_FLAG=""
0203     if [[ $BUILD_PACKAGE == *"withpip"* ]]; then
0204       PIP_FLAG="--pip"
0205     fi
0206     R_FLAG=""
0207     if [[ $BUILD_PACKAGE == *"withr"* ]]; then
0208       R_FLAG="--r"
0209     fi
0210 
0211     # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
0212     # share the same Zinc server.
0213     ZINC_PORT=$((ZINC_PORT + 1))
0214 
0215     echo "Building binary dist $NAME"
0216     cp -r spark spark-$SPARK_VERSION-bin-$NAME
0217     cd spark-$SPARK_VERSION-bin-$NAME
0218 
0219     ./dev/change-scala-version.sh $SCALA_VERSION
0220 
0221     export ZINC_PORT=$ZINC_PORT
0222     echo "Creating distribution: $NAME ($FLAGS)"
0223 
0224     # Write out the VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
0225     # to dev0 to be closer to PEP440.
0226     PYSPARK_VERSION=`echo "$SPARK_VERSION" |  sed -e "s/-/./" -e "s/SNAPSHOT/dev0/" -e "s/preview/dev/"`
0227     echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
0228 
0229     # Get maven home set by MVN
0230     MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
0231 
0232     echo "Creating distribution"
0233     ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz \
0234       $PIP_FLAG $R_FLAG $FLAGS \
0235       -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
0236     cd ..
0237 
0238     if [[ -n $R_FLAG ]]; then
0239       echo "Copying and signing R source package"
0240       R_DIST_NAME=SparkR_$SPARK_VERSION.tar.gz
0241       cp spark-$SPARK_VERSION-bin-$NAME/R/$R_DIST_NAME .
0242 
0243       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0244         --output $R_DIST_NAME.asc \
0245         --detach-sig $R_DIST_NAME
0246       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0247         SHA512 $R_DIST_NAME > \
0248         $R_DIST_NAME.sha512
0249     fi
0250 
0251     if [[ -n $PIP_FLAG ]]; then
0252       echo "Copying and signing python distribution"
0253       PYTHON_DIST_NAME=pyspark-$PYSPARK_VERSION.tar.gz
0254       cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_DIST_NAME .
0255 
0256       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0257         --output $PYTHON_DIST_NAME.asc \
0258         --detach-sig $PYTHON_DIST_NAME
0259       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0260         SHA512 $PYTHON_DIST_NAME > \
0261         $PYTHON_DIST_NAME.sha512
0262     fi
0263 
0264     echo "Copying and signing regular binary distribution"
0265     cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
0266     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
0267       --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \
0268       --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz
0269     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
0270       SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \
0271       spark-$SPARK_VERSION-bin-$NAME.tgz.sha512
0272   }
0273 
0274   # List of binary packages built. Populates two associative arrays, where the key is the "name" of
0275   # the package being built, and the values are respectively the needed maven arguments for building
0276   # the package, and any extra package needed for that particular combination.
0277   #
0278   # In dry run mode, only build the first one. The keys in BINARY_PKGS_ARGS are used as the
0279   # list of packages to be built, so it's ok for things to be missing in BINARY_PKGS_EXTRA.
0280 
0281   declare -A BINARY_PKGS_ARGS
0282   BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
0283   if ! is_dry_run; then
0284     BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
0285     if [[ $SPARK_VERSION < "3.0." ]]; then
0286       BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
0287     else
0288       BINARY_PKGS_ARGS["hadoop2.7-hive1.2"]="-Phadoop-2.7 -Phive-1.2 $HIVE_PROFILES"
0289       BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
0290     fi
0291   fi
0292 
0293   declare -A BINARY_PKGS_EXTRA
0294   BINARY_PKGS_EXTRA["hadoop2.7"]="withpip,withr"
0295 
0296   if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
0297     key="without-hadoop-scala-2.11"
0298     args="-Phadoop-provided"
0299     extra=""
0300     if ! make_binary_release "$key" "$SCALA_2_11_PROFILES $args" "$extra" "2.11"; then
0301       error "Failed to build $key package. Check logs for details."
0302     fi
0303   fi
0304 
0305   if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
0306     echo "Packages to build: ${!BINARY_PKGS_ARGS[@]}"
0307     for key in ${!BINARY_PKGS_ARGS[@]}; do
0308       args=${BINARY_PKGS_ARGS[$key]}
0309       extra=${BINARY_PKGS_EXTRA[$key]}
0310       if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
0311         error "Failed to build $key package. Check logs for details."
0312       fi
0313     done
0314   fi
0315 
0316   rm -rf spark-$SPARK_VERSION-bin-*/
0317 
0318   if ! is_dry_run; then
0319     svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
0320     rm -rf "svn-spark/${DEST_DIR_NAME}-bin"
0321     mkdir -p "svn-spark/${DEST_DIR_NAME}-bin"
0322 
0323     echo "Copying release tarballs"
0324     cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
0325     cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
0326     cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
0327     svn add "svn-spark/${DEST_DIR_NAME}-bin"
0328 
0329     cd svn-spark
0330     svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION" --no-auth-cache
0331     cd ..
0332     rm -rf svn-spark
0333   fi
0334 
0335   exit 0
0336 fi
0337 
0338 if [[ "$1" == "docs" ]]; then
0339   # Documentation
0340   cd spark
0341   echo "Building Spark docs"
0342   cd docs
0343   # TODO: Make configurable to add this: PRODUCTION=1
0344   PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build
0345   cd ..
0346   cd ..
0347 
0348   if ! is_dry_run; then
0349     svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
0350     rm -rf "svn-spark/${DEST_DIR_NAME}-docs"
0351     mkdir -p "svn-spark/${DEST_DIR_NAME}-docs"
0352 
0353     echo "Copying release documentation"
0354     cp -R "spark/docs/_site" "svn-spark/${DEST_DIR_NAME}-docs/"
0355     svn add "svn-spark/${DEST_DIR_NAME}-docs"
0356 
0357     cd svn-spark
0358     svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION docs" --no-auth-cache
0359     cd ..
0360     rm -rf svn-spark
0361   fi
0362 
0363   mv "spark/docs/_site" docs/
0364   exit 0
0365 fi
0366 
0367 if [[ "$1" == "publish-snapshot" ]]; then
0368   cd spark
0369   # Publish Spark to Maven release repo
0370   echo "Deploying Spark SNAPSHOT at '$GIT_REF' ($git_hash)"
0371   echo "Publish version is $SPARK_VERSION"
0372   if [[ ! $SPARK_VERSION == *"SNAPSHOT"* ]]; then
0373     echo "ERROR: Snapshots must have a version containing SNAPSHOT"
0374     echo "ERROR: You gave version '$SPARK_VERSION'"
0375     exit 1
0376   fi
0377   # Coerce the requested version
0378   $MVN versions:set -DnewVersion=$SPARK_VERSION
0379   tmp_settings="tmp-settings.xml"
0380   echo "<settings><servers><server>" > $tmp_settings
0381   echo "<id>apache.snapshots.https</id><username>$ASF_USERNAME</username>" >> $tmp_settings
0382   echo "<password>$ASF_PASSWORD</password>" >> $tmp_settings
0383   echo "</server></servers></settings>" >> $tmp_settings
0384 
0385   # Generate random point for Zinc
0386   export ZINC_PORT=$(python -S -c "import random; print(random.randrange(3030,4030))")
0387 
0388   $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $SCALA_2_12_PROFILES $PUBLISH_PROFILES deploy
0389 
0390   rm $tmp_settings
0391   cd ..
0392   exit 0
0393 fi
0394 
0395 if [[ "$1" == "publish-release" ]]; then
0396   cd spark
0397   # Publish Spark to Maven release repo
0398   echo "Publishing Spark checkout at '$GIT_REF' ($git_hash)"
0399   echo "Publish version is $SPARK_VERSION"
0400   # Coerce the requested version
0401   $MVN versions:set -DnewVersion=$SPARK_VERSION
0402 
0403   # Using Nexus API documented here:
0404   # https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
0405   if ! is_dry_run; then
0406     echo "Creating Nexus staging repository"
0407     repo_request="<promoteRequest><data><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
0408     out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
0409       -H "Content-Type:application/xml" -v \
0410       $NEXUS_ROOT/profiles/$NEXUS_PROFILE/start)
0411     staged_repo_id=$(echo $out | sed -e "s/.*\(orgapachespark-[0-9]\{4\}\).*/\1/")
0412     echo "Created Nexus staging repository: $staged_repo_id"
0413   fi
0414 
0415   tmp_repo=$(mktemp -d spark-repo-XXXXX)
0416 
0417   # Generate random point for Zinc
0418   export ZINC_PORT=$(python -S -c "import random; print(random.randrange(3030,4030))")
0419 
0420   # TODO: revisit for Scala 2.13 support
0421 
0422   if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
0423     ./dev/change-scala-version.sh 2.11
0424     $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests \
0425       $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
0426   fi
0427 
0428   if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
0429     ./dev/change-scala-version.sh 2.12
0430     $MVN -DzincPort=$((ZINC_PORT + 2)) -Dmaven.repo.local=$tmp_repo -DskipTests \
0431       $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
0432   fi
0433 
0434   pushd $tmp_repo/org/apache/spark
0435 
0436   # Remove any extra files generated during install
0437   find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
0438 
0439   echo "Creating hash and signature files"
0440   # this must have .asc, .md5 and .sha1 - it really doesn't like anything else there
0441   for file in $(find . -type f)
0442   do
0443     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --output $file.asc \
0444       --detach-sig --armour $file;
0445     if [ $(command -v md5) ]; then
0446       # Available on OS X; -q to keep only hash
0447       md5 -q $file > $file.md5
0448     else
0449       # Available on Linux; cut to keep only hash
0450       md5sum $file | cut -f1 -d' ' > $file.md5
0451     fi
0452     sha1sum $file | cut -f1 -d' ' > $file.sha1
0453   done
0454 
0455   if ! is_dry_run; then
0456     nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id
0457     echo "Uplading files to $nexus_upload"
0458     for file in $(find . -type f)
0459     do
0460       # strip leading ./
0461       file_short=$(echo $file | sed -e "s/\.\///")
0462       dest_url="$nexus_upload/org/apache/spark/$file_short"
0463       echo "  Uploading $file_short"
0464       curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
0465     done
0466 
0467     echo "Closing nexus staging repository"
0468     repo_request="<promoteRequest><data><stagedRepositoryId>$staged_repo_id</stagedRepositoryId><description>Apache Spark $SPARK_VERSION (commit $git_hash)</description></data></promoteRequest>"
0469     out=$(curl -X POST -d "$repo_request" -u $ASF_USERNAME:$ASF_PASSWORD \
0470       -H "Content-Type:application/xml" -v \
0471       $NEXUS_ROOT/profiles/$NEXUS_PROFILE/finish)
0472     echo "Closed Nexus staging repository: $staged_repo_id"
0473   fi
0474 
0475   popd
0476   rm -rf $tmp_repo
0477   cd ..
0478   exit 0
0479 fi
0480 
0481 cd ..
0482 rm -rf spark
0483 echo "ERROR: expects to be called with 'package', 'docs', 'publish-release' or 'publish-snapshot'"