Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env bash
0002 
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 
0020 #
0021 # Script to create a binary distribution for easy deploys of Spark.
0022 # The distribution directory defaults to dist/ but can be overridden below.
0023 # The distribution contains fat (assembly) jars that include the Scala library,
0024 # so it is completely self contained.
0025 # It does not contain source or *.class files.
0026 
0027 set -o pipefail
0028 set -e
0029 set -x
0030 
0031 # Figure out where the Spark framework is installed
0032 SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
0033 DISTDIR="$SPARK_HOME/dist"
0034 
0035 MAKE_TGZ=false
0036 MAKE_PIP=false
0037 MAKE_R=false
0038 NAME=none
0039 MVN="$SPARK_HOME/build/mvn"
0040 
0041 function exit_with_usage {
0042   set +x
0043   echo "make-distribution.sh - tool for making binary distributions of Spark"
0044   echo ""
0045   echo "usage:"
0046   cl_options="[--name] [--tgz] [--pip] [--r] [--mvn <mvn-command>]"
0047   echo "make-distribution.sh $cl_options <maven build options>"
0048   echo "See Spark's \"Building Spark\" doc for correct Maven options."
0049   echo ""
0050   exit 1
0051 }
0052 
0053 # Parse arguments
0054 while (( "$#" )); do
0055   case $1 in
0056     --tgz)
0057       MAKE_TGZ=true
0058       ;;
0059     --pip)
0060       MAKE_PIP=true
0061       ;;
0062     --r)
0063       MAKE_R=true
0064       ;;
0065     --mvn)
0066       MVN="$2"
0067       shift
0068       ;;
0069     --name)
0070       NAME="$2"
0071       shift
0072       ;;
0073     --help)
0074       exit_with_usage
0075       ;;
0076     --*)
0077       echo "Error: $1 is not supported"
0078       exit_with_usage
0079       ;;
0080     -*)
0081       break
0082       ;;
0083     *)
0084       echo "Error: $1 is not supported"
0085       exit_with_usage
0086       ;;
0087   esac
0088   shift
0089 done
0090 
0091 if [ -z "$JAVA_HOME" ]; then
0092   # Fall back on JAVA_HOME from rpm, if found
0093   if [ $(command -v  rpm) ]; then
0094     RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)"
0095     if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
0096       JAVA_HOME="$RPM_JAVA_HOME"
0097       echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm"
0098     fi
0099   fi
0100 
0101   if [ -z "$JAVA_HOME" ]; then
0102     if [ `command -v java` ]; then
0103       # If java is in /usr/bin/java, we want /usr
0104       JAVA_HOME="$(dirname $(dirname $(which java)))"
0105     fi
0106   fi
0107 fi
0108 
0109 if [ -z "$JAVA_HOME" ]; then
0110   echo "Error: JAVA_HOME is not set, cannot proceed."
0111   exit -1
0112 fi
0113 
0114 if [ $(command -v git) ]; then
0115     GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
0116     if [ ! -z "$GITREV" ]; then
0117         GITREVSTRING=" (git revision $GITREV)"
0118     fi
0119     unset GITREV
0120 fi
0121 
0122 
0123 if [ ! "$(command -v "$MVN")" ] ; then
0124     echo -e "Could not locate Maven command: '$MVN'."
0125     echo -e "Specify the Maven command with the --mvn flag"
0126     exit -1;
0127 fi
0128 
0129 VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
0130     | grep -v "INFO"\
0131     | grep -v "WARNING"\
0132     | tail -n 1)
0133 SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
0134     | grep -v "INFO"\
0135     | grep -v "WARNING"\
0136     | tail -n 1)
0137 SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
0138     | grep -v "INFO"\
0139     | grep -v "WARNING"\
0140     | tail -n 1)
0141 SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
0142     | grep -v "INFO"\
0143     | grep -v "WARNING"\
0144     | fgrep --count "<id>hive</id>";\
0145     # Reset exit status to 0, otherwise the script stops here if the last grep finds nothing\
0146     # because we use "set -o pipefail"
0147     echo -n)
0148 
0149 if [ "$NAME" == "none" ]; then
0150   NAME=$SPARK_HADOOP_VERSION
0151 fi
0152 
0153 echo "Spark version is $VERSION"
0154 
0155 if [ "$MAKE_TGZ" == "true" ]; then
0156   echo "Making spark-$VERSION-bin-$NAME.tgz"
0157 else
0158   echo "Making distribution for Spark $VERSION in '$DISTDIR'..."
0159 fi
0160 
0161 # Build uber fat JAR
0162 cd "$SPARK_HOME"
0163 
0164 export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"
0165 
0166 # Store the command as an array because $MVN variable might have spaces in it.
0167 # Normal quoting tricks don't work.
0168 # See: http://mywiki.wooledge.org/BashFAQ/050
0169 BUILD_COMMAND=("$MVN" clean package -DskipTests $@)
0170 
0171 # Actually build the jar
0172 echo -e "\nBuilding with..."
0173 echo -e "\$ ${BUILD_COMMAND[@]}\n"
0174 
0175 "${BUILD_COMMAND[@]}"
0176 
0177 # Make directories
0178 rm -rf "$DISTDIR"
0179 mkdir -p "$DISTDIR/jars"
0180 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
0181 echo "Build flags: $@" >> "$DISTDIR/RELEASE"
0182 
0183 # Copy jars
0184 cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
0185 
0186 # Only create the yarn directory if the yarn artifacts were built.
0187 if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
0188   mkdir "$DISTDIR/yarn"
0189   cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
0190 fi
0191 
0192 # Only create and copy the dockerfiles directory if the kubernetes artifacts were built.
0193 if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then
0194   mkdir -p "$DISTDIR/kubernetes/"
0195   cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src/main/dockerfiles "$DISTDIR/kubernetes/"
0196   cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/tests "$DISTDIR/kubernetes/"
0197 fi
0198 
0199 # Copy examples and dependencies
0200 mkdir -p "$DISTDIR/examples/jars"
0201 cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
0202 
0203 # Deduplicate jars that have already been packaged as part of the main Spark dependencies.
0204 for f in "$DISTDIR"/examples/jars/*; do
0205   name=$(basename "$f")
0206   if [ -f "$DISTDIR/jars/$name" ]; then
0207     rm "$DISTDIR/examples/jars/$name"
0208   fi
0209 done
0210 
0211 # Copy example sources (needed for python and SQL)
0212 mkdir -p "$DISTDIR/examples/src/main"
0213 cp -r "$SPARK_HOME/examples/src/main" "$DISTDIR/examples/src/"
0214 
0215 # Copy license and ASF files
0216 if [ -e "$SPARK_HOME/LICENSE-binary" ]; then
0217   cp "$SPARK_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
0218   cp -r "$SPARK_HOME/licenses-binary" "$DISTDIR/licenses"
0219   cp "$SPARK_HOME/NOTICE-binary" "$DISTDIR/NOTICE"
0220 else
0221   echo "Skipping copying LICENSE files"
0222 fi
0223 
0224 if [ -e "$SPARK_HOME/CHANGES.txt" ]; then
0225   cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
0226 fi
0227 
0228 # Copy data files
0229 cp -r "$SPARK_HOME/data" "$DISTDIR"
0230 
0231 # Make pip package
0232 if [ "$MAKE_PIP" == "true" ]; then
0233   echo "Building python distribution package"
0234   pushd "$SPARK_HOME/python" > /dev/null
0235   # Delete the egg info file if it exists, this can cache older setup files.
0236   rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
0237   python3 setup.py sdist
0238   popd > /dev/null
0239 else
0240   echo "Skipping building python distribution package"
0241 fi
0242 
0243 # Make R package - this is used for both CRAN release and packing R layout into distribution
0244 if [ "$MAKE_R" == "true" ]; then
0245   echo "Building R source package"
0246   R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk '{print $NF}'`
0247   pushd "$SPARK_HOME/R" > /dev/null
0248   # Build source package and run full checks
0249   # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
0250   NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh"
0251 
0252   # Move R source package to match the Spark release version if the versions are not the same.
0253   # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file
0254   if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
0255     mv "$SPARK_HOME/R/SparkR_$R_PACKAGE_VERSION.tar.gz" "$SPARK_HOME/R/SparkR_$VERSION.tar.gz"
0256   fi
0257 
0258   # Install source package to get it to generate vignettes rds files, etc.
0259   VERSION=$VERSION "$SPARK_HOME/R/install-source-package.sh"
0260   popd > /dev/null
0261 else
0262   echo "Skipping building R source package"
0263 fi
0264 
0265 # Copy other things
0266 mkdir "$DISTDIR/conf"
0267 cp "$SPARK_HOME"/conf/*.template "$DISTDIR/conf"
0268 cp "$SPARK_HOME/README.md" "$DISTDIR"
0269 cp -r "$SPARK_HOME/bin" "$DISTDIR"
0270 cp -r "$SPARK_HOME/python" "$DISTDIR"
0271 
0272 # Remove the python distribution from dist/ if we built it
0273 if [ "$MAKE_PIP" == "true" ]; then
0274   rm -f "$DISTDIR"/python/dist/pyspark-*.tar.gz
0275 fi
0276 
0277 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
0278 # Copy SparkR if it exists
0279 if [ -d "$SPARK_HOME/R/lib/SparkR" ]; then
0280   mkdir -p "$DISTDIR/R/lib"
0281   cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR/R/lib"
0282   cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR/R/lib"
0283 fi
0284 
0285 if [ "$MAKE_TGZ" == "true" ]; then
0286   TARDIR_NAME=spark-$VERSION-bin-$NAME
0287   TARDIR="$SPARK_HOME/$TARDIR_NAME"
0288   rm -rf "$TARDIR"
0289   cp -r "$DISTDIR" "$TARDIR"
0290   tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
0291   rm -rf "$TARDIR"
0292 fi