0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 set -o pipefail
0028 set -e
0029 set -x
0030
0031
0032 SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
0033 DISTDIR="$SPARK_HOME/dist"
0034
0035 MAKE_TGZ=false
0036 MAKE_PIP=false
0037 MAKE_R=false
0038 NAME=none
0039 MVN="$SPARK_HOME/build/mvn"
0040
0041 function exit_with_usage {
0042 set +x
0043 echo "make-distribution.sh - tool for making binary distributions of Spark"
0044 echo ""
0045 echo "usage:"
0046 cl_options="[--name] [--tgz] [--pip] [--r] [--mvn <mvn-command>]"
0047 echo "make-distribution.sh $cl_options <maven build options>"
0048 echo "See Spark's \"Building Spark\" doc for correct Maven options."
0049 echo ""
0050 exit 1
0051 }
0052
0053
0054 while (( "$#" )); do
0055 case $1 in
0056 --tgz)
0057 MAKE_TGZ=true
0058 ;;
0059 --pip)
0060 MAKE_PIP=true
0061 ;;
0062 --r)
0063 MAKE_R=true
0064 ;;
0065 --mvn)
0066 MVN="$2"
0067 shift
0068 ;;
0069 --name)
0070 NAME="$2"
0071 shift
0072 ;;
0073 --help)
0074 exit_with_usage
0075 ;;
0076 --*)
0077 echo "Error: $1 is not supported"
0078 exit_with_usage
0079 ;;
0080 -*)
0081 break
0082 ;;
0083 *)
0084 echo "Error: $1 is not supported"
0085 exit_with_usage
0086 ;;
0087 esac
0088 shift
0089 done
0090
0091 if [ -z "$JAVA_HOME" ]; then
0092
0093 if [ $(command -v rpm) ]; then
0094 RPM_JAVA_HOME="$(rpm -E %java_home 2>/dev/null)"
0095 if [ "$RPM_JAVA_HOME" != "%java_home" ]; then
0096 JAVA_HOME="$RPM_JAVA_HOME"
0097 echo "No JAVA_HOME set, proceeding with '$JAVA_HOME' learned from rpm"
0098 fi
0099 fi
0100
0101 if [ -z "$JAVA_HOME" ]; then
0102 if [ `command -v java` ]; then
0103
0104 JAVA_HOME="$(dirname $(dirname $(which java)))"
0105 fi
0106 fi
0107 fi
0108
0109 if [ -z "$JAVA_HOME" ]; then
0110 echo "Error: JAVA_HOME is not set, cannot proceed."
0111 exit -1
0112 fi
0113
0114 if [ $(command -v git) ]; then
0115 GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
0116 if [ ! -z "$GITREV" ]; then
0117 GITREVSTRING=" (git revision $GITREV)"
0118 fi
0119 unset GITREV
0120 fi
0121
0122
0123 if [ ! "$(command -v "$MVN")" ] ; then
0124 echo -e "Could not locate Maven command: '$MVN'."
0125 echo -e "Specify the Maven command with the --mvn flag"
0126 exit -1;
0127 fi
0128
0129 VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
0130 | grep -v "INFO"\
0131 | grep -v "WARNING"\
0132 | tail -n 1)
0133 SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
0134 | grep -v "INFO"\
0135 | grep -v "WARNING"\
0136 | tail -n 1)
0137 SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
0138 | grep -v "INFO"\
0139 | grep -v "WARNING"\
0140 | tail -n 1)
0141 SPARK_HIVE=$("$MVN" help:evaluate -Dexpression=project.activeProfiles -pl sql/hive $@ 2>/dev/null\
0142 | grep -v "INFO"\
0143 | grep -v "WARNING"\
0144 | fgrep --count "<id>hive</id>";\
0145
0146
0147 echo -n)
0148
0149 if [ "$NAME" == "none" ]; then
0150 NAME=$SPARK_HADOOP_VERSION
0151 fi
0152
0153 echo "Spark version is $VERSION"
0154
0155 if [ "$MAKE_TGZ" == "true" ]; then
0156 echo "Making spark-$VERSION-bin-$NAME.tgz"
0157 else
0158 echo "Making distribution for Spark $VERSION in '$DISTDIR'..."
0159 fi
0160
0161
0162 cd "$SPARK_HOME"
0163
0164 export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"
0165
0166
0167
0168
0169 BUILD_COMMAND=("$MVN" clean package -DskipTests $@)
0170
0171
0172 echo -e "\nBuilding with..."
0173 echo -e "\$ ${BUILD_COMMAND[@]}\n"
0174
0175 "${BUILD_COMMAND[@]}"
0176
0177
0178 rm -rf "$DISTDIR"
0179 mkdir -p "$DISTDIR/jars"
0180 echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
0181 echo "Build flags: $@" >> "$DISTDIR/RELEASE"
0182
0183
0184 cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
0185
0186
0187 if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
0188 mkdir "$DISTDIR/yarn"
0189 cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
0190 fi
0191
0192
0193 if [ -d "$SPARK_HOME"/resource-managers/kubernetes/core/target/ ]; then
0194 mkdir -p "$DISTDIR/kubernetes/"
0195 cp -a "$SPARK_HOME"/resource-managers/kubernetes/docker/src/main/dockerfiles "$DISTDIR/kubernetes/"
0196 cp -a "$SPARK_HOME"/resource-managers/kubernetes/integration-tests/tests "$DISTDIR/kubernetes/"
0197 fi
0198
0199
0200 mkdir -p "$DISTDIR/examples/jars"
0201 cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
0202
0203
0204 for f in "$DISTDIR"/examples/jars/*; do
0205 name=$(basename "$f")
0206 if [ -f "$DISTDIR/jars/$name" ]; then
0207 rm "$DISTDIR/examples/jars/$name"
0208 fi
0209 done
0210
0211
0212 mkdir -p "$DISTDIR/examples/src/main"
0213 cp -r "$SPARK_HOME/examples/src/main" "$DISTDIR/examples/src/"
0214
0215
0216 if [ -e "$SPARK_HOME/LICENSE-binary" ]; then
0217 cp "$SPARK_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
0218 cp -r "$SPARK_HOME/licenses-binary" "$DISTDIR/licenses"
0219 cp "$SPARK_HOME/NOTICE-binary" "$DISTDIR/NOTICE"
0220 else
0221 echo "Skipping copying LICENSE files"
0222 fi
0223
0224 if [ -e "$SPARK_HOME/CHANGES.txt" ]; then
0225 cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
0226 fi
0227
0228
0229 cp -r "$SPARK_HOME/data" "$DISTDIR"
0230
0231
0232 if [ "$MAKE_PIP" == "true" ]; then
0233 echo "Building python distribution package"
0234 pushd "$SPARK_HOME/python" > /dev/null
0235
0236 rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
0237 python3 setup.py sdist
0238 popd > /dev/null
0239 else
0240 echo "Skipping building python distribution package"
0241 fi
0242
0243
0244 if [ "$MAKE_R" == "true" ]; then
0245 echo "Building R source package"
0246 R_PACKAGE_VERSION=`grep Version "$SPARK_HOME/R/pkg/DESCRIPTION" | awk '{print $NF}'`
0247 pushd "$SPARK_HOME/R" > /dev/null
0248
0249
0250 NO_TESTS=1 "$SPARK_HOME/R/check-cran.sh"
0251
0252
0253
0254 if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
0255 mv "$SPARK_HOME/R/SparkR_$R_PACKAGE_VERSION.tar.gz" "$SPARK_HOME/R/SparkR_$VERSION.tar.gz"
0256 fi
0257
0258
0259 VERSION=$VERSION "$SPARK_HOME/R/install-source-package.sh"
0260 popd > /dev/null
0261 else
0262 echo "Skipping building R source package"
0263 fi
0264
0265
0266 mkdir "$DISTDIR/conf"
0267 cp "$SPARK_HOME"/conf/*.template "$DISTDIR/conf"
0268 cp "$SPARK_HOME/README.md" "$DISTDIR"
0269 cp -r "$SPARK_HOME/bin" "$DISTDIR"
0270 cp -r "$SPARK_HOME/python" "$DISTDIR"
0271
0272
0273 if [ "$MAKE_PIP" == "true" ]; then
0274 rm -f "$DISTDIR"/python/dist/pyspark-*.tar.gz
0275 fi
0276
0277 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
0278
0279 if [ -d "$SPARK_HOME/R/lib/SparkR" ]; then
0280 mkdir -p "$DISTDIR/R/lib"
0281 cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR/R/lib"
0282 cp "$SPARK_HOME/R/lib/sparkr.zip" "$DISTDIR/R/lib"
0283 fi
0284
0285 if [ "$MAKE_TGZ" == "true" ]; then
0286 TARDIR_NAME=spark-$VERSION-bin-$NAME
0287 TARDIR="$SPARK_HOME/$TARDIR_NAME"
0288 rm -rf "$TARDIR"
0289 cp -r "$DISTDIR" "$TARDIR"
0290 tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
0291 rm -rf "$TARDIR"
0292 fi