0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 from __future__ import print_function
0024 import os
0025 import sys
0026
0027
0028 def _find_spark_home():
0029 """Find the SPARK_HOME."""
0030
0031 if "SPARK_HOME" in os.environ:
0032 return os.environ["SPARK_HOME"]
0033
0034 def is_spark_home(path):
0035 """Takes a path and returns true if the provided path could be a reasonable SPARK_HOME"""
0036 return (os.path.isfile(os.path.join(path, "bin/spark-submit")) and
0037 (os.path.isdir(os.path.join(path, "jars")) or
0038 os.path.isdir(os.path.join(path, "assembly"))))
0039
0040 paths = ["../", os.path.dirname(os.path.realpath(__file__))]
0041
0042
0043 import_error_raised = False
0044 if sys.version < "3":
0045 import imp
0046 try:
0047 module_home = imp.find_module("pyspark")[1]
0048 paths.append(module_home)
0049
0050 paths.append(os.path.join(module_home, "../../"))
0051 except ImportError:
0052
0053 import_error_raised = True
0054 else:
0055 from importlib.util import find_spec
0056 try:
0057 module_home = os.path.dirname(find_spec("pyspark").origin)
0058 paths.append(module_home)
0059
0060 paths.append(os.path.join(module_home, "../../"))
0061 except ImportError:
0062
0063 import_error_raised = True
0064
0065
0066 paths = [os.path.abspath(p) for p in paths]
0067
0068 try:
0069 return next(path for path in paths if is_spark_home(path))
0070 except StopIteration:
0071 print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr)
0072 if import_error_raised:
0073 print(
0074 "\nDid you install PySpark via a package manager such as pip or Conda? If so,\n"
0075 "PySpark was not found in your Python environment. It is possible your\n"
0076 "Python environment does not properly bind with your package manager.\n"
0077 "\nPlease check your default 'python' and if you set PYSPARK_PYTHON and/or\n"
0078 "PYSPARK_DRIVER_PYTHON environment variables, and see if you can import\n"
0079 "PySpark, for example, 'python -c 'import pyspark'.\n"
0080 "\nIf you cannot import, you can install by using the Python executable directly,\n"
0081 "for example, 'python -m pip install pyspark [--user]'. Otherwise, you can also\n"
0082 "explicitly set the Python executable, that has PySpark installed, to\n"
0083 "PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON environment variables, for example,\n"
0084 "'PYSPARK_PYTHON=python3 pyspark'.\n", file=sys.stderr)
0085 sys.exit(-1)
0086
0087 if __name__ == "__main__":
0088 print(_find_spark_home())