Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python
0002 
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 
0020 # This script attempt to determine the correct setting for SPARK_HOME given
0021 # that Spark may have been installed on the system with pip.
0022 
0023 from __future__ import print_function
0024 import os
0025 import sys
0026 
0027 
0028 def _find_spark_home():
0029     """Find the SPARK_HOME."""
0030     # If the environment has SPARK_HOME set trust it.
0031     if "SPARK_HOME" in os.environ:
0032         return os.environ["SPARK_HOME"]
0033 
0034     def is_spark_home(path):
0035         """Takes a path and returns true if the provided path could be a reasonable SPARK_HOME"""
0036         return (os.path.isfile(os.path.join(path, "bin/spark-submit")) and
0037                 (os.path.isdir(os.path.join(path, "jars")) or
0038                  os.path.isdir(os.path.join(path, "assembly"))))
0039 
0040     paths = ["../", os.path.dirname(os.path.realpath(__file__))]
0041 
0042     # Add the path of the PySpark module if it exists
0043     import_error_raised = False
0044     if sys.version < "3":
0045         import imp
0046         try:
0047             module_home = imp.find_module("pyspark")[1]
0048             paths.append(module_home)
0049             # If we are installed in edit mode also look two dirs up
0050             paths.append(os.path.join(module_home, "../../"))
0051         except ImportError:
0052             # Not pip installed no worries
0053             import_error_raised = True
0054     else:
0055         from importlib.util import find_spec
0056         try:
0057             module_home = os.path.dirname(find_spec("pyspark").origin)
0058             paths.append(module_home)
0059             # If we are installed in edit mode also look two dirs up
0060             paths.append(os.path.join(module_home, "../../"))
0061         except ImportError:
0062             # Not pip installed no worries
0063             import_error_raised = True
0064 
0065     # Normalize the paths
0066     paths = [os.path.abspath(p) for p in paths]
0067 
0068     try:
0069         return next(path for path in paths if is_spark_home(path))
0070     except StopIteration:
0071         print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr)
0072         if import_error_raised:
0073             print(
0074                 "\nDid you install PySpark via a package manager such as pip or Conda? If so,\n"
0075                 "PySpark was not found in your Python environment. It is possible your\n"
0076                 "Python environment does not properly bind with your package manager.\n"
0077                 "\nPlease check your default 'python' and if you set PYSPARK_PYTHON and/or\n"
0078                 "PYSPARK_DRIVER_PYTHON environment variables, and see if you can import\n"
0079                 "PySpark, for example, 'python -c 'import pyspark'.\n"
0080                 "\nIf you cannot import, you can install by using the Python executable directly,\n"
0081                 "for example, 'python -m pip install pyspark [--user]'. Otherwise, you can also\n"
0082                 "explicitly set the Python executable, that has PySpark installed, to\n"
0083                 "PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON environment variables, for example,\n"
0084                 "'PYSPARK_PYTHON=python3 pyspark'.\n", file=sys.stderr)
0085         sys.exit(-1)
0086 
0087 if __name__ == "__main__":
0088     print(_find_spark_home())