0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 import os
0019 import re
0020
0021 from collections import namedtuple
0022 from textwrap import dedent
0023
0024
0025 from mkdocs.structure.pages import markdown
0026
0027 from pyspark.java_gateway import launch_gateway
0028
0029
0030 SQLConfEntry = namedtuple(
0031 "SQLConfEntry", ["name", "default", "description", "version"])
0032
0033
0034 def get_sql_configs(jvm, group):
0035 if group == "static":
0036 config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
0037 else:
0038 config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listRuntimeSQLConfigs()
0039 sql_configs = [
0040 SQLConfEntry(
0041 name=_sql_config._1(),
0042 default=_sql_config._2(),
0043 description=_sql_config._3(),
0044 version=_sql_config._4()
0045 )
0046 for _sql_config in config_set
0047 ]
0048 return sql_configs
0049
0050
0051 def generate_sql_configs_table_html(sql_configs, path):
0052 """
0053 Generates an HTML table at `path` that lists all public SQL
0054 configuration options.
0055
0056 The table will look something like this:
0057
0058 ```html
0059 <table class="table">
0060 <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
0061
0062 <tr>
0063 <td><code>spark.sql.adaptive.enabled</code></td>
0064 <td>false</td>
0065 <td><p>When true, enable adaptive query execution.</p></td>
0066 <td>2.1.0</td>
0067 </tr>
0068
0069 ...
0070
0071 </table>
0072 ```
0073 """
0074 value_reference_pattern = re.compile(r"^<value of (\S*)>$")
0075
0076 with open(path, 'w') as f:
0077 f.write(dedent(
0078 """
0079 <table class="table">
0080 <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
0081 """
0082 ))
0083 for config in sorted(sql_configs, key=lambda x: x.name):
0084 if config.name == "spark.sql.session.timeZone":
0085 default = "(value of local timezone)"
0086 elif config.name == "spark.sql.warehouse.dir":
0087 default = "(value of <code>$PWD/spark-warehouse</code>)"
0088 elif config.default == "<undefined>":
0089 default = "(none)"
0090 elif config.default.startswith("<value of "):
0091 referenced_config_name = value_reference_pattern.match(config.default).group(1)
0092 default = "(value of <code>{}</code>)".format(referenced_config_name)
0093 else:
0094 default = config.default
0095
0096 if default.startswith("<"):
0097 raise Exception(
0098 "Unhandled reference in SQL config docs. Config '{name}' "
0099 "has default '{default}' that looks like an HTML tag."
0100 .format(
0101 name=config.name,
0102 default=config.default,
0103 )
0104 )
0105
0106 f.write(dedent(
0107 """
0108 <tr>
0109 <td><code>{name}</code></td>
0110 <td>{default}</td>
0111 <td>{description}</td>
0112 <td>{version}</td>
0113 </tr>
0114 """
0115 .format(
0116 name=config.name,
0117 default=default,
0118 description=markdown.markdown(config.description),
0119 version=config.version
0120 )
0121 ))
0122 f.write("</table>\n")
0123
0124
0125 if __name__ == "__main__":
0126 jvm = launch_gateway().jvm
0127 docs_root_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs")
0128
0129 sql_configs = get_sql_configs(jvm, "runtime")
0130 sql_configs_table_path = os.path.join(docs_root_dir, "generated-runtime-sql-config-table.html")
0131 generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
0132
0133 sql_configs = get_sql_configs(jvm, "static")
0134 sql_configs_table_path = os.path.join(docs_root_dir, "generated-static-sql-config-table.html")
0135 generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)