0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 import os
0019 from collections import namedtuple
0020
0021 from pyspark.java_gateway import launch_gateway
0022
0023
0024 ExpressionInfo = namedtuple(
0025 "ExpressionInfo", "className name usage arguments examples note since deprecated")
0026
0027
0028 def _list_function_infos(jvm):
0029 """
0030 Returns a list of function information via JVM. Sorts wrapped expression infos by name
0031 and returns them.
0032 """
0033
0034 jinfos = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctionInfos()
0035 infos = []
0036 for jinfo in jinfos:
0037 name = jinfo.getName()
0038 usage = jinfo.getUsage()
0039 usage = usage.replace("_FUNC_", name) if usage is not None else usage
0040 infos.append(ExpressionInfo(
0041 className=jinfo.getClassName(),
0042 name=name,
0043 usage=usage,
0044 arguments=jinfo.getArguments().replace("_FUNC_", name),
0045 examples=jinfo.getExamples().replace("_FUNC_", name),
0046 note=jinfo.getNote().replace("_FUNC_", name),
0047 since=jinfo.getSince(),
0048 deprecated=jinfo.getDeprecated()))
0049 return sorted(infos, key=lambda i: i.name)
0050
0051
0052 def _make_pretty_usage(usage):
0053 """
0054 Makes the usage description pretty and returns a formatted string if `usage`
0055 is not an empty string. Otherwise, returns None.
0056 """
0057
0058 if usage is not None and usage.strip() != "":
0059 usage = "\n".join(map(lambda u: u.strip(), usage.split("\n")))
0060 return "%s\n\n" % usage
0061
0062
0063 def _make_pretty_arguments(arguments):
0064 """
0065 Makes the arguments description pretty and returns a formatted string if `arguments`
0066 starts with the argument prefix. Otherwise, returns None.
0067
0068 Expected input:
0069
0070 Arguments:
0071 * arg0 - ...
0072 ...
0073 * arg0 - ...
0074 ...
0075
0076 Expected output:
0077 **Arguments:**
0078
0079 * arg0 - ...
0080 ...
0081 * arg0 - ...
0082 ...
0083
0084 """
0085
0086 if arguments.startswith("\n Arguments:"):
0087 arguments = "\n".join(map(lambda u: u[6:], arguments.strip().split("\n")[1:]))
0088 return "**Arguments:**\n\n%s\n\n" % arguments
0089
0090
0091 def _make_pretty_examples(examples):
0092 """
0093 Makes the examples description pretty and returns a formatted string if `examples`
0094 starts with the example prefix. Otherwise, returns None.
0095
0096 Expected input:
0097
0098 Examples:
0099 > SELECT ...;
0100 ...
0101 > SELECT ...;
0102 ...
0103
0104 Expected output:
0105 **Examples:**
0106
0107 ```
0108 > SELECT ...;
0109 ...
0110 > SELECT ...;
0111 ...
0112 ```
0113
0114 """
0115
0116 if examples.startswith("\n Examples:"):
0117 examples = "\n".join(map(lambda u: u[6:], examples.strip().split("\n")[1:]))
0118 return "**Examples:**\n\n```\n%s\n```\n\n" % examples
0119
0120
0121 def _make_pretty_note(note):
0122 """
0123 Makes the note description pretty and returns a formatted string if `note` is not
0124 an empty string. Otherwise, returns None.
0125
0126 Expected input:
0127
0128 ...
0129
0130 Expected output:
0131 **Note:**
0132
0133 ...
0134
0135 """
0136
0137 if note != "":
0138 note = "\n".join(map(lambda n: n[4:], note.split("\n")))
0139 return "**Note:**\n%s\n" % note
0140
0141
0142 def _make_pretty_deprecated(deprecated):
0143 """
0144 Makes the deprecated description pretty and returns a formatted string if `deprecated`
0145 is not an empty string. Otherwise, returns None.
0146
0147 Expected input:
0148
0149 ...
0150
0151 Expected output:
0152 **Deprecated:**
0153
0154 ...
0155
0156 """
0157
0158 if deprecated != "":
0159 deprecated = "\n".join(map(lambda n: n[4:], deprecated.split("\n")))
0160 return "**Deprecated:**\n%s\n" % deprecated
0161
0162
0163 def generate_sql_api_markdown(jvm, path):
0164 """
0165 Generates a markdown file after listing the function information. The output file
0166 is created in `path`.
0167
0168 Expected output:
0169 ### NAME
0170
0171 USAGE
0172
0173 **Arguments:**
0174
0175 ARGUMENTS
0176
0177 **Examples:**
0178
0179 ```
0180 EXAMPLES
0181 ```
0182
0183 **Note:**
0184
0185 NOTE
0186
0187 **Since:** SINCE
0188
0189 **Deprecated:**
0190
0191 DEPRECATED
0192
0193 <br/>
0194
0195 """
0196
0197 with open(path, 'w') as mdfile:
0198 for info in _list_function_infos(jvm):
0199 name = info.name
0200 usage = _make_pretty_usage(info.usage)
0201 arguments = _make_pretty_arguments(info.arguments)
0202 examples = _make_pretty_examples(info.examples)
0203 note = _make_pretty_note(info.note)
0204 since = info.since
0205 deprecated = _make_pretty_deprecated(info.deprecated)
0206
0207 mdfile.write("### %s\n\n" % name)
0208 if usage is not None:
0209 mdfile.write("%s\n\n" % usage.strip())
0210 if arguments is not None:
0211 mdfile.write(arguments)
0212 if examples is not None:
0213 mdfile.write(examples)
0214 if note is not None:
0215 mdfile.write(note)
0216 if since is not None and since != "":
0217 mdfile.write("**Since:** %s\n\n" % since.strip())
0218 if deprecated is not None:
0219 mdfile.write(deprecated)
0220 mdfile.write("<br/>\n\n")
0221
0222
0223 if __name__ == "__main__":
0224 jvm = launch_gateway().jvm
0225 spark_root_dir = os.path.dirname(os.path.dirname(__file__))
0226 markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md")
0227 generate_sql_api_markdown(jvm, markdown_file_path)