0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021 import re
0022 import sys
0023 from subprocess import Popen, PIPE
0024
0025 try:
0026 from jira.client import JIRA
0027
0028 try:
0029 from jira.exceptions import JIRAError
0030 except ImportError:
0031 from jira.utils import JIRAError
0032 except ImportError:
0033 print("This tool requires the jira-python library")
0034 print("Install using 'sudo pip install jira'")
0035 sys.exit(-1)
0036
0037 try:
0038 from github import Github
0039 from github import GithubException
0040 except ImportError:
0041 print("This tool requires the PyGithub library")
0042 print("Install using 'sudo pip install PyGithub'")
0043 sys.exit(-1)
0044
0045 try:
0046 import unidecode
0047 except ImportError:
0048 print("This tool requires the unidecode library to decode obscure github usernames")
0049 print("Install using 'sudo pip install unidecode'")
0050 sys.exit(-1)
0051
0052 if sys.version < '3':
0053 input = raw_input
0054
0055
0056 contributors_file_name = "contributors.txt"
0057
0058
0059
0060 def yesOrNoPrompt(msg):
0061 response = input("%s [y/n]: " % msg)
0062 while response != "y" and response != "n":
0063 return yesOrNoPrompt(msg)
0064 return response == "y"
0065
0066
0067
0068 def run_cmd(cmd):
0069 return Popen(cmd, stdout=PIPE).communicate()[0]
0070
0071
0072 def run_cmd_error(cmd):
0073 return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
0074
0075
0076 def get_date(commit_hash):
0077 return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
0078
0079
0080 def tag_exists(tag):
0081 stderr = run_cmd_error(["git", "show", tag])
0082 return "error" not in stderr
0083
0084
0085
0086 class Commit:
0087 def __init__(self, _hash, author, title, pr_number=None):
0088 self._hash = _hash
0089 self.author = author
0090 self.title = title
0091 self.pr_number = pr_number
0092
0093 def get_hash(self):
0094 return self._hash
0095
0096 def get_author(self):
0097 return self.author
0098
0099 def get_title(self):
0100 return self.title
0101
0102 def get_pr_number(self):
0103 return self.pr_number
0104
0105 def __str__(self):
0106 closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
0107 return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119 def get_commits(tag):
0120 commit_start_marker = "|=== COMMIT START MARKER ===|"
0121 commit_end_marker = "|=== COMMIT END MARKER ===|"
0122 field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
0123 log_format =\
0124 commit_start_marker + "%h" +\
0125 field_end_marker + "%an" +\
0126 field_end_marker + "%s" +\
0127 commit_end_marker + "%b"
0128 output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
0129 commits = []
0130 raw_commits = [c for c in output.split(commit_start_marker) if c]
0131 for commit in raw_commits:
0132 if commit.count(commit_end_marker) != 1:
0133 print("Commit end marker not found in commit: ")
0134 for line in commit.split("\n"):
0135 print(line)
0136 sys.exit(1)
0137
0138
0139
0140 [commit_digest, commit_body] = commit.split(commit_end_marker)
0141 if commit_digest.count(field_end_marker) != 2:
0142 sys.exit("Unexpected format in commit: %s" % commit_digest)
0143 [_hash, author, title] = commit_digest.split(field_end_marker)
0144
0145
0146 pr_number = None
0147 match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
0148 if match:
0149 [pr_number, github_username] = match.groups()
0150
0151
0152 if not is_valid_author(author):
0153 author = github_username
0154
0155 try:
0156 author = unicode(author, "UTF-8")
0157 except NameError:
0158 author = str(author)
0159 author = unidecode.unidecode(author).strip()
0160 commit = Commit(_hash, author, title, pr_number)
0161 commits.append(commit)
0162 return commits
0163
0164
0165
0166
0167
0168
0169 known_issue_types = {
0170 "bug": "bug fixes",
0171 "build": "build fixes",
0172 "dependency upgrade": "build fixes",
0173 "improvement": "improvements",
0174 "new feature": "new features",
0175 "documentation": "documentation",
0176 "test": "test",
0177 "task": "improvement",
0178 "sub-task": "improvement"
0179 }
0180
0181
0182
0183
0184
0185 CORE_COMPONENT = "Core"
0186 known_components = {
0187 "block manager": CORE_COMPONENT,
0188 "build": CORE_COMPONENT,
0189 "deploy": CORE_COMPONENT,
0190 "documentation": CORE_COMPONENT,
0191 "examples": CORE_COMPONENT,
0192 "graphx": "GraphX",
0193 "input/output": CORE_COMPONENT,
0194 "java api": "Java API",
0195 "k8s": "Kubernetes",
0196 "kubernetes": "Kubernetes",
0197 "mesos": "Mesos",
0198 "ml": "MLlib",
0199 "mllib": "MLlib",
0200 "project infra": "Project Infra",
0201 "pyspark": "PySpark",
0202 "shuffle": "Shuffle",
0203 "spark core": CORE_COMPONENT,
0204 "spark shell": CORE_COMPONENT,
0205 "sql": "SQL",
0206 "streaming": "Streaming",
0207 "web ui": "Web UI",
0208 "windows": "Windows",
0209 "yarn": "YARN"
0210 }
0211
0212
0213
0214
0215 def translate_issue_type(issue_type, issue_id, warnings):
0216 issue_type = issue_type.lower()
0217 if issue_type in known_issue_types:
0218 return known_issue_types[issue_type]
0219 else:
0220 warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
0221 return issue_type
0222
0223
0224
0225
0226 def translate_component(component, commit_hash, warnings):
0227 component = component.lower()
0228 if component in known_components:
0229 return known_components[component]
0230 else:
0231 warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
0232 return component
0233
0234
0235
0236
0237 def find_components(commit, commit_hash):
0238 components = re.findall(r"\[\w*\]", commit.lower())
0239 components = [translate_component(c, commit_hash, [])
0240 for c in components if c in known_components]
0241 return components
0242
0243
0244
0245
0246
0247
0248 def nice_join(str_list):
0249 str_list = list(str_list)
0250 if not str_list:
0251 return ""
0252 elif len(str_list) == 1:
0253 return next(iter(str_list))
0254 elif len(str_list) == 2:
0255 return " and ".join(str_list)
0256 else:
0257 return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
0258
0259
0260
0261
0262 def get_github_name(author, github_client):
0263 if github_client:
0264 try:
0265 return github_client.get_user(author).name
0266 except GithubException as e:
0267
0268 if e.status != 404:
0269 raise e
0270 return None
0271
0272
0273
0274
0275 def get_jira_name(author, jira_client):
0276 if jira_client:
0277 try:
0278 return jira_client.user(author).displayName
0279 except JIRAError as e:
0280
0281 if e.status_code != 404:
0282 raise e
0283 return None
0284
0285
0286
0287 def is_valid_author(author):
0288 if not author:
0289 return False
0290 return " " in author and not re.findall("[0-9]", author)
0291
0292
0293
0294 def capitalize_author(author):
0295 if not author:
0296 return None
0297 words = author.split(" ")
0298 words = [w[0].capitalize() + w[1:] for w in words if w]
0299 return " ".join(words)