0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 import json
0029 import os
0030 import re
0031 import subprocess
0032 import sys
0033 import traceback
0034 if sys.version < '3':
0035 input = raw_input
0036 from urllib2 import urlopen
0037 from urllib2 import Request
0038 from urllib2 import HTTPError
0039 else:
0040 from urllib.request import urlopen
0041 from urllib.request import Request
0042 from urllib.error import HTTPError
0043
0044 try:
0045 import jira.client
0046 JIRA_IMPORTED = True
0047 except ImportError:
0048 JIRA_IMPORTED = False
0049
0050
0051 SPARK_HOME = os.environ.get("SPARK_HOME", os.getcwd())
0052
0053 PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache-github")
0054
0055 PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "apache")
0056
0057 JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "")
0058
0059 JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "")
0060
0061
0062
0063
0064 GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")
0065
0066
0067 GITHUB_BASE = "https://github.com/apache/spark/pull"
0068 GITHUB_API_BASE = "https://api.github.com/repos/apache/spark"
0069 JIRA_BASE = "https://issues.apache.org/jira/browse"
0070 JIRA_API_BASE = "https://issues.apache.org/jira"
0071
0072 BRANCH_PREFIX = "PR_TOOL"
0073
0074
0075 def get_json(url):
0076 try:
0077 request = Request(url)
0078 if GITHUB_OAUTH_KEY:
0079 request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
0080 return json.load(urlopen(request))
0081 except HTTPError as e:
0082 if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
0083 print("Exceeded the GitHub API rate limit; see the instructions in " +
0084 "dev/merge_spark_pr.py to configure an OAuth token for making authenticated " +
0085 "GitHub requests.")
0086 else:
0087 print("Unable to fetch URL, exiting: %s" % url)
0088 sys.exit(-1)
0089
0090
0091 def fail(msg):
0092 print(msg)
0093 clean_up()
0094 sys.exit(-1)
0095
0096
0097 def run_cmd(cmd):
0098 print(cmd)
0099 if isinstance(cmd, list):
0100 return subprocess.check_output(cmd).decode('utf-8')
0101 else:
0102 return subprocess.check_output(cmd.split(" ")).decode('utf-8')
0103
0104
0105 def continue_maybe(prompt):
0106 result = input("\n%s (y/n): " % prompt)
0107 if result.lower() != "y":
0108 fail("Okay, exiting")
0109
0110
0111 def clean_up():
0112 if 'original_head' in globals():
0113 print("Restoring head pointer to %s" % original_head)
0114 run_cmd("git checkout %s" % original_head)
0115
0116 branches = run_cmd("git branch").replace(" ", "").split("\n")
0117
0118 for branch in list(filter(lambda x: x.startswith(BRANCH_PREFIX), branches)):
0119 print("Deleting local branch %s" % branch)
0120 run_cmd("git branch -D %s" % branch)
0121
0122
0123
0124 def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
0125 pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
0126 target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper())
0127 run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name))
0128 run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, target_ref, target_branch_name))
0129 run_cmd("git checkout %s" % target_branch_name)
0130
0131 had_conflicts = False
0132 try:
0133 run_cmd(['git', 'merge', pr_branch_name, '--squash'])
0134 except Exception as e:
0135 msg = "Error merging: %s\nWould you like to manually fix-up this merge?" % e
0136 continue_maybe(msg)
0137 msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?"
0138 continue_maybe(msg)
0139 had_conflicts = True
0140
0141 commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
0142 '--pretty=format:%an <%ae>']).split("\n")
0143 distinct_authors = sorted(set(commit_authors),
0144 key=lambda x: commit_authors.count(x), reverse=True)
0145 primary_author = input(
0146 "Enter primary author in the format of \"name <email>\" [%s]: " %
0147 distinct_authors[0])
0148 if primary_author == "":
0149 primary_author = distinct_authors[0]
0150 else:
0151
0152
0153 distinct_authors = list(filter(lambda x: x != primary_author, distinct_authors))
0154 distinct_authors.insert(0, primary_author)
0155
0156 commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
0157 '--pretty=format:%h [%an] %s']).split("\n\n")
0158
0159 merge_message_flags = []
0160
0161 merge_message_flags += ["-m", title]
0162 if body is not None:
0163
0164
0165 merge_message_flags += ["-m", body.replace("@", "")]
0166
0167 committer_name = run_cmd("git config --get user.name").strip()
0168 committer_email = run_cmd("git config --get user.email").strip()
0169
0170 if had_conflicts:
0171 message = "This patch had conflicts when merged, resolved by\nCommitter: %s <%s>" % (
0172 committer_name, committer_email)
0173 merge_message_flags += ["-m", message]
0174
0175
0176 merge_message_flags += ["-m", "Closes #%s from %s." % (pr_num, pr_repo_desc)]
0177
0178 authors = "Authored-by:" if len(distinct_authors) == 1 else "Lead-authored-by:"
0179 authors += " %s" % (distinct_authors.pop(0))
0180 if len(distinct_authors) > 0:
0181 authors += "\n" + "\n".join(["Co-authored-by: %s" % a for a in distinct_authors])
0182 authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name, committer_email)
0183
0184 merge_message_flags += ["-m", authors]
0185
0186 run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags)
0187
0188 continue_maybe("Merge complete (local ref %s). Push to %s?" % (
0189 target_branch_name, PUSH_REMOTE_NAME))
0190
0191 try:
0192 run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, target_ref))
0193 except Exception as e:
0194 clean_up()
0195 fail("Exception while pushing: %s" % e)
0196
0197 merge_hash = run_cmd("git rev-parse %s" % target_branch_name)[:8]
0198 clean_up()
0199 print("Pull request #%s merged!" % pr_num)
0200 print("Merge hash: %s" % merge_hash)
0201 return merge_hash
0202
0203
0204 def cherry_pick(pr_num, merge_hash, default_branch):
0205 pick_ref = input("Enter a branch name [%s]: " % default_branch)
0206 if pick_ref == "":
0207 pick_ref = default_branch
0208
0209 pick_branch_name = "%s_PICK_PR_%s_%s" % (BRANCH_PREFIX, pr_num, pick_ref.upper())
0210
0211 run_cmd("git fetch %s %s:%s" % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name))
0212 run_cmd("git checkout %s" % pick_branch_name)
0213
0214 try:
0215 run_cmd("git cherry-pick -sx %s" % merge_hash)
0216 except Exception as e:
0217 msg = "Error cherry-picking: %s\nWould you like to manually fix-up this merge?" % e
0218 continue_maybe(msg)
0219 msg = "Okay, please fix any conflicts and finish the cherry-pick. Finished?"
0220 continue_maybe(msg)
0221
0222 continue_maybe("Pick complete (local ref %s). Push to %s?" % (
0223 pick_branch_name, PUSH_REMOTE_NAME))
0224
0225 try:
0226 run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, pick_ref))
0227 except Exception as e:
0228 clean_up()
0229 fail("Exception while pushing: %s" % e)
0230
0231 pick_hash = run_cmd("git rev-parse %s" % pick_branch_name)[:8]
0232 clean_up()
0233
0234 print("Pull request #%s picked into %s!" % (pr_num, pick_ref))
0235 print("Pick hash: %s" % pick_hash)
0236 return pick_ref
0237
0238
0239 def fix_version_from_branch(branch, versions):
0240
0241 if branch == "master":
0242 return versions[0]
0243 else:
0244 branch_ver = branch.replace("branch-", "")
0245 return list(filter(lambda x: x.name.startswith(branch_ver), versions))[-1]
0246
0247
0248 def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
0249 asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
0250 basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
0251
0252 jira_id = input("Enter a JIRA id [%s]: " % default_jira_id)
0253 if jira_id == "":
0254 jira_id = default_jira_id
0255
0256 try:
0257 issue = asf_jira.issue(jira_id)
0258 except Exception as e:
0259 fail("ASF JIRA could not find %s\n%s" % (jira_id, e))
0260
0261 cur_status = issue.fields.status.name
0262 cur_summary = issue.fields.summary
0263 cur_assignee = issue.fields.assignee
0264 if cur_assignee is None:
0265 cur_assignee = choose_jira_assignee(issue, asf_jira)
0266
0267 if cur_assignee is None:
0268 cur_assignee = "NOT ASSIGNED!!!"
0269 else:
0270 cur_assignee = cur_assignee.displayName
0271
0272 if cur_status == "Resolved" or cur_status == "Closed":
0273 fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status))
0274 print("=== JIRA %s ===" % jira_id)
0275 print("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" %
0276 (cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
0277
0278 versions = asf_jira.project_versions("SPARK")
0279 versions = sorted(versions, key=lambda x: x.name, reverse=True)
0280 versions = list(filter(lambda x: x.raw['released'] is False, versions))
0281
0282 versions = list(filter(lambda x: re.match(r'\d+\.\d+\.\d+', x.name), versions))
0283
0284 default_fix_versions = list(map(
0285 lambda x: fix_version_from_branch(x, versions).name, merge_branches))
0286 for v in default_fix_versions:
0287
0288
0289
0290
0291 (major, minor, patch) = v.split(".")
0292 if patch == "0":
0293 previous = "%s.%s.%s" % (major, int(minor) - 1, 0)
0294 if previous in default_fix_versions:
0295 default_fix_versions = list(filter(lambda x: x != v, default_fix_versions))
0296 default_fix_versions = ",".join(default_fix_versions)
0297
0298 available_versions = set(list(map(lambda v: v.name, versions)))
0299 while True:
0300 try:
0301 fix_versions = input(
0302 "Enter comma-separated fix version(s) [%s]: " % default_fix_versions)
0303 if fix_versions == "":
0304 fix_versions = default_fix_versions
0305 fix_versions = fix_versions.replace(" ", "").split(",")
0306 if set(fix_versions).issubset(available_versions):
0307 break
0308 else:
0309 print("Specified version(s) [%s] not found in the available versions, try "
0310 "again (or leave blank and fix manually)." % (", ".join(fix_versions)))
0311 except KeyboardInterrupt:
0312 raise
0313 except:
0314 traceback.print_exc()
0315 print("Error setting fix version(s), try again (or leave blank and fix manually)")
0316
0317 def get_version_json(version_str):
0318 return list(filter(lambda v: v.name == version_str, versions))[0].raw
0319
0320 jira_fix_versions = list(map(lambda v: get_version_json(v), fix_versions))
0321
0322 resolve = list(filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id)))[0]
0323 resolution = list(filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions()))[0]
0324 asf_jira.transition_issue(
0325 jira_id, resolve["id"], fixVersions=jira_fix_versions,
0326 comment=comment, resolution={'id': resolution.raw['id']})
0327
0328 print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
0329
0330
0331 def choose_jira_assignee(issue, asf_jira):
0332 """
0333 Prompt the user to choose who to assign the issue to in jira, given a list of candidates,
0334 including the original reporter and all commentors
0335 """
0336 while True:
0337 try:
0338 reporter = issue.fields.reporter
0339 commentors = list(map(lambda x: x.author, issue.fields.comment.comments))
0340 candidates = set(commentors)
0341 candidates.add(reporter)
0342 candidates = list(candidates)
0343 print("JIRA is unassigned, choose assignee")
0344 for idx, author in enumerate(candidates):
0345 if author.key == "apachespark":
0346 continue
0347 annotations = ["Reporter"] if author == reporter else []
0348 if author in commentors:
0349 annotations.append("Commentor")
0350 print("[%d] %s (%s)" % (idx, author.displayName, ",".join(annotations)))
0351 raw_assignee = input(
0352 "Enter number of user, or userid, to assign to (blank to leave unassigned):")
0353 if raw_assignee == "":
0354 return None
0355 else:
0356 try:
0357 id = int(raw_assignee)
0358 assignee = candidates[id]
0359 except:
0360
0361 assignee = asf_jira.user(raw_assignee)
0362 asf_jira.assign_issue(issue.key, assignee.name)
0363 return assignee
0364 except KeyboardInterrupt:
0365 raise
0366 except:
0367 traceback.print_exc()
0368 print("Error assigning JIRA, try again (or leave blank and fix manually)")
0369
0370
0371 def resolve_jira_issues(title, merge_branches, comment):
0372 jira_ids = re.findall("SPARK-[0-9]{4,5}", title)
0373
0374 if len(jira_ids) == 0:
0375 resolve_jira_issue(merge_branches, comment)
0376 for jira_id in jira_ids:
0377 resolve_jira_issue(merge_branches, comment, jira_id)
0378
0379
0380 def standardize_jira_ref(text):
0381 """
0382 Standardize the [SPARK-XXXXX] [MODULE] prefix
0383 Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to
0384 "[SPARK-XXX][MLLIB] Issue"
0385
0386 >>> standardize_jira_ref(
0387 ... "[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
0388 '[SPARK-5821][SQL] ParquetRelation2 CTAS should check if delete is successful'
0389 >>> standardize_jira_ref(
0390 ... "[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
0391 '[SPARK-4123][PROJECT INFRA][WIP] Show new dependencies added in pull requests'
0392 >>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
0393 '[SPARK-5954][MLLIB] Top by key'
0394 >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
0395 '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
0396 >>> standardize_jira_ref(
0397 ... "SPARK-1094 Support MiMa for reporting binary compatibility across versions.")
0398 '[SPARK-1094] Support MiMa for reporting binary compatibility across versions.'
0399 >>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
0400 '[SPARK-1146][WIP] Vagrant support for Spark'
0401 >>> standardize_jira_ref(
0402 ... "SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
0403 '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
0404 >>> standardize_jira_ref(
0405 ... "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
0406 '[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
0407 >>> standardize_jira_ref("Additional information for users building from source code")
0408 'Additional information for users building from source code'
0409 """
0410 jira_refs = []
0411 components = []
0412
0413
0414 if (re.search(r'^\[SPARK-[0-9]{3,6}\](\[[A-Z0-9_\s,]+\] )+\S+', text)):
0415 return text
0416
0417
0418 pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
0419 for ref in pattern.findall(text):
0420
0421 jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
0422 text = text.replace(ref, '')
0423
0424
0425
0426 pattern = re.compile(r'(\[[\w\s,.-]+\])', re.IGNORECASE)
0427 for component in pattern.findall(text):
0428 components.append(component.upper())
0429 text = text.replace(component, '')
0430
0431
0432 pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
0433 if (pattern.search(text) is not None):
0434 text = pattern.search(text).groups()[0]
0435
0436
0437 clean_text = ''.join(jira_refs).strip() + ''.join(components).strip() + " " + text.strip()
0438
0439
0440
0441 clean_text = re.sub(r'\s+', ' ', clean_text.strip())
0442
0443 return clean_text
0444
0445
0446 def get_current_ref():
0447 ref = run_cmd("git rev-parse --abbrev-ref HEAD").strip()
0448 if ref == 'HEAD':
0449
0450 return run_cmd("git rev-parse HEAD").strip()
0451 else:
0452 return ref
0453
0454
0455 def main():
0456 global original_head
0457
0458 os.chdir(SPARK_HOME)
0459 original_head = get_current_ref()
0460
0461
0462 if not JIRA_USERNAME or not JIRA_PASSWORD:
0463 continue_maybe("The env-vars JIRA_USERNAME and/or JIRA_PASSWORD are not set. Continue?")
0464
0465 branches = get_json("%s/branches" % GITHUB_API_BASE)
0466 branch_names = list(filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]))
0467
0468 latest_branch = sorted(branch_names, reverse=True)[0]
0469
0470 pr_num = input("Which pull request would you like to merge? (e.g. 34): ")
0471 pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
0472 pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
0473
0474 url = pr["url"]
0475
0476
0477 if "[WIP]" in pr["title"]:
0478 msg = "The PR title has `[WIP]`:\n%s\nContinue?" % pr["title"]
0479 continue_maybe(msg)
0480
0481
0482 modified_title = standardize_jira_ref(pr["title"]).rstrip(".")
0483 if modified_title != pr["title"]:
0484 print("I've re-written the title as follows to match the standard format:")
0485 print("Original: %s" % pr["title"])
0486 print("Modified: %s" % modified_title)
0487 result = input("Would you like to use the modified title? (y/n): ")
0488 if result.lower() == "y":
0489 title = modified_title
0490 print("Using modified title:")
0491 else:
0492 title = pr["title"]
0493 print("Using original title:")
0494 print(title)
0495 else:
0496 title = pr["title"]
0497
0498 modified_body = re.sub(re.compile(r'<!--[^>]*-->\n?', re.DOTALL), '', pr["body"]).lstrip()
0499 if modified_body != pr["body"]:
0500 print("=" * 80)
0501 print(modified_body)
0502 print("=" * 80)
0503 print("I've removed the comments from PR template like the above:")
0504 result = input("Would you like to use the modified body? (y/n): ")
0505 if result.lower() == "y":
0506 body = modified_body
0507 print("Using modified body:")
0508 else:
0509 body = pr["body"]
0510 print("Using original body:")
0511 print("=" * 80)
0512 print(body)
0513 print("=" * 80)
0514 else:
0515 body = pr["body"]
0516 target_ref = pr["base"]["ref"]
0517 user_login = pr["user"]["login"]
0518 base_ref = pr["head"]["ref"]
0519 pr_repo_desc = "%s/%s" % (user_login, base_ref)
0520
0521
0522
0523 merge_commits = \
0524 [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
0525
0526 if merge_commits:
0527 merge_hash = merge_commits[0]["commit_id"]
0528 message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
0529
0530 print("Pull request %s has already been merged, assuming you want to backport" % pr_num)
0531 commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
0532 "%s^{commit}" % merge_hash]).strip() != ""
0533 if not commit_is_downloaded:
0534 fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
0535
0536 print("Found commit %s:\n%s" % (merge_hash, message))
0537 cherry_pick(pr_num, merge_hash, latest_branch)
0538 sys.exit(0)
0539
0540 if not bool(pr["mergeable"]):
0541 msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
0542 "Continue? (experts only!)"
0543 continue_maybe(msg)
0544
0545 print("\n=== Pull Request #%s ===" % pr_num)
0546 print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" %
0547 (title, pr_repo_desc, target_ref, url))
0548 continue_maybe("Proceed with merging pull request #%s?" % pr_num)
0549
0550 merged_refs = [target_ref]
0551
0552 merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc)
0553
0554 pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
0555 while input("\n%s (y/n): " % pick_prompt).lower() == "y":
0556 merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
0557
0558 if JIRA_IMPORTED:
0559 if JIRA_USERNAME and JIRA_PASSWORD:
0560 continue_maybe("Would you like to update an associated JIRA?")
0561 jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % \
0562 (pr_num, GITHUB_BASE, pr_num)
0563 resolve_jira_issues(title, merged_refs, jira_comment)
0564 else:
0565 print("JIRA_USERNAME and JIRA_PASSWORD not set")
0566 print("Exiting without trying to close the associated JIRA.")
0567 else:
0568 print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
0569 print("Exiting without trying to close the associated JIRA.")
0570
0571 if __name__ == "__main__":
0572 import doctest
0573 (failure_count, test_count) = doctest.testmod()
0574 if failure_count:
0575 sys.exit(-1)
0576 try:
0577 main()
0578 except:
0579 clean_up()
0580 raise