dev/create-release/generate-contributors.py

0001 #!/usr/bin/env python
0002
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 # This script automates the process of creating release notes.
0020
0021 import os
0022 import re
0023 import sys
0024
0025 from releaseutils import *
0026
0027 # You must set the following before use!
0028 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
0029 RELEASE_TAG = os.environ.get("RELEASE_TAG", "v1.2.0-rc2")
0030 PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
0031
0032 # If the release tags are not provided, prompt the user to provide them
0033 while not tag_exists(RELEASE_TAG):
0034     RELEASE_TAG = raw_input("Please provide a valid release tag: ")
0035 while not tag_exists(PREVIOUS_RELEASE_TAG):
0036     print("Please specify the previous release tag.")
0037     PREVIOUS_RELEASE_TAG = raw_input(
0038         "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
0039
0040 # Gather commits found in the new tag but not in the old tag.
0041 # This filters commits based on both the git hash and the PR number.
0042 # If either is present in the old tag, then we ignore the commit.
0043 print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
0044 release_commits = get_commits(RELEASE_TAG)
0045 previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
0046 previous_release_hashes = set()
0047 previous_release_prs = set()
0048 for old_commit in previous_release_commits:
0049     previous_release_hashes.add(old_commit.get_hash())
0050     if old_commit.get_pr_number():
0051         previous_release_prs.add(old_commit.get_pr_number())
0052 new_commits = []
0053 for this_commit in release_commits:
0054     this_hash = this_commit.get_hash()
0055     this_pr_number = this_commit.get_pr_number()
0056     if this_hash in previous_release_hashes:
0057         continue
0058     if this_pr_number and this_pr_number in previous_release_prs:
0059         continue
0060     new_commits.append(this_commit)
0061 if not new_commits:
0062     sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
0063
0064 # Prompt the user for confirmation that the commit range is correct
0065 print("\n==================================================================================")
0066 print("JIRA server: %s" % JIRA_API_BASE)
0067 print("Release tag: %s" % RELEASE_TAG)
0068 print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
0069 print("Number of commits in this range: %s" % len(new_commits))
0070 print("")
0071
0072
0073 def print_indented(_list):
0074     for x in _list:
0075         print("  %s" % x)
0076 if yesOrNoPrompt("Show all commits?"):
0077     print_indented(new_commits)
0078 print("==================================================================================\n")
0079 if not yesOrNoPrompt("Does this look correct?"):
0080     sys.exit("Ok, exiting")
0081
0082 # Filter out special commits
0083 releases = []
0084 maintenance = []
0085 reverts = []
0086 nojiras = []
0087 filtered_commits = []
0088
0089
0090 def is_release(commit_title):
0091     return ("[release]" in commit_title.lower() or
0092             "preparing spark release" in commit_title.lower() or
0093             "preparing development version" in commit_title.lower() or
0094             "CHANGES.txt" in commit_title)
0095
0096
0097 def is_maintenance(commit_title):
0098     return "maintenance" in commit_title.lower() or \
0099         "manually close" in commit_title.lower()
0100
0101
0102 def has_no_jira(commit_title):
0103     return not re.findall("SPARK-[0-9]+", commit_title.upper())
0104
0105
0106 def is_revert(commit_title):
0107     return "revert" in commit_title.lower()
0108
0109
0110 def is_docs(commit_title):
0111     return re.findall("docs*", commit_title.lower()) or \
0112         "programming guide" in commit_title.lower()
0113
0114
0115 for c in new_commits:
0116     t = c.get_title()
0117     if not t:
0118         continue
0119     elif is_release(t):
0120         releases.append(c)
0121     elif is_maintenance(t):
0122         maintenance.append(c)
0123     elif is_revert(t):
0124         reverts.append(c)
0125     elif is_docs(t):
0126         filtered_commits.append(c)  # docs may not have JIRA numbers
0127     elif has_no_jira(t):
0128         nojiras.append(c)
0129     else:
0130         filtered_commits.append(c)
0131
0132 # Warn against ignored commits
0133 if releases or maintenance or reverts or nojiras:
0134     print("\n==================================================================================")
0135     if releases:
0136         print("Found %d release commits" % len(releases))
0137     if maintenance:
0138         print("Found %d maintenance commits" % len(maintenance))
0139     if reverts:
0140         print("Found %d revert commits" % len(reverts))
0141     if nojiras:
0142         print("Found %d commits with no JIRA" % len(nojiras))
0143     print("* Warning: these commits will be ignored.\n")
0144     if yesOrNoPrompt("Show ignored commits?"):
0145         if releases:
0146             print("Release (%d)" % len(releases))
0147             print_indented(releases)
0148         if maintenance:
0149             print("Maintenance (%d)" % len(maintenance))
0150             print_indented(maintenance)
0151         if reverts:
0152             print("Revert (%d)" % len(reverts))
0153             print_indented(reverts)
0154         if nojiras:
0155             print("No JIRA (%d)" % len(nojiras))
0156             print_indented(nojiras)
0157     print("==================== Warning: the above commits will be ignored ==================\n")
0158 prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
0159 if not yesOrNoPrompt(prompt_msg):
0160     sys.exit("Ok, exiting.")
0161
0162 # Keep track of warnings to tell the user at the end
0163 warnings = []
0164
0165 # Mapping from the invalid author name to its associated JIRA issues
0166 # E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471")
0167 invalid_authors = {}
0168
0169 # Populate a map that groups issues and components by author
0170 # It takes the form: Author name -> { Contribution type -> Spark components }
0171 # For instance,
0172 # {
0173 #   'Andrew Or': {
0174 #     'bug fixes': ['windows', 'core', 'web ui'],
0175 #     'improvements': ['core']
0176 #   },
0177 #   'Tathagata Das' : {
0178 #     'bug fixes': ['streaming']
0179 #     'new feature': ['streaming']
0180 #   }
0181 # }
0182 #
0183 author_info = {}
0184 jira_options = {"server": JIRA_API_BASE}
0185 jira_client = JIRA(options=jira_options)
0186 print("\n=========================== Compiling contributor list ===========================")
0187 for commit in filtered_commits:
0188     _hash = commit.get_hash()
0189     title = commit.get_title()
0190     issues = re.findall("SPARK-[0-9]+", title.upper())
0191     author = commit.get_author()
0192     date = get_date(_hash)
0193     # If the author name is invalid, keep track of it along
0194     # with all associated issues so we can translate it later
0195     if is_valid_author(author):
0196         author = capitalize_author(author)
0197     else:
0198         if author not in invalid_authors:
0199             invalid_authors[author] = set()
0200         for issue in issues:
0201             invalid_authors[author].add(issue)
0202     # Parse components from the commit title, if any
0203     commit_components = find_components(title, _hash)
0204     # Populate or merge an issue into author_info[author]
0205
0206     def populate(issue_type, components):
0207         components = components or [CORE_COMPONENT]  # assume core if no components provided
0208         if author not in author_info:
0209             author_info[author] = {}
0210         if issue_type not in author_info[author]:
0211             author_info[author][issue_type] = set()
0212         for component in components:
0213             author_info[author][issue_type].add(component)
0214     # Find issues and components associated with this commit
0215     for issue in issues:
0216         try:
0217             jira_issue = jira_client.issue(issue)
0218             jira_type = jira_issue.fields.issuetype.name
0219             jira_type = translate_issue_type(jira_type, issue, warnings)
0220             jira_components = [translate_component(c.name, _hash, warnings)
0221                                for c in jira_issue.fields.components]
0222             all_components = set(jira_components + commit_components)
0223             populate(jira_type, all_components)
0224         except Exception as e:
0225             print("Unexpected error:", e)
0226     # For docs without an associated JIRA, manually add it ourselves
0227     if is_docs(title) and not issues:
0228         populate("documentation", commit_components)
0229     print("  Processed commit %s authored by %s on %s" % (_hash, author, date))
0230 print("==================================================================================\n")
0231
0232 # Write to contributors file ordered by author names
0233 # Each line takes the format " * Author name -- semi-colon delimited contributions"
0234 # e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
0235 # e.g. * Tathagata Das -- Bug fixes and new features in Streaming
0236 contributors_file = open(contributors_file_name, "w")
0237 authors = author_info.keys()
0238 authors.sort()
0239 for author in authors:
0240     contribution = ""
0241     components = set()
0242     issue_types = set()
0243     for issue_type, comps in author_info[author].items():
0244         components.update(comps)
0245         issue_types.add(issue_type)
0246     # If there is only one component, mention it only once
0247     # e.g. Bug fixes, improvements in MLlib
0248     if len(components) == 1:
0249         contribution = "%s in %s" % (nice_join(issue_types), next(iter(components)))
0250     # Otherwise, group contributions by issue types instead of modules
0251     # e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
0252     else:
0253         contributions = ["%s in %s" % (issue_type, nice_join(comps))
0254                          for issue_type, comps in author_info[author].items()]
0255         contribution = "; ".join(contributions)
0256     # Do not use python's capitalize() on the whole string to preserve case
0257     assert contribution
0258     contribution = contribution[0].capitalize() + contribution[1:]
0259     # If the author name is invalid, use an intermediate format that
0260     # can be translated through translate-contributors.py later
0261     # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
0262     if author in invalid_authors and invalid_authors[author]:
0263         author = author + "/" + "/".join(invalid_authors[author])
0264     # line = " * %s -- %s" % (author, contribution)
0265     line = author
0266     contributors_file.write(line + "\n")
0267 contributors_file.close()
0268 print("Contributors list is successfully written to %s!" % contributors_file_name)
0269
0270 # Prompt the user to translate author names if necessary
0271 if invalid_authors:
0272     warnings.append("Found the following invalid authors:")
0273     for a in invalid_authors:
0274         warnings.append("\t%s" % a)
0275     warnings.append("Please run './translate-contributors.py' to translate them.")
0276
0277 # Log any warnings encountered in the process
0278 if warnings:
0279     print("\n============ Warnings encountered while creating the contributor list ============")
0280     for w in warnings:
0281         print(w)
0282     print("Please correct these in the final contributors list at %s." % contributors_file_name)
0283     print("==================================================================================\n")