Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python
0002 
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 # This file contains helper methods used in creating a release.
0020 
0021 import re
0022 import sys
0023 from subprocess import Popen, PIPE
0024 
0025 try:
0026     from jira.client import JIRA
0027     # Old versions have JIRAError in exceptions package, new (0.5+) in utils.
0028     try:
0029         from jira.exceptions import JIRAError
0030     except ImportError:
0031         from jira.utils import JIRAError
0032 except ImportError:
0033     print("This tool requires the jira-python library")
0034     print("Install using 'sudo pip install jira'")
0035     sys.exit(-1)
0036 
0037 try:
0038     from github import Github
0039     from github import GithubException
0040 except ImportError:
0041     print("This tool requires the PyGithub library")
0042     print("Install using 'sudo pip install PyGithub'")
0043     sys.exit(-1)
0044 
0045 try:
0046     import unidecode
0047 except ImportError:
0048     print("This tool requires the unidecode library to decode obscure github usernames")
0049     print("Install using 'sudo pip install unidecode'")
0050     sys.exit(-1)
0051 
0052 if sys.version < '3':
0053     input = raw_input  # noqa
0054 
0055 # Contributors list file name
0056 contributors_file_name = "contributors.txt"
0057 
0058 
0059 # Prompt the user to answer yes or no until they do so
0060 def yesOrNoPrompt(msg):
0061     response = input("%s [y/n]: " % msg)
0062     while response != "y" and response != "n":
0063         return yesOrNoPrompt(msg)
0064     return response == "y"
0065 
0066 
0067 # Utility functions run git commands (written with Git 1.8.5)
0068 def run_cmd(cmd):
0069     return Popen(cmd, stdout=PIPE).communicate()[0]
0070 
0071 
0072 def run_cmd_error(cmd):
0073     return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
0074 
0075 
0076 def get_date(commit_hash):
0077     return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
0078 
0079 
0080 def tag_exists(tag):
0081     stderr = run_cmd_error(["git", "show", tag])
0082     return "error" not in stderr
0083 
0084 
0085 # A type-safe representation of a commit
0086 class Commit:
0087     def __init__(self, _hash, author, title, pr_number=None):
0088         self._hash = _hash
0089         self.author = author
0090         self.title = title
0091         self.pr_number = pr_number
0092 
0093     def get_hash(self):
0094         return self._hash
0095 
0096     def get_author(self):
0097         return self.author
0098 
0099     def get_title(self):
0100         return self.title
0101 
0102     def get_pr_number(self):
0103         return self.pr_number
0104 
0105     def __str__(self):
0106         closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
0107         return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
0108 
0109 
0110 # Return all commits that belong to the specified tag.
0111 #
0112 # Under the hood, this runs a `git log` on that tag and parses the fields
0113 # from the command output to construct a list of Commit objects. Note that
0114 # because certain fields reside in the commit description and cannot be parsed
0115 # through the Github API itself, we need to do some intelligent regex parsing
0116 # to extract those fields.
0117 #
0118 # This is written using Git 1.8.5.
0119 def get_commits(tag):
0120     commit_start_marker = "|=== COMMIT START MARKER ===|"
0121     commit_end_marker = "|=== COMMIT END MARKER ===|"
0122     field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
0123     log_format =\
0124         commit_start_marker + "%h" +\
0125         field_end_marker + "%an" +\
0126         field_end_marker + "%s" +\
0127         commit_end_marker + "%b"
0128     output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
0129     commits = []
0130     raw_commits = [c for c in output.split(commit_start_marker) if c]
0131     for commit in raw_commits:
0132         if commit.count(commit_end_marker) != 1:
0133             print("Commit end marker not found in commit: ")
0134             for line in commit.split("\n"):
0135                 print(line)
0136             sys.exit(1)
0137         # Separate commit digest from the body
0138         # From the digest we extract the hash, author and the title
0139         # From the body, we extract the PR number and the github username
0140         [commit_digest, commit_body] = commit.split(commit_end_marker)
0141         if commit_digest.count(field_end_marker) != 2:
0142             sys.exit("Unexpected format in commit: %s" % commit_digest)
0143         [_hash, author, title] = commit_digest.split(field_end_marker)
0144         # The PR number and github username is in the commit message
0145         # itself and cannot be accessed through any Github API
0146         pr_number = None
0147         match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
0148         if match:
0149             [pr_number, github_username] = match.groups()
0150             # If the author name is not valid, use the github
0151             # username so we can translate it properly later
0152             if not is_valid_author(author):
0153                 author = github_username
0154         # Guard against special characters
0155         try:               # Python 2
0156             author = unicode(author, "UTF-8")
0157         except NameError:  # Python 3
0158             author = str(author)
0159         author = unidecode.unidecode(author).strip()
0160         commit = Commit(_hash, author, title, pr_number)
0161         commits.append(commit)
0162     return commits
0163 
0164 # Maintain a mapping for translating issue types to contributions in the release notes
0165 # This serves an additional function of warning the user against unknown issue types
0166 # Note: This list is partially derived from this link:
0167 # https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/issuetypes
0168 # Keep these in lower case
0169 known_issue_types = {
0170     "bug": "bug fixes",
0171     "build": "build fixes",
0172     "dependency upgrade": "build fixes",
0173     "improvement": "improvements",
0174     "new feature": "new features",
0175     "documentation": "documentation",
0176     "test": "test",
0177     "task": "improvement",
0178     "sub-task": "improvement"
0179 }
0180 
0181 # Maintain a mapping for translating component names when creating the release notes
0182 # This serves an additional function of warning the user against unknown components
0183 # Note: This list is largely derived from this link:
0184 # https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/components
0185 CORE_COMPONENT = "Core"
0186 known_components = {
0187     "block manager": CORE_COMPONENT,
0188     "build": CORE_COMPONENT,
0189     "deploy": CORE_COMPONENT,
0190     "documentation": CORE_COMPONENT,
0191     "examples": CORE_COMPONENT,
0192     "graphx": "GraphX",
0193     "input/output": CORE_COMPONENT,
0194     "java api": "Java API",
0195     "k8s": "Kubernetes",
0196     "kubernetes": "Kubernetes",
0197     "mesos": "Mesos",
0198     "ml": "MLlib",
0199     "mllib": "MLlib",
0200     "project infra": "Project Infra",
0201     "pyspark": "PySpark",
0202     "shuffle": "Shuffle",
0203     "spark core": CORE_COMPONENT,
0204     "spark shell": CORE_COMPONENT,
0205     "sql": "SQL",
0206     "streaming": "Streaming",
0207     "web ui": "Web UI",
0208     "windows": "Windows",
0209     "yarn": "YARN"
0210 }
0211 
0212 
0213 # Translate issue types using a format appropriate for writing contributions
0214 # If an unknown issue type is encountered, warn the user
0215 def translate_issue_type(issue_type, issue_id, warnings):
0216     issue_type = issue_type.lower()
0217     if issue_type in known_issue_types:
0218         return known_issue_types[issue_type]
0219     else:
0220         warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
0221         return issue_type
0222 
0223 
0224 # Translate component names using a format appropriate for writing contributions
0225 # If an unknown component is encountered, warn the user
0226 def translate_component(component, commit_hash, warnings):
0227     component = component.lower()
0228     if component in known_components:
0229         return known_components[component]
0230     else:
0231         warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
0232         return component
0233 
0234 
0235 # Parse components in the commit message
0236 # The returned components are already filtered and translated
0237 def find_components(commit, commit_hash):
0238     components = re.findall(r"\[\w*\]", commit.lower())
0239     components = [translate_component(c, commit_hash, [])
0240                   for c in components if c in known_components]
0241     return components
0242 
0243 
0244 # Join a list of strings in a human-readable manner
0245 # e.g. ["Juice"] -> "Juice"
0246 # e.g. ["Juice", "baby"] -> "Juice and baby"
0247 # e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
0248 def nice_join(str_list):
0249     str_list = list(str_list)  # sometimes it's a set
0250     if not str_list:
0251         return ""
0252     elif len(str_list) == 1:
0253         return next(iter(str_list))
0254     elif len(str_list) == 2:
0255         return " and ".join(str_list)
0256     else:
0257         return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
0258 
0259 
0260 # Return the full name of the specified user on Github
0261 # If the user doesn't exist, return None
0262 def get_github_name(author, github_client):
0263     if github_client:
0264         try:
0265             return github_client.get_user(author).name
0266         except GithubException as e:
0267             # If this is not a "not found" exception
0268             if e.status != 404:
0269                 raise e
0270     return None
0271 
0272 
0273 # Return the full name of the specified user on JIRA
0274 # If the user doesn't exist, return None
0275 def get_jira_name(author, jira_client):
0276     if jira_client:
0277         try:
0278             return jira_client.user(author).displayName
0279         except JIRAError as e:
0280             # If this is not a "not found" exception
0281             if e.status_code != 404:
0282                 raise e
0283     return None
0284 
0285 
0286 # Return whether the given name is in the form <First Name><space><Last Name>
0287 def is_valid_author(author):
0288     if not author:
0289         return False
0290     return " " in author and not re.findall("[0-9]", author)
0291 
0292 
0293 # Capitalize the first letter of each word in the given author name
0294 def capitalize_author(author):
0295     if not author:
0296         return None
0297     words = author.split(" ")
0298     words = [w[0].capitalize() + w[1:] for w in words if w]
0299     return " ".join(words)