Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python
0002 
0003 #
0004 # Licensed to the Apache Software Foundation (ASF) under one or more
0005 # contributor license agreements.  See the NOTICE file distributed with
0006 # this work for additional information regarding copyright ownership.
0007 # The ASF licenses this file to You under the Apache License, Version 2.0
0008 # (the "License"); you may not use this file except in compliance with
0009 # the License.  You may obtain a copy of the License at
0010 #
0011 #    http://www.apache.org/licenses/LICENSE-2.0
0012 #
0013 # Unless required by applicable law or agreed to in writing, software
0014 # distributed under the License is distributed on an "AS IS" BASIS,
0015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 # See the License for the specific language governing permissions and
0017 # limitations under the License.
0018 #
0019 # Utility for updating JIRA's with information about Github pull requests
0020 
0021 import json
0022 import os
0023 import re
0024 import sys
0025 if sys.version < '3':
0026     from urllib2 import urlopen
0027     from urllib2 import Request
0028     from urllib2 import HTTPError
0029 else:
0030     from urllib.request import urlopen
0031     from urllib.request import Request
0032     from urllib.error import HTTPError
0033 
0034 try:
0035     import jira.client
0036 except ImportError:
0037     print("This tool requires the jira-python library")
0038     print("Install using 'sudo pip install jira'")
0039     sys.exit(-1)
0040 
0041 # User facing configs
0042 GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
0043 GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")
0044 JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
0045 JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
0046 JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
0047 JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
0048 # Maximum number of updates to perform in one run
0049 MAX_UPDATES = int(os.environ.get("MAX_UPDATES", "100000"))
0050 # Cut-off for oldest PR on which to comment. Useful for avoiding
0051 # "notification overload" when running for the first time.
0052 MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
0053 
0054 # File used as an optimization to store maximum previously seen PR
0055 # Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
0056 # the state of JIRA's that are tied to PR's we've already looked at.
0057 MAX_FILE = ".github-jira-max"
0058 
0059 
0060 def get_url(url):
0061     try:
0062         request = Request(url)
0063         request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
0064         return urlopen(request)
0065     except HTTPError:
0066         print("Unable to fetch URL, exiting: %s" % url)
0067         sys.exit(-1)
0068 
0069 
0070 def get_json(urllib_response):
0071     return json.loads(urllib_response.read().decode("utf-8"))
0072 
0073 
0074 # Return a list of (JIRA id, JSON dict) tuples:
0075 # e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
0076 def get_jira_prs():
0077     result = []
0078     has_next_page = True
0079     page_num = 0
0080     while has_next_page:
0081         page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
0082         page_json = get_json(page)
0083 
0084         for pull in page_json:
0085             jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
0086             for jira in jiras:
0087                 result = result + [(jira, pull)]
0088 
0089         # Check if there is another page
0090         link_headers = list(filter(lambda k: k.startswith("Link"), page.headers))
0091         if not link_headers or "next" not in link_headers[0]:
0092             has_next_page = False
0093         else:
0094             page_num += 1
0095     return result
0096 
0097 
0098 def set_max_pr(max_val):
0099     f = open(MAX_FILE, 'w')
0100     f.write("%s" % max_val)
0101     f.close()
0102     print("Writing largest PR number seen: %s" % max_val)
0103 
0104 
0105 def get_max_pr():
0106     if os.path.exists(MAX_FILE):
0107         result = int(open(MAX_FILE, 'r').read())
0108         print("Read largest PR number previously seen: %s" % result)
0109         return result
0110     else:
0111         return 0
0112 
0113 
0114 def build_pr_component_dic(jira_prs):
0115     print("Build PR dictionary")
0116     dic = {}
0117     for issue, pr in jira_prs:
0118         print(issue)
0119         page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue))
0120         jira_components = [c['name'].upper() for c in page['fields']['components']]
0121         if pr['number'] in dic:
0122             dic[pr['number']][1].update(jira_components)
0123         else:
0124             pr_components = set(label['name'].upper() for label in pr['labels'])
0125             dic[pr['number']] = (pr_components, set(jira_components))
0126     return dic
0127 
0128 
0129 def reset_pr_labels(pr_num, jira_components):
0130     url = '%s/issues/%s/labels' % (GITHUB_API_BASE, pr_num)
0131     labels = ', '.join(('"%s"' % c) for c in jira_components)
0132     try:
0133         request = Request(url, data=('{"labels":[%s]}' % labels).encode('utf-8'))
0134         request.add_header('Content-Type', 'application/json')
0135         request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY)
0136         request.get_method = lambda: 'PUT'
0137         urlopen(request)
0138         print("Set %s with labels %s" % (pr_num, labels))
0139     except HTTPError:
0140         print("Unable to update PR labels, exiting: %s" % url)
0141         sys.exit(-1)
0142 
0143 
0144 jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
0145                                basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
0146 
0147 jira_prs = get_jira_prs()
0148 
0149 previous_max = get_max_pr()
0150 print("Retrieved %s JIRA PR's from Github" % len(jira_prs))
0151 jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
0152 print("%s PR's remain after excluding visted ones" % len(jira_prs))
0153 
0154 num_updates = 0
0155 considered = []
0156 for issue, pr in sorted(jira_prs, key=lambda kv: int(kv[1]['number'])):
0157     if num_updates >= MAX_UPDATES:
0158         break
0159     pr_num = int(pr['number'])
0160 
0161     print("Checking issue %s" % issue)
0162     considered = considered + [pr_num]
0163 
0164     url = pr['html_url']
0165     title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
0166     try:
0167         page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue + "/remotelink"))
0168         existing_links = map(lambda l: l['object']['url'], page)
0169     except:
0170         print("Failure reading JIRA %s (does it exist?)" % issue)
0171         print(sys.exc_info()[0])
0172         continue
0173 
0174     if url in existing_links:
0175         continue
0176 
0177     icon = {"title": "Pull request #%s" % pr['number'],
0178             "url16x16": "https://assets-cdn.github.com/favicon.ico"}
0179     destination = {"title": title, "url": url, "icon": icon}
0180     # For all possible fields see:
0181     # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
0182     # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
0183     jira_client.add_remote_link(issue, destination)
0184 
0185     comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
0186     comment += "\n%s" % pr['html_url']
0187     if pr_num >= MIN_COMMENT_PR:
0188         jira_client.add_comment(issue, comment)
0189 
0190     print("Added link %s <-> PR #%s" % (issue, pr['number']))
0191     num_updates += 1
0192 
0193 if len(considered) > 0:
0194     set_max_pr(max(considered))
0195 
0196 
0197 # Additionally, expose the JIRA labels to the PR
0198 num_updates = 0
0199 for pr_num, (pr_components, jira_components) in build_pr_component_dic(jira_prs).items():
0200     print(pr_num)
0201     if pr_components == jira_components:
0202         continue
0203     if num_updates >= MAX_UPDATES:
0204         break
0205     reset_pr_labels(pr_num, jira_components)
0206     num_updates += 1