Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python3
0002 # SPDX-License-Identifier: GPL-2.0-only
0003 
0004 """Find Kconfig symbols that are referenced but not defined."""
0005 
0006 # (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com>
0007 # (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
0008 #
0009 
0010 
0011 import argparse
0012 import difflib
0013 import os
0014 import re
0015 import signal
0016 import subprocess
0017 import sys
0018 from multiprocessing import Pool, cpu_count
0019 
0020 
0021 # regex expressions
0022 OPERATORS = r"&|\(|\)|\||\!"
0023 SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}"
0024 DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*"
0025 EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+"
0026 DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
0027 STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
0028 SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")"
0029 
0030 # regex objects
0031 REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
0032 REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)')
0033 REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL)
0034 REGEX_KCONFIG_DEF = re.compile(DEF)
0035 REGEX_KCONFIG_EXPR = re.compile(EXPR)
0036 REGEX_KCONFIG_STMT = re.compile(STMT)
0037 REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$")
0038 REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
0039 REGEX_QUOTES = re.compile("(\"(.*?)\")")
0040 
0041 
0042 def parse_options():
0043     """The user interface of this module."""
0044     usage = "Run this tool to detect Kconfig symbols that are referenced but " \
0045             "not defined in Kconfig.  If no option is specified, "             \
0046             "checkkconfigsymbols defaults to check your current tree.  "       \
0047             "Please note that specifying commits will 'git reset --hard\' "    \
0048             "your current tree!  You may save uncommitted changes to avoid "   \
0049             "losing data."
0050 
0051     parser = argparse.ArgumentParser(description=usage)
0052 
0053     parser.add_argument('-c', '--commit', dest='commit', action='store',
0054                         default="",
0055                         help="check if the specified commit (hash) introduces "
0056                              "undefined Kconfig symbols")
0057 
0058     parser.add_argument('-d', '--diff', dest='diff', action='store',
0059                         default="",
0060                         help="diff undefined symbols between two commits "
0061                              "(e.g., -d commmit1..commit2)")
0062 
0063     parser.add_argument('-f', '--find', dest='find', action='store_true',
0064                         default=False,
0065                         help="find and show commits that may cause symbols to be "
0066                              "missing (required to run with --diff)")
0067 
0068     parser.add_argument('-i', '--ignore', dest='ignore', action='store',
0069                         default="",
0070                         help="ignore files matching this Python regex "
0071                              "(e.g., -i '.*defconfig')")
0072 
0073     parser.add_argument('-s', '--sim', dest='sim', action='store', default="",
0074                         help="print a list of max. 10 string-similar symbols")
0075 
0076     parser.add_argument('--force', dest='force', action='store_true',
0077                         default=False,
0078                         help="reset current Git tree even when it's dirty")
0079 
0080     parser.add_argument('--no-color', dest='color', action='store_false',
0081                         default=True,
0082                         help="don't print colored output (default when not "
0083                              "outputting to a terminal)")
0084 
0085     args = parser.parse_args()
0086 
0087     if args.commit and args.diff:
0088         sys.exit("Please specify only one option at once.")
0089 
0090     if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff):
0091         sys.exit("Please specify valid input in the following format: "
0092                  "\'commit1..commit2\'")
0093 
0094     if args.commit or args.diff:
0095         if not args.force and tree_is_dirty():
0096             sys.exit("The current Git tree is dirty (see 'git status').  "
0097                      "Running this script may\ndelete important data since it "
0098                      "calls 'git reset --hard' for some performance\nreasons. "
0099                      " Please run this script in a clean Git tree or pass "
0100                      "'--force' if you\nwant to ignore this warning and "
0101                      "continue.")
0102 
0103     if args.commit:
0104         if args.commit.startswith('HEAD'):
0105             sys.exit("The --commit option can't use the HEAD ref")
0106 
0107         args.find = False
0108 
0109     if args.ignore:
0110         try:
0111             re.match(args.ignore, "this/is/just/a/test.c")
0112         except:
0113             sys.exit("Please specify a valid Python regex.")
0114 
0115     return args
0116 
0117 
0118 def main():
0119     """Main function of this module."""
0120     args = parse_options()
0121 
0122     global COLOR
0123     COLOR = args.color and sys.stdout.isatty()
0124 
0125     if args.sim and not args.commit and not args.diff:
0126         sims = find_sims(args.sim, args.ignore)
0127         if sims:
0128             print("%s: %s" % (yel("Similar symbols"), ', '.join(sims)))
0129         else:
0130             print("%s: no similar symbols found" % yel("Similar symbols"))
0131         sys.exit(0)
0132 
0133     # dictionary of (un)defined symbols
0134     defined = {}
0135     undefined = {}
0136 
0137     if args.commit or args.diff:
0138         head = get_head()
0139 
0140         # get commit range
0141         commit_a = None
0142         commit_b = None
0143         if args.commit:
0144             commit_a = args.commit + "~"
0145             commit_b = args.commit
0146         elif args.diff:
0147             split = args.diff.split("..")
0148             commit_a = split[0]
0149             commit_b = split[1]
0150             undefined_a = {}
0151             undefined_b = {}
0152 
0153         # get undefined items before the commit
0154         reset(commit_a)
0155         undefined_a, _ = check_symbols(args.ignore)
0156 
0157         # get undefined items for the commit
0158         reset(commit_b)
0159         undefined_b, defined = check_symbols(args.ignore)
0160 
0161         # report cases that are present for the commit but not before
0162         for symbol in sorted(undefined_b):
0163             # symbol has not been undefined before
0164             if symbol not in undefined_a:
0165                 files = sorted(undefined_b.get(symbol))
0166                 undefined[symbol] = files
0167             # check if there are new files that reference the undefined symbol
0168             else:
0169                 files = sorted(undefined_b.get(symbol) -
0170                                undefined_a.get(symbol))
0171                 if files:
0172                     undefined[symbol] = files
0173 
0174         # reset to head
0175         reset(head)
0176 
0177     # default to check the entire tree
0178     else:
0179         undefined, defined = check_symbols(args.ignore)
0180 
0181     # now print the output
0182     for symbol in sorted(undefined):
0183         print(red(symbol))
0184 
0185         files = sorted(undefined.get(symbol))
0186         print("%s: %s" % (yel("Referencing files"), ", ".join(files)))
0187 
0188         sims = find_sims(symbol, args.ignore, defined)
0189         sims_out = yel("Similar symbols")
0190         if sims:
0191             print("%s: %s" % (sims_out, ', '.join(sims)))
0192         else:
0193             print("%s: %s" % (sims_out, "no similar symbols found"))
0194 
0195         if args.find:
0196             print("%s:" % yel("Commits changing symbol"))
0197             commits = find_commits(symbol, args.diff)
0198             if commits:
0199                 for commit in commits:
0200                     commit = commit.split(" ", 1)
0201                     print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1]))
0202             else:
0203                 print("\t- no commit found")
0204         print()  # new line
0205 
0206 
0207 def reset(commit):
0208     """Reset current git tree to %commit."""
0209     execute(["git", "reset", "--hard", commit])
0210 
0211 
0212 def yel(string):
0213     """
0214     Color %string yellow.
0215     """
0216     return "\033[33m%s\033[0m" % string if COLOR else string
0217 
0218 
0219 def red(string):
0220     """
0221     Color %string red.
0222     """
0223     return "\033[31m%s\033[0m" % string if COLOR else string
0224 
0225 
0226 def execute(cmd):
0227     """Execute %cmd and return stdout.  Exit in case of error."""
0228     try:
0229         stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
0230         stdout = stdout.decode(errors='replace')
0231     except subprocess.CalledProcessError as fail:
0232         exit(fail)
0233     return stdout
0234 
0235 
0236 def find_commits(symbol, diff):
0237     """Find commits changing %symbol in the given range of %diff."""
0238     commits = execute(["git", "log", "--pretty=oneline",
0239                        "--abbrev-commit", "-G",
0240                        symbol, diff])
0241     return [x for x in commits.split("\n") if x]
0242 
0243 
0244 def tree_is_dirty():
0245     """Return true if the current working tree is dirty (i.e., if any file has
0246     been added, deleted, modified, renamed or copied but not committed)."""
0247     stdout = execute(["git", "status", "--porcelain"])
0248     for line in stdout:
0249         if re.findall(r"[URMADC]{1}", line[:2]):
0250             return True
0251     return False
0252 
0253 
0254 def get_head():
0255     """Return commit hash of current HEAD."""
0256     stdout = execute(["git", "rev-parse", "HEAD"])
0257     return stdout.strip('\n')
0258 
0259 
0260 def partition(lst, size):
0261     """Partition list @lst into eveni-sized lists of size @size."""
0262     return [lst[i::size] for i in range(size)]
0263 
0264 
0265 def init_worker():
0266     """Set signal handler to ignore SIGINT."""
0267     signal.signal(signal.SIGINT, signal.SIG_IGN)
0268 
0269 
0270 def find_sims(symbol, ignore, defined=[]):
0271     """Return a list of max. ten Kconfig symbols that are string-similar to
0272     @symbol."""
0273     if defined:
0274         return difflib.get_close_matches(symbol, set(defined), 10)
0275 
0276     pool = Pool(cpu_count(), init_worker)
0277     kfiles = []
0278     for gitfile in get_files():
0279         if REGEX_FILE_KCONFIG.match(gitfile):
0280             kfiles.append(gitfile)
0281 
0282     arglist = []
0283     for part in partition(kfiles, cpu_count()):
0284         arglist.append((part, ignore))
0285 
0286     for res in pool.map(parse_kconfig_files, arglist):
0287         defined.extend(res[0])
0288 
0289     return difflib.get_close_matches(symbol, set(defined), 10)
0290 
0291 
0292 def get_files():
0293     """Return a list of all files in the current git directory."""
0294     # use 'git ls-files' to get the worklist
0295     stdout = execute(["git", "ls-files"])
0296     if len(stdout) > 0 and stdout[-1] == "\n":
0297         stdout = stdout[:-1]
0298 
0299     files = []
0300     for gitfile in stdout.rsplit("\n"):
0301         if ".git" in gitfile or "ChangeLog" in gitfile or      \
0302                 ".log" in gitfile or os.path.isdir(gitfile) or \
0303                 gitfile.startswith("tools/"):
0304             continue
0305         files.append(gitfile)
0306     return files
0307 
0308 
0309 def check_symbols(ignore):
0310     """Find undefined Kconfig symbols and return a dict with the symbol as key
0311     and a list of referencing files as value.  Files matching %ignore are not
0312     checked for undefined symbols."""
0313     pool = Pool(cpu_count(), init_worker)
0314     try:
0315         return check_symbols_helper(pool, ignore)
0316     except KeyboardInterrupt:
0317         pool.terminate()
0318         pool.join()
0319         sys.exit(1)
0320 
0321 
0322 def check_symbols_helper(pool, ignore):
0323     """Helper method for check_symbols().  Used to catch keyboard interrupts in
0324     check_symbols() in order to properly terminate running worker processes."""
0325     source_files = []
0326     kconfig_files = []
0327     defined_symbols = []
0328     referenced_symbols = dict()  # {file: [symbols]}
0329 
0330     for gitfile in get_files():
0331         if REGEX_FILE_KCONFIG.match(gitfile):
0332             kconfig_files.append(gitfile)
0333         else:
0334             if ignore and re.match(ignore, gitfile):
0335                 continue
0336             # add source files that do not match the ignore pattern
0337             source_files.append(gitfile)
0338 
0339     # parse source files
0340     arglist = partition(source_files, cpu_count())
0341     for res in pool.map(parse_source_files, arglist):
0342         referenced_symbols.update(res)
0343 
0344     # parse kconfig files
0345     arglist = []
0346     for part in partition(kconfig_files, cpu_count()):
0347         arglist.append((part, ignore))
0348     for res in pool.map(parse_kconfig_files, arglist):
0349         defined_symbols.extend(res[0])
0350         referenced_symbols.update(res[1])
0351     defined_symbols = set(defined_symbols)
0352 
0353     # inverse mapping of referenced_symbols to dict(symbol: [files])
0354     inv_map = dict()
0355     for _file, symbols in referenced_symbols.items():
0356         for symbol in symbols:
0357             inv_map[symbol] = inv_map.get(symbol, set())
0358             inv_map[symbol].add(_file)
0359     referenced_symbols = inv_map
0360 
0361     undefined = {}  # {symbol: [files]}
0362     for symbol in sorted(referenced_symbols):
0363         # filter some false positives
0364         if symbol == "FOO" or symbol == "BAR" or \
0365                 symbol == "FOO_BAR" or symbol == "XXX":
0366             continue
0367         if symbol not in defined_symbols:
0368             if symbol.endswith("_MODULE"):
0369                 # avoid false positives for kernel modules
0370                 if symbol[:-len("_MODULE")] in defined_symbols:
0371                     continue
0372             undefined[symbol] = referenced_symbols.get(symbol)
0373     return undefined, defined_symbols
0374 
0375 
0376 def parse_source_files(source_files):
0377     """Parse each source file in @source_files and return dictionary with source
0378     files as keys and lists of references Kconfig symbols as values."""
0379     referenced_symbols = dict()
0380     for sfile in source_files:
0381         referenced_symbols[sfile] = parse_source_file(sfile)
0382     return referenced_symbols
0383 
0384 
0385 def parse_source_file(sfile):
0386     """Parse @sfile and return a list of referenced Kconfig symbols."""
0387     lines = []
0388     references = []
0389 
0390     if not os.path.exists(sfile):
0391         return references
0392 
0393     with open(sfile, "r", encoding='utf-8', errors='replace') as stream:
0394         lines = stream.readlines()
0395 
0396     for line in lines:
0397         if "CONFIG_" not in line:
0398             continue
0399         symbols = REGEX_SOURCE_SYMBOL.findall(line)
0400         for symbol in symbols:
0401             if not REGEX_FILTER_SYMBOLS.search(symbol):
0402                 continue
0403             references.append(symbol)
0404 
0405     return references
0406 
0407 
0408 def get_symbols_in_line(line):
0409     """Return mentioned Kconfig symbols in @line."""
0410     return REGEX_SYMBOL.findall(line)
0411 
0412 
0413 def parse_kconfig_files(args):
0414     """Parse kconfig files and return tuple of defined and references Kconfig
0415     symbols.  Note, @args is a tuple of a list of files and the @ignore
0416     pattern."""
0417     kconfig_files = args[0]
0418     ignore = args[1]
0419     defined_symbols = []
0420     referenced_symbols = dict()
0421 
0422     for kfile in kconfig_files:
0423         defined, references = parse_kconfig_file(kfile)
0424         defined_symbols.extend(defined)
0425         if ignore and re.match(ignore, kfile):
0426             # do not collect references for files that match the ignore pattern
0427             continue
0428         referenced_symbols[kfile] = references
0429     return (defined_symbols, referenced_symbols)
0430 
0431 
0432 def parse_kconfig_file(kfile):
0433     """Parse @kfile and update symbol definitions and references."""
0434     lines = []
0435     defined = []
0436     references = []
0437 
0438     if not os.path.exists(kfile):
0439         return defined, references
0440 
0441     with open(kfile, "r", encoding='utf-8', errors='replace') as stream:
0442         lines = stream.readlines()
0443 
0444     for i in range(len(lines)):
0445         line = lines[i]
0446         line = line.strip('\n')
0447         line = line.split("#")[0]  # ignore comments
0448 
0449         if REGEX_KCONFIG_DEF.match(line):
0450             symbol_def = REGEX_KCONFIG_DEF.findall(line)
0451             defined.append(symbol_def[0])
0452         elif REGEX_KCONFIG_STMT.match(line):
0453             line = REGEX_QUOTES.sub("", line)
0454             symbols = get_symbols_in_line(line)
0455             # multi-line statements
0456             while line.endswith("\\"):
0457                 i += 1
0458                 line = lines[i]
0459                 line = line.strip('\n')
0460                 symbols.extend(get_symbols_in_line(line))
0461             for symbol in set(symbols):
0462                 if REGEX_NUMERIC.match(symbol):
0463                     # ignore numeric values
0464                     continue
0465                 references.append(symbol)
0466 
0467     return defined, references
0468 
0469 
0470 if __name__ == "__main__":
0471     main()