Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python3
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # Copyright (C) Google LLC, 2018
0005 #
0006 # Author: Tom Roeder <tmroeder@google.com>
0007 #
0008 """A tool for generating compile_commands.json in the Linux kernel."""
0009 
0010 import argparse
0011 import json
0012 import logging
0013 import os
0014 import re
0015 import subprocess
0016 import sys
0017 
0018 _DEFAULT_OUTPUT = 'compile_commands.json'
0019 _DEFAULT_LOG_LEVEL = 'WARNING'
0020 
0021 _FILENAME_PATTERN = r'^\..*\.cmd$'
0022 _LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)'
0023 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
0024 # The tools/ directory adopts a different build system, and produces .cmd
0025 # files in a different format. Do not support it.
0026 _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
0027 
0028 def parse_arguments():
0029     """Sets up and parses command-line arguments.
0030 
0031     Returns:
0032         log_level: A logging level to filter log output.
0033         directory: The work directory where the objects were built.
0034         ar: Command used for parsing .a archives.
0035         output: Where to write the compile-commands JSON file.
0036         paths: The list of files/directories to handle to find .cmd files.
0037     """
0038     usage = 'Creates a compile_commands.json database from kernel .cmd files'
0039     parser = argparse.ArgumentParser(description=usage)
0040 
0041     directory_help = ('specify the output directory used for the kernel build '
0042                       '(defaults to the working directory)')
0043     parser.add_argument('-d', '--directory', type=str, default='.',
0044                         help=directory_help)
0045 
0046     output_help = ('path to the output command database (defaults to ' +
0047                    _DEFAULT_OUTPUT + ')')
0048     parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
0049                         help=output_help)
0050 
0051     log_level_help = ('the level of log messages to produce (defaults to ' +
0052                       _DEFAULT_LOG_LEVEL + ')')
0053     parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
0054                         default=_DEFAULT_LOG_LEVEL, help=log_level_help)
0055 
0056     ar_help = 'command used for parsing .a archives'
0057     parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
0058 
0059     paths_help = ('directories to search or files to parse '
0060                   '(files should be *.o, *.a, or modules.order). '
0061                   'If nothing is specified, the current directory is searched')
0062     parser.add_argument('paths', type=str, nargs='*', help=paths_help)
0063 
0064     args = parser.parse_args()
0065 
0066     return (args.log_level,
0067             os.path.abspath(args.directory),
0068             args.output,
0069             args.ar,
0070             args.paths if len(args.paths) > 0 else [args.directory])
0071 
0072 
0073 def cmdfiles_in_dir(directory):
0074     """Generate the iterator of .cmd files found under the directory.
0075 
0076     Walk under the given directory, and yield every .cmd file found.
0077 
0078     Args:
0079         directory: The directory to search for .cmd files.
0080 
0081     Yields:
0082         The path to a .cmd file.
0083     """
0084 
0085     filename_matcher = re.compile(_FILENAME_PATTERN)
0086     exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
0087 
0088     for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
0089         # Prune unwanted directories.
0090         if dirpath in exclude_dirs:
0091             dirnames[:] = []
0092             continue
0093 
0094         for filename in filenames:
0095             if filename_matcher.match(filename):
0096                 yield os.path.join(dirpath, filename)
0097 
0098 
0099 def to_cmdfile(path):
0100     """Return the path of .cmd file used for the given build artifact
0101 
0102     Args:
0103         Path: file path
0104 
0105     Returns:
0106         The path to .cmd file
0107     """
0108     dir, base = os.path.split(path)
0109     return os.path.join(dir, '.' + base + '.cmd')
0110 
0111 
0112 def cmdfiles_for_o(obj):
0113     """Generate the iterator of .cmd files associated with the object
0114 
0115     Yield the .cmd file used to build the given object
0116 
0117     Args:
0118         obj: The object path
0119 
0120     Yields:
0121         The path to .cmd file
0122     """
0123     yield to_cmdfile(obj)
0124 
0125 
0126 def cmdfiles_for_a(archive, ar):
0127     """Generate the iterator of .cmd files associated with the archive.
0128 
0129     Parse the given archive, and yield every .cmd file used to build it.
0130 
0131     Args:
0132         archive: The archive to parse
0133 
0134     Yields:
0135         The path to every .cmd file found
0136     """
0137     for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
0138         yield to_cmdfile(obj)
0139 
0140 
0141 def cmdfiles_for_modorder(modorder):
0142     """Generate the iterator of .cmd files associated with the modules.order.
0143 
0144     Parse the given modules.order, and yield every .cmd file used to build the
0145     contained modules.
0146 
0147     Args:
0148         modorder: The modules.order file to parse
0149 
0150     Yields:
0151         The path to every .cmd file found
0152     """
0153     with open(modorder) as f:
0154         for line in f:
0155             ko = line.rstrip()
0156             base, ext = os.path.splitext(ko)
0157             if ext != '.ko':
0158                 sys.exit('{}: module path must end with .ko'.format(ko))
0159             mod = base + '.mod'
0160             # Read from *.mod, to get a list of objects that compose the module.
0161             with open(mod) as m:
0162                 for mod_line in m:
0163                     yield to_cmdfile(mod_line.rstrip())
0164 
0165 
0166 def process_line(root_directory, command_prefix, file_path):
0167     """Extracts information from a .cmd line and creates an entry from it.
0168 
0169     Args:
0170         root_directory: The directory that was searched for .cmd files. Usually
0171             used directly in the "directory" entry in compile_commands.json.
0172         command_prefix: The extracted command line, up to the last element.
0173         file_path: The .c file from the end of the extracted command.
0174             Usually relative to root_directory, but sometimes absolute.
0175 
0176     Returns:
0177         An entry to append to compile_commands.
0178 
0179     Raises:
0180         ValueError: Could not find the extracted file based on file_path and
0181             root_directory or file_directory.
0182     """
0183     # The .cmd files are intended to be included directly by Make, so they
0184     # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
0185     # kernel version). The compile_commands.json file is not interepreted
0186     # by Make, so this code replaces the escaped version with '#'.
0187     prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
0188 
0189     # Use os.path.abspath() to normalize the path resolving '.' and '..' .
0190     abs_path = os.path.abspath(os.path.join(root_directory, file_path))
0191     if not os.path.exists(abs_path):
0192         raise ValueError('File %s not found' % abs_path)
0193     return {
0194         'directory': root_directory,
0195         'file': abs_path,
0196         'command': prefix + file_path,
0197     }
0198 
0199 
0200 def main():
0201     """Walks through the directory and finds and parses .cmd files."""
0202     log_level, directory, output, ar, paths = parse_arguments()
0203 
0204     level = getattr(logging, log_level)
0205     logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
0206 
0207     line_matcher = re.compile(_LINE_PATTERN)
0208 
0209     compile_commands = []
0210 
0211     for path in paths:
0212         # If 'path' is a directory, handle all .cmd files under it.
0213         # Otherwise, handle .cmd files associated with the file.
0214         # Most of built-in objects are linked via archives (built-in.a or lib.a)
0215         # but some objects are linked to vmlinux directly.
0216         # Modules are listed in modules.order.
0217         if os.path.isdir(path):
0218             cmdfiles = cmdfiles_in_dir(path)
0219         elif path.endswith('.o'):
0220             cmdfiles = cmdfiles_for_o(path)
0221         elif path.endswith('.a'):
0222             cmdfiles = cmdfiles_for_a(path, ar)
0223         elif path.endswith('modules.order'):
0224             cmdfiles = cmdfiles_for_modorder(path)
0225         else:
0226             sys.exit('{}: unknown file type'.format(path))
0227 
0228         for cmdfile in cmdfiles:
0229             with open(cmdfile, 'rt') as f:
0230                 result = line_matcher.match(f.readline())
0231                 if result:
0232                     try:
0233                         entry = process_line(directory, result.group(1),
0234                                              result.group(2))
0235                         compile_commands.append(entry)
0236                     except ValueError as err:
0237                         logging.info('Could not add line from %s: %s',
0238                                      cmdfile, err)
0239 
0240     with open(output, 'wt') as f:
0241         json.dump(compile_commands, f, indent=2, sort_keys=True)
0242 
0243 
0244 if __name__ == '__main__':
0245     main()