Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env python3
0002 # SPDX-License-Identifier: GPL-2.0
0003 # Copyright Thomas Gleixner <tglx@linutronix.de>
0004 
0005 from argparse import ArgumentParser
0006 from ply import lex, yacc
0007 import locale
0008 import traceback
0009 import fnmatch
0010 import sys
0011 import git
0012 import re
0013 import os
0014 
0015 class ParserException(Exception):
0016     def __init__(self, tok, txt):
0017         self.tok = tok
0018         self.txt = txt
0019 
0020 class SPDXException(Exception):
0021     def __init__(self, el, txt):
0022         self.el = el
0023         self.txt = txt
0024 
0025 class SPDXdata(object):
0026     def __init__(self):
0027         self.license_files = 0
0028         self.exception_files = 0
0029         self.licenses = [ ]
0030         self.exceptions = { }
0031 
0032 class dirinfo(object):
0033     def __init__(self):
0034         self.missing = 0
0035         self.total = 0
0036         self.files = []
0037 
0038     def update(self, fname, basedir, miss):
0039         self.total += 1
0040         self.missing += miss
0041         if miss:
0042             fname = './' + fname
0043             bdir = os.path.dirname(fname)
0044             if bdir == basedir.rstrip('/'):
0045                 self.files.append(fname)
0046 
0047 # Read the spdx data from the LICENSES directory
0048 def read_spdxdata(repo):
0049 
0050     # The subdirectories of LICENSES in the kernel source
0051     # Note: exceptions needs to be parsed as last directory.
0052     license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ]
0053     lictree = repo.head.commit.tree['LICENSES']
0054 
0055     spdx = SPDXdata()
0056 
0057     for d in license_dirs:
0058         for el in lictree[d].traverse():
0059             if not os.path.isfile(el.path):
0060                 continue
0061 
0062             exception = None
0063             for l in open(el.path, encoding="utf-8").readlines():
0064                 if l.startswith('Valid-License-Identifier:'):
0065                     lid = l.split(':')[1].strip().upper()
0066                     if lid in spdx.licenses:
0067                         raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
0068                     else:
0069                         spdx.licenses.append(lid)
0070 
0071                 elif l.startswith('SPDX-Exception-Identifier:'):
0072                     exception = l.split(':')[1].strip().upper()
0073                     spdx.exceptions[exception] = []
0074 
0075                 elif l.startswith('SPDX-Licenses:'):
0076                     for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
0077                         if not lic in spdx.licenses:
0078                             raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic))
0079                         spdx.exceptions[exception].append(lic)
0080 
0081                 elif l.startswith("License-Text:"):
0082                     if exception:
0083                         if not len(spdx.exceptions[exception]):
0084                             raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception)
0085                         spdx.exception_files += 1
0086                     else:
0087                         spdx.license_files += 1
0088                     break
0089     return spdx
0090 
0091 class id_parser(object):
0092 
0093     reserved = [ 'AND', 'OR', 'WITH' ]
0094     tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
0095 
0096     precedence = ( ('nonassoc', 'AND', 'OR'), )
0097 
0098     t_ignore = ' \t'
0099 
0100     def __init__(self, spdx):
0101         self.spdx = spdx
0102         self.lasttok = None
0103         self.lastid = None
0104         self.lexer = lex.lex(module = self, reflags = re.UNICODE)
0105         # Initialize the parser. No debug file and no parser rules stored on disk
0106         # The rules are small enough to be generated on the fly
0107         self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
0108         self.lines_checked = 0
0109         self.checked = 0
0110         self.excluded = 0
0111         self.spdx_valid = 0
0112         self.spdx_errors = 0
0113         self.spdx_dirs = {}
0114         self.dirdepth = -1
0115         self.basedir = '.'
0116         self.curline = 0
0117         self.deepest = 0
0118 
0119     def set_dirinfo(self, basedir, dirdepth):
0120         if dirdepth >= 0:
0121             self.basedir = basedir
0122             bdir = basedir.lstrip('./').rstrip('/')
0123             if bdir != '':
0124                 parts = bdir.split('/')
0125             else:
0126                 parts = []
0127             self.dirdepth = dirdepth + len(parts)
0128 
0129     # Validate License and Exception IDs
0130     def validate(self, tok):
0131         id = tok.value.upper()
0132         if tok.type == 'ID':
0133             if not id in self.spdx.licenses:
0134                 raise ParserException(tok, 'Invalid License ID')
0135             self.lastid = id
0136         elif tok.type == 'EXC':
0137             if id not in self.spdx.exceptions:
0138                 raise ParserException(tok, 'Invalid Exception ID')
0139             if self.lastid not in self.spdx.exceptions[id]:
0140                 raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
0141             self.lastid = None
0142         elif tok.type != 'WITH':
0143             self.lastid = None
0144 
0145     # Lexer functions
0146     def t_RPAR(self, tok):
0147         r'\)'
0148         self.lasttok = tok.type
0149         return tok
0150 
0151     def t_LPAR(self, tok):
0152         r'\('
0153         self.lasttok = tok.type
0154         return tok
0155 
0156     def t_ID(self, tok):
0157         r'[A-Za-z.0-9\-+]+'
0158 
0159         if self.lasttok == 'EXC':
0160             print(tok)
0161             raise ParserException(tok, 'Missing parentheses')
0162 
0163         tok.value = tok.value.strip()
0164         val = tok.value.upper()
0165 
0166         if val in self.reserved:
0167             tok.type = val
0168         elif self.lasttok == 'WITH':
0169             tok.type = 'EXC'
0170 
0171         self.lasttok = tok.type
0172         self.validate(tok)
0173         return tok
0174 
0175     def t_error(self, tok):
0176         raise ParserException(tok, 'Invalid token')
0177 
0178     def p_expr(self, p):
0179         '''expr : ID
0180                 | ID WITH EXC
0181                 | expr AND expr
0182                 | expr OR expr
0183                 | LPAR expr RPAR'''
0184         pass
0185 
0186     def p_error(self, p):
0187         if not p:
0188             raise ParserException(None, 'Unfinished license expression')
0189         else:
0190             raise ParserException(p, 'Syntax error')
0191 
0192     def parse(self, expr):
0193         self.lasttok = None
0194         self.lastid = None
0195         self.parser.parse(expr, lexer = self.lexer)
0196 
0197     def parse_lines(self, fd, maxlines, fname):
0198         self.checked += 1
0199         self.curline = 0
0200         fail = 1
0201         try:
0202             for line in fd:
0203                 line = line.decode(locale.getpreferredencoding(False), errors='ignore')
0204                 self.curline += 1
0205                 if self.curline > maxlines:
0206                     break
0207                 self.lines_checked += 1
0208                 if line.find("SPDX-License-Identifier:") < 0:
0209                     continue
0210                 expr = line.split(':')[1].strip()
0211                 # Remove trailing comment closure
0212                 if line.strip().endswith('*/'):
0213                     expr = expr.rstrip('*/').strip()
0214                 # Remove trailing xml comment closure
0215                 if line.strip().endswith('-->'):
0216                     expr = expr.rstrip('-->').strip()
0217                 # Special case for SH magic boot code files
0218                 if line.startswith('LIST \"'):
0219                     expr = expr.rstrip('\"').strip()
0220                 self.parse(expr)
0221                 self.spdx_valid += 1
0222                 #
0223                 # Should we check for more SPDX ids in the same file and
0224                 # complain if there are any?
0225                 #
0226                 fail = 0
0227                 break
0228 
0229         except ParserException as pe:
0230             if pe.tok:
0231                 col = line.find(expr) + pe.tok.lexpos
0232                 tok = pe.tok.value
0233                 sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
0234             else:
0235                 sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt))
0236             self.spdx_errors += 1
0237 
0238         if fname == '-':
0239             return
0240 
0241         base = os.path.dirname(fname)
0242         if self.dirdepth > 0:
0243             parts = base.split('/')
0244             i = 0
0245             base = '.'
0246             while i < self.dirdepth and i < len(parts) and len(parts[i]):
0247                 base += '/' + parts[i]
0248                 i += 1
0249         elif self.dirdepth == 0:
0250             base = self.basedir
0251         else:
0252             base = './' + base.rstrip('/')
0253         base += '/'
0254 
0255         di = self.spdx_dirs.get(base, dirinfo())
0256         di.update(fname, base, fail)
0257         self.spdx_dirs[base] = di
0258 
0259 class pattern(object):
0260     def __init__(self, line):
0261         self.pattern = line
0262         self.match = self.match_file
0263         if line == '.*':
0264             self.match = self.match_dot
0265         elif line.endswith('/'):
0266             self.pattern = line[:-1]
0267             self.match = self.match_dir
0268         elif line.startswith('/'):
0269             self.pattern = line[1:]
0270             self.match = self.match_fn
0271 
0272     def match_dot(self, fpath):
0273         return os.path.basename(fpath).startswith('.')
0274 
0275     def match_file(self, fpath):
0276         return os.path.basename(fpath) == self.pattern
0277 
0278     def match_fn(self, fpath):
0279         return fnmatch.fnmatchcase(fpath, self.pattern)
0280 
0281     def match_dir(self, fpath):
0282         if self.match_fn(os.path.dirname(fpath)):
0283             return True
0284         return fpath.startswith(self.pattern)
0285 
0286 def exclude_file(fpath):
0287     for rule in exclude_rules:
0288         if rule.match(fpath):
0289             return True
0290     return False
0291 
0292 def scan_git_tree(tree, basedir, dirdepth):
0293     parser.set_dirinfo(basedir, dirdepth)
0294     for el in tree.traverse():
0295         if not os.path.isfile(el.path):
0296             continue
0297         if exclude_file(el.path):
0298             parser.excluded += 1
0299             continue
0300         with open(el.path, 'rb') as fd:
0301             parser.parse_lines(fd, args.maxlines, el.path)
0302 
0303 def scan_git_subtree(tree, path, dirdepth):
0304     for p in path.strip('/').split('/'):
0305         tree = tree[p]
0306     scan_git_tree(tree, path.strip('/'), dirdepth)
0307 
0308 def read_exclude_file(fname):
0309     rules = []
0310     if not fname:
0311         return rules
0312     with open(fname) as fd:
0313         for line in fd:
0314             line = line.strip()
0315             if line.startswith('#'):
0316                 continue
0317             if not len(line):
0318                 continue
0319             rules.append(pattern(line))
0320     return rules
0321 
0322 if __name__ == '__main__':
0323 
0324     ap = ArgumentParser(description='SPDX expression checker')
0325     ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
0326     ap.add_argument('-d', '--dirs', action='store_true',
0327                     help='Show [sub]directory statistics.')
0328     ap.add_argument('-D', '--depth', type=int, default=-1,
0329                     help='Directory depth for -d statistics. Default: unlimited')
0330     ap.add_argument('-e', '--exclude',
0331                     help='File containing file patterns to exclude. Default: scripts/spdxexclude')
0332     ap.add_argument('-f', '--files', action='store_true',
0333                     help='Show files without SPDX.')
0334     ap.add_argument('-m', '--maxlines', type=int, default=15,
0335                     help='Maximum number of lines to scan in a file. Default 15')
0336     ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
0337     args = ap.parse_args()
0338 
0339     # Sanity check path arguments
0340     if '-' in args.path and len(args.path) > 1:
0341         sys.stderr.write('stdin input "-" must be the only path argument\n')
0342         sys.exit(1)
0343 
0344     try:
0345         # Use git to get the valid license expressions
0346         repo = git.Repo(os.getcwd())
0347         assert not repo.bare
0348 
0349         # Initialize SPDX data
0350         spdx = read_spdxdata(repo)
0351 
0352         # Initialize the parser
0353         parser = id_parser(spdx)
0354 
0355     except SPDXException as se:
0356         if se.el:
0357             sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
0358         else:
0359             sys.stderr.write('%s\n' %se.txt)
0360         sys.exit(1)
0361 
0362     except Exception as ex:
0363         sys.stderr.write('FAIL: %s\n' %ex)
0364         sys.stderr.write('%s\n' %traceback.format_exc())
0365         sys.exit(1)
0366 
0367     try:
0368         fname = args.exclude
0369         if not fname:
0370             fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
0371         exclude_rules = read_exclude_file(fname)
0372     except Exception as ex:
0373         sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
0374         sys.exit(1)
0375 
0376     try:
0377         if len(args.path) and args.path[0] == '-':
0378             stdin = os.fdopen(sys.stdin.fileno(), 'rb')
0379             parser.parse_lines(stdin, args.maxlines, '-')
0380         else:
0381             if args.path:
0382                 for p in args.path:
0383                     if os.path.isfile(p):
0384                         parser.parse_lines(open(p, 'rb'), args.maxlines, p)
0385                     elif os.path.isdir(p):
0386                         scan_git_subtree(repo.head.reference.commit.tree, p,
0387                                          args.depth)
0388                     else:
0389                         sys.stderr.write('path %s does not exist\n' %p)
0390                         sys.exit(1)
0391             else:
0392                 # Full git tree scan
0393                 scan_git_tree(repo.head.commit.tree, '.', args.depth)
0394 
0395             ndirs = len(parser.spdx_dirs)
0396             dirsok = 0
0397             if ndirs:
0398                 for di in parser.spdx_dirs.values():
0399                     if not di.missing:
0400                         dirsok += 1
0401 
0402             if args.verbose:
0403                 sys.stderr.write('\n')
0404                 sys.stderr.write('License files:     %12d\n' %spdx.license_files)
0405                 sys.stderr.write('Exception files:   %12d\n' %spdx.exception_files)
0406                 sys.stderr.write('License IDs        %12d\n' %len(spdx.licenses))
0407                 sys.stderr.write('Exception IDs      %12d\n' %len(spdx.exceptions))
0408                 sys.stderr.write('\n')
0409                 sys.stderr.write('Files excluded:    %12d\n' %parser.excluded)
0410                 sys.stderr.write('Files checked:     %12d\n' %parser.checked)
0411                 sys.stderr.write('Lines checked:     %12d\n' %parser.lines_checked)
0412                 if parser.checked:
0413                     pc = int(100 * parser.spdx_valid / parser.checked)
0414                     sys.stderr.write('Files with SPDX:   %12d %3d%%\n' %(parser.spdx_valid, pc))
0415                 sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
0416                 if ndirs:
0417                     sys.stderr.write('\n')
0418                     sys.stderr.write('Directories accounted: %8d\n' %ndirs)
0419                     pc = int(100 * dirsok / ndirs)
0420                     sys.stderr.write('Directories complete:  %8d %3d%%\n' %(dirsok, pc))
0421 
0422             if ndirs and ndirs != dirsok and args.dirs:
0423                 if args.verbose:
0424                     sys.stderr.write('\n')
0425                 sys.stderr.write('Incomplete directories: SPDX in Files\n')
0426                 for f in sorted(parser.spdx_dirs.keys()):
0427                     di = parser.spdx_dirs[f]
0428                     if di.missing:
0429                         valid = di.total - di.missing
0430                         pc = int(100 * valid / di.total)
0431                         sys.stderr.write('    %-80s: %5d of %5d  %3d%%\n' %(f, valid, di.total, pc))
0432 
0433             if ndirs and ndirs != dirsok and args.files:
0434                 if args.verbose or args.dirs:
0435                     sys.stderr.write('\n')
0436                 sys.stderr.write('Files without SPDX:\n')
0437                 for f in sorted(parser.spdx_dirs.keys()):
0438                     di = parser.spdx_dirs[f]
0439                     for f in sorted(di.files):
0440                         sys.stderr.write('    %s\n' %f)
0441 
0442             sys.exit(0)
0443 
0444     except Exception as ex:
0445         sys.stderr.write('FAIL: %s\n' %ex)
0446         sys.stderr.write('%s\n' %traceback.format_exc())
0447         sys.exit(1)