0001
0002
0003
0004
0005 from argparse import ArgumentParser
0006 from ply import lex, yacc
0007 import locale
0008 import traceback
0009 import fnmatch
0010 import sys
0011 import git
0012 import re
0013 import os
0014
0015 class ParserException(Exception):
0016 def __init__(self, tok, txt):
0017 self.tok = tok
0018 self.txt = txt
0019
0020 class SPDXException(Exception):
0021 def __init__(self, el, txt):
0022 self.el = el
0023 self.txt = txt
0024
0025 class SPDXdata(object):
0026 def __init__(self):
0027 self.license_files = 0
0028 self.exception_files = 0
0029 self.licenses = [ ]
0030 self.exceptions = { }
0031
0032 class dirinfo(object):
0033 def __init__(self):
0034 self.missing = 0
0035 self.total = 0
0036 self.files = []
0037
0038 def update(self, fname, basedir, miss):
0039 self.total += 1
0040 self.missing += miss
0041 if miss:
0042 fname = './' + fname
0043 bdir = os.path.dirname(fname)
0044 if bdir == basedir.rstrip('/'):
0045 self.files.append(fname)
0046
0047
0048 def read_spdxdata(repo):
0049
0050
0051
0052 license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ]
0053 lictree = repo.head.commit.tree['LICENSES']
0054
0055 spdx = SPDXdata()
0056
0057 for d in license_dirs:
0058 for el in lictree[d].traverse():
0059 if not os.path.isfile(el.path):
0060 continue
0061
0062 exception = None
0063 for l in open(el.path, encoding="utf-8").readlines():
0064 if l.startswith('Valid-License-Identifier:'):
0065 lid = l.split(':')[1].strip().upper()
0066 if lid in spdx.licenses:
0067 raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
0068 else:
0069 spdx.licenses.append(lid)
0070
0071 elif l.startswith('SPDX-Exception-Identifier:'):
0072 exception = l.split(':')[1].strip().upper()
0073 spdx.exceptions[exception] = []
0074
0075 elif l.startswith('SPDX-Licenses:'):
0076 for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
0077 if not lic in spdx.licenses:
0078 raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic))
0079 spdx.exceptions[exception].append(lic)
0080
0081 elif l.startswith("License-Text:"):
0082 if exception:
0083 if not len(spdx.exceptions[exception]):
0084 raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception)
0085 spdx.exception_files += 1
0086 else:
0087 spdx.license_files += 1
0088 break
0089 return spdx
0090
0091 class id_parser(object):
0092
0093 reserved = [ 'AND', 'OR', 'WITH' ]
0094 tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
0095
0096 precedence = ( ('nonassoc', 'AND', 'OR'), )
0097
0098 t_ignore = ' \t'
0099
0100 def __init__(self, spdx):
0101 self.spdx = spdx
0102 self.lasttok = None
0103 self.lastid = None
0104 self.lexer = lex.lex(module = self, reflags = re.UNICODE)
0105
0106
0107 self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
0108 self.lines_checked = 0
0109 self.checked = 0
0110 self.excluded = 0
0111 self.spdx_valid = 0
0112 self.spdx_errors = 0
0113 self.spdx_dirs = {}
0114 self.dirdepth = -1
0115 self.basedir = '.'
0116 self.curline = 0
0117 self.deepest = 0
0118
0119 def set_dirinfo(self, basedir, dirdepth):
0120 if dirdepth >= 0:
0121 self.basedir = basedir
0122 bdir = basedir.lstrip('./').rstrip('/')
0123 if bdir != '':
0124 parts = bdir.split('/')
0125 else:
0126 parts = []
0127 self.dirdepth = dirdepth + len(parts)
0128
0129
0130 def validate(self, tok):
0131 id = tok.value.upper()
0132 if tok.type == 'ID':
0133 if not id in self.spdx.licenses:
0134 raise ParserException(tok, 'Invalid License ID')
0135 self.lastid = id
0136 elif tok.type == 'EXC':
0137 if id not in self.spdx.exceptions:
0138 raise ParserException(tok, 'Invalid Exception ID')
0139 if self.lastid not in self.spdx.exceptions[id]:
0140 raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
0141 self.lastid = None
0142 elif tok.type != 'WITH':
0143 self.lastid = None
0144
0145
0146 def t_RPAR(self, tok):
0147 r'\)'
0148 self.lasttok = tok.type
0149 return tok
0150
0151 def t_LPAR(self, tok):
0152 r'\('
0153 self.lasttok = tok.type
0154 return tok
0155
0156 def t_ID(self, tok):
0157 r'[A-Za-z.0-9\-+]+'
0158
0159 if self.lasttok == 'EXC':
0160 print(tok)
0161 raise ParserException(tok, 'Missing parentheses')
0162
0163 tok.value = tok.value.strip()
0164 val = tok.value.upper()
0165
0166 if val in self.reserved:
0167 tok.type = val
0168 elif self.lasttok == 'WITH':
0169 tok.type = 'EXC'
0170
0171 self.lasttok = tok.type
0172 self.validate(tok)
0173 return tok
0174
0175 def t_error(self, tok):
0176 raise ParserException(tok, 'Invalid token')
0177
0178 def p_expr(self, p):
0179 '''expr : ID
0180 | ID WITH EXC
0181 | expr AND expr
0182 | expr OR expr
0183 | LPAR expr RPAR'''
0184 pass
0185
0186 def p_error(self, p):
0187 if not p:
0188 raise ParserException(None, 'Unfinished license expression')
0189 else:
0190 raise ParserException(p, 'Syntax error')
0191
0192 def parse(self, expr):
0193 self.lasttok = None
0194 self.lastid = None
0195 self.parser.parse(expr, lexer = self.lexer)
0196
0197 def parse_lines(self, fd, maxlines, fname):
0198 self.checked += 1
0199 self.curline = 0
0200 fail = 1
0201 try:
0202 for line in fd:
0203 line = line.decode(locale.getpreferredencoding(False), errors='ignore')
0204 self.curline += 1
0205 if self.curline > maxlines:
0206 break
0207 self.lines_checked += 1
0208 if line.find("SPDX-License-Identifier:") < 0:
0209 continue
0210 expr = line.split(':')[1].strip()
0211
0212 if line.strip().endswith('*/'):
0213 expr = expr.rstrip('*/').strip()
0214
0215 if line.strip().endswith('-->'):
0216 expr = expr.rstrip('-->').strip()
0217
0218 if line.startswith('LIST \"'):
0219 expr = expr.rstrip('\"').strip()
0220 self.parse(expr)
0221 self.spdx_valid += 1
0222
0223
0224
0225
0226 fail = 0
0227 break
0228
0229 except ParserException as pe:
0230 if pe.tok:
0231 col = line.find(expr) + pe.tok.lexpos
0232 tok = pe.tok.value
0233 sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
0234 else:
0235 sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt))
0236 self.spdx_errors += 1
0237
0238 if fname == '-':
0239 return
0240
0241 base = os.path.dirname(fname)
0242 if self.dirdepth > 0:
0243 parts = base.split('/')
0244 i = 0
0245 base = '.'
0246 while i < self.dirdepth and i < len(parts) and len(parts[i]):
0247 base += '/' + parts[i]
0248 i += 1
0249 elif self.dirdepth == 0:
0250 base = self.basedir
0251 else:
0252 base = './' + base.rstrip('/')
0253 base += '/'
0254
0255 di = self.spdx_dirs.get(base, dirinfo())
0256 di.update(fname, base, fail)
0257 self.spdx_dirs[base] = di
0258
0259 class pattern(object):
0260 def __init__(self, line):
0261 self.pattern = line
0262 self.match = self.match_file
0263 if line == '.*':
0264 self.match = self.match_dot
0265 elif line.endswith('/'):
0266 self.pattern = line[:-1]
0267 self.match = self.match_dir
0268 elif line.startswith('/'):
0269 self.pattern = line[1:]
0270 self.match = self.match_fn
0271
0272 def match_dot(self, fpath):
0273 return os.path.basename(fpath).startswith('.')
0274
0275 def match_file(self, fpath):
0276 return os.path.basename(fpath) == self.pattern
0277
0278 def match_fn(self, fpath):
0279 return fnmatch.fnmatchcase(fpath, self.pattern)
0280
0281 def match_dir(self, fpath):
0282 if self.match_fn(os.path.dirname(fpath)):
0283 return True
0284 return fpath.startswith(self.pattern)
0285
0286 def exclude_file(fpath):
0287 for rule in exclude_rules:
0288 if rule.match(fpath):
0289 return True
0290 return False
0291
0292 def scan_git_tree(tree, basedir, dirdepth):
0293 parser.set_dirinfo(basedir, dirdepth)
0294 for el in tree.traverse():
0295 if not os.path.isfile(el.path):
0296 continue
0297 if exclude_file(el.path):
0298 parser.excluded += 1
0299 continue
0300 with open(el.path, 'rb') as fd:
0301 parser.parse_lines(fd, args.maxlines, el.path)
0302
0303 def scan_git_subtree(tree, path, dirdepth):
0304 for p in path.strip('/').split('/'):
0305 tree = tree[p]
0306 scan_git_tree(tree, path.strip('/'), dirdepth)
0307
0308 def read_exclude_file(fname):
0309 rules = []
0310 if not fname:
0311 return rules
0312 with open(fname) as fd:
0313 for line in fd:
0314 line = line.strip()
0315 if line.startswith('#'):
0316 continue
0317 if not len(line):
0318 continue
0319 rules.append(pattern(line))
0320 return rules
0321
0322 if __name__ == '__main__':
0323
0324 ap = ArgumentParser(description='SPDX expression checker')
0325 ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
0326 ap.add_argument('-d', '--dirs', action='store_true',
0327 help='Show [sub]directory statistics.')
0328 ap.add_argument('-D', '--depth', type=int, default=-1,
0329 help='Directory depth for -d statistics. Default: unlimited')
0330 ap.add_argument('-e', '--exclude',
0331 help='File containing file patterns to exclude. Default: scripts/spdxexclude')
0332 ap.add_argument('-f', '--files', action='store_true',
0333 help='Show files without SPDX.')
0334 ap.add_argument('-m', '--maxlines', type=int, default=15,
0335 help='Maximum number of lines to scan in a file. Default 15')
0336 ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
0337 args = ap.parse_args()
0338
0339
0340 if '-' in args.path and len(args.path) > 1:
0341 sys.stderr.write('stdin input "-" must be the only path argument\n')
0342 sys.exit(1)
0343
0344 try:
0345
0346 repo = git.Repo(os.getcwd())
0347 assert not repo.bare
0348
0349
0350 spdx = read_spdxdata(repo)
0351
0352
0353 parser = id_parser(spdx)
0354
0355 except SPDXException as se:
0356 if se.el:
0357 sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
0358 else:
0359 sys.stderr.write('%s\n' %se.txt)
0360 sys.exit(1)
0361
0362 except Exception as ex:
0363 sys.stderr.write('FAIL: %s\n' %ex)
0364 sys.stderr.write('%s\n' %traceback.format_exc())
0365 sys.exit(1)
0366
0367 try:
0368 fname = args.exclude
0369 if not fname:
0370 fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
0371 exclude_rules = read_exclude_file(fname)
0372 except Exception as ex:
0373 sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
0374 sys.exit(1)
0375
0376 try:
0377 if len(args.path) and args.path[0] == '-':
0378 stdin = os.fdopen(sys.stdin.fileno(), 'rb')
0379 parser.parse_lines(stdin, args.maxlines, '-')
0380 else:
0381 if args.path:
0382 for p in args.path:
0383 if os.path.isfile(p):
0384 parser.parse_lines(open(p, 'rb'), args.maxlines, p)
0385 elif os.path.isdir(p):
0386 scan_git_subtree(repo.head.reference.commit.tree, p,
0387 args.depth)
0388 else:
0389 sys.stderr.write('path %s does not exist\n' %p)
0390 sys.exit(1)
0391 else:
0392
0393 scan_git_tree(repo.head.commit.tree, '.', args.depth)
0394
0395 ndirs = len(parser.spdx_dirs)
0396 dirsok = 0
0397 if ndirs:
0398 for di in parser.spdx_dirs.values():
0399 if not di.missing:
0400 dirsok += 1
0401
0402 if args.verbose:
0403 sys.stderr.write('\n')
0404 sys.stderr.write('License files: %12d\n' %spdx.license_files)
0405 sys.stderr.write('Exception files: %12d\n' %spdx.exception_files)
0406 sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
0407 sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
0408 sys.stderr.write('\n')
0409 sys.stderr.write('Files excluded: %12d\n' %parser.excluded)
0410 sys.stderr.write('Files checked: %12d\n' %parser.checked)
0411 sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
0412 if parser.checked:
0413 pc = int(100 * parser.spdx_valid / parser.checked)
0414 sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc))
0415 sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
0416 if ndirs:
0417 sys.stderr.write('\n')
0418 sys.stderr.write('Directories accounted: %8d\n' %ndirs)
0419 pc = int(100 * dirsok / ndirs)
0420 sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc))
0421
0422 if ndirs and ndirs != dirsok and args.dirs:
0423 if args.verbose:
0424 sys.stderr.write('\n')
0425 sys.stderr.write('Incomplete directories: SPDX in Files\n')
0426 for f in sorted(parser.spdx_dirs.keys()):
0427 di = parser.spdx_dirs[f]
0428 if di.missing:
0429 valid = di.total - di.missing
0430 pc = int(100 * valid / di.total)
0431 sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc))
0432
0433 if ndirs and ndirs != dirsok and args.files:
0434 if args.verbose or args.dirs:
0435 sys.stderr.write('\n')
0436 sys.stderr.write('Files without SPDX:\n')
0437 for f in sorted(parser.spdx_dirs.keys()):
0438 di = parser.spdx_dirs[f]
0439 for f in sorted(di.files):
0440 sys.stderr.write(' %s\n' %f)
0441
0442 sys.exit(0)
0443
0444 except Exception as ex:
0445 sys.stderr.write('FAIL: %s\n' %ex)
0446 sys.stderr.write('%s\n' %traceback.format_exc())
0447 sys.exit(1)