Files
codezero/tools/cml2-tools/kxref.py
Bahadir Balban e2b791a3d8 Initial commit
2008-01-13 13:53:52 +00:00

765 lines
27 KiB
Python
Executable File

#!/usr/bin/env python
"""
kxref.py -- generate configuration symbol cross-reference for the kernel tree
This is a report generator intended to catch problems and inconsistencies
in the configuration-symbol namespace. It uses information generated by
the CML2 compiler -- notably, it relies on the compiler's scanning of
help files.
All this does is generate cross-reference reports on configuration
symbols. But they can be filtered and presented in various
interesting ways. Basic usage is like this:
kxref.py [-f filter | -h] [-l] [-x symbol] [-n re] [sourcetree]
You can set a filter using a boolean-expression minilanguage. The predicates
available are as follows:
c -- select all symbols present in code (.c, .h, .S files)
m -- select all symbols present in makefiles
n -- select all symbols defined in CML2 rulesfiles
h -- select all symbols for which help is available (CMl1 convention)
H -- select all symbols for which help is available (CML2 convention)
d -- select all symbols that occur in defconfigs
x -- select all symbols that are derived in CML2.
o -- select all symbols present in CML1 configuration files
a -- select all symbols declared in CML1 configuration files
p -- select all symbols for which autoconfigure.py has a probe
D(name) -- select all symbols transitively dependent on name
A(name) -- select all symbols transitively ancestral to name
T(type) -- select type (trit, bool, string, decimal, hexadecimal)
P(property) -- select all symbols with given property
V(symbol) -- select all symbols with given symbol in their
visibility guard.
Operations available are as follows:
& -- and (set intersection)
| -- or (set intersection)
~ -- not (set complement).
You may use parentheses for expression grouping.
This program caches a cross-reference database in a file named
xref.out, so all reports after the first are generated really fast.
You should remove this file whenever you apply a patch.
The -i option inverts the report so it's keyed by file, rather than
by symbol.
The -g option generates a patch removing file lines containing the
reported (presumably orphaned) symbols. Use with caution...it's
really only safe for hacking defconfigs.
The -x option is for debugging. It generates a report on an individual
symbol specified as an argument to the option. Flag letters are as
above, with f= giving the value of the computed filter predicate.
The -h option checks for duplicate or superfluous file inclusions
in the source tree.
The -l switch suppresses printing printing of cross-references;
only symbols matching the given filter(s) are listed.
The -n suppresses listing of files with names matching the given regexp.
If all the files a symbol occurs in are excluded, it will be omitted
from the listings.
The -t option produces a listing of symbols which either have
inconsistent CML1 types or types that differ between CML1 and CML2.
The -k option accepts a file of kill-list symbols to be ignored.
The program has some knowledge of file syntax. It ignores the
contents of comments in C, CML1, and CML2 files (e.g. does not
cross-reference symbols in such comments).
Some interesting reports:
n&~p&~a -- identifies CML2 symbols no longer declared or defined in CML1
"""
import sys, os, re, getopt, cPickle, cml, cmlsystem
xrefs = None
rulebase = None
typefind = choicere = configre = definere = mycml1types = None
def suffix(haystack, *needle):
"Does a filename have any of the given suffixes?"
for suf in needle:
if haystack[-len(suf):] == suf:
return 1
return 0
def prefix(haystack, *needle):
"Does a filename have any of the given prefixes?"
for pre in needle:
if haystack[len(pre):] == pre:
return 1
return 0
# Code for recognizing symbols and stripping out comments
# It's OK that this matches _MODULE symbols, we'll filter those out later.
configpref = re.compile("(?<![A-Z0-9_])(CONFIG_[a-zA-Z0-9_][a-zA-Z0-9_]+)")
# Regular expressions for stripping out C comments. We're aided here by the
# fact that we don't care about the contents of most of the file. So by
# first stripping out / and characters that are not part of comment
# delimiter pairs, we can make detecting comments pretty trivial. This won't
# completely strip comments of the form /* aaaa /* bbbb */, but for this
# application that's OK -- we don't have to be perfect, just reduce the
# exception cases to the point where eyeball checking is feasible. Use
# of lookaheads and lookbehinds avoids nipping off anything that might
# be a nearby bit of symbol.
#
randomslash = re.compile("(?<=[^*])/(?=[^*])")
randomstar = re.compile("(?<=[^/])\*(?=[^/])")
c_comment = re.compile("/\*[^*]*\*/")
def c_comment_strip(str):
str = randomslash.sub("", str,)
str = randomstar.sub("", str)
return c_comment.sub("", str)
# Shell, config-file, and Makefile-style comments.
#
hashcomment = re.compile("#.*\n", re.MULTILINE)
def hash_comment_strip(str):
return hashcomment.sub("", str)
# Code for generating the cross-reference
def ignore(file):
"Return 1 if the file should be ignored for cross-referencing purposes."
# Ignore CML files because we look symbols up directly in the rulebase.
return suffix(file, ".bak", ".orig", ".rej", ".cml", ".o", ".a", ".out", "log", "Log", ",v", "~")
# These are used in the language documentation
kill_list = {"CHEER":1, "BOOM":1, "BOGUS":1}
def makexref(tree):
"Generate a cross-reference dictionary for the given source tree."
global typefind, choicere, configre, definere, mycml1types
typefind = re.compile(r"(?<!define_)(bool|tristate|int|hex|string)\s+'.*'\s+CONFIG_(\w+)")
choicere = re.compile(r"^\s*choice")
configre = re.compile(rulebase.prefix + r"(\w*)")
definere = re.compile(r"^\s+define_([a-z]*)\s+(\w*)")
mycml1types = {}
def xrefvisit(dict, dir, files):
"Visit a directory on behalf of the cross-referencer."
def filevisitor(dict, file):
"Visit a file on behalf of the cross-referencer."
if file[0] == '.':
return
fp = open(file)
contents = fp.read()
fp.close()
if suffix(file, ".c", ".h", ".S"):
contents = c_comment_strip(contents)
elif suffix(file, ".in", ".cml"):
contents = hash_comment_strip(contents)
for match in configpref.findall(contents):
if suffix(match, "_MODULE"):
continue
match = namestrip(match)
if kill_list.has_key(match):
continue
elif not dict.has_key(match):
dict[match] = []
if file not in dict[match]:
dict[match].append(file)
# Parse file contents for choice symbols
if suffix(file, ".in"):
lines = contents.split("\n")
while lines:
if not choicere.match(lines[0]):
# First extract type info for ordinary symbols
m = typefind.search(lines[0])
if m:
symtype = m.group(1)
symname = m.group(2)
if not mycml1types.has_key(symname):
mycml1types[symname] = []
if (symtype, file) not in mycml1types[symname]:
mycml1types[symname].append((symtype, file))
# CML1 defines count with other symbols of their type
symdef = definere.search(lines[0])
if symdef:
symbol = namestrip(symdef.group(2))
type = symdef.group(1)
if not mycml1types.has_key(symbol):
mycml1types[symbol] = []
if (type, file) not in mycml1types[symbol]:
mycml1types[symbol].append((type, file))
lines.pop(0)
continue
else:
lines.pop(0)
while lines[0].find(rulebase.prefix) > -1:
findit = configre.search(lines[0])
symbol = namestrip(findit.group(0))
if not mycml1types.has_key(symbol):
mycml1types[symbol] = []
mycml1types[symbol].append(("choice", file))
if lines[0].find('" ') > -1:
break
lines.pop(0)
for file in files:
node = os.path.join(dir, file)[2:]
if os.path.isfile(node) and not ignore(node):
filevisitor(dict, node)
xrefdict = {}
here = os.getcwd()
os.chdir(sourcetree)
os.path.walk(".", xrefvisit, xrefdict)
os.chdir(here)
# Data reduction -- collapse CML1 cross references of identical type
for (key, value) in mycml1types.items():
if len(value) <= 1:
continue # Only interested in the multiples
else:
tdict = {}
for (type, file) in value:
tdict[type] = []
for (type, file) in value:
tdict[type].append(file)
reslist = []
for type in tdict.keys():
reslist.append((type, tdict[type]))
mycml1types[key] = reslist
# Second stage of data reduction -- if a symbol has both a choice
# declaration and another of a different type, suppress the non-choice
# declaration -- we can assume it came from a CML1 define.
for (key, value) in mycml1types.items():
if "choice" in map(lambda x: x[0], value):
mycml1types[key]=filter(lambda x: x[0]=="choice", mycml1types[key])
return (xrefdict, mycml1types)
probe_table = {}
def load_probe_table():
"Build a table of symbols for qhich we have probes."
from autoconfigure import get_arch
(ARCH, ARCHSYMBOL) = get_arch()
TRUE = 1
FALSE = 0
PRESENT = 1
ABSENT = 0
y = m = n = 0
def DEBUG(str):
pass
def PCI(prefix, symbol):
probe_table[symbol] = 1
def PCI_CLASS(match, symbol):
probe_table[symbol] = 1
def PNP(match, symbol):
probe_table[symbol] = 1
def MCA(match, symbol):
probe_table[symbol] = 1
def USBP(match, symbol):
probe_table[symbol] = 1
def USBC(match, symbol):
probe_table[symbol] = 1
def USBI(match, symbol):
probe_table[symbol] = 1
def FS(match, symbol):
probe_table[symbol] = 1
def DEV(match, symbol):
probe_table[symbol] = 1
def DEVM(match, symbol):
probe_table[symbol] = 1
def CONS(match, symbol):
probe_table[symbol] = 1
def DMESG(match, symbol, truthval=None):
probe_table[symbol] = 1
def NET(match, symbol):
probe_table[symbol] = 1
def IDE(match, symbol):
probe_table[symbol] = 1
def REQ(match, symbol):
probe_table[symbol] = 1
def CPUTYPE(match, symbol):
probe_table[symbol] = 1
def CPUINFO(match, symbol, present=None, truthval=None):
probe_table[symbol] = 1
def EXISTS(procfile, symbol):
probe_table[symbol] = 1
def MODULE(name, symbol):
probe_table[symbol] = 1
def GREP(pattern, file, symbol):
probe_table[symbol] = 1
execfile(rulesfile)
# Predicates for filtering the reports
def namestrip(name):
if rulebase.prefix and name[:len(rulebase.prefix)] == rulebase.prefix:
return name[len(rulebase.prefix):]
else:
return name
def in_code(name):
"Does a name occur in code?"
if not xrefs.has_key(name):
return 0
for file in xrefs[name]:
if suffix(file, ".c", ".S") or (suffix(file, ".h") and not suffix(file, "autoconf.h")):
return 1
return 0
def in_help(name):
"Is there help for a symbol (CML1 convention)?"
# Catch choice names that aren't in Configure.help directly.
entry = rulebase.dictionary.get(namestrip(name))
if entry and entry.help():
return 1
# This catches names that are in a helpfile but not known to CML2.
if not xrefs.has_key(name):
return 0
for file in xrefs[name]:
if suffix(file, ".help"):
return 1
# False negative if there is ever a choice name that CML2
# doesn't know about.
return 0
def in_cml2_help(name):
"Does a name occur in some help file (CML2 rules)?"
entry = rulebase.dictionary.get(namestrip(name))
if entry and entry.helptext:
return 1
# This catches names that are in a helpfile but not known to CML2.
if not xrefs.has_key(name):
return 0
for file in xrefs[name]:
if suffix(file, ".help"):
return 1
# False negative if there is ever a choice name that CML2
# doesn't know about.
return 0
def in_makefile(name):
"Does a name occur in a makefile?"
if not xrefs.has_key(name):
return 0
for file in xrefs[name]:
if suffix(file, "akefile"):
return 1
return 0
def in_cml1(name):
"Does a name occur in a CML1 file?"
if not xrefs.has_key(name):
return 0
for file in xrefs[name]:
if suffix(file, "onfig.in"):
return 1
return 0
def cml1_declared(name):
"Is a name declared (assigned a type) in a CML1 file?"
return mycml1types.has_key(name)
def in_defconfig(name):
if not xrefs.has_key(name):
return 0
"Does a this symbol occur in a defconfig?"
for file in xrefs[name]:
if file.find("defconfig") > -1 or file.find("configs/") > -1:
return 1
return 0
def in_cml2(name):
"Is this a valid CML2 symbol?"
return rulebase.dictionary.has_key(namestrip(name))
def is_derived(name):
"Is this a CML2 derived name?"
entry = rulebase.dictionary.get(namestrip(name))
if entry and entry.is_derived():
return 1
else:
return 0
def dependent_of(ancestor, name):
"Is given symbol a dependent of given ancestor?"
ancestor = rulebase.dictionary.get(namestrip(ancestor))
entry = rulebase.dictionary.get(namestrip(name))
if entry and ancestor.ancestor_of(entry):
return 1
else:
return 0
def ancestor_of(dependent, name):
"Is given symbol a an ancestor of given dependent?"
dependent = rulebase.dictionary.get(namestrip(dependent))
entry = rulebase.dictionary.get(namestrip(name))
if entry and entry.ancestor_of(dependent):
return 1
else:
return 0
def type_of(typename, name):
"Is given symbol of given tyoe?"
entry = rulebase.dictionary.get(namestrip(name))
if entry and entry.type == typename:
return 1
else:
return 0
def has_property(property, name):
"Does given symbol have given property?"
entry = rulebase.dictionary.get(namestrip(name))
if entry and property in entry.properties:
return 1
else:
return 0
def is_probed(name):
"Does given symbol have a probe?"
entry = rulebase.dictionary.get(namestrip(name))
if not probe_table:
load_probe_table()
return entry and probe_table.has_key(entry.name)
def in_visibility(guard, name):
"Does the symbol GUARD occur in the visibility predicate of NAME?"
entry = rulebase.dictionary.get(namestrip(name))
if not entry:
return 0
guard = rulebase.dictionary.get(namestrip(guard))
return entry.visibility and guard in cml.flatten_expr(entry.visibility)
# Report generation
def setfilter(filterspec):
"Set the filter function."
if not filterspec:
function = "def myfilter(name): return 1"
else:
state = 0
expression = ""
for c in filterspec:
if state == 0:
if c == "(" or c == ")":
expression += c
elif c == " " or c == "\t":
pass
elif c == "a":
expression += " cml1_declared(name)"
elif c == "c":
expression += " in_code(name)"
elif c == "h":
expression += " in_help(name)"
elif c == "H":
expression += " in_cml2_help(name)"
elif c == 'm':
expression += " in_makefile(name)"
elif c == "o":
expression += " in_cml1(name)"
elif c == "n":
expression += " in_cml2(name)"
elif c == "d":
expression += " in_defconfig(name)"
elif c == "x":
expression += " is_derived(name)"
elif c == "~":
expression += " not"
elif c == "&":
expression += " and"
elif c == "|":
expression += " or"
elif c == "p":
expression += " is_probed(name)"
elif c == "D":
expression += " dependent_of"
state = 1
elif c == "A":
expression += " ancestor_of"
state = 1
elif c == "T":
expression += " type_of"
state = 1
elif c == "P":
expression += " has_property"
state = 1
elif c == "V":
expression += " in_visibility"
state = 1
elif state == 1:
if c == ')':
expression += '", name)'
state = 0
elif c == '(':
expression += '("'
else:
expression += c
function = "def myfilter(name): return " + expression
#sys.stderr.write("Filter function: " + function + "\n")
exec function in globals()
def report(keys, norefs=0):
"Generate a filtered report on the cross-references."
for symbol in keys:
refs = filter(lambda x: not (suppress and suppress.search(x)), xrefs[symbol])
if refs:
if norefs:
print symbol
else:
sys.stdout.write(symbol + ":")
for file in refs:
sys.stdout.write(" " + file)
sys.stdout.write("\n")
def generate_patch(file, symbols):
"Generate a patch deleting the given symbols from the given file."
pfp = open(file, "rb")
contents = pfp.read()
pfp.close()
for symbol in symbols:
contents = re.compile("^.*" + symbol + "[^A-Z0-9].*\n", re.M).sub("", contents)
pfp = open(file + ".tweaked", "wb")
pfp.write(contents)
pfp.close()
os.system("diff -u %s %s.tweaked; rm %s.tweaked" % (file, file, file))
# Inclusion checking. This lives here because we use the CML2 rulebase to
# check which CONFIG_ symbols are defined (just checking for a CONFIG_ stem
# isn't reliable as CML2 doesn't completely own that namespace).
includere = re.compile(r'^\s*#\s*include\s*[<"](\S*)[>"]', re.M)
def includecheck(sourcetree):
"Check the inclusion structure of a source tree."
def includevisit(dummy, dir, files):
"Visit a directory on behalf of the inclusion checker."
def filevisitor(dummy, file):
"Visit a file on behalf of the inclusion checker."
fp = open(file)
contents = fp.read()
fp.close()
# First get the list of included files
inclusions = includere.findall(contents)
# This strips slashes, so it has to be done after
contents = c_comment_strip(contents)
# Check to see if we have defined CONFIG_ symbols in the file
matched = []
for match in configpref.findall(contents):
if suffix(match, "_MODULE"):
match = match[:-7]
match = namestrip(match) # Strip prefix
if rulebase.dictionary.has_key(match) and match not in matched:
matched.append(match)
# Check for duplicates
dups = {}
for header in inclusions:
dups[header] = 0
for header in inclusions:
dups[header] += 1
for header in inclusions:
if dups[header] > 1:
print "%s: %s is included %d times" % (file, header, dups[header])
# OK, check to see if we have autoconf inclusion.
have_autoconf = 0
for header in inclusions:
if header == "autoconf.h" or header == "linux/config.h":
have_autoconf = 1
break
if not matched and have_autoconf:
print "%s: has unnecessary configure file inclusion" % file
elif matched and not have_autoconf:
print "%s: needs configure file inclusion for %s" % (file, matched)
for file in files:
if suffix(file, ".c", ".h", ".S"):
node = os.path.join(dir, file)[2:]
if os.path.isfile(node) and not ignore(node):
filevisitor(None, node)
here = os.getcwd()
os.chdir(sourcetree)
os.path.walk(".", includevisit, None)
os.chdir(here)
# The main program
def load_context(tree):
"Load context, including CML2 rulebase and cross-reference database."
global rulebase, xrefs, mycml1types
# Get a CML2 rulebase.
if not os.path.exists(os.path.join(tree, "rules.out")):
print "This program requires a CML2 rulebase in the source tree."
raise SystemExit, 1
else:
rulebase = cmlsystem.CMLSystem(os.path.join(tree, "rules.out"))
# Try to find a saved cross-reference database. If no such database
# exists, generate one and cache it.
xref_file = os.path.join(tree, "xref.out")
if os.path.exists(xref_file):
sys.stderr.write("Reading cross-reference database...")
ifp = open(xref_file, "rb")
(xrefs, mycml1types) = cPickle.load(ifp)
ifp.close()
sys.stderr.write("done.\n")
else:
sys.stderr.write("Regenerating cross-reference database...")
(xrefs, mycml1types) = makexref(tree)
ofp = open(xref_file, "w")
cPickle.dump((xrefs, mycml1types), ofp, 1)
ofp.close()
sys.stderr.write("done.\n")
if __name__ == "__main__":
setfilter(None)
examine = ""
norefs = 0
typecheck = 0
suppress = None
rulesfile = None
invert = genpatch = checkincludes = 0
(options, arguments) = getopt.getopt(sys.argv[1:], "ef:ghik:ln:r:tx:")
for (switch, val) in options:
if switch == '-f':
setfilter(val)
elif switch == '-i':
invert = 1
elif switch == '-g':
invert = genpatch = 1
elif switch == '-h':
checkincludes = 1
elif switch == '-k':
fp = open(val, "r")
while 1:
line = fp.readline()
if not line:
break
kill_list[line.strip()] = 1
elif switch == '-l':
norefs = 1
elif switch == '-n':
suppress = re.compile(val)
elif switch == '-r':
rulesfile = val
elif switch == '-t':
typecheck = 1
elif switch == '-x':
examine = val
if len(arguments) < 1:
sourcetree = "."
else:
sourcetree = arguments[0]
# Load or regenerate the cross-reference database
load_context(sourcetree)
if not checkincludes:
# OK, now filter the database
keys = filter(myfilter, xrefs.keys())
keys.sort()
# If invert was specified, invert the database so it's keyed by file
if invert:
inverted = {}
for key in keys:
for file in xrefs[key]:
if not inverted.has_key(file):
inverted[file] = []
if key not in inverted[file]:
inverted[file].append(key)
xrefs = inverted
keys = inverted.keys()
keys.sort()
if genpatch:
for file in keys:
generate_patch(file, xrefs[file])
elif checkincludes:
includecheck(sourcetree)
elif examine:
shortname = namestrip(examine)
if not rulebase.dictionary.has_key(shortname) and not mycml1types.has_key(examine):
print "%s: no such symbol" % examine
else:
print "%s: a=%d c=%d h=%d o=%d n=%d m=%d d=%d x=%s f=%d" % (examine, cml1_declared(examine), in_code(examine), in_help(examine), in_cml1(examine), in_cml2(examine), in_makefile(examine), in_defconfig(examine), is_derived(examine), myfilter(examine))
elif typecheck:
print "CML1 type consistency report:"
hits = []
ok = 0
for (key, item) in mycml1types.items():
if len(item) == 1:
ok += 1
else:
hits.append(key)
print "%d symbols have consistent type declarations." % ok
if hits:
print "Non-declared or multiply-declared symbols:"
for symbol in hits:
print "%s:" % symbol
for (type, locs) in mycml1types[symbol]:
print " %-8s: %s" % (type, " ".join(locs))
print "CML2 type cross-check:"
typematch = 0
missing = 0
matching = 0
typemap = {"bool":"bool", "trit":"tristate", "string":"string", "decimal":"int", "hexadecimal":"hex"}
for (key, item) in mycml1types.items():
if not rulebase.dictionary.has_key(namestrip(key)):
missing += 1
continue
elif len(item) != 1:
continue
cml2symbol = rulebase.dictionary[namestrip(key)]
cml1type = item[0][0]
if typemap[cml2symbol.type] == cml1type:
matching += 1
elif cml2symbol.menu and cml2symbol.menu.type=="choices" and cml1type=="choice":
matching += 1
else:
if cml2symbol.is_derived():
derived = "(derived)"
else:
derived = ""
print '"%s", line %d: %s, %s -> %s %s' % (cml2symbol.file, cml2symbol.lineno, key, item[0][0], cml2symbol.type, derived)
print "%d CML1 symbols missing, %d type matches" % (missing, matching)
else:
# OK, list the filtered symbols
try:
report(keys, norefs)
except (KeyboardInterrupt, IOError):
pass # In case we break a pipe by interrupting
# That's all, folks!