codezero/tools/cml2-tools/kxref.py

#!/usr/bin/env python
"""
kxref.py -- generate configuration symbol cross-reference for the kernel tree

This is a report generator intended to catch problems and inconsistencies
in the configuration-symbol namespace.  It uses information generated by
the CML2 compiler -- notably, it relies on the compiler's scanning of
help files.

All this does is generate cross-reference reports on configuration
symbols.  But they can be filtered and presented in various
interesting ways.  Basic usage is like this:

 	kxref.py [-f filter | -h] [-l] [-x symbol] [-n re] [sourcetree]

You can set a filter using a boolean-expression minilanguage.  The predicates
available are as follows:

	c -- select all symbols present in code (.c, .h, .S files)
        m -- select all symbols present in makefiles
        n -- select all symbols defined in CML2 rulesfiles
        h -- select all symbols for which help is available (CMl1 convention)
        H -- select all symbols for which help is available (CML2 convention)
        d -- select all symbols that occur in defconfigs
        x -- select all symbols that are derived in CML2.
        o -- select all symbols present in CML1 configuration files
        a -- select all symbols declared in CML1 configuration files
        p -- select all symbols for which autoconfigure.py has a probe

        D(name) -- select all symbols transitively dependent on name
        A(name) -- select all symbols transitively ancestral to name
        T(type) -- select type (trit, bool, string, decimal, hexadecimal)
        P(property) -- select all symbols with given property
        V(symbol) -- select all symbols with given symbol in their
        		visibility guard.

Operations available are as follows:

	& -- and (set intersection)
	| -- or (set intersection)
        ~ -- not (set complement).

You may use parentheses for expression grouping.

This program caches a cross-reference database in a file named
xref.out, so all reports after the first are generated really fast.
You should remove this file whenever you apply a patch.

The -i option inverts the report so it's keyed by file, rather than
by symbol.

The -g option generates a patch removing file lines containing the
reported (presumably orphaned) symbols.  Use with caution...it's
really only safe for hacking defconfigs.

The -x option is for debugging.  It generates a report on an individual
symbol specified as an argument to the option.  Flag letters are as
above, with f= giving the value of the computed filter predicate.

The -h option checks for duplicate or superfluous file inclusions
in the source tree.

The -l switch suppresses printing printing of cross-references;
only symbols matching the given filter(s) are listed.

The -n suppresses listing of files with names matching the given regexp.
If all the files a symbol occurs in are excluded, it will be omitted
from the listings.

The -t option produces a listing of symbols which either have
inconsistent CML1 types or types that differ between CML1 and CML2.

The -k option accepts a file of kill-list symbols to be ignored.

The program has some knowledge of file syntax.  It ignores the
contents of comments in C, CML1, and CML2 files (e.g. does not
cross-reference symbols in such comments).

Some interesting reports:

n&~p&~a	-- identifies CML2 symbols no longer declared or defined in CML1

"""
import sys, os, re, getopt, cPickle, cml, cmlsystem

xrefs = None
rulebase = None
typefind = choicere = configre = definere = mycml1types = None

def suffix(haystack, *needle):
    "Does a filename have any of the given suffixes?"
    for suf in needle:
        if haystack[-len(suf):] == suf:
            return 1
    return 0

def prefix(haystack, *needle):
    "Does a filename have any of the given prefixes?"
    for pre in needle:
        if haystack[len(pre):] == pre:
            return 1
    return 0

# Code for recognizing symbols and stripping out comments

# It's OK that this matches _MODULE symbols, we'll filter those out later.
configpref = re.compile("(?<![A-Z0-9_])(CONFIG_[a-zA-Z0-9_][a-zA-Z0-9_]+)")

# Regular expressions for stripping out C comments.  We're aided here by the
# fact that we don't care about the contents of most of the file.  So by
# first stripping out / and characters that are not part of comment
# delimiter pairs, we can make detecting comments pretty trivial.  This won't
# completely strip comments of the form /* aaaa /* bbbb */, but for this
# application that's OK -- we don't have to be perfect, just reduce the
# exception cases to the point where eyeball checking is feasible.  Use
# of lookaheads and lookbehinds avoids nipping off anything that might
# be a nearby bit of symbol.
#
randomslash = re.compile("(?<=[^*])/(?=[^*])")
randomstar  = re.compile("(?<=[^/])\*(?=[^/])")
c_comment = re.compile("/\*[^*]*\*/")

def c_comment_strip(str):
    str = randomslash.sub("", str,)
    str = randomstar.sub("", str)
    return c_comment.sub("", str)

# Shell, config-file, and Makefile-style comments.
#
hashcomment = re.compile("#.*\n", re.MULTILINE)

def hash_comment_strip(str):
    return hashcomment.sub("", str)

# Code for generating the cross-reference

def ignore(file):
    "Return 1 if the file should be ignored for cross-referencing purposes."
    # Ignore CML files because we look symbols up directly in the rulebase.
    return suffix(file, ".bak", ".orig", ".rej", ".cml", ".o", ".a", ".out", "log", "Log", ",v", "~")

# These are used in the language documentation
kill_list = {"CHEER":1, "BOOM":1, "BOGUS":1}

def makexref(tree):
    "Generate a cross-reference dictionary for the given source tree."
    global typefind, choicere, configre, definere, mycml1types
    typefind = re.compile(r"(?<!define_)(bool|tristate|int|hex|string)\s+'.*'\s+CONFIG_(\w+)")
    choicere = re.compile(r"^\s*choice")
    configre = re.compile(rulebase.prefix + r"(\w*)")
    definere = re.compile(r"^\s+define_([a-z]*)\s+(\w*)")
    mycml1types = {}
    def xrefvisit(dict, dir, files):
        "Visit a directory on behalf of the cross-referencer."
        def filevisitor(dict, file):
            "Visit a file on behalf of the cross-referencer."
            if file[0] == '.':
                return
            fp = open(file)
            contents = fp.read()
            fp.close()
            if suffix(file, ".c", ".h", ".S"):
                contents = c_comment_strip(contents)
            elif suffix(file, ".in", ".cml"):
                contents = hash_comment_strip(contents)
            for match in configpref.findall(contents):
                if suffix(match, "_MODULE"):
                    continue
                match = namestrip(match)
                if kill_list.has_key(match):
                    continue
                elif not dict.has_key(match):
                    dict[match] = []
                if file not in dict[match]:
                    dict[match].append(file)
            # Parse file contents for choice symbols
            if suffix(file, ".in"):
                lines = contents.split("\n")
                while lines:
                    if not choicere.match(lines[0]):
                        # First extract type info for ordinary symbols
                        m = typefind.search(lines[0])
                        if m:
                            symtype = m.group(1)
                            symname = m.group(2)
                            if not mycml1types.has_key(symname):
                                mycml1types[symname] = []
                            if (symtype, file) not in mycml1types[symname]:
                                mycml1types[symname].append((symtype, file))
                        # CML1 defines count with other symbols of their type
                        symdef = definere.search(lines[0])
                        if symdef:
                            symbol = namestrip(symdef.group(2))
                            type = symdef.group(1)
                            if not mycml1types.has_key(symbol):
                                mycml1types[symbol] = []
                            if (type, file) not in mycml1types[symbol]:
                                mycml1types[symbol].append((type, file))
                        lines.pop(0)
                        continue
                    else:
                        lines.pop(0)
                        while lines[0].find(rulebase.prefix) > -1:
                            findit = configre.search(lines[0])
                            symbol = namestrip(findit.group(0))
                            if not mycml1types.has_key(symbol):
                                mycml1types[symbol] = []
                            mycml1types[symbol].append(("choice", file))
                            if lines[0].find('" ') > -1:
                                break
                            lines.pop(0)

        for file in files:
            node = os.path.join(dir, file)[2:]
            if os.path.isfile(node) and not ignore(node):
                filevisitor(dict, node)

    xrefdict = {}
    here = os.getcwd()
    os.chdir(sourcetree)
    os.path.walk(".", xrefvisit, xrefdict)
    os.chdir(here)
    # Data reduction -- collapse CML1 cross references of identical type
    for (key, value) in mycml1types.items():
        if len(value) <= 1:
            continue		# Only interested in the multiples
        else:
            tdict = {}
            for (type, file) in value:
                tdict[type] = []
            for (type, file) in value:
                tdict[type].append(file)
            reslist = []
            for type in tdict.keys():
                reslist.append((type, tdict[type]))
            mycml1types[key] = reslist
    # Second stage of data reduction -- if a symbol has both a choice
    # declaration and another of a different type, suppress the non-choice
    # declaration -- we can assume it came from a CML1 define.
    for (key, value) in mycml1types.items():
        if "choice" in map(lambda x: x[0], value):
            mycml1types[key]=filter(lambda x: x[0]=="choice", mycml1types[key])
    return (xrefdict, mycml1types)

probe_table = {}

def load_probe_table():
    "Build a table of symbols for qhich we have probes."
    from autoconfigure import get_arch
    (ARCH, ARCHSYMBOL) = get_arch()

    TRUE = 1
    FALSE = 0
    PRESENT = 1
    ABSENT = 0
    y = m = n = 0

    def DEBUG(str):
        pass

    def PCI(prefix, symbol):
        probe_table[symbol] = 1

    def PCI_CLASS(match, symbol):
        probe_table[symbol] = 1

    def PNP(match, symbol):
        probe_table[symbol] = 1

    def MCA(match, symbol):
        probe_table[symbol] = 1

    def USBP(match, symbol):
        probe_table[symbol] = 1

    def USBC(match, symbol):
        probe_table[symbol] = 1

    def USBI(match, symbol):
        probe_table[symbol] = 1

    def FS(match, symbol):
        probe_table[symbol] = 1

    def DEV(match, symbol):
        probe_table[symbol] = 1

    def DEVM(match, symbol):
        probe_table[symbol] = 1

    def CONS(match, symbol):
        probe_table[symbol] = 1

    def DMESG(match, symbol, truthval=None):
        probe_table[symbol] = 1

    def NET(match, symbol):
        probe_table[symbol] = 1

    def IDE(match, symbol):
        probe_table[symbol] = 1

    def REQ(match, symbol):
        probe_table[symbol] = 1

    def CPUTYPE(match, symbol):
        probe_table[symbol] = 1

    def CPUINFO(match, symbol, present=None, truthval=None):
        probe_table[symbol] = 1

    def EXISTS(procfile, symbol):
        probe_table[symbol] = 1

    def MODULE(name, symbol):
        probe_table[symbol] = 1

    def GREP(pattern, file, symbol):
        probe_table[symbol] = 1

    execfile(rulesfile)

# Predicates for filtering the reports

def namestrip(name):
    if rulebase.prefix and name[:len(rulebase.prefix)] == rulebase.prefix:
        return name[len(rulebase.prefix):]
    else:
        return name

def in_code(name):
    "Does a name occur in code?"
    if not xrefs.has_key(name):
        return 0
    for file in xrefs[name]:
        if suffix(file, ".c", ".S") or (suffix(file, ".h") and not suffix(file, "autoconf.h")):
            return 1
    return 0

def in_help(name):
    "Is there help for a symbol (CML1 convention)?"
    # Catch choice names that aren't in Configure.help directly.
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and entry.help():
        return 1
    # This catches names that are in a helpfile but not known to CML2.
    if not xrefs.has_key(name):
        return 0
    for file in xrefs[name]:
        if suffix(file, ".help"):
            return 1
    # False negative if there is ever a choice name that CML2
    # doesn't know about.
    return 0

def in_cml2_help(name):
    "Does a name occur in some help file (CML2 rules)?"
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and entry.helptext:
        return 1
    # This catches names that are in a helpfile but not known to CML2.
    if not xrefs.has_key(name):
        return 0
    for file in xrefs[name]:
        if suffix(file, ".help"):
            return 1
    # False negative if there is ever a choice name that CML2
    # doesn't know about.
    return 0

def in_makefile(name):
    "Does a name occur in a makefile?"
    if not xrefs.has_key(name):
        return 0
    for file in xrefs[name]:
        if suffix(file, "akefile"):
            return 1
    return 0

def in_cml1(name):
    "Does a name occur in a CML1 file?"
    if not xrefs.has_key(name):
        return 0
    for file in xrefs[name]:
        if suffix(file, "onfig.in"):
            return 1
    return 0

def cml1_declared(name):
    "Is a name declared (assigned a type) in a CML1 file?"
    return mycml1types.has_key(name)

def in_defconfig(name):
    if not xrefs.has_key(name):
        return 0
    "Does a this symbol occur in a defconfig?"
    for file in xrefs[name]:
        if file.find("defconfig") > -1 or file.find("configs/") > -1:
            return 1
    return 0

def in_cml2(name):
    "Is this a valid CML2 symbol?"
    return rulebase.dictionary.has_key(namestrip(name))

def is_derived(name):
    "Is this a CML2 derived name?"
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and entry.is_derived():
        return 1
    else:
        return 0

def dependent_of(ancestor, name):
    "Is given symbol a dependent of given ancestor?"
    ancestor = rulebase.dictionary.get(namestrip(ancestor))
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and ancestor.ancestor_of(entry):
        return 1
    else:
        return 0

def ancestor_of(dependent, name):
    "Is given symbol a an ancestor of given dependent?"
    dependent = rulebase.dictionary.get(namestrip(dependent))
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and entry.ancestor_of(dependent):
        return 1
    else:
        return 0

def type_of(typename, name):
    "Is given symbol of given tyoe?"
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and entry.type == typename:
        return 1
    else:
        return 0

def has_property(property, name):
    "Does given symbol have given property?"
    entry = rulebase.dictionary.get(namestrip(name))
    if entry and property in entry.properties:
        return 1
    else:
        return 0

def is_probed(name):
    "Does given symbol have a probe?"
    entry = rulebase.dictionary.get(namestrip(name))
    if not probe_table:
        load_probe_table()
    return entry and probe_table.has_key(entry.name)

def in_visibility(guard, name):
    "Does the symbol GUARD occur in the visibility predicate of NAME?"
    entry = rulebase.dictionary.get(namestrip(name))
    if not entry:
        return 0
    guard = rulebase.dictionary.get(namestrip(guard))
    return entry.visibility and guard in cml.flatten_expr(entry.visibility)

# Report generation

def setfilter(filterspec):
    "Set the filter function."
    if not filterspec:
        function = "def myfilter(name): return 1"
    else:
        state = 0
        expression = ""
        for c in filterspec:
            if state == 0:
                if c == "(" or c == ")":
                    expression += c
                elif c == " " or c == "\t":
                    pass
                elif c == "a":
                    expression += " cml1_declared(name)"
                elif c == "c":
                    expression += " in_code(name)"
                elif c == "h":
                    expression += " in_help(name)"
                elif c == "H":
                    expression += " in_cml2_help(name)"
                elif c == 'm':
                    expression += " in_makefile(name)"
                elif c == "o":
                    expression += " in_cml1(name)"
                elif c == "n":
                    expression += " in_cml2(name)"
                elif c == "d":
                    expression += " in_defconfig(name)"
                elif c == "x":
                    expression += " is_derived(name)"
                elif c == "~":
                    expression += " not"
                elif c == "&":
                    expression += " and"
                elif c == "|":
                    expression += " or"
                elif c == "p":
                    expression += " is_probed(name)"
                elif c == "D":
                    expression += " dependent_of"
                    state = 1
                elif c == "A":
                    expression += " ancestor_of"
                    state = 1
                elif c == "T":
                    expression += " type_of"
                    state = 1
                elif c == "P":
                    expression += " has_property"
                    state = 1
                elif c == "V":
                    expression += " in_visibility"
                    state = 1
            elif state == 1:
                if c == ')':
                    expression += '", name)'
                    state = 0
                elif c == '(':
                    expression += '("'
                else:
                    expression += c
        function = "def myfilter(name): return " + expression
    #sys.stderr.write("Filter function: " +  function + "\n")
    exec function in globals()

def report(keys, norefs=0):
    "Generate a filtered report on the cross-references."
    for symbol in keys:
        refs = filter(lambda x: not (suppress and suppress.search(x)), xrefs[symbol])
        if refs:
            if norefs:
                print symbol
            else:
                sys.stdout.write(symbol + ":")
                for file in refs:
                    sys.stdout.write(" " + file)
                sys.stdout.write("\n")

def generate_patch(file, symbols):
    "Generate a patch deleting the given symbols from the given file."
    pfp = open(file, "rb")
    contents = pfp.read()
    pfp.close()
    for symbol in symbols:
        contents = re.compile("^.*" + symbol + "[^A-Z0-9].*\n", re.M).sub("", contents)
    pfp = open(file + ".tweaked", "wb")
    pfp.write(contents)
    pfp.close()
    os.system("diff -u %s %s.tweaked; rm %s.tweaked" % (file, file, file))

# Inclusion checking.  This lives here because we use the CML2 rulebase to
# check which CONFIG_ symbols are defined (just checking for a CONFIG_ stem
# isn't reliable as CML2 doesn't completely own that namespace).

includere = re.compile(r'^\s*#\s*include\s*[<"](\S*)[>"]', re.M)

def includecheck(sourcetree):
    "Check the inclusion structure of a source tree."
    def includevisit(dummy, dir, files):
        "Visit a directory on behalf of the inclusion checker."
        def filevisitor(dummy, file):
            "Visit a file on behalf of the inclusion checker."
            fp = open(file)
            contents = fp.read()
            fp.close()
            # First get the list of included files
            inclusions = includere.findall(contents)
            # This strips slashes, so it has to be done after
            contents = c_comment_strip(contents)
            # Check to see if we have defined CONFIG_ symbols in the file
            matched = []
            for match in configpref.findall(contents):
                if suffix(match, "_MODULE"):
                    match = match[:-7]
                match = namestrip(match)		# Strip prefix
                if rulebase.dictionary.has_key(match) and match not in matched:
                    matched.append(match)
            # Check for duplicates
            dups = {}
            for header in inclusions:
                dups[header] = 0
            for header in inclusions:
                dups[header] += 1
            for header in inclusions:
                if dups[header] > 1:
                    print "%s: %s is included %d times" % (file, header, dups[header])
            # OK, check to see if we have autoconf inclusion.
            have_autoconf = 0
            for header in inclusions:
                if header == "autoconf.h" or header == "linux/config.h":
                    have_autoconf = 1
                    break
            if not matched and have_autoconf:
                print "%s: has unnecessary configure file inclusion" % file
            elif matched and not have_autoconf:
                print "%s: needs configure file inclusion for %s" % (file, matched)

        for file in files:
            if suffix(file, ".c", ".h", ".S"):
                node = os.path.join(dir, file)[2:]
                if os.path.isfile(node) and not ignore(node):
                    filevisitor(None, node)

    here = os.getcwd()
    os.chdir(sourcetree)
    os.path.walk(".", includevisit, None)
    os.chdir(here)

# The main program

def load_context(tree):
    "Load context, including CML2 rulebase and cross-reference database."
    global rulebase, xrefs, mycml1types

    # Get a CML2 rulebase.
    if not os.path.exists(os.path.join(tree, "rules.out")):
        print "This program requires a CML2 rulebase in the source tree."
        raise SystemExit, 1
    else:
        rulebase = cmlsystem.CMLSystem(os.path.join(tree, "rules.out"))

    # Try to find a saved cross-reference database.  If no such database
    # exists, generate one and cache it.
    xref_file = os.path.join(tree, "xref.out")
    if os.path.exists(xref_file):
        sys.stderr.write("Reading cross-reference database...")
        ifp = open(xref_file, "rb")
        (xrefs, mycml1types) = cPickle.load(ifp)
        ifp.close()
        sys.stderr.write("done.\n")
    else:
        sys.stderr.write("Regenerating cross-reference database...")
        (xrefs, mycml1types) = makexref(tree)
        ofp = open(xref_file, "w")
        cPickle.dump((xrefs, mycml1types), ofp, 1)
        ofp.close()
        sys.stderr.write("done.\n")

if __name__ == "__main__":
    setfilter(None)
    examine = ""
    norefs = 0
    typecheck = 0
    suppress = None
    rulesfile = None
    invert = genpatch = checkincludes = 0
    (options, arguments) = getopt.getopt(sys.argv[1:], "ef:ghik:ln:r:tx:")
    for (switch, val) in options:
        if switch == '-f':
            setfilter(val)
        elif switch == '-i':
            invert = 1
        elif switch == '-g':
            invert = genpatch = 1
        elif switch == '-h':
            checkincludes = 1
        elif switch == '-k':
            fp = open(val, "r")
            while 1:
                line = fp.readline()
                if not line:
                    break
                kill_list[line.strip()] = 1
        elif switch == '-l':
            norefs = 1
        elif switch == '-n':
            suppress = re.compile(val)
        elif switch == '-r':
            rulesfile = val
        elif switch == '-t':
            typecheck = 1
        elif switch == '-x':
            examine = val

    if len(arguments) < 1:
        sourcetree = "."
    else:
        sourcetree = arguments[0]

    # Load or regenerate the cross-reference database
    load_context(sourcetree)

    if not checkincludes:
        # OK, now filter the database
        keys = filter(myfilter, xrefs.keys())
        keys.sort()

        # If invert was specified, invert the database so it's keyed by file
        if invert:
            inverted = {}
            for key in keys:
                for file in xrefs[key]:
                    if not inverted.has_key(file):
                        inverted[file] = []
                    if key not in inverted[file]:
                        inverted[file].append(key)
            xrefs = inverted
            keys = inverted.keys()
            keys.sort()

    if genpatch:
        for file in keys:
            generate_patch(file, xrefs[file])
    elif checkincludes:
        includecheck(sourcetree)
    elif examine:
        shortname = namestrip(examine)
        if not rulebase.dictionary.has_key(shortname) and not mycml1types.has_key(examine):
            print "%s: no such symbol" % examine
        else:
            print "%s: a=%d c=%d h=%d o=%d n=%d m=%d d=%d x=%s f=%d" % (examine, cml1_declared(examine), in_code(examine), in_help(examine), in_cml1(examine), in_cml2(examine), in_makefile(examine), in_defconfig(examine), is_derived(examine), myfilter(examine))
    elif typecheck:
        print "CML1 type consistency report:"
        hits = []
        ok = 0
        for (key, item) in mycml1types.items():
            if len(item) == 1:
                ok += 1
            else:
                hits.append(key)
        print "%d symbols have consistent type declarations." % ok
        if hits:
            print "Non-declared or multiply-declared symbols:"
            for symbol in hits:
                print "%s:" % symbol
                for (type, locs) in mycml1types[symbol]:
                    print "    %-8s: %s" % (type, " ".join(locs))
        print "CML2 type cross-check:"
        typematch = 0
        missing = 0
        matching = 0
        typemap = {"bool":"bool", "trit":"tristate", "string":"string", "decimal":"int", "hexadecimal":"hex"}
        for (key, item) in mycml1types.items():
            if not rulebase.dictionary.has_key(namestrip(key)):
                missing += 1
                continue
            elif len(item) != 1:
                continue
            cml2symbol = rulebase.dictionary[namestrip(key)]
            cml1type = item[0][0]
            if typemap[cml2symbol.type] == cml1type:
                matching += 1
            elif cml2symbol.menu and cml2symbol.menu.type=="choices" and cml1type=="choice":
                matching += 1
            else:
                if cml2symbol.is_derived():
                    derived = "(derived)"
                else:
                    derived = ""
                print '"%s", line %d: %s, %s -> %s %s' % (cml2symbol.file, cml2symbol.lineno, key, item[0][0], cml2symbol.type, derived)
        print "%d CML1 symbols missing, %d type matches" % (missing, matching)
    else:
        # OK, list the filtered symbols
        try:
            report(keys, norefs)
        except (KeyboardInterrupt, IOError):
            pass	# In case we break a pipe by interrupting

# That's all, folks!