#! /usr/bin/env python
"""Gather import statistics

Usage:
importstat.py modname [modname ...]

Import the modules named on the command line and print some statistics.
The statistics are meant to provide some arguments in favor of relative
imports for PEP-328.

The statistics are gathered for imports that happen from within modules
that belong to packages.  Three different kinds of imports are
distinguished:

 - Imports of modules in the same package.  E.g. a.b.c imports a.b.x

 - Imports of modules in the same hierarchy but not the same package
   E.g. a.b.c imports a.x.y

 - absolute imports i.e. imports of modules somewhere else on sys.path.
   E.g. a.b.c imports z

All names used above are the absolute names of the modules.  It doesn't
matter for the statistics whether the import in the first case was done
with 'import x' or 'import a.b.x'.
"""

__version__ = "$Revision: 1.1 $"

import sys, os
import traceback
import __builtin__

# The import data gathered by gather.  A mapping from module names to a
# list of the names of the modules imported by the module named by the
# key.
stats = {}

# keep track of potential miscounts
miscounts = 0

_orig_import = None
def gather(name, globals_dict=None, locals_dict=None, fromlist=None):
    """Implementation of __import__ that records some of the imports

    The actual import is delegated to _orig_import which is assumed to
    have been bound to the original __import__.

    Two names are important for this function, the name of the importer
    and the name of the impoted module.  The name of the importer is
    assumed to be the value of the '__name__' in the globals passed to
    this function.  The name of the imported module is the __name__
    attribute ofthe imported module, and not the name parameter given to
    this function.

    If the importer is a package or a module in a package, an entry is
    added to stats.
    """
    return_value = _orig_import(name, globals_dict, locals_dict, fromlist)
    # if name is a dotted name and no fromlist is given, the module
    # returned by the original __import__ is the base module (i.e. email
    # when email.Headers is imported).  Make module the module really
    # referred to by the name.  return_value is the module we have to
    # return to retain interface compatibility
    module = return_value
    if not fromlist:
        for part in name.split(".")[1:]:
            try:
                module = getattr(module, part)
            except AttributeError:
                # It can happen that the part we try to access isn't yet
                # bound.  This usually happens with recursive
                # dependencies.  We will end up counting this case wrong
                # but it shouldn't skew the statistics too much
                global miscounts
                miscounts += 1
                break
    if globals_dict:
        # __import__ may be called with only a name.  Thankfully this
        # doesn't happen when invoked by a normal import statement.
        importer = globals_dict["__name__"]
        if globals_dict.has_key("__path__"):
            # For the package module itself, pretend it's the __init__
            # module in the package
            importer += ".__init__"

        module_name = module.__name__
        if hasattr(module, "__path__"):
            # The imported module is a package.  Pretend it's the
            # __init__ module in the package.  This makes counting
            # package imports more accurate.  Otherwise a "from pkg
            # import foo" in pkg.bar would be counted as a hierarchy
            # import.
            module_name += ".__init__"

        # For some reason, it can happen that importer is None (only
        # happened in Twisted-1.0.0, and I couldn't figure out why, so
        # far)
        if importer is not None and "." in importer:
            # The module is a package or a module in a package. All
            # other cases are ignored as we're only interested in
            # package import statistics.

            # We simply append the new module name.  This may count some
            # names multiple times if the importing module contains
            # multiple import statements for the same module
            stats.setdefault(importer, []).append(module_name)
    else:
        # FIXME: we could probably handle this case by looking into the
        # caller's frame
        print "gather: no globals dict given"

    return return_value

def install():
    """Install gather as the __import__ hook"""
    global _orig_import
    _orig_import = __builtin__.__import__
    __builtin__.__import__ = gather


def count_stats(pkgname = None):
    """Return statistics for imports in the package pkgname

    If pkgname is None, return statistics for all imports.

    The return value is a tuple (absolute, hierarchy, packages) with one
    number for each kind of import.
    """
    count_absolute = 0
    count_hierarchy = 0
    count_package = 0

    if pkgname is not None:
        pkgname = pkgname + "."
    else:
        pkgname = ""
    for modname, imported in stats.items():
        if not modname.startswith(pkgname):
            continue
        parts = modname.split(".")
        hierarchy = parts[:1]
        package = parts[:-1]

        for m in imported:
            p = m.split(".")
            if package == p[:-1]:
                count_package += 1
            elif hierarchy == p[:1]:
                count_hierarchy += 1
            else:
                count_absolute += 1

    return (count_absolute, count_hierarchy, count_package)

def print_stats(packages):
    """Print the statistics for the packages"""
    print "               absolute hierarchy package"
    template = "%-15s  %4d      %4d    %4d"
    for package in packages:
        print template % ((package,) + count_stats(package))
    print
    print template % (("all",) + count_stats(None))

    print
    print "potential miscounts:", miscounts


def import_package(name):
    """Import the package given by name recursively

    Since simply importing a package will often not import all modules
    contained in the package, look at the files in the directories on
    the package's __path__ and import all python modules found.

    If name is not the name of a package but that of a normal module,
    just import that module and return.
    """
    try:
        mod = __import__(name, globals())
    except ImportError:
        print >>sys.stderr, "can't import", repr(name)
        return
    except SystemExit, KeyboardInterrupt:
        raise
    except:
        print >>sys.stderr, "error when importing", repr(name)
        traceback.print_exc(None, sys.stderr)
        return
    for part in name.split(".")[1:]:
        mod = getattr(mod, part)
    path = getattr(mod, "__path__", [])
    for d in path:
        for submodule in os.listdir(d):
            filename = os.path.join(d, submodule)
            if submodule.endswith(".py") and submodule != "__init__.py":
                modname = os.path.splitext(submodule)[0]
            elif (os.path.isdir(filename)
                  and os.path.isfile(os.path.join(filename, "__init__.py"))):
                modname = submodule
            else:
                modname = None
            if modname is not None:
                import_package(name + "." + modname)

def main():
    install()
    names = sys.argv[1:]
    recursive = True
    for name in names:
        import_package(name)
    print_stats(names)

if __name__ == "__main__":
    main()
