I'm going to hack on Python import system. Suppose we have the following directory structure:
.
├── main
│ ├── main.py
│ └── parent
│ └── __init__.py
└── pkg1
├── __init__.py
├── sub
│ ├── __init__.py
│ └── import_global.py
└── success.py
The launch script would be main.py, so there should be a top-most module parent. Now, I want to simulate a subpackage, whose fullname is parent.intermediate.pkg1, that indeed refers to the pkg1 directory.
There actually does not exist the intermediate module, however, I indeed need to simulate one (in my real project, the name of this intermediate module will be dynamically generated). So I decide to use the Python import hooks.
First, let me introduce the contents of pkg1.
pkg1/sub/import_global.py:
from operator import add
Value = add(1, 2)
pkg1/success.py:
Value = 'Success'
And (part of main.py), I made some test cases:
class MainTestCase(unittest.TestCase):
def test_success(self):
from parent.intermediate.pkg1 import success
self.assertEqual(success.Value, "Success")
def test_import_global(self):
from parent.intermediate.pkg1.sub import import_global
self.assertEqual(import_global.Value, 3)
def test_not_found(self):
def F():
from parent.intermediate.pkg1 import not_found
self.assertRaises(ImportError, F)
unittest.main()
All of the __init__.py are empty. Now it's going to implement the import hooks. I've drafted two versions, each has some problem.
The first version:
class PkgLoader(object):
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
if fullname.startswith('parent.'):
return self
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
fp, filename, options = imp.find_module(p, [path])
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
loader = PkgLoader()
loader.install()
Where the test_import_global fails:
E..
======================================================================
ERROR: test_import_global (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 54, in test_import_global
from parent.intermediate.pkg1.sub import import_global
File "main.py", line 39, in load_module
m = imp.load_module(ns, fp, filename, options)
File "../pkg1/sub/import_global.py", line 1, in <module>
from operator import add
File "main.py", line 35, in load_module
fp, filename, options = imp.find_module(p, [path])
ImportError: No module named operator
----------------------------------------------------------------------
Ran 3 tests in 0.005s
FAILED (errors=1)
Now for the second version, I modified load_module:
def load_module(self, fullname):
if fullname in sys.modules:
return sys.modules[fullname]
parts = fullname.split('.')[1:]
path = os.path.join(os.path.dirname(__file__), '..')
# intermediate module
m = None
ns = 'parent.intermediate'
if ns in sys.modules:
m = sys.modules[ns]
elif parts[0] == 'intermediate':
m = imp.new_module(ns)
m.__name__ = ns
m.__path__ = [ns]
m.__package__ = '.'.join(ns.rsplit('.', 1)[:-1])
else:
raise ImportError("Module %s not found." % fullname)
# submodules
for p in parts[1:]:
ns = '%s.%s' % (ns, p)
# ======> The modification starts here <======
try:
fp, filename, options = imp.find_module(p, [path])
except ImportError:
return None
# ======> The modification ends here <======
if ns in sys.modules:
m = sys.modules[ns]
else:
m = imp.load_module(ns, fp, filename, options)
sys.modules[ns] = m
path = filename
return m
Where the test_not_found fails:
.F.
======================================================================
FAIL: test_not_found (__main__.MainTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "main.py", line 65, in test_not_found
self.assertRaises(ImportError, F)
AssertionError: ImportError not raised
----------------------------------------------------------------------
Ran 3 tests in 0.004s
FAILED (failures=1)
So the question is now clear: how can I implement the import hook, so that all these three test cases can pass?
Oh, I've got a solution, though more test cases may be needed for my real project. The basic opinion is to carry out imp.find_module at the find_module stage, not the load_module stage, so that we can avoid the system to use our customized loader to load non-exist modules.
Here goes the solution:
class ModuleImportUtility(object):
@staticmethod
def in_namespace(namespace, fullname):
"""
Whether the given :param:`fullname` is or within the :attr:`namespace`.
"""
if not fullname.startswith(namespace):
return False
nslen = len(namespace)
return len(fullname) == nslen or fullname[nslen] == '.'
@staticmethod
def parent_name(fullname):
"""Get the parent name of :param:`fullname`."""
return '.'.join(fullname.rsplit('.', 1)[:-1])
@staticmethod
def find_modules(namespace, name_parts, root_path):
"""
Find the modules along :param:`name_parts` according to
:param:`root_path`.
:return :class:`list` of (fullname, file, filename, options) as
:method:`imp.find_module`, or :value:`None` if not found.
"""
try:
ret = []
ns = namespace
path = root_path
for n in name_parts:
ns = '%s.%s' % (ns, n)
fp, filename, options = imp.find_module(n, [path])
ret.append((ns, fp, filename, options))
path = filename
return ret
except ImportError:
return None
class NamespaceSplitter(object):
"""Strip the parent namespace and split the subname to pieces."""
def __init__(self, namespace):
self.namespace = namespace
self.cutoff = len(namespace.split("."))
def cut(self, fullname):
return fullname.split('.')[self.cutoff:]
class DirModuleFinder(object):
"""
Find a module under particular namespace in a given directory.
We assume that :attr:`root_path` is not a package, and that it contains
the packages to be imported.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def install(self):
sys.meta_path[:] = [x for x in sys.meta_path if self != x] + [self]
def find_module(self, fullname, path=None):
# We should deal with all the parent packages of namespace, because
# some of the intermediate packages may not exist, and need to be
# created manually
if ModuleImportUtility.in_namespace(fullname, self.namespace):
return DefaultNewModuleLoader()
# If not a parent of the namespace, we try to find the requested
# module under the given :attr:`root_path`
if ModuleImportUtility.in_namespace(self.namespace, fullname):
ns = self.namespace
parts = self.ns_splitter.cut(fullname)
root = self.root_path
if ModuleImportUtility.find_modules(ns, parts, root):
return DirModuleLoader(ns, root)
class DefaultNewModuleLoader(object):
"""
Load the requested module via standard import, or create a new module if
not exist.
"""
def load_module(self, fullname):
import sys
import imp
class FakePackage(object):
def __init__(self, path):
self.__path__ = path
# If the module has already been loaded, then we just fetch this module
# from the import cache
if fullname in sys.modules:
return sys.modules[fullname]
# Otherwise we try perform a standard import first, and if not found,
# we create a new package as the required module
m = None
try:
m = FakePackage(None)
parts = fullname.split('.')
for i, p in enumerate(parts, 1):
ns = '.'.join(parts[:i])
if ns in sys.modules:
m = sys.modules[ns]
else:
if not hasattr(m, '__path__'):
raise ImportError()
fp, filename, options = imp.find_module(p, m.__path__)
m = imp.load_module(p, fp, filename, options)
sys.modules[ns] = m
except ImportError:
m = imp.new_module(fullname)
m.__name__ = fullname
m.__path__ = [fullname]
m.__loader__ = self
m.__file__ = '<dummy package "%s">' % fullname
m.__package__ = ModuleImportUtility.parent_name(fullname)
# Now insert the loaded module into the cache, and return the result
sys.modules[fullname] = m
return m
class DirModuleLoader(object):
"""
Load the requested module under a directory (simulate the system import),
all the intermediate modules will also be loaded.
"""
def __init__(self, namespace, root_path):
self.namespace = namespace
self.root_path = root_path
self.ns_splitter = NamespaceSplitter(namespace)
def load_module(self, fullname):
import imp
name_parts = self.ns_splitter.cut(fullname)
for (ns, fp, filename, options) in \
ModuleImportUtility.find_modules(self.namespace, name_parts,
self.root_path):
if ns not in sys.modules:
sys.modules[ns] = imp.load_module(ns, fp, filename, options)
return sys.modules[fullname]
loader = DirModuleFinder(
'parent.intermediate',
os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
)
loader.install()
Feel free to comment on my solution, and if you guys find any potential bugs, be free to notify me.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With