我当前的代码对经常使用的类使用 __getattribute__/__getattr__ 的覆盖。分析表明在重写的函数上花费了相当多的时间。我将用于测量纯覆盖的性能影响的代码大大简化如下:
from timeit import default_timer
class Test(object):
def __init__(self):
self.a = 1.0
class Test1(object):
def __init__(self):
self.a = 1.0
def __getattribute__(self, item):
return object.__getattribute__(self, item)
class Test2(object):
def __init__(self):
self.a = 1.0
def __getattribute__(self, item):
return super(Test2, self).__getattribute__(item)
class ObjectWrapper(object):
def __init__(self, ocls, obj=None):
self.__ocls = ocls
self.__obj = None
if obj is None or isinstance(obj, ocls):
self.__obj = obj
else:
raise RuntimeError(''.join(str(x) for x in ("The value is ", type(obj), \
", but it must be None or instance of ", self.__ocls.__name__)))
def set_inner_object(self, obj):
if obj is None or isinstance(obj, self.__ocls):
self.__obj = obj
else:
raise RuntimeError(''.join(str(x) for x in ("The value is ", type(obj), \
", but it must be None or instance of ", self.__ocls.__name__)))
def __getattr__(self, name):
return getattr(self.__obj, name)
def main():
x = Test()
x1 = Test1()
x2 = Test2()
xw = ObjectWrapper(Test, x)
accsum = 0.0
accsum1 = 0.0
accsum2 = 0.0
accsumw = 0.0
s = default_timer()
for i in xrange(0, 100000000):
accsum += x.a
e = default_timer()
s1 = default_timer()
for i in xrange(0, 100000000):
accsum1 += x1.a
e1 = default_timer()
s2 = default_timer()
for i in xrange(0, 100000000):
accsum2 += x2.a
e2 = default_timer()
sw = default_timer()
for i in xrange(0, 100000000):
accsumw += xw.a
ew = default_timer()
print "speed factor Test1/Test: ", (e1 - s1) / (e - s)
print "speed factor Test2/Test: ", (e2 - s2) / (e - s)
print "speed factor Test wrapped/Test: ", (ew - sw) / (e - s)
if __name__ == '__main__':
main()
覆盖的影响是巨大的。这是输出:
speed factor Test1/Test: 6.32820892871
speed factor Test2/Test: 8.4176175507
speed factor Test wrapped/Test: 11.6202852701
Python 是否正常(python 不是我的母编程语言),为什么?这是压倒一切或/和溺爱的代价吗?!
如何提高性能?
是的,您使用的抽象越多,速度就越慢。主要的 Python 实现对所有内容都使用哈希表,额外的间接级别通常意味着额外的哈希表查找。
这里有一些反汇编供你细细品味:
>>> dis.dis(Test2.__getattribute__)
5 0 LOAD_GLOBAL 0 (super)
3 LOAD_GLOBAL 1 (Test2)
6 LOAD_FAST 0 (self)
9 CALL_FUNCTION 2 (2 positional, 0 keyword pair)
12 LOAD_ATTR 2 (__getattribute__)
15 LOAD_FAST 1 (item)
18 CALL_FUNCTION 1 (1 positional, 0 keyword pair)
21 RETURN_VALUE
请注意,对于每个 LOAD_GLOBAL
,您基本上都在执行哈希表查找,然后是针对 LOAD_ATTR
的另一个哈希表查找(我认为)。您正在做更多的工作,因此需要更多的时间。最重要的是执行您不需要的功能的成本。
我是一名优秀的程序员,十分优秀!