addのほうが早かったよなと思って、色々試してみた。
- def str_mul():
- return name * count
- def str_join():
- lst = []
- append = lst.append
- for i in xrange(count):
- append(name)
- return ''.join(lst)
- def str_map():
- lst = map(lambda x:name, xrange(count))
- return ''.join(lst)
- def list_comp():
- return ''.join(name for i in xrange(count))
- def str_add():
- s = ''
- for i in xrange(count):
- s += name
- return s
- import cStringIO
- def cstring_io():
- io = cStringIO.StringIO()
- write = io.write
- for i in xrange(count):
- write(name)
- return io.getvalue()
- import StringIO
- def string_io():
- io = StringIO.StringIO()
- write = io.write
- for i in xrange(count):
- write(name)
- return io.getvalue()
- from array import array
- def str_array():
- a = array('c')
- add = a.fromstring
- for i in xrange(count):
- add(name)
- return a.tostring()
- from mmap import mmap
- def str_mmap():
- m = mmap(-1, count * len(name))
- write = m.write
- for i in xrange(count):
- write(name)
- m.seek(0)
- return m.read(count * len(name))
- def main():
- global count, name
- func_list = (str_mul, str_join, str_add, string_io, cstring_io,
- str_map, list_comp, str_array, str_mmap)
- # test
- count = 2
- name = 'hello'
- assert all((name * count).__eq__(f()) for f in func_list)
- import timeit
- for c in (10, 1000):
- for b in (1, 1000):
- for f in func_list:
- count = c
- name = 'hello' * b
- print '%10s x %4s, "hello"*%4s: %9.4fms'%(
- f.func_name, c, b, timeit.timeit(f, number=100)/100*1000)
- if __name__ == '__main__':
- main()
str_mul x 10, "hello"* 1: 0.0006ms str_join x 10, "hello"* 1: 0.0042ms str_add x 10, "hello"* 1: 0.0033ms string_io x 10, "hello"* 1: 0.0312ms cstring_io x 10, "hello"* 1: 0.0072ms str_map x 10, "hello"* 1: 0.0059ms list_comp x 10, "hello"* 1: 0.0077ms str_array x 10, "hello"* 1: 0.0081ms str_mmap x 10, "hello"* 1: 0.0180ms str_mul x 10, "hello"*1000: 0.0108ms str_join x 10, "hello"*1000: 0.0131ms str_add x 10, "hello"*1000: 0.0126ms string_io x 10, "hello"*1000: 0.0411ms cstring_io x 10, "hello"*1000: 0.0364ms str_map x 10, "hello"*1000: 0.0146ms list_comp x 10, "hello"*1000: 0.0167ms str_array x 10, "hello"*1000: 0.0377ms str_mmap x 10, "hello"*1000: 0.1105ms str_mul x 1000, "hello"* 1: 0.0017ms str_join x 1000, "hello"* 1: 0.2352ms str_add x 1000, "hello"* 1: 0.2235ms string_io x 1000, "hello"* 1: 2.4272ms cstring_io x 1000, "hello"* 1: 0.5111ms str_map x 1000, "hello"* 1: 0.3591ms list_comp x 1000, "hello"* 1: 0.2296ms str_array x 1000, "hello"* 1: 0.5338ms str_mmap x 1000, "hello"* 1: 0.4778ms str_mul x 1000, "hello"*1000: 8.5971ms str_join x 1000, "hello"*1000: 8.3908ms str_add x 1000, "hello"*1000: 29.4590ms string_io x 1000, "hello"*1000: 10.6351ms cstring_io x 1000, "hello"*1000: 28.7235ms str_map x 1000, "hello"*1000: 8.8407ms list_comp x 1000, "hello"*1000: 8.2300ms str_array x 1000, "hello"*1000: 38.9228ms str_mmap x 1000, "hello"*1000: 18.6053ms
addは50kB程度までは、文字列長や回数にかかわらず早い。
'str' * nをjoinが逆転するのは見てびっくり。
StringIOは1MBを超えたあたりで突然加速、理由不明。
cStringIOを余裕で追い抜き、100MBあたりではjoinに並ぶ。
ただし、1MBを超えるようなものはPythonで扱うべきではないかも。
for, map, list内包は勝ったり負けたり。
disってみたところ、早そうなのはfor-join。
psycoも使ってみたけど、速度はあまりかわらず。
結論:
addでもjoinでも扱いやすいほうを使おう。
速度はPythonを使う上では気にしない。