addのほうが早かったよなと思って、色々試してみた。
def str_mul():
return name * count
def str_join():
lst = []
append = lst.append
for i in xrange(count):
append(name)
return ''.join(lst)
def str_map():
lst = map(lambda x:name, xrange(count))
return ''.join(lst)
def list_comp():
return ''.join(name for i in xrange(count))
def str_add():
s = ''
for i in xrange(count):
s += name
return s
import cStringIO
def cstring_io():
io = cStringIO.StringIO()
write = io.write
for i in xrange(count):
write(name)
return io.getvalue()
import StringIO
def string_io():
io = StringIO.StringIO()
write = io.write
for i in xrange(count):
write(name)
return io.getvalue()
from array import array
def str_array():
a = array('c')
add = a.fromstring
for i in xrange(count):
add(name)
return a.tostring()
from mmap import mmap
def str_mmap():
m = mmap(-1, count * len(name))
write = m.write
for i in xrange(count):
write(name)
m.seek(0)
return m.read(count * len(name))
def main():
global count, name
func_list = (str_mul, str_join, str_add, string_io, cstring_io,
str_map, list_comp, str_array, str_mmap)
# test
count = 2
name = 'hello'
assert all((name * count).__eq__(f()) for f in func_list)
import timeit
for c in (10, 1000):
for b in (1, 1000):
for f in func_list:
count = c
name = 'hello' * b
print '%10s x %4s, "hello"*%4s: %9.4fms'%(
f.func_name, c, b, timeit.timeit(f, number=100)/100*1000)
if __name__ == '__main__':
main()
str_mul x 10, "hello"* 1: 0.0006ms str_join x 10, "hello"* 1: 0.0042ms str_add x 10, "hello"* 1: 0.0033ms string_io x 10, "hello"* 1: 0.0312ms cstring_io x 10, "hello"* 1: 0.0072ms str_map x 10, "hello"* 1: 0.0059ms list_comp x 10, "hello"* 1: 0.0077ms str_array x 10, "hello"* 1: 0.0081ms str_mmap x 10, "hello"* 1: 0.0180ms str_mul x 10, "hello"*1000: 0.0108ms str_join x 10, "hello"*1000: 0.0131ms str_add x 10, "hello"*1000: 0.0126ms string_io x 10, "hello"*1000: 0.0411ms cstring_io x 10, "hello"*1000: 0.0364ms str_map x 10, "hello"*1000: 0.0146ms list_comp x 10, "hello"*1000: 0.0167ms str_array x 10, "hello"*1000: 0.0377ms str_mmap x 10, "hello"*1000: 0.1105ms str_mul x 1000, "hello"* 1: 0.0017ms str_join x 1000, "hello"* 1: 0.2352ms str_add x 1000, "hello"* 1: 0.2235ms string_io x 1000, "hello"* 1: 2.4272ms cstring_io x 1000, "hello"* 1: 0.5111ms str_map x 1000, "hello"* 1: 0.3591ms list_comp x 1000, "hello"* 1: 0.2296ms str_array x 1000, "hello"* 1: 0.5338ms str_mmap x 1000, "hello"* 1: 0.4778ms str_mul x 1000, "hello"*1000: 8.5971ms str_join x 1000, "hello"*1000: 8.3908ms str_add x 1000, "hello"*1000: 29.4590ms string_io x 1000, "hello"*1000: 10.6351ms cstring_io x 1000, "hello"*1000: 28.7235ms str_map x 1000, "hello"*1000: 8.8407ms list_comp x 1000, "hello"*1000: 8.2300ms str_array x 1000, "hello"*1000: 38.9228ms str_mmap x 1000, "hello"*1000: 18.6053ms
addは50kB程度までは、文字列長や回数にかかわらず早い。
'str' * nをjoinが逆転するのは見てびっくり。
StringIOは1MBを超えたあたりで突然加速、理由不明。
cStringIOを余裕で追い抜き、100MBあたりではjoinに並ぶ。
ただし、1MBを超えるようなものはPythonで扱うべきではないかも。
for, map, list内包は勝ったり負けたり。
disってみたところ、早そうなのはfor-join。
psycoも使ってみたけど、速度はあまりかわらず。
結論:
addでもjoinでも扱いやすいほうを使おう。
速度はPythonを使う上では気にしない。