addのほうが早かったよなと思って、色々試してみた。
def str_mul(): return name * count def str_join(): lst = [] append = lst.append for i in xrange(count): append(name) return ''.join(lst) def str_map(): lst = map(lambda x:name, xrange(count)) return ''.join(lst) def list_comp(): return ''.join(name for i in xrange(count)) def str_add(): s = '' for i in xrange(count): s += name return s import cStringIO def cstring_io(): io = cStringIO.StringIO() write = io.write for i in xrange(count): write(name) return io.getvalue() import StringIO def string_io(): io = StringIO.StringIO() write = io.write for i in xrange(count): write(name) return io.getvalue() from array import array def str_array(): a = array('c') add = a.fromstring for i in xrange(count): add(name) return a.tostring() from mmap import mmap def str_mmap(): m = mmap(-1, count * len(name)) write = m.write for i in xrange(count): write(name) m.seek(0) return m.read(count * len(name)) def main(): global count, name func_list = (str_mul, str_join, str_add, string_io, cstring_io, str_map, list_comp, str_array, str_mmap) # test count = 2 name = 'hello' assert all((name * count).__eq__(f()) for f in func_list) import timeit for c in (10, 1000): for b in (1, 1000): for f in func_list: count = c name = 'hello' * b print '%10s x %4s, "hello"*%4s: %9.4fms'%( f.func_name, c, b, timeit.timeit(f, number=100)/100*1000) if __name__ == '__main__': main()
str_mul x 10, "hello"* 1: 0.0006ms str_join x 10, "hello"* 1: 0.0042ms str_add x 10, "hello"* 1: 0.0033ms string_io x 10, "hello"* 1: 0.0312ms cstring_io x 10, "hello"* 1: 0.0072ms str_map x 10, "hello"* 1: 0.0059ms list_comp x 10, "hello"* 1: 0.0077ms str_array x 10, "hello"* 1: 0.0081ms str_mmap x 10, "hello"* 1: 0.0180ms str_mul x 10, "hello"*1000: 0.0108ms str_join x 10, "hello"*1000: 0.0131ms str_add x 10, "hello"*1000: 0.0126ms string_io x 10, "hello"*1000: 0.0411ms cstring_io x 10, "hello"*1000: 0.0364ms str_map x 10, "hello"*1000: 0.0146ms list_comp x 10, "hello"*1000: 0.0167ms str_array x 10, "hello"*1000: 0.0377ms str_mmap x 10, "hello"*1000: 0.1105ms str_mul x 1000, "hello"* 1: 0.0017ms str_join x 1000, "hello"* 1: 0.2352ms str_add x 1000, "hello"* 1: 0.2235ms string_io x 1000, "hello"* 1: 2.4272ms cstring_io x 1000, "hello"* 1: 0.5111ms str_map x 1000, "hello"* 1: 0.3591ms list_comp x 1000, "hello"* 1: 0.2296ms str_array x 1000, "hello"* 1: 0.5338ms str_mmap x 1000, "hello"* 1: 0.4778ms str_mul x 1000, "hello"*1000: 8.5971ms str_join x 1000, "hello"*1000: 8.3908ms str_add x 1000, "hello"*1000: 29.4590ms string_io x 1000, "hello"*1000: 10.6351ms cstring_io x 1000, "hello"*1000: 28.7235ms str_map x 1000, "hello"*1000: 8.8407ms list_comp x 1000, "hello"*1000: 8.2300ms str_array x 1000, "hello"*1000: 38.9228ms str_mmap x 1000, "hello"*1000: 18.6053ms
addは50kB程度までは、文字列長や回数にかかわらず早い。
'str' * nをjoinが逆転するのは見てびっくり。
StringIOは1MBを超えたあたりで突然加速、理由不明。
cStringIOを余裕で追い抜き、100MBあたりではjoinに並ぶ。
ただし、1MBを超えるようなものはPythonで扱うべきではないかも。
for, map, list内包は勝ったり負けたり。
disってみたところ、早そうなのはfor-join。
psycoも使ってみたけど、速度はあまりかわらず。
結論:
addでもjoinでも扱いやすいほうを使おう。
速度はPythonを使う上では気にしない。