Vbench performance benchmarks for pandas

stat_ops

stat_ops_series_std

Benchmark setup

from pandas_vb_common import *

s = Series(np.random.randn(100000), index=np.arange(100000))
s[::2] = np.nan

Benchmark statement

s.std()

Performance graph

_images/stat_ops_series_std.png

stats_rank_average

Benchmark setup

from pandas_vb_common import *

values = np.concatenate([np.arange(100000),
                         np.random.randn(100000),
                         np.arange(100000)])
s = Series(values)

Benchmark statement

s.rank()

Performance graph

_images/stats_rank_average.png

stats_rank2d_axis1_average

Benchmark setup

from pandas_vb_common import *

df = DataFrame(np.random.randn(5000, 50))

Benchmark statement

df.rank(1)

Performance graph

_images/stats_rank2d_axis1_average.png

stat_ops_level_frame_sum

Benchmark setup

from pandas_vb_common import *

index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
                   labels=[np.arange(10).repeat(10000),
                           np.tile(np.arange(100).repeat(100), 10),
                           np.tile(np.tile(np.arange(100), 100), 10)])
random.shuffle(index.values)
df = DataFrame(np.random.randn(len(index), 4), index=index)
df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1])

Benchmark statement

df.sum(level=1)

Performance graph

_images/stat_ops_level_frame_sum.png

stat_ops_level_series_sum_multiple

Benchmark setup

from pandas_vb_common import *

index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
                   labels=[np.arange(10).repeat(10000),
                           np.tile(np.arange(100).repeat(100), 10),
                           np.tile(np.tile(np.arange(100), 100), 10)])
random.shuffle(index.values)
df = DataFrame(np.random.randn(len(index), 4), index=index)
df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1])

Benchmark statement

df[1].sum(level=[0, 1])

Performance graph

_images/stat_ops_level_series_sum_multiple.png

stat_ops_level_frame_sum_multiple

Benchmark setup

from pandas_vb_common import *

index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
                   labels=[np.arange(10).repeat(10000),
                           np.tile(np.arange(100).repeat(100), 10),
                           np.tile(np.tile(np.arange(100), 100), 10)])
random.shuffle(index.values)
df = DataFrame(np.random.randn(len(index), 4), index=index)
df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1])

Benchmark statement

df.sum(level=[0, 1])

Performance graph

_images/stat_ops_level_frame_sum_multiple.png

stats_rank2d_axis0_average

Benchmark setup

from pandas_vb_common import *

df = DataFrame(np.random.randn(5000, 50))

Benchmark statement

df.rank()

Performance graph

_images/stats_rank2d_axis0_average.png

stat_ops_level_series_sum

Benchmark setup

from pandas_vb_common import *

index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
                   labels=[np.arange(10).repeat(10000),
                           np.tile(np.arange(100).repeat(100), 10),
                           np.tile(np.tile(np.arange(100), 100), 10)])
random.shuffle(index.values)
df = DataFrame(np.random.randn(len(index), 4), index=index)
df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1])

Benchmark statement

df[1].sum(level=1)

Performance graph

_images/stat_ops_level_series_sum.png

stats_rank_average_int

Benchmark setup

from pandas_vb_common import *

values = np.random.randint(0, 100000, size=200000)
s = Series(values)

Benchmark statement

s.rank()

Performance graph

_images/stats_rank_average_int.png