Vbench performance benchmarks for pandas

indexing

dataframe_getitem_scalar

Benchmark setup

from pandas_vb_common import *

index = [tm.rands(10) for _ in xrange(1000)]
columns = [tm.rands(10) for _ in xrange(30)]
df = DataFrame(np.random.rand(1000, 30), index=index,
               columns=columns)
idx = index[100]
col = columns[10]

Benchmark statement

df[col][idx]

Performance graph

_images/dataframe_getitem_scalar.png

indexing_dataframe_boolean_rows

Benchmark setup

from pandas_vb_common import *

df = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
indexer = df['B'] > 0
obj_indexer = indexer.astype('O')

Benchmark statement

df[indexer]

Performance graph

_images/indexing_dataframe_boolean_rows.png

sort_level_zero

Benchmark setup

from pandas_vb_common import *

a = np.repeat(np.arange(100), 1000)
b = np.tile(np.arange(1000), 100)
midx = MultiIndex.from_arrays([a, b])
midx = midx.take(np.random.permutation(np.arange(100000)))

Benchmark statement

midx.sortlevel(0)

Performance graph

_images/sort_level_zero.png

series_getitem_scalar

Benchmark setup

from pandas_vb_common import *

tm.N = 1000
ts = tm.makeTimeSeries()
dt = ts.index[500]

Benchmark statement

ts[dt]

Performance graph

_images/series_getitem_scalar.png

sort_level_one

Benchmark setup

from pandas_vb_common import *

a = np.repeat(np.arange(100), 1000)
b = np.tile(np.arange(1000), 100)
midx = MultiIndex.from_arrays([a, b])
midx = midx.take(np.random.permutation(np.arange(100000)))

Benchmark statement

midx.sortlevel(1)

Performance graph

_images/sort_level_one.png

dataframe_get_value

Benchmark setup

from pandas_vb_common import *

try:
    klass = DataMatrix
except:
    klass = DataFrame

index = [tm.rands(10) for _ in xrange(1000)]
columns = [tm.rands(10) for _ in xrange(30)]
df = klass(np.random.rand(1000, 30), index=index,
               columns=columns)
idx = index[100]
col = columns[10]

Benchmark statement

df.get_value(idx, col)

Performance graph

_images/dataframe_get_value.png

datamatrix_getitem_scalar

Benchmark setup

from pandas_vb_common import *

try:
    klass = DataMatrix
except:
    klass = DataFrame

index = [tm.rands(10) for _ in xrange(1000)]
columns = [tm.rands(10) for _ in xrange(30)]
df = klass(np.random.rand(1000, 30), index=index,
               columns=columns)
idx = index[100]
col = columns[10]

Benchmark statement

df[col][idx]

Performance graph

_images/datamatrix_getitem_scalar.png

indexing_dataframe_boolean_rows_object

Benchmark setup

from pandas_vb_common import *

df = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
indexer = df['B'] > 0
obj_indexer = indexer.astype('O')

Benchmark statement

df[obj_indexer]

Performance graph

_images/indexing_dataframe_boolean_rows_object.png