join_merge¶
join_dataframe_index_single_key_bigger¶
Benchmark setup
from pandas_vb_common import *
level1 = np.array([rands(10) for _ in xrange(10)], dtype='O')
level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O')
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
key1 = np.tile(level1.take(label1), 10)
key2 = np.tile(level2.take(label2), 10)
shuf = np.arange(100000)
random.shuffle(shuf)
try:
index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2])
index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2,
columns=['A', 'B', 'C', 'D'])
except: # pre-MultiIndex
pass
try:
DataFrame = DataMatrix
except:
pass
df = DataFrame({'data1' : np.random.randn(100000),
'data2' : np.random.randn(100000),
'key1' : key1,
'key2' : key2})
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
columns=['A', 'B', 'C', 'D'])
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
columns=['A', 'B', 'C', 'D'])
df_shuf = df.reindex(df.index[shuf])
Benchmark statement
df_shuf.join(df_key2, on='key2', sort=True)
Performance graph
append_frame_single_mixed¶
Benchmark setup
from pandas_vb_common import *
df1 = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
df2 = df1.copy()
df2.index = np.arange(10000, 20000)
mdf1 = df1.copy()
mdf1['obj1'] = 'bar'
mdf1['obj2'] = 'bar'
mdf1['int1'] = 5
try:
mdf1.consolidate(inplace=True)
except:
pass
mdf2 = mdf1.copy()
mdf2.index = df2.index
Benchmark statement
mdf1.append(mdf2)
Performance graph
join_dataframe_index_multi¶
Benchmark setup
from pandas_vb_common import *
level1 = np.array([rands(10) for _ in xrange(10)], dtype='O')
level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O')
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
key1 = np.tile(level1.take(label1), 10)
key2 = np.tile(level2.take(label2), 10)
shuf = np.arange(100000)
random.shuffle(shuf)
try:
index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2])
index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2,
columns=['A', 'B', 'C', 'D'])
except: # pre-MultiIndex
pass
try:
DataFrame = DataMatrix
except:
pass
df = DataFrame({'data1' : np.random.randn(100000),
'data2' : np.random.randn(100000),
'key1' : key1,
'key2' : key2})
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
columns=['A', 'B', 'C', 'D'])
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
columns=['A', 'B', 'C', 'D'])
df_shuf = df.reindex(df.index[shuf])
Benchmark statement
df.join(df_multi, on=['key1', 'key2'])
Performance graph
series_align_int64_index¶
Benchmark setup
from pandas_vb_common import *
n = 1000000
# indices = Index([rands(10) for _ in xrange(n)])
def sample(values, k):
sampler = np.random.permutation(len(values))
return values.take(sampler[:k])
sz = 500000
rng = np.arange(0, 10000000000000, 10000000)
stamps = np.datetime64(datetime.now()).view('i8') + rng
idx1 = np.sort(sample(stamps, sz))
idx2 = np.sort(sample(stamps, sz))
ts1 = Series(np.random.randn(sz), idx1)
ts2 = Series(np.random.randn(sz), idx2)
Benchmark statement
ts1 + ts2
Performance graph
join_dataframe_index_single_key_small¶
Benchmark setup
from pandas_vb_common import *
level1 = np.array([rands(10) for _ in xrange(10)], dtype='O')
level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O')
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
key1 = np.tile(level1.take(label1), 10)
key2 = np.tile(level2.take(label2), 10)
shuf = np.arange(100000)
random.shuffle(shuf)
try:
index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2])
index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2,
columns=['A', 'B', 'C', 'D'])
except: # pre-MultiIndex
pass
try:
DataFrame = DataMatrix
except:
pass
df = DataFrame({'data1' : np.random.randn(100000),
'data2' : np.random.randn(100000),
'key1' : key1,
'key2' : key2})
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
columns=['A', 'B', 'C', 'D'])
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
columns=['A', 'B', 'C', 'D'])
df_shuf = df.reindex(df.index[shuf])
Benchmark statement
df.join(df_key1, on='key1')
Performance graph
append_frame_single_homogenous¶
Benchmark setup
from pandas_vb_common import *
df1 = DataFrame(np.random.randn(10000, 4), columns=['A', 'B', 'C', 'D'])
df2 = df1.copy()
df2.index = np.arange(10000, 20000)
mdf1 = df1.copy()
mdf1['obj1'] = 'bar'
mdf1['obj2'] = 'bar'
mdf1['int1'] = 5
try:
mdf1.consolidate(inplace=True)
except:
pass
mdf2 = mdf1.copy()
mdf2.index = df2.index
Benchmark statement
df1.append(df2)
Performance graph
join_dataframe_index_single_key_bigger¶
Benchmark setup
from pandas_vb_common import *
level1 = np.array([rands(10) for _ in xrange(10)], dtype='O')
level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O')
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
key1 = np.tile(level1.take(label1), 10)
key2 = np.tile(level2.take(label2), 10)
shuf = np.arange(100000)
random.shuffle(shuf)
try:
index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2])
index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2,
columns=['A', 'B', 'C', 'D'])
except: # pre-MultiIndex
pass
try:
DataFrame = DataMatrix
except:
pass
df = DataFrame({'data1' : np.random.randn(100000),
'data2' : np.random.randn(100000),
'key1' : key1,
'key2' : key2})
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
columns=['A', 'B', 'C', 'D'])
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
columns=['A', 'B', 'C', 'D'])
df_shuf = df.reindex(df.index[shuf])
Benchmark statement
df_shuf.join(df_key2, on='key2', sort=True)
Performance graph
join_dataframe_integer_key¶
Benchmark setup
from pandas_vb_common import *
level1 = np.array([rands(10) for _ in xrange(10)], dtype='O')
level2 = np.array([rands(10) for _ in xrange(1000)], dtype='O')
label1 = np.arange(10).repeat(1000)
label2 = np.tile(np.arange(1000), 10)
key1 = np.tile(level1.take(label1), 10)
key2 = np.tile(level2.take(label2), 10)
shuf = np.arange(100000)
random.shuffle(shuf)
try:
index2 = MultiIndex(levels=[level1, level2], labels=[label1, label2])
index3 = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)],
labels=[np.arange(10).repeat(10000),
np.tile(np.arange(100).repeat(100), 10),
np.tile(np.tile(np.arange(100), 100), 10)])
df_multi = DataFrame(np.random.randn(len(index2), 4), index=index2,
columns=['A', 'B', 'C', 'D'])
except: # pre-MultiIndex
pass
try:
DataFrame = DataMatrix
except:
pass
df = DataFrame({'data1' : np.random.randn(100000),
'data2' : np.random.randn(100000),
'key1' : key1,
'key2' : key2})
df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1,
columns=['A', 'B', 'C', 'D'])
df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2,
columns=['A', 'B', 'C', 'D'])
df_shuf = df.reindex(df.index[shuf])
Benchmark statement
merge(df, df2, on='key')
Performance graph
series_align_left_monotonic¶
Benchmark setup
from pandas_vb_common import *
n = 1000000
# indices = Index([rands(10) for _ in xrange(n)])
def sample(values, k):
sampler = np.random.permutation(len(values))
return values.take(sampler[:k])
sz = 500000
rng = np.arange(0, 10000000000000, 10000000)
stamps = np.datetime64(datetime.now()).view('i8') + rng
idx1 = np.sort(sample(stamps, sz))
idx2 = np.sort(sample(stamps, sz))
ts1 = Series(np.random.randn(sz), idx1)
ts2 = Series(np.random.randn(sz), idx2)
Benchmark statement
ts1.align(ts2, join='left')
Performance graph
concat_series_axis1¶
Benchmark setup
from pandas_vb_common import *
n = 1000
indices = Index([rands(10) for _ in xrange(1000)])
s = Series(n, index=indices)
pieces = [s[i:-i] for i in range(1, 10)]
pieces = pieces * 50
Benchmark statement
concat(pieces, axis=1)
Performance graph