Vbench performance benchmarks for pandas

timeseries

timeseries_period_downsample_mean

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

rng = period_range('1/1/2000', '1/1/2001', freq='T')
ts = Series(np.random.randn(len(rng)), index=rng)

Benchmark statement

ts.resample('D', how='mean')

Performance graph

_images/timeseries_period_downsample_mean.png

timeseries_1min_5min_mean

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

Benchmark statement

ts[:10000].resample('5min', how='mean')

Performance graph

_images/timeseries_1min_5min_mean.png

timeseries_1min_5min_ohlc

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

Benchmark statement

ts[:10000].resample('5min', how='ohlc')

Performance graph

_images/timeseries_1min_5min_ohlc.png

datetimeindex_add_offset

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

rng = date_range('1/1/2000', periods=10000, freq='T')

Benchmark statement

rng + timedelta(minutes=2)

Performance graph

_images/datetimeindex_add_offset.png

timeseries_add_irregular

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

lindex = np.random.permutation(N)[:N // 2]
rindex = np.random.permutation(N)[:N // 2]
left = Series(ts.values.take(lindex), index=ts.index.take(lindex))
right = Series(ts.values.take(rindex), index=ts.index.take(rindex))

Benchmark statement

left + right

Performance graph

_images/timeseries_add_irregular.png

timeseries_large_lookup_value

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

rng = date_range('1/1/2000', periods=1500000, freq='s')
ts = Series(1, index=rng)

Benchmark statement

ts[ts.index[len(ts) // 2]]; ts.index._cleanup()

Performance graph

_images/timeseries_large_lookup_value.png

timeseries_asof_single

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

N = 10000
rng = date_range('1/1/1990', periods=N, freq='53s')
ts = Series(np.random.randn(N), index=rng)
dates = date_range('1/1/1990', periods=N * 10, freq='5s')

Benchmark statement

ts.asof(dates[0])

Performance graph

_images/timeseries_asof_single.png

timeseries_asof

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

N = 10000
rng = date_range('1/1/1990', periods=N, freq='53s')
ts = Series(np.random.randn(N), index=rng)
dates = date_range('1/1/1990', periods=N * 10, freq='5s')

Benchmark statement

ts.asof(dates)

Performance graph

_images/timeseries_asof.png

timeseries_asof_nan

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

N = 10000
rng = date_range('1/1/1990', periods=N, freq='53s')
ts = Series(np.random.randn(N), index=rng)
dates = date_range('1/1/1990', periods=N * 10, freq='5s')
ts[250:5000] = np.nan

Benchmark statement

ts.asof(dates)

Performance graph

_images/timeseries_asof_nan.png

timeseries_timestamp_tzinfo_cons

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

rng = date_range('1/1/2000', '3/1/2000', tz='US/Eastern')

Benchmark statement

rng[0]

Performance graph

_images/timeseries_timestamp_tzinfo_cons.png

timeseries_timestamp_downsample_mean

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

rng = date_range('1/1/2000', '1/1/2001', freq='T')
ts = Series(np.random.randn(len(rng)), index=rng)

Benchmark statement

ts.resample('D', how='mean')

Performance graph

_images/timeseries_timestamp_downsample_mean.png

timeseries_sort_index

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

N = 100000
rng = date_range('1/1/2000', periods=N, freq='s')
rng = rng.take(np.random.permutation(N))
ts = Series(np.random.randn(N), index=rng)

Benchmark statement

ts.sort_index()

Performance graph

_images/timeseries_sort_index.png

timeseries_slice_minutely

Benchmark setup

from pandas_vb_common import *
from datetime import timedelta
N = 100000

try:
    rng = date_range('1/1/2000', periods=N, freq='min')
except NameError:
    rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
    def date_range(start=None, end=None, periods=None, freq=None):
        return DateRange(start, end, periods=periods, offset=freq)

if hasattr(Series, 'convert'):
    Series.resample = Series.convert

ts = Series(np.random.randn(N), index=rng)

Benchmark statement

ts[:10000]

Performance graph

_images/timeseries_slice_minutely.png