parser¶
read_csv_thou_vb¶
Benchmark setup
from pandas_vb_common import *
from pandas import read_csv
import os
N = 10000
K = 8
format = lambda x: '{:,}'.format(x)
df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)))
df = df.applymap(format)
df.to_csv('test.csv', sep='|')
Benchmark statement
read_csv('test.csv', sep='|', thousands=',')
Performance graph
read_csv_comment_vb¶
Benchmark setup
from pandas_vb_common import *
from pandas import read_csv
import os
N = 10000
K = 8
format = lambda x: '%f' % x
df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)))
df = df.applymap(format)
df.ix[:5, 0] = '#'
df.to_csv('test.csv', sep='|')
Benchmark statement
read_csv('test.csv', sep='|', comment='#')
Performance graph
read_table_multiple_date_baseline¶
Benchmark setup
from pandas_vb_common import *
from pandas import read_table
from cStringIO import StringIO
import os
N = 10000
K = 8
data = '''KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
'''
data = data * 2000
Benchmark statement
read_table(StringIO(data), sep=',', header=None, parse_dates=[1])
Performance graph
read_csv_vb¶
Benchmark setup
from pandas_vb_common import *
from pandas import read_csv
import os
N = 10000
K = 8
df = DataFrame(np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)))
df.to_csv('test.csv', sep='|')
Benchmark statement
read_csv('test.csv', sep='|')
Performance graph
read_table_multiple_date¶
Benchmark setup
from pandas_vb_common import *
from pandas import read_table
from cStringIO import StringIO
import os
N = 10000
K = 8
data = '''KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000
KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000
KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000
KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000
'''
data = data * 2000
Benchmark statement
read_table(StringIO(data), sep=',', header=None, parse_dates=[[1,2], [1,3]])
Performance graph