- Store large amounts of data, e.g. tick data
- Retrieve subsets of data into memory
- Programming language independent
- Minimal setup requirements, single client
- High performance
21 May 2016
f <- h5file('test.h5') g1 <- f['group1'] g1['mat'] <- matrix(1:9, nrow = 3) g1['mat2'] <- matrix(11:19, nrow = 3) h5attr(g1, 'attr1') <- 'This is Group 1' f['group2/mat3'] <- matrix(21:29, nrow = 3) sapply(list.datasets(f), function(x) f[x][, 1])
## /group1/mat /group1/mat2 /group2/mat3 ## [1,] 1 11 21 ## [2,] 2 12 22 ## [3,] 3 13 23
h5close(f)
Python:
from pandas import date_range, DataFrame from numpy import random t = date_range('2010-01-01', '2016-01-01', freq='D').date randmat = random.standard_normal((len(t), 3)) df = DataFrame(randmat, index=t) df.to_hdf('ex-pandas.h5', 'testset')
R:
f <- h5file('ex-pandas.h5', 'r') dates <- as.Date(f['testset/axis1'][1:3], origin = '0001-01-01') - 1 zoo(f['testset/block0_values'][1:3, ], order.by=dates)