Plotting the results

Convenient and powerful access to the HDF5 output files is provided by the Python packages h5py together with PyLab. An examplary Python script for accessing, post-processing and plotting the output data of HALMD is provided in the sources at examples/plotting/plot_h5md.py; it requires H5Py ≥ 2.0.1.

The various aspects of the script are detailed in the following. It starts with loading some packages, defining command line options, and opening the HDF5 output file.

import argparse
import h5py
from numpy import *
from pylab import *

def main():
    # define and parse command line arguments
    parser = argparse.ArgumentParser(prog='plot_h5md.py')
    parser.add_argument('--range', type=int, nargs=2, help='select range of data points')
    parser.add_argument('--dump', metavar='FILENAME', help='dump plot data to filename')
    parser.add_argument('--no-plot', action='store_true', help='do not produce plots, but do the analysis')
    parser.add_argument('--group', help='particle group (default: %(default)s)', default='all')
    parser.add_argument('input', metavar='INPUT', help='H5MD input file with data for state variables')
    args = parser.parse_args()

    # evaluate option --range
    range = args.range or [0, -1]

    # open and read data file
    H5 = h5py.File(args.input, 'r')
    H5obs = H5['observables']
    H5particle = H5['particles'][args.group]
    H5box = H5particle['box']

The script shows how to extract some of the simulation parameters that are stored along with each HDF5 output file.

    # print some details
    print 'Particles: {0}'.format(H5obs['particle_number'][()])
    print 'Box size:',
    for x in diag(H5box['edges']):
        print ' {0:g}'.format(x),
    print
    print 'Density: {0:g}'.format(H5obs['density'][()])

It illustrates how to computes the average temperature, pressure, and potential energy over the whole simulation run or just over a selected range of data points, i.e., a time window.

    # compute and print some averages
    # the simplest selection of a data set looks like this:
    #     temp, temp_err = compute_average(H5obs['temperature'], 'Temperature')
    # full support for slicing (the second pair of brackets) requires conversion to a NumPy array before
    temp, temp_err = compute_average(array(H5obs['temperature/value'])[range[0]:range[1]], 'Temperature')
    pressure, pressure_err = compute_average(array(H5obs['pressure/value'])[range[0]:range[1]], 'Pressure')
    epot, epot_err = compute_average(array(H5obs['potential_energy/value'])[range[0]:range[1]], 'Potential energy')
def compute_average(data, label, nblocks = 10):
    """ compute and print average of data set
        The error is estimated from grouping the data in shorter blocks """

    # group data in blocks, discard excess data
    data = reshape(data[:(nblocks * (data.shape[0] / nblocks))], (nblocks, -1))

    # compute mean and error
    avg = mean(data)
    err = std(mean(data, axis=1)) / sqrt(nblocks - 1)
    print '{0:s}: {1:.4f} ± {2:.2g}'.format(label, avg, err)

    # return as tuple
    return avg, err

Eventually, the script can dump the fluctuating potential energy as function of time to a text file

    # select data for plotting the potential energy as function of time
    x = array(H5obs['potential_energy/time'])[range[0]:range[1]]
    y = array(H5obs['potential_energy/value'])[range[0]:range[1]]

    # append plot data to file
    if args.dump:
        f = open(args.dump, 'a')
        print >>f, '# time   E_pot(t)'
        savetxt(f, array((x, y)).T)
        print >>f, '\n'
        f.close()

or directly generate a figure from these data

    # generate plot
    if not args.no_plot:
        # plot potential energy versus time
        plot(x, y, '-b', label=args.input)

        # plot mean value for comparison
        x = linspace(min(x), max(x), num=50)
        y = zeros_like(x) + epot
        plot(x, y, ':k')

        # add axes labels and finalise plot
        axis('tight')
        xlabel(r'Time $t$')
        ylabel(r'Potential energy $E_\mathrm{pot}$')
        legend(loc='best', frameon=False)
        show()