Bayesian error estimation functionals

Jess Wellendorff, Keld T. Lundgaard, Karsten W. Jacobsen, and Thomas Bligaard

mBEEF: An accurate semi-local Bayesian error estimation density functional

The Journal of Chemical Physics 140, 144107 (2014)

Key-value pairs

molecules.db

key description
name Name of atom or molecule
ae Atomization energy
xc XC-functional
project Name of the project: “beef”

solids.db

key description
name Name of atom or solid
structure Crystal structure
ce Cohesive energy
bm Bulk modulus
xc XC-functional
project Name of the project: “beef”

Extra data for each row: volumes and energies.

surfaces.db

XXX suggested keys:

key description
name Name of atom or solid
substrate
adsorbate
be Binding energy
xc XC-functional
project Name of the project: “beef”

Performance of functionals

Molecules

# creates: table.csv
from __future__ import print_function
import operator
import ase.db
import numpy as np

con = ase.db.connect('molecules.db')
eref = [row.ae for row in
        con.select(calculator='exp', sort='name')]
eref = np.array(eref)

xcs = [row.xc for row in con.select(name='H', calculator='gpaw')]

data = []
for xc in xcs:
    energies = [row.ae for row in
                con.select('natoms>1', xc=xc, sort='name')]
    de = energies - eref
    data.append((xc, abs(de).mean(), de.min(), de.mean(), de.max()))
    
data.sort(key=operator.itemgetter(1))

fd = open('table.csv', 'w')
print('# XC,    MEANABS,     MIN,    MEAN,     MAX', file=fd)
for row in data:
    print('{:8}{:8.2f},{:8.2f},{:8.2f},{:8.2f}'.format(row[0] + ',', *row[1:]),
          file=fd)

Errors in atomization energy (eV):

# XC MEANABS MIN MEAN MAX
mBEEF 0.15 -1.14 0.04 0.64
oTPSS 0.18 -0.93 -0.08 0.55
revTPSS 0.26 -0.93 0.21 1.17
MS2 0.26 -2.22 -0.01 0.95
TPSS 0.28 -0.92 0.23 1.04
MS1 0.28 -2.51 -0.20 0.56
MS0 0.41 -2.43 0.10 1.18
RPBE 0.59 -2.52 -0.49 0.94
PBE 0.89 -0.54 0.87 3.36
PBEsol 2.45 -0.17 2.45 7.75
LDA 5.14 -0.03 5.14 14.90

Solids

# creates: lp.csv, ce.csv, bm.csv
from __future__ import print_function, division
import operator
import ase.db
import numpy as np

con = ase.db.connect('solids.db')

xcs = [row.xc for row in con.select(name='FeAl', calculator='gpaw')]

for key in ['lp', 'ce', 'bm']:
    fd = open(key + '.csv', 'w')
    if key == 'lp':
        key = 'volume'
    refs = {row.name: row.get(key) for row in
            con.select(calculator='exp')}
        
    data = []
    for xc in xcs:
        errors = []
        for row in con.select(xc=xc, pbc='TTT'):
            value = row.get(key)
            ref = refs.get(row.name)
            if value is not None and ref is not None:
                if key == 'volume':
                    errors.append(100 * (value / ref)**(1 / 3) - 100)
                else:
                    errors.append(value - ref)
        errors = np.array(errors)
        data.append((xc, len(errors), abs(errors).mean(),
                     errors.min(), errors.mean(), errors.max()))
    
    data.sort(key=operator.itemgetter(2))

    print('# XC, NUMBER, MEANABS, MIN, MEAN, MAX', file=fd)
    for row in data:
        print('{},{},{:.3f},{:.3f},{:.3f},{:.3f}'.format(*row), file=fd)
    fd.close()

Errors in lattice parameters (%):

# XC NUMBER MEANABS MIN MEAN MAX
MS2 58 0.599 -1.687 0.248 1.795
PBEsol 58 0.654 -2.143 0.125 2.320
mBEEF 58 0.715 -1.576 0.323 2.824
MS0 58 0.721 -1.487 0.456 2.977
revTPSS 58 0.803 -1.737 0.481 2.702
MS1 58 0.885 -1.343 0.683 3.619
TPSS 58 1.017 -1.513 0.829 3.039
LDA 58 1.075 -4.606 -1.045 0.382
PBE 58 1.273 -0.535 1.209 3.108
oTPSS 57 1.312 -1.183 1.200 6.125
RPBE 58 2.297 0.510 2.297 5.111

Errors in cohesive energies (eV):

# XC NUMBER MEANABS MIN MEAN MAX
TPSS 54 0.237 -0.556 -0.041 0.947
PBE 54 0.240 -0.822 -0.080 0.892
MS2 54 0.256 -0.608 0.046 1.260
mBEEF 54 0.269 -1.083 -0.173 0.953
revTPSS 54 0.274 -0.420 0.105 1.128
MS0 54 0.276 -0.821 -0.079 1.148
MS1 54 0.285 -0.907 -0.142 1.047
oTPSS 53 0.304 -0.924 -0.161 0.791
PBEsol 54 0.395 -0.159 0.377 1.335
RPBE 54 0.528 -1.406 -0.511 0.458
LDA 54 0.879 0.065 0.879 2.261

Errors in bulk moduli (GPa):

# XC NUMBER MEANABS MIN MEAN MAX
MS2 32 4.741 -11.693 1.602 25.008
MS1 32 5.244 -17.660 -1.496 22.476
MS0 32 5.347 -13.208 0.779 26.070
mBEEF 32 5.582 -20.573 -1.152 13.975
PBEsol 32 5.806 -23.772 -2.600 20.026
LDA 32 7.620 -10.484 5.689 43.047
oTPSS 31 8.190 -31.912 -6.238 16.993
revTPSS 32 8.213 -31.409 -2.692 30.533
TPSS 32 8.519 -34.587 -6.163 17.153
PBE 32 11.780 -37.837 -11.754 0.263
RPBE 32 18.895 -51.580 -18.895 -0.244

Ensembles

Rows with xc='mBEEF' also contain data for doing ensembles:

import ase.db
from ase.dft.bee import ensemble

con = ase.db.connect('molecules.db')


row1 = con.get(formula='N', xc='mBEEF')
e1 = ensemble(row1.energy, row1.data.contributions, 'mBEEF')
row2 = con.get(formula='N2', xc='mBEEF')
e2 = ensemble(row2.energy, row2.data.contributions, 'mBEEF')
ae = 2 * e1 - e2

print('AE = {0:.2f} +- {1:.2f} eV'.format(ae.mean(), ae.std()))

This should print:

AE = 9.69 +- 0.37 eV
../_images/hist.svg

The \(8\times 8\) mBEEF energy contributions are calculated from self-consistent mBEEF calculations at mBEEF geometries.

Reaction energies and error bars

../_images/reactions.svg
# creates: reactions.svg
import numpy as np
import matplotlib.pyplot as plt
import ase.db
from ase.dft.bee import ensemble

re42 = (
    'C2H2+H2->C2H4, CH4+CO2->2CO+2H2, 3O2->2O3, CO2+3H2->CH3OH+H2O, '
    '2CH3OH+O2->2CO2+4H2, 4CO+9H2->trans-butane+4H2O, CH3CH2OH->CH3OCH3, '
    'CO+H2O->CO2+H2, C3H6_Cs+H2->C3H8, Cyclohexadiene_1_4+2H2->Cyclohexane, '
    'isobutane->trans-butane, 2CO+2NO->2CO2+N2, CH4+2Cl2->CCl4+2H2, '
    '2N2+O2->2N2O, H2+O2->H2O2, N2+2H2->N2H4, N2+2O2->2NO2, '
    'CH2OCH2+H2->C2H4+H2O, CO+2H2->CH3OH, C3H4_D2d+2H2->C3H8, O2+2H2->2H2O, '
    'N2+3H2->2NH3, CH4+2F2->CF4+2H2, H2CCO+2H2->C2H4+H2O, N2+O2->2NO, '
    'O2+4HCl->2Cl2+2H2O, CO2+4H2->CH4+2H2O, '
    'Cyclohexadiene_1_3->Cyclohexadiene_1_4, CO+3H2->CH4+H2O, '
    'CH4+CO2->CH3COOH, CH4+NH3->HCN+3H2, 2CO+O2->2CO2, C3H4_C3v+H2->C3H6_Cs, '
    'CO+H2O->HCOOH, H3CNH2+H2->CH4+NH3, O2+H2->2OH, '
    'C6H6+H2->Cyclohexadiene_1_4, CH4+H2O->CH3OH+H2, CH4+CO+H2->CH3CH2OH, '
    'CH3CH2SH+H2->SH2+C2H6, SO2+3H2->SH2+2H2O, 2OH+H2->2H2O').split(', ')

reactions = []
for name in re42:
    reaction = []
    sign = 1
    for formula in name.split('->'):
        for mol in formula.split('+'):
            if mol[0].isdigit():
                n = int(mol[0])
                mol = mol[1:]
            else:
                n = 1
            reaction.append((sign * n, mol))
        sign = -1
    reactions.append((name, reaction))

con = ase.db.connect('molecules.db')
errors = []
errorbars = []
for name, reaction in reactions:
    Eref = 0.0
    E = 0.0
    dE = 0.0
    for n, mol in reaction:
        rowref = con.get(name=mol, calculator='exp')
        Eref += n * rowref.energy
        row = con.get(name=mol, xc='mBEEF')
        E += n * row.energy
        dE += n * ensemble(row.energy, row.data.contributions, 'mBEEF')
    errors.append(E - Eref)
    errorbars.append(dE.std())

fig = plt.figure()
ax = fig.add_subplot(111)
x = 1 + np.arange(len(errors))
ax.errorbar(x, errors, yerr=errorbars, fmt='o')
ax.plot(x, 0 * x, '-')
ax.set_xlabel('Reaction #')
ax.set_ylabel('Reaction energy error (eV)')
plt.savefig('reactions.svg')

Running the calculations again

This example show how to run all the molecule systems with PBE:

from ase.db import connect
from gpaw import GPAW, PW, Mixer, FermiDirac

cref = connect('molecules.db')
xc = 'PBE'
names = [row.name for row in cref.select(xc=xc)]

c = connect('pbe.db')
for name in names:
    id = c.reserve(name=name)
    if id is None:
        continue
    atoms = cref.get_atoms(xc=xc, name=name)
    atoms.center(vacuum=3.5)
    atoms.cell += ([0, 0, 0], [0, 0.1, 0], [0, 0, 0.2])
    atoms.calc = GPAW(mode=PW(600),
                      xc=xc,
                      mixer=Mixer(0.25, 3, 1.0),
                      occupations=FermiDirac(0.0, fixmagmom=True),
                      txt=name + '.' + xc + '.txt')
    atoms.get_forces()
    c.write(atoms, name=name, xc=xc)
    del c[id]