Bayesian error estimation functionals

We present a general-purpose meta-generalized gradient approximation (MGGA) exchange-correlation functional generated within the Bayesian error estimation functional framework.

Jess Wellendorff, Keld T. Lundgaard, Karsten W. Jacobsen, and Thomas Bligaard

mBEEF: An accurate semi-local Bayesian error estimation density functional

The Journal of Chemical Physics 140, 144107 (2014)

Key-value pairs

molecules.db

key

description

name

Name of atom or molecule

ae

Atomization energy

xc

XC-functional

project

Name of the project: “beef”

solids.db

key

description

name

Name of atom or solid

structure

Crystal structure

ce

Cohesive energy

bm

Bulk modulus

xc

XC-functional

project

Name of the project: “beef”

Extra data for each row: volumes and energies.

surfaces.db

XXX suggested keys:

key

description

name

Name of atom or solid

substrate

adsorbate

be

Binding energy

xc

XC-functional

project

Name of the project: “beef”

Performance of functionals

Molecules

# creates: table.csv
from __future__ import print_function
import operator
import ase.db
import numpy as np

con = ase.db.connect('molecules.db')
eref = [row.ae for row in
        con.select(calculator='exp', sort='name')]
eref = np.array(eref)

xcs = [row.xc for row in con.select(name='H', calculator='gpaw')]

data = []
for xc in xcs:
    energies = [row.ae for row in
                con.select('natoms>1', xc=xc, sort='name')]
    de = energies - eref
    data.append((xc, abs(de).mean(), de.min(), de.mean(), de.max()))
    
data.sort(key=operator.itemgetter(1))

fd = open('table.csv', 'w')
print('# XC,    MEANABS,     MIN,    MEAN,     MAX', file=fd)
for row in data:
    print('{:8}{:8.2f},{:8.2f},{:8.2f},{:8.2f}'.format(row[0] + ',', *row[1:]),
          file=fd)

Errors in atomization energy (eV):

# XC

MEANABS

MIN

MEAN

MAX

mBEEF

0.15

-1.14

0.04

0.64

oTPSS

0.18

-0.93

-0.08

0.55

revTPSS

0.26

-0.93

0.21

1.17

MS2

0.26

-2.22

-0.01

0.95

TPSS

0.28

-0.92

0.23

1.04

MS1

0.28

-2.51

-0.20

0.56

MS0

0.41

-2.43

0.10

1.18

RPBE

0.59

-2.52

-0.49

0.94

PBE

0.89

-0.54

0.87

3.36

PBEsol

2.45

-0.17

2.45

7.75

LDA

5.14

-0.03

5.14

14.90

Solids

# creates: lp.csv, ce.csv, bm.csv
from __future__ import print_function, division
import operator
import ase.db
import numpy as np

con = ase.db.connect('solids.db')

xcs = [row.xc for row in con.select(name='FeAl', calculator='gpaw')]

for key in ['lp', 'ce', 'bm']:
    fd = open(key + '.csv', 'w')
    if key == 'lp':
        key = 'volume'
    refs = {row.name: row.get(key) for row in
            con.select(calculator='exp')}
        
    data = []
    for xc in xcs:
        errors = []
        for row in con.select(xc=xc, pbc='TTT'):
            value = row.get(key)
            ref = refs.get(row.name)
            if value is not None and ref is not None:
                if key == 'volume':
                    errors.append(100 * (value / ref)**(1 / 3) - 100)
                else:
                    errors.append(value - ref)
        errors = np.array(errors)
        data.append((xc, len(errors), abs(errors).mean(),
                     errors.min(), errors.mean(), errors.max()))
    
    data.sort(key=operator.itemgetter(2))

    print('# XC, NUMBER, MEANABS, MIN, MEAN, MAX', file=fd)
    for row in data:
        print('{},{},{:.3f},{:.3f},{:.3f},{:.3f}'.format(*row), file=fd)
    fd.close()

Errors in lattice parameters (%):

# XC

NUMBER

MEANABS

MIN

MEAN

MAX

MS2

58

0.599

-1.687

0.248

1.795

PBEsol

58

0.654

-2.143

0.125

2.320

mBEEF

58

0.715

-1.576

0.323

2.824

MS0

58

0.721

-1.487

0.456

2.977

revTPSS

58

0.803

-1.737

0.481

2.702

MS1

58

0.885

-1.343

0.683

3.619

TPSS

58

1.017

-1.513

0.829

3.039

LDA

58

1.075

-4.606

-1.045

0.382

PBE

58

1.273

-0.535

1.209

3.108

oTPSS

57

1.312

-1.183

1.200

6.125

RPBE

58

2.297

0.510

2.297

5.111

Errors in cohesive energies (eV):

# XC

NUMBER

MEANABS

MIN

MEAN

MAX

TPSS

54

0.237

-0.556

-0.041

0.947

PBE

54

0.240

-0.822

-0.080

0.892

MS2

54

0.256

-0.608

0.046

1.260

mBEEF

54

0.269

-1.083

-0.173

0.953

revTPSS

54

0.274

-0.420

0.105

1.128

MS0

54

0.276

-0.821

-0.079

1.148

MS1

54

0.285

-0.907

-0.142

1.047

oTPSS

53

0.304

-0.924

-0.161

0.791

PBEsol

54

0.395

-0.159

0.377

1.335

RPBE

54

0.528

-1.406

-0.511

0.458

LDA

54

0.879

0.065

0.879

2.261

Errors in bulk moduli (GPa):

# XC

NUMBER

MEANABS

MIN

MEAN

MAX

MS2

32

4.741

-11.693

1.602

25.008

MS1

32

5.244

-17.660

-1.496

22.476

MS0

32

5.347

-13.208

0.779

26.070

mBEEF

32

5.582

-20.573

-1.152

13.975

PBEsol

32

5.806

-23.772

-2.600

20.026

LDA

32

7.620

-10.484

5.689

43.047

oTPSS

31

8.190

-31.912

-6.238

16.993

revTPSS

32

8.213

-31.409

-2.692

30.533

TPSS

32

8.519

-34.587

-6.163

17.153

PBE

32

11.780

-37.837

-11.754

0.263

RPBE

32

18.895

-51.580

-18.895

-0.244

Ensembles

Rows with xc='mBEEF' also contain data for doing ensembles:

import matplotlib.pyplot as plt
import ase.db
from ase.dft.bee import ensemble

con = ase.db.connect('molecules.db')


row1 = con.get(formula='N', xc='mBEEF')
e1 = ensemble(row1.energy, row1.data.contributions, 'mBEEF')
row2 = con.get(formula='N2', xc='mBEEF')
e2 = ensemble(row2.energy, row2.data.contributions, 'mBEEF')
ae = 2 * e1 - e2

print('AE = {0:.2f} +- {1:.2f} eV'.format(ae.mean(), ae.std()))

This should print:

AE = 9.69 +- 0.37 eV
../_images/hist.svg

The \(8\times 8\) mBEEF energy contributions are calculated from self-consistent mBEEF calculations at mBEEF geometries.

Reaction energies and error bars

../_images/reactions.svg
# creates: reactions.svg
import numpy as np
import matplotlib.pyplot as plt
import ase.db
from ase.dft.bee import ensemble

re42 = (
    'C2H2+H2->C2H4, CH4+CO2->2CO+2H2, 3O2->2O3, CO2+3H2->CH3OH+H2O, '
    '2CH3OH+O2->2CO2+4H2, 4CO+9H2->trans-butane+4H2O, CH3CH2OH->CH3OCH3, '
    'CO+H2O->CO2+H2, C3H6_Cs+H2->C3H8, Cyclohexadiene_1_4+2H2->Cyclohexane, '
    'isobutane->trans-butane, 2CO+2NO->2CO2+N2, CH4+2Cl2->CCl4+2H2, '
    '2N2+O2->2N2O, H2+O2->H2O2, N2+2H2->N2H4, N2+2O2->2NO2, '
    'CH2OCH2+H2->C2H4+H2O, CO+2H2->CH3OH, C3H4_D2d+2H2->C3H8, O2+2H2->2H2O, '
    'N2+3H2->2NH3, CH4+2F2->CF4+2H2, H2CCO+2H2->C2H4+H2O, N2+O2->2NO, '
    'O2+4HCl->2Cl2+2H2O, CO2+4H2->CH4+2H2O, '
    'Cyclohexadiene_1_3->Cyclohexadiene_1_4, CO+3H2->CH4+H2O, '
    'CH4+CO2->CH3COOH, CH4+NH3->HCN+3H2, 2CO+O2->2CO2, C3H4_C3v+H2->C3H6_Cs, '
    'CO+H2O->HCOOH, H3CNH2+H2->CH4+NH3, O2+H2->2OH, '
    'C6H6+H2->Cyclohexadiene_1_4, CH4+H2O->CH3OH+H2, CH4+CO+H2->CH3CH2OH, '
    'CH3CH2SH+H2->SH2+C2H6, SO2+3H2->SH2+2H2O, 2OH+H2->2H2O').split(', ')

reactions = []
for name in re42:
    reaction = []
    sign = 1
    for formula in name.split('->'):
        for mol in formula.split('+'):
            if mol[0].isdigit():
                n = int(mol[0])
                mol = mol[1:]
            else:
                n = 1
            reaction.append((sign * n, mol))
        sign = -1
    reactions.append((name, reaction))

con = ase.db.connect('molecules.db')
errors = []
errorbars = []
for name, reaction in reactions:
    Eref = 0.0
    E = 0.0
    dE = 0.0
    for n, mol in reaction:
        rowref = con.get(name=mol, calculator='exp')
        Eref += n * rowref.energy
        row = con.get(name=mol, xc='mBEEF')
        E += n * row.energy
        dE += n * ensemble(row.energy, row.data.contributions, 'mBEEF')
    errors.append(E - Eref)
    errorbars.append(dE.std())

fig = plt.figure()
ax = fig.add_subplot(111)
x = 1 + np.arange(len(errors))
ax.errorbar(x, errors, yerr=errorbars, fmt='o')
ax.plot(x, 0 * x, '-')
ax.set_xlabel('Reaction #')
ax.set_ylabel('Reaction energy error (eV)')
plt.savefig('reactions.svg')

Running the calculations again

This example show how to run all the molecule systems with PBE:

from ase.db import connect
from gpaw import GPAW, PW, Mixer, FermiDirac

cref = connect('molecules.db')
xc = 'PBE'
names = [row.name for row in cref.select(xc=xc)]

c = connect('pbe.db')
for name in names:
    id = c.reserve(name=name)
    if id is None:
        continue
    atoms = cref.get_atoms(xc=xc, name=name)
    atoms.center(vacuum=3.5)
    atoms.cell += ([0, 0, 0], [0, 0.1, 0], [0, 0, 0.2])
    atoms.calc = GPAW(mode=PW(600),
                      xc=xc,
                      mixer=Mixer(0.25, 3, 1.0),
                      occupations=FermiDirac(0.0, fixmagmom=True),
                      txt=name + '.' + xc + '.txt')
    atoms.get_forces()
    c.write(atoms, name=name, xc=xc)
    del c[id]