Source code for chemparseplot.parse.orca.neb.opi_parser
# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
#
# SPDX-License-Identifier: MIT
"""Parse ORCA NEB calculations using OPI (ORCA Python Interface).
Requires ORCA 6.1+ and the orca-pi package.
Falls back to legacy parsing for older ORCA versions.
Returns data in format compatible with chemparseplot.plot.neb plotting functions.
```{versionadded} 0.2.0
```
"""
from pathlib import Path
from typing import Any
import numpy as np
# Lazy import - will be loaded on first use
_opi_output = None
[docs]
def _get_opi_output():
"""Get OPI Output class, installing if needed."""
global _opi_output
if _opi_output is None:
from rgpycrumbs._aux import ensure_import
_opi_output = ensure_import("opi.output.core").Output
return _opi_output
HAS_OPI = True # Will fail gracefully at runtime if not installed
[docs]
def parse_orca_neb(basename: str, working_dir: Path | None = None) -> dict[str, Any]:
"""Parse ORCA NEB calculation using OPI.
Returns data compatible with chemparseplot.plot.neb functions:
- energies: array of energies in eV
- rmsd_r, rmsd_p: RMSD from reactant/product (if geometries available)
- grad_r, grad_p: gradients (if forces available)
- converged: bool
- n_images: number of images
Parameters
----------
basename
ORCA job basename (without extension)
working_dir
Working directory containing ORCA output files
Returns
-------
dict
Structured NEB data compatible with plot_neb_profile() and plot_neb_landscape()
Example
-------
>>> from chemparseplot.parse.orca.neb import parse_orca_neb
>>> from chemparseplot.plot.neb import plot_energy_path, plot_landscape_path_overlay
>>> data = parse_orca_neb("job", Path("calc"))
>>> # Use with existing plotting functions
"""
Output = _get_opi_output()
if working_dir is None:
working_dir = Path.cwd()
# Parse ORCA output using OPI
output = Output(basename, working_dir=working_dir)
output.parse()
# Check if calculation terminated normally
converged = output.terminated_normally()
# Extract data for all NEB images
n_images = output.num_results_gbw
energies = []
geometries = []
forces = []
for i in range(n_images):
# Get energy in eV
energy_eh = output.get_final_energy(index=i)
energy_ev = energy_eh * 27.211386245988 # Hartree to eV
energies.append(energy_ev)
# Get geometry
try:
geom = output.get_geometry(index=i)
geometries.append(
{
"coordinates": geom.coordinates.cartesians,
"atomic_numbers": [atom.atomic_number for atom in geom.atoms],
}
)
except (AttributeError, KeyError):
geometries.append(None)
# Get forces if available
try:
grad = output.get_gradient(index=i)
forces.append(grad)
except (AttributeError, KeyError):
forces.append(None)
energies = np.array(energies)
# Calculate RMSD from reactant and product if geometries available
rmsd_r = None
rmsd_p = None
grad_r = None
grad_p = None
if all(g is not None for g in geometries) and len(geometries) >= 2:
try:
from ase import Atoms
# Convert to ASE Atoms
atoms_list = []
for geom in geometries:
atoms = Atoms(
numbers=geom["atomic_numbers"],
positions=geom["coordinates"],
)
atoms_list.append(atoms)
# Calculate RMSD from reactant and product
rmsd_r = np.array(
[_calculate_rmsd(atoms_list[0], atoms) for atoms in atoms_list]
)
rmsd_p = np.array(
[_calculate_rmsd(atoms_list[-1], atoms) for atoms in atoms_list]
)
# Calculate synthetic gradients if forces available
if all(f is not None for f in forces):
grad_r, grad_p = _compute_synthetic_gradients(
rmsd_r, rmsd_p, forces, atoms_list
)
except ImportError:
# ASE not available, skip RMSD calculation
pass
# Get barrier heights
if len(energies) > 1:
e_reactant = energies[0]
e_product = energies[-1]
e_max = energies.max()
barrier_forward = e_max - e_reactant
barrier_reverse = e_max - e_product
else:
barrier_forward = None
barrier_reverse = None
return {
"energies": energies,
"rmsd_r": rmsd_r,
"rmsd_p": rmsd_p,
"grad_r": grad_r,
"grad_p": grad_p,
"forces": forces if all(f is not None for f in forces) else None,
"converged": converged,
"n_images": n_images,
"barrier_forward": barrier_forward,
"barrier_reverse": barrier_reverse,
"source": "opi",
"orca_version": str(output.orca_version)
if hasattr(output, "orca_version")
else "unknown",
}
[docs]
def _calculate_rmsd(ref: Any, mobile: Any) -> float:
"""Calculate RMSD between two ASE Atoms objects (simple, no alignment)."""
pos_ref = ref.get_positions()
pos_mob = mobile.get_positions()
diff = pos_ref - pos_mob
return float(np.sqrt((diff * diff).sum() / len(ref)))
[docs]
def _compute_synthetic_gradients(rmsd_r, rmsd_p, forces, atoms_list):
"""Compute synthetic gradients for landscape plotting."""
# Simple projection of forces onto RMSD coordinates
# This is a simplified version - full implementation would use IRA
grad_r = np.zeros_like(rmsd_r)
grad_p = np.zeros_like(rmsd_p)
if forces[0] is not None:
# Project forces onto RMSD direction
for i, force in enumerate(forces):
if force is not None:
force_norm = np.linalg.norm(force)
grad_r[i] = -force_norm * (rmsd_r[i] / max(rmsd_r.max(), 1e-10))
grad_p[i] = -force_norm * (rmsd_p[i] / max(rmsd_p.max(), 1e-10))
return grad_r, grad_p
[docs]
def parse_orca_neb_fallback(
basename: str, working_dir: Path | None = None
) -> dict[str, Any] | None:
"""Parse ORCA NEB using legacy regex parsing (ORCA < 6.1).
Falls back to parsing .interp files if OPI is not available.
Returns
-------
dict or None
NEB data if successful, None if parsing fails
"""
from chemparseplot.parse.orca.neb.interp import extract_interp_points
if working_dir is None:
working_dir = Path.cwd()
interp_file = working_dir / f"{basename}.interp"
if not interp_file.exists():
return None
try:
text = interp_file.read_text()
data = extract_interp_points(text)
if not data:
return None
# Extract last iteration
last_iter = data[-1]
energies = last_iter.nebpath.energy.magnitude
return {
"energies": energies,
"rmsd_r": None,
"rmsd_p": None,
"grad_r": None,
"grad_p": None,
"forces": None,
"converged": True,
"n_images": len(energies),
"barrier_forward": None,
"barrier_reverse": None,
"source": "legacy_interp",
"orca_version": "<6.1",
}
except Exception:
return None