Source code for chemparseplot.parse.orca.neb.opi_parser

# SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
#
# SPDX-License-Identifier: MIT

"""Parse ORCA NEB calculations using OPI (ORCA Python Interface).

Requires ORCA 6.1+ and the orca-pi package.
Falls back to legacy parsing for older ORCA versions.

Returns data in format compatible with chemparseplot.plot.neb plotting functions.

```{versionadded} 0.2.0
```
"""

from pathlib import Path
from typing import Any

import numpy as np

# Lazy import - will be loaded on first use
_opi_output = None


[docs] def _get_opi_output(): """Get OPI Output class, installing if needed.""" global _opi_output if _opi_output is None: from rgpycrumbs._aux import ensure_import _opi_output = ensure_import("opi.output.core").Output return _opi_output
HAS_OPI = True # Will fail gracefully at runtime if not installed
[docs] def parse_orca_neb(basename: str, working_dir: Path | None = None) -> dict[str, Any]: """Parse ORCA NEB calculation using OPI. Returns data compatible with chemparseplot.plot.neb functions: - energies: array of energies in eV - rmsd_r, rmsd_p: RMSD from reactant/product (if geometries available) - grad_r, grad_p: gradients (if forces available) - converged: bool - n_images: number of images Parameters ---------- basename ORCA job basename (without extension) working_dir Working directory containing ORCA output files Returns ------- dict Structured NEB data compatible with plot_neb_profile() and plot_neb_landscape() Example ------- >>> from chemparseplot.parse.orca.neb import parse_orca_neb >>> from chemparseplot.plot.neb import plot_energy_path, plot_landscape_path_overlay >>> data = parse_orca_neb("job", Path("calc")) >>> # Use with existing plotting functions """ Output = _get_opi_output() if working_dir is None: working_dir = Path.cwd() # Parse ORCA output using OPI output = Output(basename, working_dir=working_dir) output.parse() # Check if calculation terminated normally converged = output.terminated_normally() # Extract data for all NEB images n_images = output.num_results_gbw energies = [] geometries = [] forces = [] for i in range(n_images): # Get energy in eV energy_eh = output.get_final_energy(index=i) energy_ev = energy_eh * 27.211386245988 # Hartree to eV energies.append(energy_ev) # Get geometry try: geom = output.get_geometry(index=i) geometries.append( { "coordinates": geom.coordinates.cartesians, "atomic_numbers": [atom.atomic_number for atom in geom.atoms], } ) except (AttributeError, KeyError): geometries.append(None) # Get forces if available try: grad = output.get_gradient(index=i) forces.append(grad) except (AttributeError, KeyError): forces.append(None) energies = np.array(energies) # Calculate RMSD from reactant and product if geometries available rmsd_r = None rmsd_p = None grad_r = None grad_p = None if all(g is not None for g in geometries) and len(geometries) >= 2: try: from ase import Atoms # Convert to ASE Atoms atoms_list = [] for geom in geometries: atoms = Atoms( numbers=geom["atomic_numbers"], positions=geom["coordinates"], ) atoms_list.append(atoms) # Calculate RMSD from reactant and product rmsd_r = np.array( [_calculate_rmsd(atoms_list[0], atoms) for atoms in atoms_list] ) rmsd_p = np.array( [_calculate_rmsd(atoms_list[-1], atoms) for atoms in atoms_list] ) # Calculate synthetic gradients if forces available if all(f is not None for f in forces): grad_r, grad_p = _compute_synthetic_gradients( rmsd_r, rmsd_p, forces, atoms_list ) except ImportError: # ASE not available, skip RMSD calculation pass # Get barrier heights if len(energies) > 1: e_reactant = energies[0] e_product = energies[-1] e_max = energies.max() barrier_forward = e_max - e_reactant barrier_reverse = e_max - e_product else: barrier_forward = None barrier_reverse = None return { "energies": energies, "rmsd_r": rmsd_r, "rmsd_p": rmsd_p, "grad_r": grad_r, "grad_p": grad_p, "forces": forces if all(f is not None for f in forces) else None, "converged": converged, "n_images": n_images, "barrier_forward": barrier_forward, "barrier_reverse": barrier_reverse, "source": "opi", "orca_version": str(output.orca_version) if hasattr(output, "orca_version") else "unknown", }
[docs] def _calculate_rmsd(ref: Any, mobile: Any) -> float: """Calculate RMSD between two ASE Atoms objects (simple, no alignment).""" pos_ref = ref.get_positions() pos_mob = mobile.get_positions() diff = pos_ref - pos_mob return float(np.sqrt((diff * diff).sum() / len(ref)))
[docs] def _compute_synthetic_gradients(rmsd_r, rmsd_p, forces, atoms_list): """Compute synthetic gradients for landscape plotting.""" # Simple projection of forces onto RMSD coordinates # This is a simplified version - full implementation would use IRA grad_r = np.zeros_like(rmsd_r) grad_p = np.zeros_like(rmsd_p) if forces[0] is not None: # Project forces onto RMSD direction for i, force in enumerate(forces): if force is not None: force_norm = np.linalg.norm(force) grad_r[i] = -force_norm * (rmsd_r[i] / max(rmsd_r.max(), 1e-10)) grad_p[i] = -force_norm * (rmsd_p[i] / max(rmsd_p.max(), 1e-10)) return grad_r, grad_p
[docs] def parse_orca_neb_fallback( basename: str, working_dir: Path | None = None ) -> dict[str, Any] | None: """Parse ORCA NEB using legacy regex parsing (ORCA < 6.1). Falls back to parsing .interp files if OPI is not available. Returns ------- dict or None NEB data if successful, None if parsing fails """ from chemparseplot.parse.orca.neb.interp import extract_interp_points if working_dir is None: working_dir = Path.cwd() interp_file = working_dir / f"{basename}.interp" if not interp_file.exists(): return None try: text = interp_file.read_text() data = extract_interp_points(text) if not data: return None # Extract last iteration last_iter = data[-1] energies = last_iter.nebpath.energy.magnitude return { "energies": energies, "rmsd_r": None, "rmsd_p": None, "grad_r": None, "grad_p": None, "forces": None, "converged": True, "n_images": len(energies), "barrier_forward": None, "barrier_reverse": None, "source": "legacy_interp", "orca_version": "<6.1", } except Exception: return None