Source code for chemparseplot.parse.eon.saddle_search

import configparser
import gzip
import logging
import os
import re
from enum import Enum
from pathlib import Path

import numpy as np

try:
    from rgpycrumbs.basetypes import DimerOpt, MolGeom, SaddleMeasure, SpinID
    from rgpycrumbs.parsers.bless import BLESS_LOG
    from rgpycrumbs.parsers.common import _NUM
    from rgpycrumbs.search.helpers import tail
except ImportError:
    DimerOpt = MolGeom = SaddleMeasure = SpinID = None
    BLESS_LOG = _NUM = tail = None

log = logging.getLogger(__name__)


[docs] class EONSaddleStatus(Enum): """Status codes returned by eOn saddle point searches. ```{versionadded} 0.0.3 ``` """ # See SaddleSearchJob.cpp for the ordering # (numerical_status, descriptive_string) GOOD = 0, "Success" INIT = 1, "Initial" # Should never show up, before run BAD_NO_CONVEX = 2, "Initial displacement unable to reach convex region" BAD_HIGH_ENERGY = 3, "Barrier too high" BAD_MAX_CONCAVE_ITERATIONS = 4, "Too many iterations in concave region" BAD_MAX_ITERATIONS = 5, "Too many iterations" BAD_NOT_CONNECTED = 6, "Saddle is not connected to initial state" BAD_PREFACTOR = 7, "Prefactors not within window" BAD_HIGH_BARRIER = 8, "Energy barrier not within window" BAD_MINIMA = 9, "Minimizations from saddle did not converge" FAILED_PREFACTOR = 10, "Hessian calculation failed" POTENTIAL_FAILED = 11, "Potential failed" NONNEGATIVE_ABORT = 12, "Nonnegative initial mode, aborting" NONLOCAL_ABORT = 13, "Nonlocal abort" NEGATIVE_BARRIER = 14, "Negative barrier detected" BAD_MD_TRAJECTORY_TOO_SHORT = 15, "No reaction found during MD trajectory" BAD_NO_NEGATIVE_MODE_AT_SADDLE = ( 16, "Converged to stationary point with zero negative modes", ) BAD_NO_BARRIER = 17, "No forward barrier was found along minimized band" ZEROMODE_ABORT = 18, "Zero mode abort." OPTIMIZER_ERROR = 19, "Optimizer error." UNKNOWN = -1, "Unknown status" def __new__(cls, value, description): obj = object.__new__(cls) obj._value_ = value obj.description = description return obj
[docs] @classmethod def from_value(cls, value): for member in cls: if member.value == value: return member return cls.UNKNOWN
[docs] def extract_saddle_gprd(log: list[str]): """Extract saddle point geometry from GPRD log lines. ```{versionadded} 0.0.3 ``` """ logdata = [BLESS_LOG.search(x).group("logdata").strip() for x in log] numdat = [] for line in logdata: if line.isdigit() or (line.startswith("-") and line[1:].isdigit()): numdat.append(float(line)) if _NUM.match(line): numdat.append(np.array(_NUM.findall(line), dtype=float)) # TODO(rg): There's got to be a cleaner way.. # assert len(numdat) == 3, f"numdat: {numdat} for log:\n{log}" return MolGeom(pos=numdat[0], energy=numdat[1], forces=numdat[2])
[docs] def _read_results_dat(eresp: Path) -> dict: """Reads and parses the results.dat file. Args: eresp: Path to the eOn results directory. Returns: A dictionary containing the parsed data from results.dat, or None if the file does not exist or the termination reason is not 0. """ respth = eresp / "results.dat" if not respth.exists(): return None rdat = respth.read_text() termination_int = int(re.search(r"(\d+) termination_reason", rdat).group(1)) termination_reason = EONSaddleStatus.from_value(termination_int) term_dict = { "termination_status": str(termination_reason).split(".")[-1], } if termination_reason != EONSaddleStatus.GOOD: return term_dict results_data = { "pes_calls": int(re.search(r"(\d+) total_force_calls", rdat).group(1)), "iter_steps": int(re.search(r"(\d+) iterations", rdat).group(1)), "saddle_energy": float( re.search(r"(-?\d+\.\d+) potential_energy_saddle", rdat).group(1) ), } results_data |= term_dict return results_data
[docs] def _find_log_file(eresp: Path) -> Path | None: """Finds the most recent, valid log file within the eOn results directory. Args: eresp: Path to the eOn results directory. Returns: Path to the chosen log file, or None if no suitable log file is found. """ log_files = list(eresp.glob("*.log.gz")) if not log_files: log.warning("*.log.gz not found for %s", eresp) return None # Sort log files by modification time (most recent first) log_files.sort(key=os.path.getmtime, reverse=True) for f in log_files: last_lines = tail(f, 5) # Get the last 5 lines if not last_lines: # if tail fails, move onto next file. continue fcheck = [x for x in last_lines if "Fail" in x] if not fcheck: # If "Fail" is NOT in any of the last 5 lines return f log.warning("No valid log files found in %s", eresp) return None
[docs] def _extract_total_time(log_data: list[str]) -> float: """Extracts the total run time from the log data. Args: log_data: A list of strings representing the lines of the log file. Returns: The total run time in seconds as a float, or 0.0 if not found. """ try: tot_time = float( re.search( r"INFO] real\s*(?P<realtime>\d*\.+\d*)", " ".join(log_data[-5:]), ).group("realtime") ) except (AttributeError, TypeError): tot_time = 0.0 return tot_time
[docs] def _extract_initial_energy(log_data: list[str]) -> float: """Extracts the initial energy from the log data. Args: log_data: A list of strings representing the lines of the log file. Returns: The initial energy as a float, or None if not found. """ start_saddle_line = next( (line for line in log_data if "Saddle point search started from" in line), None ) if start_saddle_line: init_energy = float( re.search( r"Saddle point search started from reactant with energy (-?\d+\.\d+) eV.", start_saddle_line, ).group(1) ) return init_energy return None
[docs] def _extract_saddle_info( log_data: list[str], eresp: Path, *, is_gprd: bool ) -> tuple[float, str]: """Extracts saddle point information (saddle_fmax and method) from the log data. Args: log_data: A list of strings representing the lines of the log file. eresp: Path to the eOn results directory. is_gprd: Boolean flag indicating whether the GPRD method was used. Returns: A tuple containing: - saddle_fmax: The maximum force at the saddle point. """ start_line = next( (i + 1 for i, line in enumerate(log_data) if "Elapsed time" in line), None ) end_line = next( ( i - 1 for i, line in enumerate(log_data) if "timing information" in line.strip() ), None, ) if is_gprd and start_line is not None and end_line is not None: saddle = extract_saddle_gprd(log_data[start_line:end_line]) saddle_fmax = np.abs(np.max(saddle.forces)) elif not is_gprd: try: # Expected header: Step, Step Size, Delta E, ||Force|| # ||Force|| is the 5th element (index 4) saddle_fmax = float( (eresp / "client_spdlog.log") .read_text() .strip() .split("\n")[-5:][0] .split()[4] ) except (FileNotFoundError, IndexError): saddle_fmax = 0.0 else: saddle_fmax = 0.0 return saddle_fmax
[docs] def _get_methods(eresp: Path) -> DimerOpt: _conf = configparser.ConfigParser() _conf.read(eresp / "config.ini") if _conf["Saddle Search"]["min_mode_method"] == "gprdimer": return DimerOpt( saddle="GPRD", rot=_conf["GPR Dimer"]["rotation_opt_method"], trans=_conf["GPR Dimer"]["translation_opt_method"], ) elif _conf["Saddle Search"]["min_mode_method"] == "dimer": return DimerOpt( saddle="IDimer", # XXX: This is basically the default rot=_conf["Dimer"].get("opt_method", "cg"), trans=_conf["Optimizer"]["opt_method"], ) else: errmsg = "Clearly wrong.." raise ValueError(errmsg)
[docs] def parse_eon_saddle(eresp: Path, rloc: "SpinID") -> "SaddleMeasure": """Parses eOn saddle point search results from a directory. ```{versionadded} 0.0.3 ``` Args: eresp: Path to the directory containing eOn results. rloc: A SpinID object. Returns: A SaddleMeasure object. """ meth = _get_methods(eresp) is_gprd = meth.saddle.lower() == "gprd" # 1. Read results.dat results_data = _read_results_dat(eresp) if results_data is None: return SaddleMeasure( mol_id=getattr(rloc, "mol_id", None), spin=getattr(rloc, "spin", None), success=False, method=meth.saddle, dimer_rot=meth.rot, dimer_trans=meth.trans, ) if list(results_data.keys()) == ["termination_status"]: return SaddleMeasure( mol_id=getattr(rloc, "mol_id", None), spin=getattr(rloc, "spin", None), success=False, method=meth.saddle, dimer_rot=meth.rot, dimer_trans=meth.trans, termination_status=results_data["termination_status"], ) # 2. Find the log file log_file = _find_log_file(eresp) if log_file is None: return SaddleMeasure( pes_calls=results_data["pes_calls"], iter_steps=results_data["iter_steps"], saddle_energy=results_data["saddle_energy"], tot_time=0.0, saddle_fmax=0.0, success=False, method=meth.saddle, dimer_rot=meth.rot, dimer_trans=meth.trans, mol_id=getattr(rloc, "mol_id", None), spin=getattr(rloc, "spin", None), termination_status=results_data["termination_status"], ) # 3. Parse the log file with gzip.open(log_file, "rt") as f: log_data = f.readlines() # 4. Extract data from the log file tot_time = _extract_total_time(log_data) init_energy = _extract_initial_energy(log_data) saddle_fmax = _extract_saddle_info(log_data, eresp, is_gprd=is_gprd) # 5. Construct and return SaddleMeasure if tot_time <= 0.0 and meth.saddle == "IDimer": return SaddleMeasure( mol_id=getattr(rloc, "mol_id", None), spin=getattr(rloc, "spin", None), success=False, method=meth.saddle, dimer_rot=meth.rot, dimer_trans=meth.trans, pes_calls=results_data["pes_calls"], iter_steps=results_data["iter_steps"], saddle_energy=results_data["saddle_energy"], termination_status=results_data["termination_status"], ) return SaddleMeasure( pes_calls=results_data["pes_calls"], iter_steps=results_data["iter_steps"], tot_time=tot_time, saddle_energy=results_data["saddle_energy"], saddle_fmax=saddle_fmax, success=True, method=meth.saddle, dimer_rot=meth.rot, dimer_trans=meth.trans, init_energy=init_energy, barrier=( results_data["saddle_energy"] - init_energy if init_energy is not None else None ), mol_id=getattr(rloc, "mol_id", None), spin=getattr(rloc, "spin", None), termination_status=results_data["termination_status"], )