Source code for esipy.atomicfiles

import os
import re
import numpy as np

from esipy.tools import wf_type, format_short_partition, load_file

# Print the missing .wfx diagnostic only once
[docs]wfxnotfound = False
[docs]def read_aoms(path='.'): """ Reads the AOMs from ESIpy's writeaoms() method or from an AIMAll calculation. :param path: Path of the directory of the files. :type path: str :returns: The AOMs in ESIpy format stored in 'ESI.aom'. :rtype: list """ aom = [] aom_alpha, aom_beta = [], [] start_string = 'The Atomic Overlap Matrix' mul = False working_dir = os.getcwd() if path == '.': path = working_dir else: path = os.path.join(working_dir, path) ints = [intfile for intfile in os.listdir(path) if intfile.endswith('.int') and os.path.isfile(os.path.join(path, intfile))] ordered = sorted(ints, key=lambda x: int(re.search(r'\d+', x).group())) if ordered == []: raise ValueError(f"No *.int files found in the directory '{path}'.") first_file = os.path.join(path, ordered[0]) with open(first_file, 'r') as f: for line in f: if "Restricted, closed" in line or "Restricted Closed" in line: wf = "rest" break elif "Unrestricted" in line: wf = "unrest" break elif "Restricted, natural" in line: wf = "no" break else: raise ValueError("Wavefunction type could not be determined.") if wf == "no": occs = read_occs(first_file) for intfile in ordered: intfile_path = os.path.join(path, intfile) with open(intfile_path, 'r') as f: mat_lines = [] for line in f: if "Mulliken" in line: mul = True if start_string in line: for _ in range(3): next(f) while True: line = next(f).strip() if not line: break mat_lines.extend([float(num) for num in line.split()]) # We first get the number of shape of the alpha-alpha matrix na, nb = read_orbs(intfile_path) if wf == 'unrest': nt = na + nb else: nt = na # Mulliken works on non-symmetric, square AOMs if mul: matrix = np.array(mat_lines).reshape((nt, nt)) # Symmetric AOMs work on lower-triangular matrices else: low_matrix = np.zeros((nt, nt)) low_matrix[np.tril_indices(nt)] = mat_lines matrix = low_matrix + low_matrix.T - np.diag(low_matrix.diagonal()) if wf == "rest" or wf == "no": aom.append(matrix) if wf == "unrest": SCR_alpha = matrix[:na, :na] SCR_beta = matrix[na:, na:] aom_alpha.append(SCR_alpha) aom_beta.append(SCR_beta) if wf == "rest": return aom elif wf == "unrest": return [aom_alpha, aom_beta] elif wf == "no": return [aom, occs] else: raise ValueError("Could not read the AOMs")
########### WRITING THE INPUT FOR THE ESI-3D CODE FROM THE AOMS ###########
[docs]def write_aoms(mol, mf, name, aom, ring=[], partition=None): """ Writes the input for the ESI-3D code from the AOMs. :param mol: Molecule object from PySCF. :type mol: PySCF instance :param mf: Calculation object from PySCF. :type mf: PySCF instance :param name: Name of the calculation. :type name: str :param aom: Concatenated list of Atomic Overlap Matrices (AOMs) in the MO basis. :type aom: list :param ring: Connectivity of the atoms in the ring. Can be more than one ring as a list of lists. :type ring: list of int, optional :param partition: Partition scheme for the AOMs. Options are "mulliken", "lowdin", "meta-lowdin", "nao", "iao". :type partition: str, optional :returns: None :generates: - A '_atomicfiles/' directory with all the files created. - A '.int' file for each atom with its corresponding AOM. - A 'name.files' file with a list of the names of the '.int' files. - A 'name.bad' file with a standard input for the ESI-3D code. - A 'name.wfx' file with the occupancies for the ESI-3D code. """ from copy import deepcopy if isinstance(aom, str): aom = load_file(aom) if ring is None: pass elif isinstance(ring[0], int): ring = [ring] wf = wf_type(aom) if wf == "no": aom, occ = aom # Separating AOMs and orbital occupations # Obtaining information for the files symbols = [mol.atom_symbol(i) for i in range(mol.natm)] atom_numbers = [i + 1 for i in range(mol.natm)] if wf == "unrest": nalpha = [np.trace(aom_alpha) for aom_alpha in aom[0]] nbeta = [np.trace(aom_beta) for aom_beta in aom[1]] elif wf == "rest": nalpha = nbeta = [np.trace(aom) for aom in aom] elif wf == "no": nalpha = nbeta = [float(np.trace(np.dot(occ, aom))) for aom in aom] # Creating a new directory for the calculation symbols = [mol.atom_symbol(i) for i in range(mol.natm)] fragidx = mol.natm + 1 fragmap = {} dofrag = False ringcopy = deepcopy(ring) # Mark fragments in the order they are defined if ring: for i, sublist in enumerate(ringcopy): for j, element in enumerate(sublist): if isinstance(element, set): dofrag = True fragmap[tuple(element)] = fragidx sublist[j] = fragidx fragidx += 1 shortpart = format_short_partition(partition) new_dir_name = name + "_" + shortpart + "_atomicfiles" symbols = [s.lower() for s in symbols] titles = [symbols[i] + str(atom_numbers[i]) for i in range(mol.natm)] # Setting the title of the files new_dir_path = os.path.join(os.getcwd(), new_dir_name) os.makedirs(new_dir_path, exist_ok=True) # Creating and writing the atomic .int files for i, item in enumerate(titles): with open(os.path.join(new_dir_path, item + ".int"), "w+") as f: f.write(" Created by ESIpy\n") if partition == "mulliken": f.write(" Using Mulliken atomic definition\n") elif partition == "lowdin": f.write(" Using Lowdin atomic definition\n") elif partition == "meta-lowdin": f.write(" Using Meta-Lowdin atomic definition\n") elif partition == "nao": f.write(" Using NAO atomic definition\n") elif partition == "iao": f.write(" Using IAO atomic definition\n") f.write(" Single-determinant wave function\n") f.write(" Molecular SCF ENERGY (AU) = {:.11f}\n\n".format(mf.e_tot)) f.write(" INTEGRATION IS OVER ATOM {}\n".format(titles[i])) f.write(" RESULTS OF THE INTEGRATION\n") if wf == "unrest": f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format( np.trace(aom[0][i]) + np.trace(aom[1][i]))) elif wf == "rest": f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format(2 * np.trace(aom[i]))) else: f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format(np.trace(np.dot(occ, aom[i])))) f.write(" G\n") f.write(" K 1.00000000000000E+01 E(ATOM) 1.00000000000000E+00\n") f.write(" L 0.00000000000000E+01\n\n") if wf == "unrest": occ = np.diag([1.0] * (len(aom[0][i]) + len(aom[1][i]))) f.write("\n The Atomic Overlap Matrix:\n\n Unrestricted\n\n") alpha_size = len(aom[0][i]) beta_size = len(aom[1][i]) zeros = np.zeros((beta_size, alpha_size)) fill = np.vstack((aom[0][i], zeros)) fill = np.hstack((fill, np.vstack((zeros.T, aom[1][i])))) if partition == "mulliken": for j in range(alpha_size + beta_size): for k in range(alpha_size + beta_size): f.write("{:.16E} ".format(fill[j][k])) f.write("\n") else: for j in range(alpha_size + beta_size): for k in range(alpha_size + beta_size): if k <= j: f.write("{:.16E} ".format(fill[j][k])) f.write("\n") elif wf == "rest": occ = np.diag([2.0] * len(aom[i])) f.write("\n The Atomic Overlap Matrix:\n\n Restricted, closed-shell\n\n ") if partition == "mulliken": for j in range(len(aom[i])): for k in range(len(aom[i])): f.write("{:.16E} ".format(aom[i][j][k])) f.write("\n") else: for j in range(len(aom[i])): for k in range(len(aom[i])): if k <= j: f.write("{:.16E} ".format(aom[i][j][k])) f.write("\n") elif wf == "no": f.write("\n The Atomic Overlap Matrix:\n\n Restricted, natural orbital wavefunction\n\n ") if partition == "mulliken": for j in range(len(aom[i])): for k in range(len(aom[i])): f.write("{:.16E} ".format(aom[i][j][k])) f.write("\n") else: for j in range(len(aom[i])): for k in range(len(aom[i])): if k <= j: f.write("{:.16E} ".format(aom[i][j][k])) f.write("\n") f.write("\n\n") f.write("Molecular Orbital (MO) Data:\n") f.write("---------------------------\n") f.write(" MO# i = ith MO in AIMAll (internal and output) order\n") f.write(" WMO#(i) = MO# i in wavefunction file order\n") f.write(" Occ_MO(i) = Occupancy of ith MO for Molecule\n") f.write(" Spin_MO(i) = Spin Type of ith MO\n") f.write(" Occ_MO(A,i) = Contribution of Atom A to Occ_MO(i)\n") f.write(" %Occ_MO(A,i) = 100 * Occ_MO(A,i) / Occ_MO(i)\n") f.write(" %N_MO(A,i) = 100 * Occ_MO(A,i) / N(A)\n") f.write( "---------------------------------------------------------------------------------------------------\n") f.write( " MO# i WMO#(i) Occ_MO(i) Spin_MO(i) Occ_MO(A,i) %Occ_MO(A,i) %N_MO(A,i)\n") f.write( "---------------------------------------------------------------------------------------------------\n") if wf == "unrest": for j, occup in enumerate(np.diag(occ[beta_size:])): f.write( f" {j + 1:<8}{j + 1:<12}{float(1.):<15.10f}{'Alpha':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n" ) for j, occup in enumerate(np.diag(occ[:beta_size])): f.write( f" {j + 1 + alpha_size:<8}{j + 1 + alpha_size:<12}{float(occup):<15.10f}{'Beta':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n" ) else: for j, occup in enumerate(np.diag(occ)): f.write( f" {j + 1:<8}{j + 1:<12}{float(occup):<15.10f}{'Alpha,Beta':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n" ) f.write("\n Alpha electrons (NAlpha) {:.10E}".format(nalpha[i])) f.write("\n Beta electrons (NBeta) {:.10E}\n".format(nbeta[i])) f.write(" NORMAL TERMINATION OF PROAIMV") f.close() # Writing the file containing the title of the atomic .int files with open(os.path.join(new_dir_path, name + shortpart + ".files"), "w") as f: for i in titles: f.write(i + ".int\n") f.close() # Always write .wfx file (use occ if available) occ_local = occ if 'occ' in locals() else None # Write the WFX-like file always (extension .wfx) write_wfx(new_dir_path, name, mol, mf, aom, wf, occ_local)
[docs]def read_molinfo(path): """ Reads all *.int files from a directory and builds the molinfo dictionary. :param path: Path to the directory containing *.int files. :type path: str :returns: A dictionary containing molecular information. :rtype: dict """ molinfo = { "method": "Not specified", "basisset": "Not specified", "xc": "Not specified", "energy": "Not specified", "calctype": "Not specified", "geom": None, "partition": "qtaim", } symbs, atm_nums = [], [] found_energy = False ints = [intfile for intfile in os.listdir(path) if intfile.endswith(".int") and os.path.isfile(os.path.join(path, intfile))] int_files = sorted(ints, key=lambda x: int(re.search(r"\d+", x).group())) if not int_files: raise FileNotFoundError(f"No *.int files found in the directory '{path}'.") molinfo["partition"] = get_partition(path, int_files[0]) for int_file in int_files: with open(os.path.join(path, int_file), "r") as file: lines = file.readlines() for line in lines: if "Integration is" in line or "INTEGRATION IS" in line: parts = re.findall(r"[A-Za-z]+|\d+", line.split()[-1].strip()) symbs.append(parts[-2]) atm_nums.append(parts[-1]) if "Restricted, closed-shell" in line or "Restricted Closed-Shell": molinfo["calctype"] = "RHF" if "Unrestricted" in line: molinfo["calctype"] = "UHF" elif "Restricted, natural" in line: molinfo["calctype"] = "NO" if "The molecular energy from the wf" in line or "ENERGY" in line and not found_energy: molinfo["energy"] = float(line.split()[-1]) found_energy = True if "Model:" in line: molinfo["method"] = line.split("Model:")[1].strip() if "Basis Set:" in line: molinfo["basisset"] = line.split("Basis Set:")[1].strip() if "Functional:" in line: molinfo["xc"] = line.split("Functional:")[1].strip() if "Wfx File:" in line and "wfx_filename" not in molinfo: molinfo["wfx_filename"] = line.split(":")[-1].strip() molinfo["symbols"] = symbs molinfo["atom_numbers"] = atm_nums molinfo["geom"] = read_wfx_info(path, molinfo.get("wfx_filename")) return molinfo
[docs]def read_wfx_info(path, wfx_filename=None): """ Searches for a .wfx file in the given path or the previous path, reads the coordinates and charges, and stores them in molinfo["geom"]. :param path: Directory path containing the file. :type path: str :param wfx_filename: Optional filename of the .wfx file. :type wfx_filename: str :returns: NumPy array with coordinates. :rtype: numpy.ndarray """ global wfxnotfound # Normalize paths if not path: path = os.getcwd() path = os.path.normpath(path) abs_path = os.path.abspath(path) parent_path = os.path.dirname(abs_path) # Possible locations to look for the file # Prioritize current working directory and path parent locations = [os.getcwd(), parent_path, path, abs_path] # Remove duplicates while preserving order unique_locations = [] for loc in locations: if loc and loc not in unique_locations: unique_locations.append(loc) wfx_file = None # If we have a specific filename, look for it if wfx_filename: # It might be an absolute path already if os.path.isabs(wfx_filename) and os.path.isfile(wfx_filename): wfx_file = wfx_filename else: # Look for this specific filename in all locations base_wfx = os.path.basename(wfx_filename) for loc in unique_locations: if loc and os.path.isdir(loc): candidate = os.path.join(loc, base_wfx) if os.path.isfile(candidate): wfx_file = candidate break # Fallback to searching for any .wfx file if not found yet if not wfx_file: for loc in unique_locations: if loc and os.path.isdir(loc): files = [f for f in os.listdir(loc) if f.lower().endswith(".wfx")] if len(files) == 1: wfx_file = os.path.join(loc, files[0]) break elif len(files) > 1: # If multiple, maybe one matches the "path" name? basename = os.path.basename(path).replace("_atomicfiles", "") matches = [f for f in files if basename in f] if len(matches) == 1: wfx_file = os.path.join(loc, matches[0]) break if not wfx_file: if not wfxnotfound: print(f" | Could not find .wfx file in any of: {unique_locations}") wfxnotfound = True return None with open(wfx_file, "r") as file: lines = file.readlines() start_coords = False coordinates = [] for line in lines: if "<Nuclear Cartesian Coordinates>" in line: start_coords = True continue if "</Nuclear Cartesian Coordinates>" in line: break if start_coords: parts = line.split() if len(parts) == 3: coordinates.append([float(x) for x in parts]) if not coordinates: raise ValueError(f"No coordinates found in the .wfx file: {wfx_file}") # Combine symbols and coordinates into a NumPy array geom = np.array([coordinates[i] for i in range(len(coordinates))], dtype=object) return geom
[docs]def get_partition(path, int_file): """ Extracts the partition type from the given file. :param path: Directory path containing the file. :param int_file: File name to read. :return: Partition type as a string. """ with open(os.path.join(path, int_file), "r") as file: lines = file.readlines() if "AIMInt" in lines[0]: return "qtaim" elif "ESIpy" in lines[0]: return lines[1].split()[1].strip() return None
[docs]def read_occs(file_path): """ Extracts the NO coefficients from a Restricted, natural orbitals AIMAll calculation. :param file_path: Path to the file to process. :type file_path: str :return: List of extracted Occ_MO(i) values. :rtype: list of float """ occ_mo_values = [] start_processing = False with open(file_path, 'r') as file: for line in file: # Start processing after the header line if "Molecular Orbital (MO) Data" in line: start_processing = True for _ in range(10): next(file) continue # Stop processing at a blank line if start_processing and not line.strip(): break # Extract the third column if processing has started if start_processing: columns = line.split() if len(columns) >= 3: occ_mo_values.append(float(columns[2])) return np.diag(occ_mo_values)
[docs]def read_orbs(file_path): """ Extracts the NO coefficients from a Restricted, natural orbitals AIMAll calculation. :param file_path: Path to the file to process. :type file_path: str :return: List of extracted Occ_MO(i) values. :rtype: list of float """ nalpha, nbeta = 0, 0 start_processing = False with open(file_path, 'r') as file: for line in file: # Fallback for older AIMAll versions or missing MO Data section if "Number of Alpha electrons (from MO Occs)" in line: nalpha = int(float(line.split()[-1])) if "Number of Beta electrons (from MO Occs)" in line: nbeta = int(float(line.split()[-1])) # Start processing after the header line if "Molecular Orbital (MO) Data" in line: nalpha, nbeta = 0, 0 # Reset if we find the detailed section start_processing = True try: for _ in range(10): next(file) except StopIteration: break continue # Stop processing at a blank line if start_processing and not line.strip(): break if start_processing: columns = line.split() if len(columns) >= 4: if columns[3] == "Alpha": nalpha += 1 elif columns[3] == "Beta": nbeta += 1 elif columns[3] == "Alpha,Beta": nalpha += 1 nbeta += 1 # Some versions might have different columns, but usually Spin is at 3 return nalpha, nbeta
[docs]def write_wfx(path, name, mol, mf, aom, wf, occ=None): """ Minimal helper to write a .wfx file for ESIpy. This writes a compact WFX-like file named <name>.wfx into the provided path. If `occ` is provided it will be used for the MO occupations (expected as a diagonal matrix); otherwise a sensible default is built from `wf` and `aom`. """ # Use a custom extension to avoid confusion with other WFX writers filename = os.path.join(path, name + ".wfx") if os.path.exists(filename): return # Determine occupations matrix if wf == 'no' and occ is not None: occ_mat = np.asarray(occ, dtype=float) elif wf == 'rest': # for restricted, assume doubly-occupied MOs if isinstance(aom, list): m = np.asarray(aom[0]).shape[0] else: m = np.asarray(aom).shape[0] occ_mat = np.diag([2.0] * m) elif wf == 'unrest': # for unrestricted, build alpha+beta occupancy vector if isinstance(aom, list) and len(aom) == 2: m = np.asarray(aom[0]).shape[0] + np.asarray(aom[1]).shape[0] else: m = np.asarray(aom).shape[0] occ_mat = np.diag([1.0] * m) else: # fallback: infer size from first AOM block if isinstance(aom, list): m = np.asarray(aom[0]).shape[0] else: m = np.asarray(aom).shape[0] occ_mat = np.diag([2.0] * m) # build titles list locally (same format as write_aoms uses) symbols = [mol.atom_symbol(i) for i in range(mol.natm)] atom_numbers = [i + 1 for i in range(mol.natm)] titles = [symbols[i].lower() + str(atom_numbers[i]) for i in range(mol.natm)] with open(filename, 'w') as f: f.write('<Number of Nuclei>\n') f.write(f" {mol.natm}\n") f.write('</Number of Nuclei>\n') # number of occupied MOs: try mf.mo_coeff then fall back to AOM size nocc = None if mf is not None and hasattr(mf, 'mo_coeff') and getattr(mf, 'mo_coeff') is not None: try: nocc = mf.mo_coeff.shape[1] except Exception: nocc = None if nocc is None: try: nocc = int(np.asarray(occ_mat).shape[0]) except Exception: nocc = 0 f.write('<Number of Occupied Molecular Orbitals>\n') f.write(f" {nocc}\n") f.write('</Number of Occupied Molecular Orbitals>\n') f.write('<Number of Electrons>\n') try: f.write(f" {sum(mol.nelec)}\n") except Exception: f.write(' 0\n') f.write('</Number of Electrons>\n') f.write('<Number of Core Electrons>\n') f.write(f" {0}\n") f.write('</Number of Core Electrons>\n') try: nalpha, nbeta = mol.nelec f.write('<Number of Alpha Electrons>\n') f.write(f" {nalpha}\n") f.write('</Number of Alpha Electrons>\n') f.write('<Number of Beta Electrons>\n') f.write(f" {nbeta}\n") f.write('</Number of Beta Electrons>\n') except Exception: pass f.write('<Electronic Spin Multiplicity>\n') try: mult = 2 * mol.spin + 1 f.write(f" {str(mult)}\n") except Exception: f.write(' 1\n') f.write('</Electronic Spin Multiplicity>\n') f.write('<Nuclear Names>\n') for t in titles: f.write(f" {t.upper()}\n") f.write('</Nuclear Names>\n') f.write('<Nuclear Cartesian Coordinates>\n') try: for coord in mol.atom_coords(): f.write(" {: .12e} {: .12e} {: .12e}\n".format(coord[0], coord[1], coord[2])) except Exception: pass f.write('</Nuclear Cartesian Coordinates>\n') f.write('<Molecular Orbital Occupation Numbers>\n') try: for occupation in np.diag(occ_mat): f.write(f" {occupation:.12e}\n") except Exception: pass f.write('</Molecular Orbital Occupation Numbers>\n') f.write('<Molecular Orbital Spin Types>\n') try: for i in range(0, max(1, int(np.asarray(occ_mat).shape[0]))): f.write(' Alpha and Beta\n') except Exception: pass f.write('</Molecular Orbital Spin Types>\n') f.write('<Molecular Orbital Energies>\n') try: for i in range(0, max(1, int(np.asarray(occ_mat).shape[0]))): f.write(' 0.000000000000e+00\n') except Exception: pass f.write('</Molecular Orbital Energies>\n') f.close()