import os
import re
import numpy as np
from esipy.tools import wf_type, format_short_partition, load_file
# Print the missing .wfx diagnostic only once
[docs]def read_aoms(path='.'):
"""
Reads the AOMs from ESIpy's writeaoms() method or from an AIMAll calculation.
:param path: Path of the directory of the files.
:type path: str
:returns: The AOMs in ESIpy format stored in 'ESI.aom'.
:rtype: list
"""
aom = []
aom_alpha, aom_beta = [], []
start_string = 'The Atomic Overlap Matrix'
mul = False
working_dir = os.getcwd()
if path == '.':
path = working_dir
else:
path = os.path.join(working_dir, path)
ints = [intfile for intfile in os.listdir(path) if
intfile.endswith('.int') and os.path.isfile(os.path.join(path, intfile))]
ordered = sorted(ints, key=lambda x: int(re.search(r'\d+', x).group()))
if ordered == []:
raise ValueError(f"No *.int files found in the directory '{path}'.")
first_file = os.path.join(path, ordered[0])
with open(first_file, 'r') as f:
for line in f:
if "Restricted, closed" in line or "Restricted Closed" in line:
wf = "rest"
break
elif "Unrestricted" in line:
wf = "unrest"
break
elif "Restricted, natural" in line:
wf = "no"
break
else:
raise ValueError("Wavefunction type could not be determined.")
if wf == "no":
occs = read_occs(first_file)
for intfile in ordered:
intfile_path = os.path.join(path, intfile)
with open(intfile_path, 'r') as f:
mat_lines = []
for line in f:
if "Mulliken" in line:
mul = True
if start_string in line:
for _ in range(3):
next(f)
while True:
line = next(f).strip()
if not line:
break
mat_lines.extend([float(num) for num in line.split()])
# We first get the number of shape of the alpha-alpha matrix
na, nb = read_orbs(intfile_path)
if wf == 'unrest':
nt = na + nb
else:
nt = na
# Mulliken works on non-symmetric, square AOMs
if mul:
matrix = np.array(mat_lines).reshape((nt, nt))
# Symmetric AOMs work on lower-triangular matrices
else:
low_matrix = np.zeros((nt, nt))
low_matrix[np.tril_indices(nt)] = mat_lines
matrix = low_matrix + low_matrix.T - np.diag(low_matrix.diagonal())
if wf == "rest" or wf == "no":
aom.append(matrix)
if wf == "unrest":
SCR_alpha = matrix[:na, :na]
SCR_beta = matrix[na:, na:]
aom_alpha.append(SCR_alpha)
aom_beta.append(SCR_beta)
if wf == "rest":
return aom
elif wf == "unrest":
return [aom_alpha, aom_beta]
elif wf == "no":
return [aom, occs]
else:
raise ValueError("Could not read the AOMs")
########### WRITING THE INPUT FOR THE ESI-3D CODE FROM THE AOMS ###########
[docs]def write_aoms(mol, mf, name, aom, ring=[], partition=None):
"""
Writes the input for the ESI-3D code from the AOMs.
:param mol: Molecule object from PySCF.
:type mol: PySCF instance
:param mf: Calculation object from PySCF.
:type mf: PySCF instance
:param name: Name of the calculation.
:type name: str
:param aom: Concatenated list of Atomic Overlap Matrices (AOMs) in the MO basis.
:type aom: list
:param ring: Connectivity of the atoms in the ring. Can be more than one ring as a list of lists.
:type ring: list of int, optional
:param partition: Partition scheme for the AOMs. Options are "mulliken", "lowdin", "meta-lowdin", "nao", "iao".
:type partition: str, optional
:returns: None
:generates:
- A '_atomicfiles/' directory with all the files created.
- A '.int' file for each atom with its corresponding AOM.
- A 'name.files' file with a list of the names of the '.int' files.
- A 'name.bad' file with a standard input for the ESI-3D code.
- A 'name.wfx' file with the occupancies for the ESI-3D code.
"""
from copy import deepcopy
if isinstance(aom, str):
aom = load_file(aom)
if ring is None:
pass
elif isinstance(ring[0], int):
ring = [ring]
wf = wf_type(aom)
if wf == "no":
aom, occ = aom # Separating AOMs and orbital occupations
# Obtaining information for the files
symbols = [mol.atom_symbol(i) for i in range(mol.natm)]
atom_numbers = [i + 1 for i in range(mol.natm)]
if wf == "unrest":
nalpha = [np.trace(aom_alpha) for aom_alpha in aom[0]]
nbeta = [np.trace(aom_beta) for aom_beta in aom[1]]
elif wf == "rest":
nalpha = nbeta = [np.trace(aom) for aom in aom]
elif wf == "no":
nalpha = nbeta = [float(np.trace(np.dot(occ, aom))) for aom in aom]
# Creating a new directory for the calculation
symbols = [mol.atom_symbol(i) for i in range(mol.natm)]
fragidx = mol.natm + 1
fragmap = {}
dofrag = False
ringcopy = deepcopy(ring)
# Mark fragments in the order they are defined
if ring:
for i, sublist in enumerate(ringcopy):
for j, element in enumerate(sublist):
if isinstance(element, set):
dofrag = True
fragmap[tuple(element)] = fragidx
sublist[j] = fragidx
fragidx += 1
shortpart = format_short_partition(partition)
new_dir_name = name + "_" + shortpart + "_atomicfiles"
symbols = [s.lower() for s in symbols]
titles = [symbols[i] + str(atom_numbers[i]) for i in range(mol.natm)] # Setting the title of the files
new_dir_path = os.path.join(os.getcwd(), new_dir_name)
os.makedirs(new_dir_path, exist_ok=True)
# Creating and writing the atomic .int files
for i, item in enumerate(titles):
with open(os.path.join(new_dir_path, item + ".int"), "w+") as f:
f.write(" Created by ESIpy\n")
if partition == "mulliken":
f.write(" Using Mulliken atomic definition\n")
elif partition == "lowdin":
f.write(" Using Lowdin atomic definition\n")
elif partition == "meta-lowdin":
f.write(" Using Meta-Lowdin atomic definition\n")
elif partition == "nao":
f.write(" Using NAO atomic definition\n")
elif partition == "iao":
f.write(" Using IAO atomic definition\n")
f.write(" Single-determinant wave function\n")
f.write(" Molecular SCF ENERGY (AU) = {:.11f}\n\n".format(mf.e_tot))
f.write(" INTEGRATION IS OVER ATOM {}\n".format(titles[i]))
f.write(" RESULTS OF THE INTEGRATION\n")
if wf == "unrest":
f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format(
np.trace(aom[0][i]) + np.trace(aom[1][i])))
elif wf == "rest":
f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format(2 * np.trace(aom[i])))
else:
f.write(" N {:.10E} NET CHARGE 0.0000000000E+00\n".format(np.trace(np.dot(occ, aom[i]))))
f.write(" G\n")
f.write(" K 1.00000000000000E+01 E(ATOM) 1.00000000000000E+00\n")
f.write(" L 0.00000000000000E+01\n\n")
if wf == "unrest":
occ = np.diag([1.0] * (len(aom[0][i]) + len(aom[1][i])))
f.write("\n The Atomic Overlap Matrix:\n\n Unrestricted\n\n")
alpha_size = len(aom[0][i])
beta_size = len(aom[1][i])
zeros = np.zeros((beta_size, alpha_size))
fill = np.vstack((aom[0][i], zeros))
fill = np.hstack((fill, np.vstack((zeros.T, aom[1][i]))))
if partition == "mulliken":
for j in range(alpha_size + beta_size):
for k in range(alpha_size + beta_size):
f.write("{:.16E} ".format(fill[j][k]))
f.write("\n")
else:
for j in range(alpha_size + beta_size):
for k in range(alpha_size + beta_size):
if k <= j:
f.write("{:.16E} ".format(fill[j][k]))
f.write("\n")
elif wf == "rest":
occ = np.diag([2.0] * len(aom[i]))
f.write("\n The Atomic Overlap Matrix:\n\n Restricted, closed-shell\n\n ")
if partition == "mulliken":
for j in range(len(aom[i])):
for k in range(len(aom[i])):
f.write("{:.16E} ".format(aom[i][j][k]))
f.write("\n")
else:
for j in range(len(aom[i])):
for k in range(len(aom[i])):
if k <= j:
f.write("{:.16E} ".format(aom[i][j][k]))
f.write("\n")
elif wf == "no":
f.write("\n The Atomic Overlap Matrix:\n\n Restricted, natural orbital wavefunction\n\n ")
if partition == "mulliken":
for j in range(len(aom[i])):
for k in range(len(aom[i])):
f.write("{:.16E} ".format(aom[i][j][k]))
f.write("\n")
else:
for j in range(len(aom[i])):
for k in range(len(aom[i])):
if k <= j:
f.write("{:.16E} ".format(aom[i][j][k]))
f.write("\n")
f.write("\n\n")
f.write("Molecular Orbital (MO) Data:\n")
f.write("---------------------------\n")
f.write(" MO# i = ith MO in AIMAll (internal and output) order\n")
f.write(" WMO#(i) = MO# i in wavefunction file order\n")
f.write(" Occ_MO(i) = Occupancy of ith MO for Molecule\n")
f.write(" Spin_MO(i) = Spin Type of ith MO\n")
f.write(" Occ_MO(A,i) = Contribution of Atom A to Occ_MO(i)\n")
f.write(" %Occ_MO(A,i) = 100 * Occ_MO(A,i) / Occ_MO(i)\n")
f.write(" %N_MO(A,i) = 100 * Occ_MO(A,i) / N(A)\n")
f.write(
"---------------------------------------------------------------------------------------------------\n")
f.write(
" MO# i WMO#(i) Occ_MO(i) Spin_MO(i) Occ_MO(A,i) %Occ_MO(A,i) %N_MO(A,i)\n")
f.write(
"---------------------------------------------------------------------------------------------------\n")
if wf == "unrest":
for j, occup in enumerate(np.diag(occ[beta_size:])):
f.write(
f" {j + 1:<8}{j + 1:<12}{float(1.):<15.10f}{'Alpha':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n"
)
for j, occup in enumerate(np.diag(occ[:beta_size])):
f.write(
f" {j + 1 + alpha_size:<8}{j + 1 + alpha_size:<12}{float(occup):<15.10f}{'Beta':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n"
)
else:
for j, occup in enumerate(np.diag(occ)):
f.write(
f" {j + 1:<8}{j + 1:<12}{float(occup):<15.10f}{'Alpha,Beta':<15}{0.0:<15.10f}{0.0:<15.10f}{0.0:<15.10f}\n"
)
f.write("\n Alpha electrons (NAlpha) {:.10E}".format(nalpha[i]))
f.write("\n Beta electrons (NBeta) {:.10E}\n".format(nbeta[i]))
f.write(" NORMAL TERMINATION OF PROAIMV")
f.close()
# Writing the file containing the title of the atomic .int files
with open(os.path.join(new_dir_path, name + shortpart + ".files"), "w") as f:
for i in titles:
f.write(i + ".int\n")
f.close()
# Always write .wfx file (use occ if available)
occ_local = occ if 'occ' in locals() else None
# Write the WFX-like file always (extension .wfx)
write_wfx(new_dir_path, name, mol, mf, aom, wf, occ_local)
[docs]def read_molinfo(path):
"""
Reads all *.int files from a directory and builds the molinfo dictionary.
:param path: Path to the directory containing *.int files.
:type path: str
:returns: A dictionary containing molecular information.
:rtype: dict
"""
molinfo = {
"method": "Not specified",
"basisset": "Not specified",
"xc": "Not specified",
"energy": "Not specified",
"calctype": "Not specified",
"geom": None,
"partition": "qtaim",
}
symbs, atm_nums = [], []
found_energy = False
ints = [intfile for intfile in os.listdir(path) if
intfile.endswith(".int") and os.path.isfile(os.path.join(path, intfile))]
int_files = sorted(ints, key=lambda x: int(re.search(r"\d+", x).group()))
if not int_files:
raise FileNotFoundError(f"No *.int files found in the directory '{path}'.")
molinfo["partition"] = get_partition(path, int_files[0])
for int_file in int_files:
with open(os.path.join(path, int_file), "r") as file:
lines = file.readlines()
for line in lines:
if "Integration is" in line or "INTEGRATION IS" in line:
parts = re.findall(r"[A-Za-z]+|\d+", line.split()[-1].strip())
symbs.append(parts[-2])
atm_nums.append(parts[-1])
if "Restricted, closed-shell" in line or "Restricted Closed-Shell":
molinfo["calctype"] = "RHF"
if "Unrestricted" in line:
molinfo["calctype"] = "UHF"
elif "Restricted, natural" in line:
molinfo["calctype"] = "NO"
if "The molecular energy from the wf" in line or "ENERGY" in line and not found_energy:
molinfo["energy"] = float(line.split()[-1])
found_energy = True
if "Model:" in line:
molinfo["method"] = line.split("Model:")[1].strip()
if "Basis Set:" in line:
molinfo["basisset"] = line.split("Basis Set:")[1].strip()
if "Functional:" in line:
molinfo["xc"] = line.split("Functional:")[1].strip()
if "Wfx File:" in line and "wfx_filename" not in molinfo:
molinfo["wfx_filename"] = line.split(":")[-1].strip()
molinfo["symbols"] = symbs
molinfo["atom_numbers"] = atm_nums
molinfo["geom"] = read_wfx_info(path, molinfo.get("wfx_filename"))
return molinfo
[docs]def read_wfx_info(path, wfx_filename=None):
"""
Searches for a .wfx file in the given path or the previous path, reads the coordinates and charges,
and stores them in molinfo["geom"].
:param path: Directory path containing the file.
:type path: str
:param wfx_filename: Optional filename of the .wfx file.
:type wfx_filename: str
:returns: NumPy array with coordinates.
:rtype: numpy.ndarray
"""
global wfxnotfound
# Normalize paths
if not path:
path = os.getcwd()
path = os.path.normpath(path)
abs_path = os.path.abspath(path)
parent_path = os.path.dirname(abs_path)
# Possible locations to look for the file
# Prioritize current working directory and path parent
locations = [os.getcwd(), parent_path, path, abs_path]
# Remove duplicates while preserving order
unique_locations = []
for loc in locations:
if loc and loc not in unique_locations:
unique_locations.append(loc)
wfx_file = None
# If we have a specific filename, look for it
if wfx_filename:
# It might be an absolute path already
if os.path.isabs(wfx_filename) and os.path.isfile(wfx_filename):
wfx_file = wfx_filename
else:
# Look for this specific filename in all locations
base_wfx = os.path.basename(wfx_filename)
for loc in unique_locations:
if loc and os.path.isdir(loc):
candidate = os.path.join(loc, base_wfx)
if os.path.isfile(candidate):
wfx_file = candidate
break
# Fallback to searching for any .wfx file if not found yet
if not wfx_file:
for loc in unique_locations:
if loc and os.path.isdir(loc):
files = [f for f in os.listdir(loc) if f.lower().endswith(".wfx")]
if len(files) == 1:
wfx_file = os.path.join(loc, files[0])
break
elif len(files) > 1:
# If multiple, maybe one matches the "path" name?
basename = os.path.basename(path).replace("_atomicfiles", "")
matches = [f for f in files if basename in f]
if len(matches) == 1:
wfx_file = os.path.join(loc, matches[0])
break
if not wfx_file:
if not wfxnotfound:
print(f" | Could not find .wfx file in any of: {unique_locations}")
wfxnotfound = True
return None
with open(wfx_file, "r") as file:
lines = file.readlines()
start_coords = False
coordinates = []
for line in lines:
if "<Nuclear Cartesian Coordinates>" in line:
start_coords = True
continue
if "</Nuclear Cartesian Coordinates>" in line:
break
if start_coords:
parts = line.split()
if len(parts) == 3:
coordinates.append([float(x) for x in parts])
if not coordinates:
raise ValueError(f"No coordinates found in the .wfx file: {wfx_file}")
# Combine symbols and coordinates into a NumPy array
geom = np.array([coordinates[i] for i in range(len(coordinates))], dtype=object)
return geom
[docs]def get_partition(path, int_file):
"""
Extracts the partition type from the given file.
:param path: Directory path containing the file.
:param int_file: File name to read.
:return: Partition type as a string.
"""
with open(os.path.join(path, int_file), "r") as file:
lines = file.readlines()
if "AIMInt" in lines[0]:
return "qtaim"
elif "ESIpy" in lines[0]:
return lines[1].split()[1].strip()
return None
[docs]def read_occs(file_path):
"""
Extracts the NO coefficients from a Restricted, natural orbitals AIMAll calculation.
:param file_path: Path to the file to process.
:type file_path: str
:return: List of extracted Occ_MO(i) values.
:rtype: list of float
"""
occ_mo_values = []
start_processing = False
with open(file_path, 'r') as file:
for line in file:
# Start processing after the header line
if "Molecular Orbital (MO) Data" in line:
start_processing = True
for _ in range(10):
next(file)
continue
# Stop processing at a blank line
if start_processing and not line.strip():
break
# Extract the third column if processing has started
if start_processing:
columns = line.split()
if len(columns) >= 3:
occ_mo_values.append(float(columns[2]))
return np.diag(occ_mo_values)
[docs]def read_orbs(file_path):
"""
Extracts the NO coefficients from a Restricted, natural orbitals AIMAll calculation.
:param file_path: Path to the file to process.
:type file_path: str
:return: List of extracted Occ_MO(i) values.
:rtype: list of float
"""
nalpha, nbeta = 0, 0
start_processing = False
with open(file_path, 'r') as file:
for line in file:
# Fallback for older AIMAll versions or missing MO Data section
if "Number of Alpha electrons (from MO Occs)" in line:
nalpha = int(float(line.split()[-1]))
if "Number of Beta electrons (from MO Occs)" in line:
nbeta = int(float(line.split()[-1]))
# Start processing after the header line
if "Molecular Orbital (MO) Data" in line:
nalpha, nbeta = 0, 0 # Reset if we find the detailed section
start_processing = True
try:
for _ in range(10):
next(file)
except StopIteration:
break
continue
# Stop processing at a blank line
if start_processing and not line.strip():
break
if start_processing:
columns = line.split()
if len(columns) >= 4:
if columns[3] == "Alpha":
nalpha += 1
elif columns[3] == "Beta":
nbeta += 1
elif columns[3] == "Alpha,Beta":
nalpha += 1
nbeta += 1
# Some versions might have different columns, but usually Spin is at 3
return nalpha, nbeta
[docs]def write_wfx(path, name, mol, mf, aom, wf, occ=None):
"""
Minimal helper to write a .wfx file for ESIpy. This writes a compact WFX-like
file named <name>.wfx into the provided path. If `occ` is provided it will be
used for the MO occupations (expected as a diagonal matrix); otherwise a sensible
default is built from `wf` and `aom`.
"""
# Use a custom extension to avoid confusion with other WFX writers
filename = os.path.join(path, name + ".wfx")
if os.path.exists(filename):
return
# Determine occupations matrix
if wf == 'no' and occ is not None:
occ_mat = np.asarray(occ, dtype=float)
elif wf == 'rest':
# for restricted, assume doubly-occupied MOs
if isinstance(aom, list):
m = np.asarray(aom[0]).shape[0]
else:
m = np.asarray(aom).shape[0]
occ_mat = np.diag([2.0] * m)
elif wf == 'unrest':
# for unrestricted, build alpha+beta occupancy vector
if isinstance(aom, list) and len(aom) == 2:
m = np.asarray(aom[0]).shape[0] + np.asarray(aom[1]).shape[0]
else:
m = np.asarray(aom).shape[0]
occ_mat = np.diag([1.0] * m)
else:
# fallback: infer size from first AOM block
if isinstance(aom, list):
m = np.asarray(aom[0]).shape[0]
else:
m = np.asarray(aom).shape[0]
occ_mat = np.diag([2.0] * m)
# build titles list locally (same format as write_aoms uses)
symbols = [mol.atom_symbol(i) for i in range(mol.natm)]
atom_numbers = [i + 1 for i in range(mol.natm)]
titles = [symbols[i].lower() + str(atom_numbers[i]) for i in range(mol.natm)]
with open(filename, 'w') as f:
f.write('<Number of Nuclei>\n')
f.write(f" {mol.natm}\n")
f.write('</Number of Nuclei>\n')
# number of occupied MOs: try mf.mo_coeff then fall back to AOM size
nocc = None
if mf is not None and hasattr(mf, 'mo_coeff') and getattr(mf, 'mo_coeff') is not None:
try:
nocc = mf.mo_coeff.shape[1]
except Exception:
nocc = None
if nocc is None:
try:
nocc = int(np.asarray(occ_mat).shape[0])
except Exception:
nocc = 0
f.write('<Number of Occupied Molecular Orbitals>\n')
f.write(f" {nocc}\n")
f.write('</Number of Occupied Molecular Orbitals>\n')
f.write('<Number of Electrons>\n')
try:
f.write(f" {sum(mol.nelec)}\n")
except Exception:
f.write(' 0\n')
f.write('</Number of Electrons>\n')
f.write('<Number of Core Electrons>\n')
f.write(f" {0}\n")
f.write('</Number of Core Electrons>\n')
try:
nalpha, nbeta = mol.nelec
f.write('<Number of Alpha Electrons>\n')
f.write(f" {nalpha}\n")
f.write('</Number of Alpha Electrons>\n')
f.write('<Number of Beta Electrons>\n')
f.write(f" {nbeta}\n")
f.write('</Number of Beta Electrons>\n')
except Exception:
pass
f.write('<Electronic Spin Multiplicity>\n')
try:
mult = 2 * mol.spin + 1
f.write(f" {str(mult)}\n")
except Exception:
f.write(' 1\n')
f.write('</Electronic Spin Multiplicity>\n')
f.write('<Nuclear Names>\n')
for t in titles:
f.write(f" {t.upper()}\n")
f.write('</Nuclear Names>\n')
f.write('<Nuclear Cartesian Coordinates>\n')
try:
for coord in mol.atom_coords():
f.write(" {: .12e} {: .12e} {: .12e}\n".format(coord[0], coord[1], coord[2]))
except Exception:
pass
f.write('</Nuclear Cartesian Coordinates>\n')
f.write('<Molecular Orbital Occupation Numbers>\n')
try:
for occupation in np.diag(occ_mat):
f.write(f" {occupation:.12e}\n")
except Exception:
pass
f.write('</Molecular Orbital Occupation Numbers>\n')
f.write('<Molecular Orbital Spin Types>\n')
try:
for i in range(0, max(1, int(np.asarray(occ_mat).shape[0]))):
f.write(' Alpha and Beta\n')
except Exception:
pass
f.write('</Molecular Orbital Spin Types>\n')
f.write('<Molecular Orbital Energies>\n')
try:
for i in range(0, max(1, int(np.asarray(occ_mat).shape[0]))):
f.write(' 0.000000000000e+00\n')
except Exception:
pass
f.write('</Molecular Orbital Energies>\n')
f.close()