Source code for cgexplore._internal.atomistic.utilities
"""Definition of conversion utilities."""
import logging
import pathlib
import bbprep
import numpy as np
import stk
import stko
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
)
logger = logging.getLogger(__name__)
[docs]
def extract_ditopic_ensemble(
molecule: stk.BuildingBlock,
crest_run: pathlib.Path,
) -> dict:
"""Extract and save an ensemble from a crest run.
TODO: Turn the ensemble into a dataclass so that typing makes sense, remove
all mypy ignore statements.
"""
ensemble_dir = crest_run / "ensemble"
num_atoms = molecule.get_num_atoms()
ensemble: dict[
int, dict[str, float | int | str | stk.BuildingBlock | tuple]
] = {}
ensemble_dir.mkdir(exist_ok=True, parents=True)
# Calculate geometrical properties.
conformer_file = crest_run / "crest_conformers.xyz"
with conformer_file.open("r") as f:
linex = f.readlines()
split_line = linex[0].rstrip()
for i, conformers in enumerate("".join(linex).split(split_line)):
lines = conformers.split("\n")
if len(lines) != num_atoms + 3:
continue
energy = lines[1]
position_matrix = []
for line in lines[2:]:
splits = line.rstrip().split()
if len(splits) != 4: # noqa: PLR2004
continue
_, x, y, z = splits
x = float(x) # type: ignore[assignment]
y = float(y) # type: ignore[assignment]
z = float(z) # type: ignore[assignment]
position_matrix.append(np.array((x, y, z)))
conf_molecule = molecule.with_position_matrix(
np.array(position_matrix)
)
calc = stko.molecule_analysis.DitopicThreeSiteAnalyser()
adjacent_centroids = calc.get_adjacent_centroids(conf_molecule)
adjacent_distance = float(
np.linalg.norm(adjacent_centroids[0] - adjacent_centroids[1])
)
ensemble[i] = {
"energy": float(energy),
"molecule": conf_molecule, # type: ignore[dict-item]
"binder_angles": calc.get_binder_angles(conf_molecule),
"binder_binder_angle": calc.get_binder_binder_angle(conf_molecule),
"binder_distance": calc.get_binder_distance(conf_molecule),
"binder_adjacent_torsion": calc.get_binder_adjacent_torsion(
conf_molecule
),
"adjacent_distance": adjacent_distance,
"binder_com_angle": calc.get_binder_centroid_angle(conf_molecule),
}
ensemble[i]["molecule"].write( # type: ignore[union-attr]
ensemble_dir / f"conf_{i}.mol"
)
return ensemble
[docs]
def cgx_optimisation_sequence(
cage: stk.Molecule,
name: str,
calculation_dir: pathlib.Path,
gulp_path: pathlib.Path,
) -> stk.Molecule:
"""Cage optimisation sequence.
TODO: This is a placeholder for a more general atomistic cage sequence.
"""
gulp1_output = calculation_dir / f"{name}_gulp1.mol"
gulp2_output = calculation_dir / f"{name}_gulp2.mol"
if not gulp1_output.exists():
output_dir = calculation_dir / f"{name}_gulp1"
logger.info(" UFF4MOF optimisation 1 of %s", name)
gulp_opt = stko.GulpUFFOptimizer(
gulp_path=gulp_path,
maxcyc=1000,
metal_FF={46: "Pd4+2"},
metal_ligand_bond_order="",
output_dir=output_dir,
conjugate_gradient=True,
)
gulp_opt.assign_FF(cage)
gulp1_mol = gulp_opt.optimize(mol=cage)
gulp1_mol.write(gulp1_output)
else:
logger.info(" loading %s", gulp1_output)
gulp1_mol = cage.with_structure_from_file(gulp1_output)
if not gulp2_output.exists():
output_dir = calculation_dir / f"{name}_gulp2"
logger.info(" UFF4MOF optimisation 2 of %s", name)
gulp_opt = stko.GulpUFFOptimizer(
gulp_path=gulp_path,
maxcyc=1000,
metal_FF={46: "Pd4+2"},
metal_ligand_bond_order="",
output_dir=output_dir,
conjugate_gradient=False,
)
gulp_opt.assign_FF(gulp1_mol)
gulp2_mol = gulp_opt.optimize(mol=gulp1_mol)
gulp2_mol.write(gulp2_output)
else:
logger.info(" loading %s", gulp2_output)
gulp2_mol = cage.with_structure_from_file(gulp2_output)
return cage.with_structure_from_file(gulp2_output)
[docs]
def get_ditopic_aligned_bb(
path: pathlib.Path,
optl_path: pathlib.Path,
) -> stk.BuildingBlock:
"""Get building block for the target ligand and prepare for cage model.
TODO: move into bbprepared.recipes.
"""
if not path.exists():
temp = stk.BuildingBlock.init_from_file(
path=optl_path,
functional_groups=(
stko.functional_groups.ThreeSiteFactory("[#6]~[#7X2]~[#6]"),
),
)
# Handle if not ditopic.
if temp.get_num_functional_groups() != 2: # noqa: PLR2004
temp = bbprep.FurthestFGs().modify(
building_block=temp,
desired_functional_groups=2,
)
generator = bbprep.generators.ETKDG(num_confs=100)
ensemble = generator.generate_conformers(temp)
process = bbprep.DitopicFitter(ensemble=ensemble)
min_molecule = process.get_minimum()
min_molecule.molecule.write(path)
molecule = stk.BuildingBlock.init_from_file(
path=path,
functional_groups=(
stko.functional_groups.ThreeSiteFactory("[#6]~[#7X2]~[#6]"),
),
)
# Handle if not ditopic.
if molecule.get_num_functional_groups() != 2: # noqa: PLR2004
molecule = bbprep.FurthestFGs().modify(
building_block=molecule,
desired_functional_groups=2,
)
return molecule