Source code for cgexplore._internal.utilities.generation_utilities

# Distributed under the terms of the MIT License.

"""Module for structure generation utilities.

Author: Andrew Tarzia

"""

import logging
import pathlib
from collections import abc
from typing import TYPE_CHECKING

import numpy as np
import stk
from openmm import OpenMMException, openmm

from cgexplore._internal.forcefields.assigned_system import (
    AssignedSystem,
    MartiniSystem,
)
from cgexplore._internal.forcefields.forcefield import ForceField
from cgexplore._internal.molecular.beads import string_to_atom_number
from cgexplore._internal.molecular.conformer import Conformer
from cgexplore._internal.molecular.ensembles import Ensemble
from cgexplore._internal.optimisation.openmm_optimizer import (
    CGOMMDynamics,
    CGOMMOptimizer,
    OMMTrajectory,
)
from cgexplore._internal.terms.angles import Angle, CosineAngle
from cgexplore._internal.terms.bonds import Bond

if TYPE_CHECKING:
    from cgexplore._internal.terms.nonbonded import Nonbonded
    from cgexplore._internal.terms.torsions import Torsion

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
)
logger = logging.getLogger(__name__)


[docs] def optimise_ligand( molecule: stk.Molecule, name: str, output_dir: pathlib.Path, forcefield: ForceField, platform: str | None, ) -> stk.Molecule: """Optimise a building block. Parameters: molecule: The molecule to optimise. name: Name to use for naming output files. E.g. produces a file `{name}_opted1.mol` in `output_dir`. output_dir: Directory to save outputs of optimisation process. forcefield: Define the forces used in the molecule. platform: Which platform to use with OpenMM optimisation. Options are `CPU` or `CUDA`. More are available but may not work well out of the box. Returns: An stk molecule. """ opt1_mol_file = pathlib.Path(output_dir) / f"{name}_opted1.mol" if opt1_mol_file.exists(): molecule = molecule.with_structure_from_file(str(opt1_mol_file)) else: logger.info("optimising %s, no max_iterations", name) assigned_system = forcefield.assign_terms( molecule=molecule, name=name, output_dir=output_dir, ) opt = CGOMMOptimizer( fileprefix=name, output_dir=output_dir, platform=platform, ) molecule = opt.optimize(assigned_system) molecule = molecule.with_centroid(np.array((0, 0, 0))) molecule.write(opt1_mol_file) energy_decomp = opt.read_final_energy_decomposition() logger.info("optimised with energy:") for i in energy_decomp: e, u = energy_decomp[i] logger.info("%s: %s [%s]", i, round(e, 2), u) return molecule
[docs] def soften_forcefield( assigned_system: AssignedSystem | MartiniSystem, bond_ff_scale: float, angle_ff_scale: float, new_name: str, ) -> AssignedSystem: """Soften force field by scaling parameters and turning off torsions. Parameters: assigned_system: Molecule with force field terms assigned that will be modified. bond_ff_scale: Scale (divide) the bond terms in the model by this value. angle_ff_scale: Scale (divide) the angle terms in the model by this value. new_name: New name for system xml. Returns: New assigned system. """ if isinstance(assigned_system, MartiniSystem): msg = "soften not available for Martini yet." raise TypeError(msg) angles = [] for i in assigned_system.forcefield_terms["angle"]: if isinstance(i, Angle): angles.append( Angle( atom_names=i.atom_names, atom_ids=i.atom_ids, angle=i.angle, angle_k=i.angle_k / angle_ff_scale, atoms=i.atoms, force=i.force, ) ) elif isinstance(i, CosineAngle): angles.append( CosineAngle( # type: ignore[arg-type] atom_names=i.atom_names, atom_ids=i.atom_ids, n=i.n, b=i.b, angle_k=i.angle_k / angle_ff_scale, atoms=i.atoms, force=i.force, ) ) else: msg = f"soften not available for {i} angle type." raise TypeError(msg) new_forcefield_terms: dict[ str, abc.Sequence[Bond | Angle | CosineAngle | Torsion | Nonbonded] ] = { "bond": tuple( Bond( atom_names=i.atom_names, # type:ignore[union-attr] atom_ids=i.atom_ids, # type:ignore[union-attr] bond_r=i.bond_r, # type:ignore[union-attr] bond_k=i.bond_k / bond_ff_scale, # type:ignore[union-attr] atoms=i.atoms, # type:ignore[union-attr] force=i.force, ) for i in assigned_system.forcefield_terms["bond"] ), "angle": tuple(angles), "torsion": (), "nonbonded": assigned_system.forcefield_terms["nonbonded"], } new_system_xml = pathlib.Path( str(assigned_system.system_xml).replace( "_syst.xml", f"_{new_name}syst.xml" ) ) return AssignedSystem( molecule=assigned_system.molecule, forcefield_terms=new_forcefield_terms, system_xml=new_system_xml, topology_xml=assigned_system.topology_xml, bead_set=assigned_system.bead_set, vdw_bond_cutoff=assigned_system.vdw_bond_cutoff, )
[docs] def run_soft_md_cycle( # noqa: PLR0913 name: str, assigned_system: AssignedSystem, output_dir: pathlib.Path, num_steps: int, suffix: str, bond_ff_scale: float, angle_ff_scale: float, temperature: openmm.unit.Quantity, time_step: openmm.unit.Quantity, friction: openmm.unit.Quantity, reporting_freq: float, traj_freq: float, platform: str | None, ) -> OMMTrajectory | None: """Run MD exploration with soft potentials. Parameters: name: Name to use for naming output files. E.g. produces a file `{name}_opted1.mol` in `output_dir`. assigned_system: The system to optimise with force field assigned. output_dir: Directory to save outputs of optimisation process. num_steps: The number of time steps to run the MD for. suffix: Suffix to use for naming output files. E.g. produces a file `{name}_{suffix}_ff.xml` in `output_dir`. Used to define step in process. bond_ff_scale: Scale (divide) the bond terms in the model by this value. angle_ff_scale: Scale (divide) the angle terms in the model by this value. temperature: Simulation temperature. Should be an openmm.unit.Quantity with units of openmm.unit.kelvin. time_step: Simulation timestep to use in Integrator. Should be an openmm.unit.Quantity with units of [time], e.g., openmm.unit.femtoseconds. friction: Friction constant to use in LangevinIntegrator. Should be an openmm.unit.Quantity with units of [time^-1], e.g., / openmm.unit.picosecond. reporting_freq: How often the simulation properties should be written in time steps. traj_freq: How often the trajectory should be written in time steps. platform: Which platform to use with OpenMM optimisation. Options are `CPU` or `CUDA`. More are available but may not work well out of the box. Returns: An OMMTrajectory containing the data and conformers. """ soft_assigned_system = soften_forcefield( assigned_system=assigned_system, bond_ff_scale=bond_ff_scale, angle_ff_scale=angle_ff_scale, new_name="softmd", ) md = CGOMMDynamics( fileprefix=f"{name}_{suffix}", output_dir=output_dir, temperature=temperature, random_seed=1000, num_steps=num_steps, time_step=time_step, friction=friction, reporting_freq=reporting_freq, traj_freq=traj_freq, platform=platform, ) try: return md.run_dynamics(soft_assigned_system) except OpenMMException: return None
[docs] def run_constrained_optimisation( # noqa: PLR0913 assigned_system: AssignedSystem, name: str, output_dir: pathlib.Path, bond_ff_scale: float, angle_ff_scale: float, max_iterations: int, platform: str | None, ) -> stk.Molecule: """Run optimisation with constraints and softened potentials. Parameters: assigned_system: The system to optimise with force field assigned. name: Name to use for naming output files. E.g. produces a file `{name}_constrained_ff.xml` in `output_dir`. output_dir: Directory to save outputs of optimisation process. bond_ff_scale: Scale (divide) the bond terms in the model by this value. angle_ff_scale: Scale (divide) the angle terms in the model by this value. max_iterations: Number of iterations to use in optimisation. Can be None, for until convergence is met. platform: Which platform to use with OpenMM optimisation. Options are `CPU` or `CUDA`. More are available but may not work well out of the box. Returns: An stk molecule. """ soft_assigned_system = soften_forcefield( assigned_system=assigned_system, bond_ff_scale=bond_ff_scale, angle_ff_scale=angle_ff_scale, new_name="const", ) intra_bb_bonds = [] if isinstance(assigned_system.molecule, stk.ConstructedMolecule): for bond_info in assigned_system.molecule.get_bond_infos(): if bond_info.get_building_block_id() is not None: bond = bond_info.get_bond() intra_bb_bonds.append( (bond.get_atom1().get_id(), bond.get_atom2().get_id()) ) constrained_opt = CGOMMOptimizer( fileprefix=f"{name}_constrained", output_dir=output_dir, max_iterations=max_iterations, atom_constraints=intra_bb_bonds, platform=platform, ) return constrained_opt.optimize(soft_assigned_system)
[docs] def run_optimisation( # noqa: PLR0913 assigned_system: AssignedSystem, name: str, file_suffix: str, output_dir: pathlib.Path, platform: str | None, max_iterations: int | None = None, ensemble: Ensemble | None = None, ) -> Conformer: """Run optimisation. Parameters: assigned_system: The system to optimise with force field assigned. name: Name to use for naming output files. E.g. produces a file `{name}_{suffix}_ff.xml` in `output_dir`. file_suffix: Suffix to use for naming output files. E.g. produces a file `{name}_{suffix}_ff.xml` in `output_dir`. Used to define step in process. output_dir: Directory to save outputs of optimisation process. platform: Which platform to use with OpenMM optimisation. Options are `CPU` or `CUDA`. More are available but may not work well out of the box. max_iterations: Number of iterations to use in optimisation. Can be None, for until convergence is met. ensemble: Ensemble to get the conformer id from. Returns: A Conformer. """ opt = CGOMMOptimizer( fileprefix=f"{name}_{file_suffix}", output_dir=output_dir, max_iterations=max_iterations, platform=platform, ) molecule = opt.optimize(assigned_system) energy_decomp = opt.read_final_energy_decomposition() confid = None if ensemble is None else ensemble.get_num_conformers() return Conformer( molecule=molecule, conformer_id=confid, energy_decomposition=energy_decomp, )
[docs] def yield_near_models( molecule: stk.Molecule, name: str, output_dir: pathlib.Path | str, neighbour_library: list, ) -> abc.Iterator[stk.Molecule]: """Yield structures of models with neighbouring force field parameters. Parameters: molecule: The molecule to modify the position matrix of. name: Name of molecule, holding force field ID. output_dir: Directory with optimisation outputs saved. neighbour_library: IDs of force fields with nearby parameters, defined in `define_forcefields.py`. Returns: An stk molecule. """ ff_name = [i for i in name.split("_") if "f" in i][-1] for new_ff_id in neighbour_library: new_name = name.replace(ff_name, f"f{new_ff_id}") new_fina_mol_file = pathlib.Path(output_dir) / f"{new_name}_final.mol" if new_fina_mol_file.exists(): logger.info("found neigh: %s", new_fina_mol_file) yield molecule.with_structure_from_file(str(new_fina_mol_file))
[docs] def shift_beads( molecule: stk.Molecule, atomic_number: int, kick: float, ) -> stk.Molecule: """Shift beads away from cage centroid. Parameters: molecule: The molecule to manipulate. atomic_number: Atomic number of beads to manipulate. kick: Scale determining size of manipulation. Returns: An stk molecule. """ old_pos_mat = molecule.get_position_matrix() centroid = molecule.get_centroid() new_pos_mat = [] for atom, pos in zip(molecule.get_atoms(), old_pos_mat, strict=True): if atom.get_atomic_number() == atomic_number: c_v = centroid - pos c_v = c_v / np.linalg.norm(c_v) move = c_v * kick new_pos = pos - move else: new_pos = pos new_pos_mat.append(new_pos) return molecule.with_position_matrix(np.array(new_pos_mat))
[docs] def yield_shifted_models( molecule: stk.Molecule, forcefield: ForceField, kicks: abc.Sequence[int], ) -> abc.Iterator[stk.Molecule]: """Yield conformers with atom positions of particular beads shifted. Parameters: molecule: The molecule to manipulate. forcefield: Defines the force field. kicks: Defines the kicks in Angstrom to apply. Yields: An stk molecule. """ for bead in forcefield.get_present_beads(): atom_number = string_to_atom_number(bead.element_string) for kick in kicks: yield shift_beads(molecule, atom_number, kick)
[docs] def rattle( molecule: stk.Molecule, stdev: float = 0.001, seed: int | None = None, ) -> stk.Molecule: """Randomly displace atoms. This code is mimicking what is done in ase.Atoms.rattle(). Thank you to them! Parameters: molecule: The molecule to manipulate. stdev: The standard deviation of the displacement. seed: The seed for the random number generator. Returns: An stk molecule. """ if seed is None: seed = 42 rng = np.random.RandomState(seed) positions = molecule.get_position_matrix() return molecule.with_position_matrix( positions + rng.normal(scale=stdev, size=positions.shape) )