Systems Optimisation

A package of the classes for optimising CG models in cgexplore.

Note

Examples of these functions and the use of this package can be found in recipe 1.

Inputs

A cgexplore.systems_optimisation.ChromosomeGenerator is used to add genes to an optimisation problem and automatically provide a list of cgexplore.systems_optimisation.Chromosome for modelling (plus helpful methods for exploring this library through optimisation algorithms, such as a genetic algorithm).

Generation Handling

Once you have a library of chromosomes, you can iterate over them to produce generations, which are handled here:

Fitness and Structure Calculation

A cgexplore.systems_optimisation.Generation requires the definition of how a cgexplore.systems_optimisation.Chromosome is translated into a model and how to calculate that models fitness. These are provided as functions by the user (described below) to:

Note

The options argument allows the user to provide custom information or functions in a dictionary.

import cgexplore as cgx

def fitness_function(
  chromosome,
  chromosome_generator,
  database_path,
  calculation_output,
  structure_output,
  options={},
):
  """Calculate the fitness of a chromosome.

  All arguments are needed, even if not used.

  Returns:
    float

  """
  database = cgx.utilities.AtomliteDatabase(database_path)
  target_pore = 2
  # Use the separated string to distinguish between genes!
  name = f"{chromosome.prefix}_{chromosome.get_separated_string()}"

  # Extract needed properties from database (done in `structure_calculator`).
  entry = database.get_entry(name)
  tstr = entry.properties["topology"]
  pore = entry.properties["opt_pore_data"]["min_distance"]
  energy = entry.properties["energy_per_bb"]
  pore_diff = abs(target_pore - pore) / target_pore

  fitness = 1 / (pore_diff + energy)

  # Add fitness to database.
  database.add_properties(
      key=name,
      property_dict={"fitness": fitness},
  )

  return fitness
def structure_function(
  chromosome,
  database_path,
  calculation_output,
  structure_output,
  options={},
):
  """Define model and calculate its properties.

  All arguments are needed, even if not used.

  """
  database = cgx.utilities.AtomliteDatabase(database_path)
  # Build structure.
  topology_str, topology_fun = chromosome.get_topology_information()
  building_blocks = chromosome.get_building_blocks()
  cage = stk.ConstructedMolecule(topology_fun(building_blocks))
  # Use the separated string to distinguish between genes!
  name = f"{chromosome.prefix}_{chromosome.get_separated_string()}"

  # Select forcefield by chromosome.
  forcefield = chromosome.get_forcefield()

  # Optimise with some procedure.
  conformer = optimise_cage(
      molecule=cage,
      name=name,
      output_dir=calculation_output,
      forcefield=forcefield,
      platform=None,
      database=database,
      chromosome=chromosome,
  )

  # Analyse cage.
  analyse_cage(
      name=name,
      output_dir=calculation_output,
      forcefield=forcefield,
      node_element="Ag",
      database=database,
      chromosome=chromosome,
  )

Anatomies of a script

Important

All of the below can be found in the script optimisation_test.py.

First you must define the ChromosomeGenerator, which holds all the changeable gene information and allows iteration and selection of Chromosome from it. You define the object, and then use the add_gene() or add_forcefield_dict() methods to add the genes, which the generator should analyse automatically to find the changeable features.

# Define beads.
bead_library = cgx.molecular.BeadLibrary.from_bead_types(
    # Type and coordination.
    {"a": 3, "b": 2, "c": 2, "o": 2}
)

# Define the chromosome generator, holding all the changeable genes.
chromo_it = cgx.systems_optimisation.ChromosomeGenerator(
    prefix="syst_opt_test",
    present_beads=bead_library.get_present_beads(),
    vdw_bond_cutoff=2,
)
chromo_it.add_gene(
    iteration=(
        ("2P3", stk.cage.TwoPlusThree),
        ("4P6", stk.cage.FourPlusSix),
        ("4P62", stk.cage.FourPlusSix2),
        ("6P9", stk.cage.SixPlusNine),
        ("8P12", stk.cage.EightPlusTwelve),
    ),
    gene_type="topology",
)
# Set some basic building blocks up. This should be run by an algorithm
# later.
chromo_it.add_gene(
    iteration=(
        cgx.molecular.TwoC1Arm(
            bead=bead_library.get_from_type("b"),
            abead1=bead_library.get_from_type("c"),
        ),
    ),
    gene_type="precursor",
)
chromo_it.add_gene(
    iteration=(
        cgx.molecular.ThreeC1Arm(
            bead=bead_library.get_from_type("a"),
            abead1=bead_library.get_from_type("o"),
        ),
    ),
    gene_type="precursor",
)

# Define the forcefield terms.
definer_dict = {
    # Bonds.
    "ao": ("bond", 1.5, 1e5),
    "bc": ("bond", 1.5, 1e5),
    "co": ("bond", 1.0, 1e5),
    "cc": ("bond", 1.0, 1e5),
    "oo": ("bond", 1.0, 1e5),
    # Angles.
    "ccb": ("angle", 180.0, 1e2),
    "ooc": ("angle", 180.0, 1e2),
    "occ": ("angle", 180.0, 1e2),
    "ccc": ("angle", 180.0, 1e2),
    "oco": ("angle", 180.0, 1e2),
    "aoc": ("angle", 180.0, 1e2),
    "aoo": ("angle", 180.0, 1e2),
    "bco": ("angle", tuple(i for i in range(90, 181, 5)), 1e2),
    "cbc": ("angle", 180.0, 1e2),
    "oao": ("angle", tuple(i for i in range(50, 121, 5)), 1e2),
    # Torsions.
    "ocbco": ("tors", "0134", 180, 50, 1),
    # Nonbondeds.
    "a": ("nb", 10.0, 1.0),
    "b": ("nb", 10.0, 1.0),
    "c": ("nb", 10.0, 1.0),
    "o": ("nb", 10.0, 1.0),
}
chromo_it.add_forcefield_dict(definer_dict=definer_dict)

Then, you can run the genetic algorithm (I will not show all the information for brevity):

# Define the structure and fitness calculators.
fitness_calculator = cgx.systems_optimisation.FitnessCalculator(...)
structure_calculator = cgx.systems_optimisation.StructureCalculator(...)

# Set a random number generator for consistency. Normally, run over multiple
# seeds.
generator = np.random.default_rng(seed)

initial_population = chromo_it.select_random_population(
    generator,
    size=selection_size,
)

# This holds the generational information.
generations = []
# Define a generation!
generation = cgx.systems_optimisation.Generation(
    chromosomes=initial_population,
    fitness_calculator=fitness_calculator,
    structure_calculator=structure_calculator,
    num_processes=num_processes,
)
# Run the structures, and get its fitness.
generation.run_structures()
_ = generation.calculate_fitness_values()
generations.append(generation)

# Now we can iterate over mutations and generations.
for generation_id in range(1, num_generations + 1):
    # Extend the list of new chromosomes with mutations.
    merged_chromosomes = []
    merged_chromosomes.extend(
        chromo_it.mutate_population(
            list_of_chromosomes=generation.chromosomes,
            generator=generator,
            gene_range=chromo_it.get_term_ids(),
            selection="random",
            num_to_select=5,
            database=database,
        )
    )
    merged_chromosomes.extend(chromo_it.mutate_population(...))

    # Extend the list of new chromosomes with crossovers.
    merged_chromosomes.extend(
        chromo_it.crossover_population(
            list_of_chromosomes=generation.chromosomes,
            generator=generator,
            selection="random",
            num_to_select=5,
            database=database,
        )
    )
    merged_chromosomes.extend(chromo_it.crossover_population(...))

    # Extend the list of new chromosomes with the best from the last
    # generation.
    merged_chromosomes.extend(generation.select_best(selection_size=5))

    # Define the new generation and run its structures and fitness.
    generation = cgx.systems_optimisation.Generation(
        chromosomes=chromo_it.dedupe_population(merged_chromosomes),
        ...
    )

    # Build, optimise and analyse each structure.
    generation.run_structures()
    _ = generation.calculate_fitness_values()

    # Add final state to generations.
    generations.append(generation)

    # Select the best of the generation for the next generation.
    best = generation.select_best(selection_size=selection_size)
    generation = cgx.systems_optimisation.Generation(
        chromosomes=chromo_it.dedupe_population(best),
        ...
    )