# Distributed under the terms of the MIT License.
"""Optimisation inputs module.
Author: Andrew Tarzia
"""
import itertools as it
import logging
from collections import abc
from dataclasses import dataclass, field
from typing import TYPE_CHECKING
import numpy as np
from cgexplore._internal.forcefields.forcefield import ForceField
from cgexplore._internal.molecular.beads import CgBead
from cgexplore.utilities import AtomliteDatabase
from .utilities import (
define_angle,
define_bond,
define_cosine_angle,
define_lennardjones,
define_nonbonded,
define_pyramid_angle,
define_torsion,
)
if TYPE_CHECKING:
from cgexplore._internal.terms.angles import TargetAngle, TargetCosineAngle
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
)
logger = logging.getLogger(__name__)
[docs]
@dataclass
class Chromosome:
"""Define the genes and properties of a chromosome."""
name: abc.Sequence[int]
present_beads: abc.Sequence[CgBead]
vdw_bond_cutoff: int
prefix: str
gene_dict: dict[int, tuple]
definer_dict: dict[str, tuple]
chromosomed_terms: dict[str, list[int]]
[docs]
def get_separated_string(self) -> str:
"""Get chromosome name separated by `-` as string."""
return "-".join(tuple(str(i) for i in self.name))
[docs]
def get_string(self) -> str:
"""Get chromosome name as string."""
return "".join(str(i) for i in self.name)
[docs]
def get_building_block_configurations(self) -> tuple:
"""Get the chromosomes building block configuration."""
return tuple(
self.gene_dict[i][1]
for i in self.gene_dict
if self.gene_dict[i][2] == "building_block_configuration"
)
[docs]
def get_vertex_alignments(self) -> tuple:
"""Get the chromosomes vertex alignments."""
return tuple(
self.gene_dict[i][1]
for i in self.gene_dict
if self.gene_dict[i][2] == "vertex_alignment"
)
[docs]
def get_building_blocks(self) -> tuple:
"""Get the chromosomes building blocks."""
return tuple(
self.gene_dict[i][1].get_building_block()
for i in self.gene_dict
if self.gene_dict[i][2] == "precursor"
)
[docs]
def get_precursors(self) -> tuple:
"""Get the chromosomes precursors."""
return tuple(
self.gene_dict[i][1]
for i in self.gene_dict
if self.gene_dict[i][2] == "precursor"
)
[docs]
def get_forcefield(self) -> ForceField: # noqa: C901, PLR0912
"""Get chromosome forcefield."""
changed_tuples = tuple(
self.gene_dict[i][1]
for i in self.gene_dict
if self.gene_dict[i][2] == "term"
)
# Change to term: (new value, index in interaction list).
changed_terms = {i[0]: (i[1], i[2]) for i in changed_tuples}
bond_terms: list = []
angle_terms: list[TargetAngle | TargetCosineAngle] = []
torsion_terms: list = []
nonbonded_terms: list = []
for key_ in self.definer_dict:
if key_ in changed_terms:
ranged_term = self.definer_dict[key_]
# Get terms based on all changes to this term in chromosome.
term = [0] * len(ranged_term)
term[0] = ranged_term[0]
c_ids_part_of = self.chromosomed_terms[key_]
# Get the id of chromosomes (key to map), that this term is in.
ranged_indices = set()
for c_id in c_ids_part_of:
ranged_id_definition = self.gene_dict[c_id][1]
gene_value = ranged_id_definition[1]
gene_index = ranged_id_definition[2]
term[gene_index] = gene_value
ranged_indices.add(gene_index)
for i in range(len(ranged_term[1:])):
if i + 1 in ranged_indices:
continue
term[i + 1] = ranged_term[i + 1]
term = tuple(term) # type: ignore[assignment]
else:
term = self.definer_dict[key_] # type: ignore[assignment]
if term[0] == "bond":
bond_terms.append( # type: ignore[unreachable]
define_bond(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "angle":
angle_terms.append( # type: ignore[unreachable]
define_angle(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "pyramid":
angle_terms.append( # type: ignore[unreachable]
define_pyramid_angle(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "cosine":
angle_terms.append( # type: ignore[unreachable]
define_cosine_angle(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "tors":
torsion_terms.append( # type: ignore[unreachable]
define_torsion(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "nb":
nonbonded_terms.append( # type: ignore[unreachable]
define_nonbonded(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
elif term[0] == "custom-lj":
nonbonded_terms.append( # type: ignore[unreachable]
define_lennardjones(
interaction_key=key_,
interaction_list=term,
present_beads=self.present_beads,
)
)
else:
msg = f"{term[0]} not found in known terms."
raise RuntimeError(msg)
return ForceField(
identifier=self.get_string(),
prefix=self.prefix,
present_beads=self.present_beads,
bond_targets=tuple(bond_terms),
angle_targets=tuple(angle_terms),
torsion_targets=tuple(torsion_terms),
nonbonded_targets=tuple(nonbonded_terms),
vdw_bond_cutoff=self.vdw_bond_cutoff,
verbose=False,
)
def __str__(self) -> str:
"""Return a string representation of the Chromosome."""
return f"{self.__class__.__name__}({self.get_string()})"
def __repr__(self) -> str:
"""Return a string representation of the Chromosome."""
return str(self)
[docs]
@dataclass
class ChromosomeGenerator:
"""Hold all information for chromosome iteration."""
present_beads: abc.Sequence[CgBead]
vdw_bond_cutoff: int
prefix: str
chromosome_map: dict[int, dict] = field(default_factory=dict)
chromosome_types: dict[int, str] = field(default_factory=dict)
chromosomed_terms: dict[str, list[int]] = field(default_factory=dict)
definer_dict: dict[str, tuple] = field(default_factory=dict)
[docs]
def get_num_chromosomes(self) -> int:
"""Return the number of chromosomes."""
num_per_genes = [
len(self.chromosome_map[gene]) for gene in self.chromosome_map
]
num_chromosomes = np.prod(num_per_genes)
return int(num_chromosomes)
[docs]
def add_gene(self, iteration: abc.Iterable, gene_type: str) -> None:
"""Add a gene to the chromosome generator, to be iterated over.
Parameters:
iteration:
The range of values to use in the chromosome.
For term: use the `add_forcefield_dict`
gene_type:
A string defining the gene type.
Can be `term`, `topology`, `precursor`, `forcefield`,
`vertex_alignment`, `building_block_configuration`.
"""
_known_terms = (
"term",
"topology",
"precursor",
"forcefield",
"vertex_alignment",
"building_block_configuration",
)
if gene_type not in _known_terms:
msg = f"gene_type not in {_known_terms}"
raise RuntimeError(msg)
known_types = set(self.chromosome_types.values())
if "term" in known_types and gene_type == "forcefield":
msg = "cannot add `forcefield` and `term` in the same chromosome."
raise RuntimeError(msg)
if "forcefield" in known_types and gene_type == "term":
msg = "cannot add `forcefield` and `term` in the same chromosome."
raise RuntimeError(msg)
chromo_index = len(self.chromosome_map)
self.chromosome_types[chromo_index] = gene_type
self.chromosome_map[chromo_index] = dict(enumerate(iteration))
if gene_type == "term":
term_key = next(
i[0] for i in self.chromosome_map[chromo_index].values()
)
if term_key not in self.chromosomed_terms:
self.chromosomed_terms[term_key] = []
self.chromosomed_terms[term_key].append(chromo_index)
[docs]
def add_forcefield_dict(self, definer_dict: dict[str, tuple]) -> None:
"""Add genes based on a forcefield dictionary.
Parameters:
definer_dict:
The forcefield dictionary.
Format:
"""
self.definer_dict = definer_dict
for key, definer in definer_dict.items():
for comp_id, comp in enumerate(definer[1:]):
if not isinstance(comp, float | int | str):
self.add_gene(
iteration=tuple((key, i, comp_id + 1) for i in comp),
gene_type="term",
)
[docs]
def yield_chromosomes(self) -> abc.Iterable[Chromosome]:
"""Yield chromosomes."""
chromosome_ids = sorted(self.chromosome_map.keys())
iteration = it.product(
*(self.chromosome_map[i] for i in chromosome_ids)
)
known_types = set(self.chromosome_types.values())
for chromosome_name in iteration:
gene_dict = {}
for gene_id, gene in enumerate(chromosome_name):
gene_value = self.chromosome_map[gene_id][gene]
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (gene, gene_value, gene_type)
if "forcefield" in known_types:
# In this case, the definer dict changes per chromosome!
ff_id = next(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "forcefield"
)
definer_dict = gene_dict[ff_id][1]
else:
definer_dict = self.definer_dict
yield Chromosome(
name=chromosome_name,
prefix=self.prefix,
present_beads=self.present_beads,
vdw_bond_cutoff=self.vdw_bond_cutoff,
gene_dict=gene_dict,
definer_dict=definer_dict,
chromosomed_terms=self.chromosomed_terms,
)
[docs]
def define_chromosomes(self) -> None:
"""Get all chromosomes that have been defined.
If the chromosome space is large, this can be super expensive!
"""
all_chromosomes = {}
for chromosome in self.yield_chromosomes():
all_chromosomes[chromosome.name] = chromosome
self.chromosomes = all_chromosomes
logger.info("there are %s chromosomes", len(self.chromosomes))
[docs]
def get_term_ids(self) -> abc.Sequence[int]:
"""Get chromosome indices associated with terms."""
return tuple(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "term"
)
[docs]
def get_topo_ids(self) -> abc.Sequence[int]:
"""Get chromosome indices associated with topology."""
return tuple(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "topology"
)
[docs]
def get_va_ids(self) -> abc.Sequence[int]:
"""Get chromosome indices associated with vertex alignments."""
return tuple(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "vertex_alignment"
)
[docs]
def get_bc_ids(self) -> abc.Sequence[int]:
"""Get chromosome indices associated with building block configs."""
return tuple(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "building_block_configuration"
)
[docs]
def get_prec_ids(self) -> abc.Sequence[int]:
"""Get chromosome indices associated with precursors."""
return tuple(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "precursor"
)
[docs]
def select_chromosome(self, chromosome: abc.Sequence[int]) -> Chromosome:
"""Get chromosome."""
known_types = set(self.chromosome_types.values())
gene_dict = {}
for gene_id in self.chromosome_map:
gene = chromosome[gene_id]
gene_value = self.chromosome_map[gene_id][gene]
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (gene, gene_value, gene_type)
if "forcefield" in known_types:
# In this case, the definer dict changes per chromosome!
ff_id = next(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "forcefield"
)
definer_dict = gene_dict[ff_id][1]
else:
definer_dict = self.definer_dict
return Chromosome(
name=tuple(chromosome),
prefix=self.prefix,
present_beads=self.present_beads,
vdw_bond_cutoff=self.vdw_bond_cutoff,
gene_dict=gene_dict,
definer_dict=definer_dict,
chromosomed_terms=self.chromosomed_terms,
)
[docs]
def select_random_population(
self,
generator: np.random.Generator,
size: int,
) -> abc.Sequence[Chromosome]:
"""Select `size` instances from population."""
selected = []
known_types = set(self.chromosome_types.values())
for _ in range(size):
gene_dict = {}
chromosome_name = [0 for i in range(len(self.chromosome_map))]
for gene_id in self.chromosome_map:
chromosome_options = range(len(self.chromosome_map[gene_id]))
gene = generator.choice(chromosome_options)
gene_value = self.chromosome_map[gene_id][gene]
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (gene, gene_value, gene_type)
chromosome_name[gene_id] = gene
if "forcefield" in known_types:
# In this case, the definer dict changes per chromosome!
ff_id = next(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "forcefield"
)
definer_dict = gene_dict[ff_id][1]
else:
definer_dict = self.definer_dict
selected.append(
Chromosome(
name=tuple(chromosome_name),
prefix=self.prefix,
present_beads=self.present_beads,
vdw_bond_cutoff=self.vdw_bond_cutoff,
gene_dict=gene_dict,
definer_dict=definer_dict,
chromosomed_terms=self.chromosomed_terms,
)
)
return selected
[docs]
def dedupe_population(
self,
list_of_chromosomes: abc.Sequence[Chromosome],
) -> abc.Sequence[Chromosome]:
"""Deduplicate the list of chromosomes."""
tuples = {i.name for i in list_of_chromosomes}
return [self.select_chromosome(i) for i in tuples]
[docs]
def select_similar_chromosome(
self,
chromosome: Chromosome,
free_gene_id: int,
) -> abc.Sequence[Chromosome]:
"""Select chromosomes where only one gene is allowed to change."""
selected = []
filter_range = tuple(
i for i in self.chromosome_map if i != free_gene_id
)
for new_chromosome in self.yield_chromosomes():
if any(
new_chromosome.name[k] != chromosome.name[k]
for k in filter_range
):
continue
if chromosome.name == new_chromosome.name:
continue
selected.append(new_chromosome)
return selected
[docs]
def mutate_population( # noqa: PLR0913
self,
chromosomes: dict[str, Chromosome],
generator: np.random.Generator,
gene_range: abc.Sequence[int],
selection: str,
num_to_select: int,
database: AtomliteDatabase,
) -> abc.Sequence[Chromosome]:
"""Mutate chromosomes in the gene range only.
Available selections for which chromosomes to mutate:
random:
uses generator.choice()
roulette:
adds weight to generator.choice() based on
fitness/sum(fitness)
Parameters:
chromosomes:
Dictionary of chromosomes with keys corresponding to their keys
in the provided database.
generator:
Random number generator.
gene_range:
Range of genes, by their indices in the chromosome, to mutate.
selection:
How to select chromosomes to mutate (random or roulette).
num_to_select:
Number of chromosomes to mutate.
database:
atomlite database containing the chromosome data.
Returns:
List of chromosomes to add to population.
"""
# Select chromosomes to mutate.
if selection == "random":
selected = generator.choice(
np.asarray(list(chromosomes.values())),
size=num_to_select,
)
elif selection == "roulette":
fitness_values: list[float | int] = [
database.get_property_entry(key).properties["fitness"] # type: ignore[misc]
for key in chromosomes
]
# Handle if all fitness values are 0.
try:
weights = [i / sum(fitness_values) for i in fitness_values]
except ZeroDivisionError:
weights = [1 / len(fitness_values) for i in fitness_values]
selected = generator.choice(
np.asarray(list(chromosomes.values())),
size=num_to_select,
p=weights,
)
else:
msg = f"{selection} is not defined."
raise RuntimeError(msg)
mutated = []
known_types = set(self.chromosome_types.values())
for chromosome in selected:
gene_dict = {}
chromosome_name = [0 for i in range(len(self.chromosome_map))]
for gene_id in self.chromosome_map:
if gene_id not in gene_range:
gene = chromosome.name[gene_id]
else:
chromosome_options = tuple(
range(len(self.chromosome_map[gene_id]))
)
gene = generator.choice(chromosome_options)
gene_value = self.chromosome_map[gene_id][gene]
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (gene, gene_value, gene_type)
chromosome_name[gene_id] = gene
if "forcefield" in known_types:
# In this case, the definer dict changes per chromosome!
ff_id = next(
i
for i in self.chromosome_types
if self.chromosome_types[i] == "forcefield"
)
definer_dict = gene_dict[ff_id][1]
else:
definer_dict = self.definer_dict
mutated.append(
Chromosome(
name=tuple(chromosome_name),
prefix=self.prefix,
present_beads=self.present_beads,
vdw_bond_cutoff=self.vdw_bond_cutoff,
gene_dict=gene_dict,
definer_dict=definer_dict,
chromosomed_terms=self.chromosomed_terms,
)
)
return mutated
[docs]
def crossover_population(
self,
chromosomes: dict[str, Chromosome],
generator: np.random.Generator,
selection: str,
num_to_select: int,
database: AtomliteDatabase,
) -> abc.Sequence[Chromosome]:
"""Crossover chromosomes.
Available selections for which chromosomes to mutate:
random:
uses generator.choice()
roulette:
adds weight to generator.choice() based on
fitness/sum(fitness)
Parameters:
chromosomes:
Dictionary of chromosomes with keys corresponding to their keys
in the provided database.
generator:
Random number generator.
selection:
How to select chromosomes to mutate (random or roulette).
num_to_select:
Number of chromosomes to mutate.
database:
atomlite database containing the chromosome data.
Returns:
List of chromosomes to add to population.
"""
# Select chromosomes to cross.
if selection == "random":
selected = generator.choice(
np.asarray(list(chromosomes.values())),
size=(num_to_select, 2),
)
elif selection == "roulette":
fitness_values: list[float | int] = [
database.get_property_entry(key).properties["fitness"] # type: ignore[misc]
for key in chromosomes
]
# Handle if all fitness values are 0.
try:
weights = [i / sum(fitness_values) for i in fitness_values]
except ZeroDivisionError:
weights = [1 / len(fitness_values) for i in fitness_values]
selected = generator.choice(
np.asarray(list(chromosomes.values())),
size=(num_to_select, 2),
p=weights,
)
else:
msg = f"{selection} is not defined."
raise RuntimeError(msg)
crossed = []
for chromosome1, chromosome2 in selected:
# Randomly select the genes to cross.
nums_to_select_from = range(len(chromosome1.name))
num_to_cross = generator.choice(nums_to_select_from, size=1)
genes_to_cross = set(
generator.choice(nums_to_select_from, size=num_to_cross[0])
)
# Cross them.
new_chromosome1 = tuple(
val if i not in genes_to_cross else chromosome2.name[i]
for i, val in enumerate(chromosome1.name)
)
new_chromosome2 = tuple(
val if i not in genes_to_cross else chromosome1.name[i]
for i, val in enumerate(chromosome2.name)
)
# Append the new chromosomes.
crossed.append(self.select_chromosome(new_chromosome1))
crossed.append(self.select_chromosome(new_chromosome2))
return crossed
[docs]
def get_population_neighbours(
self,
chromosomes: dict[str, Chromosome],
gene_range: abc.Sequence[int],
selection: str,
) -> abc.Sequence[Chromosome]:
"""Get all nearest neighbours of provided chromosomes.
Available selections for which chromosomes to mutate:
all:
selects all chromosomes.
Parameters:
chromosomes:
Dictionary of chromosomes with keys corresponding to their keys
in the provided database.
gene_range:
Range of genes, by their indices in the chromosome, to mutate.
selection:
How to select chromosomes to mutate (all).
Returns:
List of chromosomes to add to population.
"""
# Select chromosomes to mutate.
if selection == "all":
selected = np.asarray(list(chromosomes.values()))
else:
msg = f"{selection} is not defined."
raise RuntimeError(msg)
neighbours = []
known_types = set(self.chromosome_types.values())
for chromosome in selected:
if "forcefield" in known_types:
msg = (
"Forcefield chromosomes are not supported for neighbour"
" collation."
)
raise NotImplementedError(msg)
definer_dict = self.definer_dict
gene_dict = {}
chromosome_name = [0 for i in range(len(self.chromosome_map))]
# Add known ones to chromosome name.
to_modify = []
for gene_id in self.chromosome_map:
if gene_id not in gene_range:
gene = chromosome.name[gene_id]
else:
to_modify.append(gene_id)
continue
gene_value = self.chromosome_map[gene_id][gene]
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (gene, gene_value, gene_type)
chromosome_name[gene_id] = gene
for gene_id in to_modify:
current_gene = chromosome.name[gene_id]
for option in (
current_gene + 1,
current_gene - 1,
current_gene + 2,
current_gene - 2,
current_gene + 3,
current_gene - 3,
current_gene + 4,
current_gene - 4,
):
try:
gene_value = self.chromosome_map[gene_id][option]
except KeyError:
continue
gene_type = self.chromosome_types[gene_id]
gene_dict[gene_id] = (option, gene_value, gene_type)
chromosome_name[gene_id] = option
neighbours.append(
Chromosome(
name=tuple(chromosome_name),
prefix=self.prefix,
present_beads=self.present_beads,
vdw_bond_cutoff=self.vdw_bond_cutoff,
gene_dict=gene_dict,
definer_dict=definer_dict,
chromosomed_terms=self.chromosomed_terms,
)
)
return neighbours
def __str__(self) -> str:
"""Return a string representation of the ChromosomeGenerator."""
_num_chromosomes = self.get_num_chromosomes()
_num_genes = len(self.chromosome_map)
return f"{self.__class__.__name__}(num_genes={_num_genes})"
def __repr__(self) -> str:
"""Return a string representation of the Chromosome."""
return str(self)