# Distributed under the terms of the MIT License.
"""Module for shape analysis.
Author: Andrew Tarzia
"""
import logging
import os
import pathlib
import shutil
import subprocess as sp
from collections.abc import Sequence
import numpy as np
import stk
from cgexplore._internal.molecular.beads import string_to_atom_number
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
)
logger = logging.getLogger(__name__)
[docs]
class ShapeMeasure:
"""Uses Shape to calculate the shape of coordinates.
Shape: http://www.ee.ub.edu/
"""
def __init__(
self,
output_dir: pathlib.Path | str,
shape_path: pathlib.Path | str,
shape_string: str | None = None,
) -> None:
"""Initialize shape measure."""
self._output_dir = output_dir
self._shape_path = shape_path
self._shape_dict: dict[str, dict]
if shape_string is None:
self._shape_dict = self.reference_shape_dict()
else:
self._shape_dict = {
shape_string: self.reference_shape_dict()[shape_string]
}
self._num_vertex_options = tuple(
{int(self._shape_dict[i]["vertices"]) for i in self._shape_dict}
)
def _test_shape_mol(self, expected_points: int, atoms: list) -> None:
num_atoms = len(atoms)
if num_atoms != expected_points:
msg = (
f"Expected num points not met ({expected_points}); has "
f"{num_atoms}"
)
raise ValueError(msg)
[docs]
def fill_position_matrix_molecule(
self,
molecule: stk.Molecule,
elements: str | Sequence[str],
old_position_matrix: np.ndarray,
) -> tuple[list, list]:
"""Make position matrix from filtering molecule based on element."""
position_matrix = []
atoms: list[stk.Atom] = []
if isinstance(elements, str):
elements = (elements,)
target_anum = tuple(string_to_atom_number(i) for i in elements)
for atom in molecule.get_atoms():
atomic_number = atom.get_atomic_number() # type: ignore[union-attr]
if atomic_number in target_anum:
new_atom = stk.Atom(
id=len(atoms),
atomic_number=atom.get_atomic_number(),
charge=atom.get_charge(),
)
atoms.append(new_atom)
position_matrix.append(old_position_matrix[atom.get_id()])
return position_matrix, atoms
[docs]
def get_shape_molecule_byelements(
self,
molecule: stk.Molecule,
elements: str | Sequence[str],
expected_points: int,
) -> stk.Molecule | None:
"""Get molecule to analyse by filtering by element."""
old_position_matrix = molecule.get_position_matrix()
position_matrix, atoms = self.fill_position_matrix_molecule(
molecule=molecule,
elements=elements,
old_position_matrix=old_position_matrix,
)
self._test_shape_mol(expected_points, atoms)
return stk.BuildingBlock.init(
atoms=atoms,
bonds=(),
position_matrix=np.array(position_matrix),
)
def _collect_all_shape_values(self, output_file: pathlib.Path) -> dict:
"""Collect shape values from output."""
with output_file.open("r") as f:
lines = f.readlines()
label_idx_map = {}
values = None
for line in reversed(lines):
if "Structure" in line:
values = [
i.strip()
for i in line.rstrip().split("]")[1].split(" ")
if i.strip()
]
label_idx_map = {symb: idx for idx, symb in enumerate(values)}
break
float_values = [i.strip() for i in line.rstrip().split(",")]
if values is None:
logger.info("no shapes found due to overlapping atoms")
shapes = {}
else:
shapes = {
i: float(float_values[1 + label_idx_map[i]])
for i in label_idx_map
}
return shapes
def _write_input_file(
self,
input_file: pathlib.Path,
structure_string: str,
) -> None:
"""Write input file for shape."""
num_vertices = len(structure_string.split("\n")) - 2
possible_shapes = self._get_possible_shapes(num_vertices)
shape_numbers = tuple(i["code"] for i in possible_shapes)
title = "$shape run by Andrew Tarzia - central atom=0 always.\n"
fix_perm = (
"%fixperm 0\n" if num_vertices > 12 else "\n" # noqa: PLR2004
)
size_of_poly = f"{num_vertices} 0\n"
codes = " ".join(shape_numbers) + "\n"
string = title + fix_perm + size_of_poly + codes + structure_string
with input_file.open("w") as f:
f.write(string)
def _run_calculation(self, structure_string: str) -> dict:
"""Calculate the shape of a molecule."""
input_file = pathlib.Path("shp.dat")
std_out = pathlib.Path("shp.out")
output_file = pathlib.Path("shp.tab")
self._write_input_file(
input_file=input_file,
structure_string=structure_string,
)
# Note that sp.call will hold the program until completion
# of the calculation.
captured_output = sp.run( # noqa: S603
[f"{self._shape_path}", f"{input_file}"],
stdin=sp.PIPE,
capture_output=True,
check=True,
# Shell is required to run complex arguments.
)
with std_out.open("w") as f:
f.write(str(captured_output.stdout))
return self._collect_all_shape_values(output_file)
def _get_centroids(
self,
molecule: stk.ConstructedMolecule,
target_atmnums: Sequence[int],
) -> list[np.ndarray]:
bb_ids: dict[int | None, list] = {}
for ai in molecule.get_atom_infos():
aibbid = ai.get_building_block_id()
if ai.get_atom().get_atomic_number() in target_atmnums:
if aibbid not in bb_ids:
bb_ids[aibbid] = []
bb_ids[aibbid].append(ai.get_atom().get_id())
centroids = [molecule.get_centroid(atom_ids=bb_ids[n]) for n in bb_ids]
with pathlib.Path("cents.xyz").open("w") as f:
f.write(f"{len(centroids)}\n\n")
f.writelines(f"Zn {c[0]} {c[1]} {c[2]}\n" for c in centroids)
return centroids
def _get_possible_shapes(self, num_vertices: int) -> Sequence[dict]:
ref_dict = self.reference_shape_dict()
return tuple(
ref_dict[i]
for i in ref_dict
if int(ref_dict[i]["vertices"]) == num_vertices
)
[docs]
def calculate(self, molecule: stk.Molecule) -> dict:
"""Calculate shape measures for a molecule."""
output_dir = pathlib.Path(self._output_dir).resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir()
init_dir = pathlib.Path.cwd()
try:
os.chdir(output_dir)
structure_string = "shape run by AT\n"
num_centroids = 0
pos_mat = molecule.get_position_matrix()
for a in molecule.get_atoms():
c = pos_mat[a.get_id()]
structure_string += (
f"{a.__class__.__name__} {c[0]} {c[1]} {c[2]}\n"
)
num_centroids += 1
if num_centroids not in self._num_vertex_options:
msg = (
f"you gave {num_centroids} vertices, but expected to "
f"calculate shapes with {self._num_vertex_options} options"
)
raise ValueError(msg)
shapes = self._run_calculation(structure_string)
finally:
os.chdir(init_dir)
return shapes
[docs]
def calculate_from_centroids(
self,
constructed_molecule: stk.ConstructedMolecule,
target_atmnums: Sequence[int],
) -> dict:
"""Calculate shape from the centroids of building blocks.
Currently not implemented well.
"""
output_dir = pathlib.Path(self._output_dir).resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir()
init_dir = pathlib.Path.cwd()
try:
os.chdir(output_dir)
centroids = self._get_centroids(
constructed_molecule, target_atmnums
)
structure_string = "shape run by AT\n"
num_centroids = 0
for c in centroids:
structure_string += f"Zn {c[0]} {c[1]} {c[2]}\n"
num_centroids += 1
if num_centroids not in self._num_vertex_options:
msg = (
f"you gave {num_centroids} vertices, but expected to "
f"calculate shapes with {self._num_vertex_options} options"
)
raise ValueError(msg)
shapes = self._run_calculation(structure_string)
finally:
os.chdir(init_dir)
return shapes
[docs]
def reference_shape_dict(self) -> dict[str, dict]:
"""Reference shapes as dictionary."""
return {
"L-2": {
"code": "1",
"label": "L-2",
"vertices": "2",
"shape": "Linear D∞h",
},
"vT-2": {
"code": "2",
"label": "vT-2",
"vertices": "2",
"shape": "Divacant tetrahedron (V-shape, 109.47º) C2v",
},
"vOC-2": {
"code": "3",
"label": "vOC-2",
"vertices": "2",
"shape": "Tetravacant octahedron (L-shape, 90º) C2v",
},
"TP-3": {
"vertices": "3",
"code": "1",
"label": "TP-3",
"shape": "Trigonal planar D3h",
},
"vT-3": {
"vertices": "3",
"code": "2",
"label": "vT-3",
"shape": "Pyramid‡ (vacant tetrahedron) C3v",
},
"fac-vOC-3": {
"vertices": "3",
"code": "3",
"label": "fac-vOC-3",
"shape": "fac-Trivacant octahedron C3v",
},
"mer-vOC-3": {
"vertices": "3",
"code": "4",
"label": "mer-vOC-3",
"shape": "mer-Trivacant octahedron (T-shape) C2v",
},
"SP-4": {
"code": "1",
"label": "SP-4",
"vertices": "4",
"shape": "Square D4h",
},
"T-4": {
"code": "2",
"label": "T-4",
"vertices": "4",
"shape": "Tetrahedron Td",
},
"SS-4": {
"code": "3",
"label": "SS-4",
"vertices": "4",
"shape": "Seesaw or sawhorse‡ (cis-divacant octahedron) C2v",
},
"vTBPY-4": {
"code": "4",
"label": "vTBPY-4",
"vertices": "4",
"shape": "Axially vacant trigonal bipyramid C3v",
},
"PP-5": {
"code": "1",
"vertices": "5",
"label": "PP-5",
"shape": "Pentagon D5h",
},
"vOC-5": {
"code": "2",
"vertices": "5",
"label": "vOC-5",
"shape": "Vacant octahedron‡ (Johnson square pyramid, J1) C4v",
},
"TBPY-5": {
"code": "3",
"vertices": "5",
"label": "TBPY-5",
"shape": "Trigonal bipyramid D3h",
},
"SPY-5": {
"code": "4",
"vertices": "5",
"label": "SPY-5",
"shape": "Square pyramid § C4v",
},
"JTBPY-5": {
"code": "5",
"vertices": "5",
"label": "JTBPY-5",
"shape": "Johnson trigonal bipyramid (J12) D3h",
},
"HP-6": {
"code": "1",
"label": "HP-6",
"vertices": "6",
"shape": "Hexagon D6h",
},
"PPY-6": {
"code": "2",
"label": "PPY-6",
"vertices": "6",
"shape": "Pentagonal pyramid C5v",
},
"OC-6": {
"code": "3",
"label": "OC-6",
"vertices": "6",
"shape": "Octahedron Oh",
},
"TPR-6": {
"code": "4",
"label": "TPR-6",
"vertices": "6",
"shape": "Trigonal prism D3h",
},
"JPPY-5": {
"code": "5",
"label": "JPPY-5",
"vertices": "6",
"shape": "Johnson pentagonal pyramid (J2) C5v",
},
"HP-7": {
"code": "1",
"vertices": "7",
"label": "HP-7",
"shape": "Heptagon D7h",
},
"HPY-7": {
"code": "2",
"vertices": "7",
"label": "HPY-7",
"shape": "Hexagonal pyramid C6v",
},
"PBPY-7": {
"code": "3",
"vertices": "7",
"label": "PBPY-7",
"shape": "Pentagonal bipyramid D5h",
},
"COC-7": {
"code": "4",
"vertices": "7",
"label": "COC-7",
"shape": "Capped octahedron * C3v",
},
"CTPR-7": {
"code": "5",
"vertices": "7",
"label": "CTPR-7",
"shape": "Capped trigonal prism * C2v",
},
"JPBPY-7": {
"code": "6",
"vertices": "7",
"label": "JPBPY-7",
"shape": "Johnson pentagonal bipyramid (J13) D5h",
},
"JETPY-7": {
"code": "7",
"vertices": "7",
"label": "JETPY-7",
"shape": "Elongated triangular pyramid (J7) C3v",
},
"OP-8": {
"code": "1",
"label": "OP-8",
"vertices": "8",
"shape": "Octagon D8h",
},
"HPY-8": {
"code": "2",
"label": "HPY-8",
"vertices": "8",
"shape": "Heptagonal pyramid C7v",
},
"HBPY-8": {
"code": "3",
"label": "HBPY-8",
"vertices": "8",
"shape": "Hexagonal bipyramid D6h",
},
"CU-8": {
"code": "4",
"label": "CU-8",
"vertices": "8",
"shape": "Cube Oh",
},
"SAPR-8": {
"code": "5",
"label": "SAPR-8",
"vertices": "8",
"shape": "Square antiprism D4d",
},
"TDD-8": {
"code": "6",
"label": "TDD-8",
"vertices": "8",
"shape": "Triangular dodecahedron D2d",
},
"JGBF-8": {
"code": "7",
"label": "JGBF-8",
"vertices": "8",
"shape": "Johnson - Gyrobifastigium (J26) D2d",
},
"JETBPY-8": {
"code": "8",
"label": "JETBPY-8",
"vertices": "8",
"shape": "Johnson - Elongated triangular bipyramid (J14) D3h",
},
"JBTP-8": {
"code": "9",
"label": "JBTP-8",
"vertices": "8",
"shape": "Johnson - Biaugmented trigonal prism (J50) C2v",
},
"BTPR-8": {
"code": "10",
"label": "BTPR-8",
"vertices": "8",
"shape": "Biaugmented trigonal prism C2v",
},
"JSD-8": {
"code": "11",
"label": "JSD-8",
"vertices": "8",
"shape": "Snub disphenoid (J84) D2d",
},
"TT-8": {
"code": "12",
"label": "TT-8",
"vertices": "8",
"shape": "Triakis tetrahedron Td",
},
"ETBPY-8": {
"code": "13",
"label": "ETBPY-8",
"vertices": "8",
"shape": "Elongated trigonal bipyramid (see 8) D3h",
},
"EP-9": {
"code": "1",
"vertices": "9",
"label": "EP-9",
"shape": "Enneagon D9h",
},
"OPY-9": {
"code": "2",
"vertices": "9",
"label": "OPY-9",
"shape": "Octagonal pyramid C8v",
},
"HBPY-9": {
"code": "3",
"vertices": "9",
"label": "HBPY-9",
"shape": "Heptagonal bipyramid D7h",
},
"JTC-9": {
"code": "4",
"vertices": "9",
"label": "JTC-9",
"shape": (
"Triangular cupola (J3) = trivacant cuboctahedron C3v"
),
},
"JCCU-9": {
"code": "5",
"vertices": "9",
"label": "JCCU-9",
"shape": ("Capped cube (Elongated square pyramid, J8) C4v"),
},
"CCU-9": {
"code": "6",
"vertices": "9",
"label": "CCU-9",
"shape": "Capped cube C4v",
},
"JCSAPR-9": {
"code": "7",
"vertices": "9",
"label": "JCSAPR-9",
"shape": (
"Capped sq. antiprism (Gyroelongated square "
"pyramid J10) C4v"
),
},
"CSAPR-9": {
"code": "8",
"vertices": "9",
"label": "CSAPR-9",
"shape": "Capped square antiprism C4v",
},
"JTCTPR-9": {
"code": "9",
"vertices": "9",
"label": "JTCTPR-9",
"shape": "Tricapped trigonal prism (J51) D3h",
},
"TCTPR-9": {
"code": "10",
"vertices": "9",
"label": "TCTPR-9",
"shape": "Tricapped trigonal prism D3h",
},
"JTDIC-9": {
"code": "11",
"vertices": "9",
"label": "JTDIC-9",
"shape": "Tridiminished icosahedron (J63) C3v",
},
"HH-9": {
"code": "12",
"vertices": "9",
"label": "HH-9",
"shape": "Hula-hoop C2v",
},
"MFF-9": {
"code": "13",
"vertices": "9",
"label": "MFF-9",
"shape": "Muffin Cs",
},
"DP-10": {
"code": "1",
"vertices": "10",
"label": "DP-10",
"shape": "Decagon D10h",
},
"EPY-10": {
"code": "2",
"vertices": "10",
"label": "EPY-10",
"shape": "Enneagonal pyramid C9v",
},
"OBPY-10": {
"code": "3",
"vertices": "10",
"label": "OBPY-10",
"shape": "Octagonal bipyramid D8h",
},
"PPR-10": {
"code": "4",
"vertices": "10",
"label": "PPR-10",
"shape": "Pentagonal prism D5h",
},
"PAPR-10": {
"code": "5",
"vertices": "10",
"label": "PAPR-10",
"shape": "Pentagonal antiprism D5d",
},
"JBCCU-10": {
"code": "6",
"vertices": "10",
"label": "JBCCU-10",
"shape": (
"Bicapped cube (Elongated square bipyramid J15) D4h"
),
},
"JBCSAPR-10": {
"code": "7",
"vertices": "10",
"label": "JBCSAPR-10",
"shape": (
"Bicapped square antiprism (Gyroelongated square "
"bipyramid J17) D4d"
),
},
"JMBIC-10": {
"code": "8",
"vertices": "10",
"label": "JMBIC-10",
"shape": "Metabidiminished icosahedron (J62) C2v",
},
"JATDI-10": {
"code": "9",
"vertices": "10",
"label": "JATDI-10",
"shape": "Augmented tridiminished icosahedron (J64) C3v",
},
"JSPC-10": {
"code": "10",
"vertices": "10",
"label": "JSPC-10",
"shape": "Sphenocorona (J87) C2v",
},
"SDD-10": {
"code": "11",
"vertices": "10",
"label": "SDD-10",
"shape": "Staggered dodecahedron (2:6:2) # D2",
},
"TD-10": {
"code": "12",
"vertices": "10",
"label": "TD-10",
"shape": "Tetradecahedron (2:6:2) C2v",
},
"HD-10": {
"code": "13",
"vertices": "10",
"label": "HD-10",
"shape": "Hexadecahedron (2:6:2, or 1:4:4:1) D4h",
},
"HP-11": {
"code": "1",
"vertices": "11",
"label": "HP-11",
"shape": "Hendecagon D11h",
},
"DPY-11": {
"code": "2",
"vertices": "11",
"label": "DPY-11",
"shape": "Decagonal pyramid C10v",
},
"EBPY-11": {
"code": "3",
"vertices": "11",
"label": "EBPY-11",
"shape": "Enneagonal bipyramid D9h",
},
"JCPPR-11": {
"code": "4",
"vertices": "11",
"label": "JCPPR-11",
"shape": (
"Capped pent. Prism (Elongated pentagonal pyramid J9) C5v"
),
},
"JCPAPR-11": {
"code": "5",
"vertices": "11",
"label": "JCPAPR-11",
"shape": (
"Capped pent. antiprism (Gyroelongated pentagonal "
"pyramid J11) C5v"
),
},
"JAPPR-11": {
"code": "6",
"vertices": "11",
"label": "JAPPR-11",
"shape": "Augmented pentagonal prism (J52) C2v",
},
"JASPC-11": {
"code": "7",
"vertices": "11",
"label": "JASPC-11",
"shape": "Augmented sphenocorona (J87) Cs",
},
"DP-12": {
"code": "1",
"vertices": "12",
"label": "DP-12",
"shape": "Dodecagon D12h",
},
"HPY-12": {
"code": "2",
"vertices": "12",
"label": "HPY-12",
"shape": "Hendecagonal pyramid C11v",
},
"DBPY-12": {
"code": "3",
"vertices": "12",
"label": "DBPY-12",
"shape": "Decagonal bipyramid D10h",
},
"HPR-12": {
"code": "4",
"vertices": "12",
"label": "HPR-12",
"shape": "Hexagonal prism D6h",
},
"HAPR-12": {
"code": "5",
"vertices": "12",
"label": "HAPR-12",
"shape": "Hexagonal antiprism D6d",
},
"TT-12": {
"code": "6",
"vertices": "12",
"label": "TT-12",
"shape": "Truncated tetrahedron Td",
},
"COC-12": {
"code": "7",
"vertices": "12",
"label": "COC-12",
"shape": "Cuboctahedron Oh",
},
"ACOC-12": {
"code": "8",
"vertices": "12",
"label": "ACOC-12",
"shape": (
"Anticuboctahedron (Triangular orthobicupola J27) D3h"
),
},
"IC-12": {
"code": "9",
"vertices": "12",
"label": "IC-12",
"shape": "Icosahedron Ih",
},
"JSC-12": {
"code": "10",
"vertices": "12",
"label": "JSC-12",
"shape": "Square cupola (J4) C4v",
},
"JEPBPY-12": {
"code": "11",
"vertices": "12",
"label": "JEPBPY-12",
"shape": "Elongated pentagonal bipyramid (J16) D6h",
},
"JBAPPR-12": {
"code": "12",
"vertices": "12",
"label": "JBAPPR-12",
"shape": "Biaugmented pentagonal prism (J53) C2v",
},
"JSPMC-12": {
"code": "13",
"vertices": "12",
"label": "JSPMC-12",
"shape": "Sphenomegacorona (J88) Cs",
},
"DD-20": {
"code": "1",
"vertices": "20",
"label": "DD-20",
"shape": "Dodecahedron † Ih",
},
"TCU-24": {
"code": "1",
"vertices": "24",
"label": "TCU-24",
"shape": "Truncated cube Oh",
},
"TOC-24": {
"code": "2",
"vertices": "24",
"label": "TOC-24",
"shape": "Truncated octahedron Oh",
},
}
[docs]
def known_shape_vectors() -> dict[str, dict]:
"""Printed from shape_map.py output.
May not include all of them, only included ones I deemed relevant.
"""
return {
"TP-3": {
"TP-3": 0.0,
"vT-3": 0.0,
"fvOC-3": 0.0,
"mvOC-3": 6.699,
},
"SP-4": {
"SP-4": 0.0,
"T-4": 33.333,
"SS-4": 16.737,
"vTBPY-4": 34.007,
},
"T-4": {
"SP-4": 33.333,
"T-4": 0.0,
"SS-4": 7.213,
"vTBPY-4": 2.288,
},
"TBPY-5": {
"PP-5": 37.069,
"vOC-5": 6.699,
"TBPY-5": 0.0,
"SPY-5": 5.384,
"JTBPY-5": 2.941,
},
"TPR-6": {
"HP-6": 33.675,
"PPY-6": 16.678,
"OC-6": 16.737,
"TPR-6": 0.0,
"JPPY-6": 20.913,
},
"OC-6": {
"HP-6": 33.333,
"PPY-6": 30.153,
"OC-6": 0.0,
"TPR-6": 16.737,
"JPPY-6": 33.916,
},
"CU-8": {
"OP-8": 38.311,
"HPY-8": 30.489,
"HBPY-8": 8.395,
"CU-8": 0.0,
"SAPR-8": 10.989,
"TDD-8": 7.952,
"JGBF-8": 18.811,
"JETBPY-8": 25.086,
"JBTPR-8": 13.285,
"BTPR-8": 12.725,
"JSD-8": 14.257,
"TT-8": 0.953,
"ETBPY-8": 23.162,
},
"SAPR-8": {
"OP-8": 26.12,
"HPY-8": 24.402,
"HBPY-8": 18.458,
"CU-8": 10.989,
"SAPR-8": 0.0,
"TDD-8": 2.848,
"JGBF-8": 17.259,
"JETBPY-8": 28.516,
"JBTPR-8": 2.593,
"BTPR-8": 2.095,
"JSD-8": 5.362,
"TT-8": 11.838,
"ETBPY-8": 24.048,
},
}