"""Module for databasing, with AtomLite.
Author: Andrew Tarzia
"""
import logging
import pathlib
import sqlite3
from collections import abc
import atomlite
import polars as pl
import stk
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(message)s",
)
logger = logging.getLogger(__name__)
[docs]
class AtomliteDatabase:
"""Holds an atomlite database with some useful methods."""
def __init__(self, db_file: pathlib.Path) -> None:
"""Initialize database."""
self._db_file = db_file
self._db = atomlite.Database(db_file)
[docs]
def get_database(self) -> atomlite.Database:
"""Access the atomlite Database object."""
return self._db
[docs]
def get_num_entries(self) -> int:
"""Get the number of molecular entries in the database."""
return self._db.num_entries()
[docs]
def get_num_property_entries(self) -> int:
"""Get the number of property entries in the database."""
return self._db.num_property_entries()
[docs]
def add_molecule(self, molecule: stk.Molecule, key: str) -> None:
"""Add molecule to database as entry."""
entry = atomlite.Entry.from_rdkit(
key=key,
molecule=molecule.to_rdkit_mol(),
)
try:
self._db.add_entries(entry)
except sqlite3.IntegrityError:
self._db.update_entries(entry)
[docs]
def add_entries(self, entries: abc.Sequence[atomlite.Entry]) -> None:
"""Add molecules to database as entry."""
try:
self._db.add_entries(entries)
except sqlite3.IntegrityError:
self._db.update_entries(entries)
[docs]
def get_entries(self) -> abc.Iterator[atomlite.Entry]:
"""Get all entries."""
return self._db.get_entries()
[docs]
def get_entry_list(self) -> list[atomlite.Entry]:
"""Get all entries."""
return list(self._db.get_entries())
[docs]
def get_property_entries(self) -> abc.Iterator[atomlite.PropertyEntry]:
"""Get all property entries."""
return self._db.get_property_entries()
[docs]
def get_entry(self, key: str) -> atomlite.Entry:
"""Get specific entry."""
if not self._db.has_entry(key):
msg = f"{key} not in database"
raise RuntimeError(msg)
return self._db.get_entry(key) # type: ignore[return-value]
[docs]
def get_property_entry(self, key: str) -> atomlite.PropertyEntry:
"""Get specific entry."""
if not self._db.has_property_entry(key):
msg = f"{key} not in database"
raise RuntimeError(msg)
return self._db.get_property_entry(key) # type: ignore[return-value]
[docs]
def get_molecule(self, key: str) -> stk.Molecule:
"""Get a molecule."""
rdkit_molecule = atomlite.json_to_rdkit(self.get_entry(key).molecule)
return stk.BuildingBlock.init_from_rdkit_mol(rdkit_molecule)
[docs]
def add_properties(
self,
key: str,
property_dict: dict[str, atomlite.Json],
) -> None:
"""Add properties to an entry by key."""
self._db.update_properties(
atomlite.PropertyEntry(key=key, properties=property_dict)
)
[docs]
def remove_property(self, key: str, property_path: str) -> None:
"""Add properties to an entry by key."""
self._db.remove_property(key=key, path=property_path)
[docs]
def set_property(
self,
key: str,
property_path: str,
value: float | str | bool | None,
) -> None:
"""Add properties to an entry by key."""
self._db.set_property(key=key, path=property_path, property=value)
[docs]
def get_property(
self,
key: str,
property_key: str,
property_type: type,
) -> atomlite.Json:
"""Get the properties of an entry."""
try:
if property_type is bool:
value = self._db.get_bool_property( # type: ignore[assignment]
key=key,
path=f"$.{property_key}",
)
elif property_type is float:
value = self._db.get_float_property( # type: ignore[assignment]
key=key,
path=f"$.{property_key}",
)
elif property_type is str:
value = self._db.get_str_property( # type: ignore[assignment]
key=key,
path=f"$.{property_key}",
)
elif property_type is int:
value = self._db.get_int_property( # type: ignore[assignment]
key=key,
path=f"$.{property_key}",
)
elif property_type is dict:
value = self.get_entry(key).properties[property_key] # type: ignore[assignment]
else:
msg = f"{property_key} has unexpected type"
raise RuntimeError(msg)
except KeyError as ex:
ex.add_note(
f"{key} does not have {property_key} of {property_type}"
)
raise
if value is None:
msg = f"{property_key} has no value"
raise RuntimeError(msg)
return value # type: ignore[return-value]
[docs]
def has_molecule(self, key: str) -> bool:
"""Check if database has a molecule by key."""
return bool(self._db.has_entry(key))
[docs]
def remove_entry(self, key: str) -> None:
"""Remove an entry by key."""
self._db.remove_entries(keys=key)
[docs]
def keep_if(
self,
column: str,
value: str | float,
) -> abc.Iterator[atomlite.Entry]:
"""Filter database entries by properties."""
for entry in self.get_entries():
if entry.properties[column] == value:
yield entry
[docs]
def get_property_df(
self,
properties: abc.Sequence[str],
allow_missing: bool = False,
) -> pl.DataFrame:
"""Get a DataFrame of the properties in the database.
Parameters:
properties:
The paths of the properties to retrieve.
Valid paths are described in the :mod:`atomlite` docs.
allow_missing:
If ``True``, rows with some missing properties will be
included in the DataFrame and hold ``null`` values.
Returns:
A DataFrame of the property entries in the database.
"""
return self._db.get_property_df(
properties=properties,
allow_missing=allow_missing,
)