"""
A module encompassing various settings of lXtractor objects.
"""
from __future__ import annotations
import json
import typing as t
from collections import UserDict, abc
from contextlib import contextmanager
from copy import deepcopy
from enum import IntFlag
from pathlib import Path
STRUCTURE_EXT = (".cif", ".pdb", ".mmtf")
STRUCTURE_FMT = tuple(x[1:] for x in STRUCTURE_EXT)
_RESOURCES = Path(__file__).parent.parent / "resources"
_DEFAULT_CONFIG_PATH = _RESOURCES / "default_config.json"
_USER_CONFIG_PATH = _RESOURCES / "user_config.json"
EMPTY_ALTLOC = ("", " ", ".")
MetaColumns = (
# TODO: move to docs
# Taken from https://bioservices.readthedocs.io/en/main/_modules/bioservices/uniprot.html#UniProt
# Names & Taxonomy ================================================
"accession",
"id",
"gene_names",
"gene_primary",
"gene_synonym",
"gene_oln",
"gene_orf",
"organism_name",
"organism_id",
"protein_name",
"xref_proteomes",
"lineage",
"virus_hosts",
# Sequences ========================================================
"fragment",
"sequence",
"length",
"mass",
"organelle",
"cc_alternative_products",
"error_gmodel_pred",
"cc_mass_spectrometry",
"cc_polymorphism",
"cc_rna_editing",
"cc_sequence_caution",
"ft_var_seq",
"ft_variant",
"ft_non_cons",
"ft_non_std",
"ft_non_ter",
"ft_conflict",
"ft_unsure",
"sequence_version",
# Family and Domains ========================================
"ft_coiled",
"ft_compbias",
"cc_domain",
"ft_domain",
"ft_motif",
"protein_families",
"ft_region",
"ft_repeat",
"ft_zn_fing",
# Function ===================================================
"absorption",
"ft_act_site",
"cc_activity_regulation",
"ft_binding",
"ft_ca_bind",
"cc_catalytic_activity",
"cc_cofactor",
"ft_dna_bind",
"ec",
"cc_function",
"kinetics",
"ft_metal",
"ft_np_bind",
"cc_pathway",
"ph_dependence",
"redox_potential",
# 'rhea_id',
"ft_site",
"temp_dependence",
# Gene Ontology ==================================
"go",
"go_p",
"go_f",
"go_c",
"go_id",
# Interaction ======================================
"cc_interaction",
"cc_subunit",
# EXPRESSION =======================================
"cc_developmental_stage",
"cc_induction",
"cc_tissue_specificity",
# Publications
"lit_pubmed_id",
# Date of
"date_created",
"date_modified",
"date_sequence_modified",
"version",
# Structure
"structure_3d",
"ft_strand",
"ft_helix",
"ft_turn",
# Subcellular location
"cc_subcellular_location",
"ft_intramem",
"ft_topo_dom",
"ft_transmem",
# Miscellaneous ==========================
"annotation_score",
"cc_caution",
"comment_count",
# "feature",
"feature_count",
"keyword",
"keywordid",
"cc_miscellaneous",
"protein_existence",
"tools",
"reviewed",
"uniparc_id",
# Pathology
"cc_allergen",
"cc_biotechnology",
"cc_disruption_phenotype",
"cc_disease",
"ft_mutagen",
"cc_pharmaceutical",
"cc_toxic_dose",
# PTM / Processsing
"ft_chain",
"ft_crosslnk",
"ft_disulfid",
"ft_carbohyd",
"ft_init_met",
"ft_lipid",
"ft_mod_res",
"ft_peptide",
"cc_ptm",
"ft_propep",
"ft_signal",
"ft_transit",
# not documented
"xref_pdb",
)
[docs]
def serialize_json_value(obj: t.Any):
"""Recursively convert objects to a JSON-serializable form."""
# Directly serializable types
if isinstance(obj, (str, int, float, bool, type(None))):
return obj
if isinstance(obj, (list, tuple)):
return [serialize_json_value(item) for item in obj]
if isinstance(obj, dict):
return {key: serialize_json_value(value) for key, value in obj.items()}
try:
return obj.__str__()
except Exception as e:
raise TypeError(
f"Failed to serialize object of type {type(obj)}. Cannot convert to string."
) from e
[docs]
class Config(UserDict):
"""
A configuration management class.
This class facilitates the loading and saving of configuration settings,
with a user-specified configuration overriding the default settings.
:param default_config_path: The path to the default config file. This is a
reference default settings, which can be used to reset user settings
if needed.
:param user_config_path: The path to the user configuration file. This file
is stored internally and can be modified by a user to provide permanent
settings.
Loading and mofifying the config:
>>> cfg = Config()
>>> list(cfg.keys())[:2]
['bonds', 'colnames']
>>> cfg['bonds']['non_covalent_upper']
5.0
>>> cfg['bonds']['non_covalent_upper'] = 6
Equivalently, one can update the config by a local JSON file or dict:
>>> cfg.update_with({'bonds': {'non_covalent_upper': 4}})
>>> assert cfg['bonds']['non_covalent_upper'] == 4
The changes can be stored internally and loaded automatically in the future:
>>> cfg.save()
>>> cfg = Config()
>>> assert cfg['bonds']['non_covalent_upper'] == 4
To restore default settings:
>>> cfg.reset_to_defaults()
>>> cfg.clear_user_config()
"""
[docs]
def __init__(
self,
default_config_path: str | Path = _DEFAULT_CONFIG_PATH,
user_config_path: str | Path = _USER_CONFIG_PATH,
):
self.default_config_path = Path(default_config_path)
self.user_config_path = Path(user_config_path)
super().__init__()
self.reload()
[docs]
@contextmanager
def temporary_namespace(self):
"""
A context manager for a temporary config namespace.
Within this context, changes to the config are allowed, but will be
reverted back to the original config once the context is exited.
Example:
>>> cfg = Config()
>>> with cfg.temporary_namespace():
... cfg['bonds']['non_covalent_upper'] = 10
... # Do some stuff with the temporary config...
... # Config is reverted back to original state here
>>> assert cfg['bonds']['non_covalent_upper'] != 10
"""
original_config = deepcopy(self.data)
try:
yield self # This allows access to the Config object within the context
finally:
self.data = original_config # Revert config back to original state
[docs]
def reload(self):
"""Load the configuration from files."""
# Load true default config
with self.default_config_path.open("r") as f:
self.data.update(json.load(f))
# Update with default user config
with self.user_config_path.open("r") as f:
user_default_config = json.load(f)
self.update_with(user_default_config)
[docs]
def save(self, user_config_path: str | Path = _USER_CONFIG_PATH):
"""
Save the current configuration. By default, will store the configuration
internally. This stored configuration will be loaded automatically on
top of the default configuration.
:param user_config_path: The path where to save the user configuration file.
:raises ValueError: If the user config path is not provided.
"""
with Path(user_config_path).open("w") as f:
json.dump(self.data, f, indent=4, default=serialize_json_value)
[docs]
def reset_to_defaults(self):
"""Reset the configuration to the default settings."""
self.data.clear()
with self.default_config_path.open("r") as f:
self.data.update(json.load(f))
[docs]
def clear_user_config(self):
"""Clear the contents of the locally stored user config file."""
with self.user_config_path.open("w") as f:
json.dump({}, f, indent=4)
[docs]
def update_with(self, other: abc.Mapping[str, t.Any] | Path):
if isinstance(other, Path):
with other.open() as f:
self.update(json.load(f))
else:
for k, v in other.items():
if k in self:
if isinstance(self[k], abc.Mapping) and isinstance(v, abc.Mapping):
self[k].update(v)
else:
self[k] = v
def __repr__(self):
return f"{self.__class__.__name__}({self.data})"
DefaultConfig = Config()
[docs]
class AtomMark(IntFlag):
"""
The atom categories. Some categories may be combined, e.g., LIGAND | PEP
is another valid category denoting ligand peptide atoms.
"""
#: Unknown atom.
UNK: int = 1
#: Solvent atom.
SOLVENT: int = 2
#: Ligand atom. If not combined with PEP, NUC, or CARB, this category
#: denotes non-polymer (small molecule) single-residue ligands.
LIGAND: int = 4
#: Peptide polymer atoms.
PEP: int = 8
#: Nucleotide polymer atoms.
NUC: int = 16
#: Carbohydrate polymer atoms.
CARB: int = 32
#: Covalent polymer modifications including ligands.
COVALENT: int = 64
POL_MARKS = (("p", AtomMark.PEP), ("n", AtomMark.NUC), ("c", AtomMark.CARB))
if __name__ == "__main__":
raise RuntimeError