Source code for lXtractor.core.config

"""
A module encompassing various settings of lXtractor objects.
"""
from __future__ import annotations

import json
import typing as t
from collections import UserDict, abc
from contextlib import contextmanager
from copy import deepcopy
from enum import IntFlag
from pathlib import Path

STRUCTURE_EXT = (".cif", ".pdb", ".mmtf")
STRUCTURE_FMT = tuple(x[1:] for x in STRUCTURE_EXT)

_RESOURCES = Path(__file__).parent.parent / "resources"
_DEFAULT_CONFIG_PATH = _RESOURCES / "default_config.json"
_USER_CONFIG_PATH = _RESOURCES / "user_config.json"

EMPTY_ALTLOC = ("", " ", ".")


MetaColumns = (
    # TODO: move to docs
    # Taken from https://bioservices.readthedocs.io/en/main/_modules/bioservices/uniprot.html#UniProt
    # Names & Taxonomy ================================================
    "accession",
    "id",
    "gene_names",
    "gene_primary",
    "gene_synonym",
    "gene_oln",
    "gene_orf",
    "organism_name",
    "organism_id",
    "protein_name",
    "xref_proteomes",
    "lineage",
    "virus_hosts",
    # Sequences ========================================================
    "fragment",
    "sequence",
    "length",
    "mass",
    "organelle",
    "cc_alternative_products",
    "error_gmodel_pred",
    "cc_mass_spectrometry",
    "cc_polymorphism",
    "cc_rna_editing",
    "cc_sequence_caution",
    "ft_var_seq",
    "ft_variant",
    "ft_non_cons",
    "ft_non_std",
    "ft_non_ter",
    "ft_conflict",
    "ft_unsure",
    "sequence_version",
    # Family and Domains ========================================
    "ft_coiled",
    "ft_compbias",
    "cc_domain",
    "ft_domain",
    "ft_motif",
    "protein_families",
    "ft_region",
    "ft_repeat",
    "ft_zn_fing",
    # Function ===================================================
    "absorption",
    "ft_act_site",
    "cc_activity_regulation",
    "ft_binding",
    "ft_ca_bind",
    "cc_catalytic_activity",
    "cc_cofactor",
    "ft_dna_bind",
    "ec",
    "cc_function",
    "kinetics",
    "ft_metal",
    "ft_np_bind",
    "cc_pathway",
    "ph_dependence",
    "redox_potential",
    # 'rhea_id',
    "ft_site",
    "temp_dependence",
    # Gene Ontology ==================================
    "go",
    "go_p",
    "go_f",
    "go_c",
    "go_id",
    # Interaction ======================================
    "cc_interaction",
    "cc_subunit",
    # EXPRESSION =======================================
    "cc_developmental_stage",
    "cc_induction",
    "cc_tissue_specificity",
    # Publications
    "lit_pubmed_id",
    # Date of
    "date_created",
    "date_modified",
    "date_sequence_modified",
    "version",
    # Structure
    "structure_3d",
    "ft_strand",
    "ft_helix",
    "ft_turn",
    # Subcellular location
    "cc_subcellular_location",
    "ft_intramem",
    "ft_topo_dom",
    "ft_transmem",
    # Miscellaneous ==========================
    "annotation_score",
    "cc_caution",
    "comment_count",
    # "feature",
    "feature_count",
    "keyword",
    "keywordid",
    "cc_miscellaneous",
    "protein_existence",
    "tools",
    "reviewed",
    "uniparc_id",
    # Pathology
    "cc_allergen",
    "cc_biotechnology",
    "cc_disruption_phenotype",
    "cc_disease",
    "ft_mutagen",
    "cc_pharmaceutical",
    "cc_toxic_dose",
    # PTM / Processsing
    "ft_chain",
    "ft_crosslnk",
    "ft_disulfid",
    "ft_carbohyd",
    "ft_init_met",
    "ft_lipid",
    "ft_mod_res",
    "ft_peptide",
    "cc_ptm",
    "ft_propep",
    "ft_signal",
    "ft_transit",
    # not documented
    "xref_pdb",
)



[docs]
def serialize_json_value(obj: t.Any):
    """Recursively convert objects to a JSON-serializable form."""
    # Directly serializable types
    if isinstance(obj, (str, int, float, bool, type(None))):
        return obj
    if isinstance(obj, (list, tuple)):
        return [serialize_json_value(item) for item in obj]
    if isinstance(obj, dict):
        return {key: serialize_json_value(value) for key, value in obj.items()}

    try:
        return obj.__str__()
    except Exception as e:
        raise TypeError(
            f"Failed to serialize object of type {type(obj)}. Cannot convert to string."
        ) from e




[docs]
class Config(UserDict):
    """
    A configuration management class.

    This class facilitates the loading and saving of configuration settings,
    with a user-specified configuration overriding the default settings.

    :param default_config_path: The path to the default config file. This is a
        reference default settings, which can be used to reset user settings
        if needed.
    :param user_config_path: The path to the user configuration file. This file
        is stored internally and can be modified by a user to provide permanent
        settings.

    Loading and mofifying the config:

    >>> cfg = Config()
    >>> list(cfg.keys())[:2]
    ['bonds', 'colnames']
    >>> cfg['bonds']['non_covalent_upper']
    5.0
    >>> cfg['bonds']['non_covalent_upper'] = 6

    Equivalently, one can update the config by a local JSON file or dict:

    >>> cfg.update_with({'bonds': {'non_covalent_upper': 4}})
    >>> assert cfg['bonds']['non_covalent_upper'] == 4

    The changes can be stored internally and loaded automatically in the future:

    >>> cfg.save()
    >>> cfg = Config()
    >>> assert cfg['bonds']['non_covalent_upper'] == 4

    To restore default settings:

    >>> cfg.reset_to_defaults()
    >>> cfg.clear_user_config()
    """


[docs]
    def __init__(
        self,
        default_config_path: str | Path = _DEFAULT_CONFIG_PATH,
        user_config_path: str | Path = _USER_CONFIG_PATH,
    ):
        self.default_config_path = Path(default_config_path)
        self.user_config_path = Path(user_config_path)

        super().__init__()
        self.reload()



[docs]
    @contextmanager
    def temporary_namespace(self):
        """
        A context manager for a temporary config namespace.

        Within this context, changes to the config are allowed, but will be
        reverted back to the original config once the context is exited.

        Example:

        >>> cfg = Config()
        >>> with cfg.temporary_namespace():
        ...     cfg['bonds']['non_covalent_upper'] = 10
        ...     # Do some stuff with the temporary config...
        ... # Config is reverted back to original state here
        >>> assert cfg['bonds']['non_covalent_upper'] != 10
        """
        original_config = deepcopy(self.data)
        try:
            yield self  # This allows access to the Config object within the context
        finally:
            self.data = original_config  # Revert config back to original state



[docs]
    def reload(self):
        """Load the configuration from files."""
        # Load true default config
        with self.default_config_path.open("r") as f:
            self.data.update(json.load(f))

        # Update with default user config
        with self.user_config_path.open("r") as f:
            user_default_config = json.load(f)
        self.update_with(user_default_config)



[docs]
    def save(self, user_config_path: str | Path = _USER_CONFIG_PATH):
        """
        Save the current configuration. By default, will store the configuration
        internally. This stored configuration will be loaded automatically on
        top of the default configuration.

        :param user_config_path: The path where to save the user configuration file.
        :raises ValueError: If the user config path is not provided.
        """
        with Path(user_config_path).open("w") as f:
            json.dump(self.data, f, indent=4, default=serialize_json_value)



[docs]
    def reset_to_defaults(self):
        """Reset the configuration to the default settings."""
        self.data.clear()
        with self.default_config_path.open("r") as f:
            self.data.update(json.load(f))



[docs]
    def clear_user_config(self):
        """Clear the contents of the locally stored user config file."""
        with self.user_config_path.open("w") as f:
            json.dump({}, f, indent=4)



[docs]
    def update_with(self, other: abc.Mapping[str, t.Any] | Path):
        if isinstance(other, Path):
            with other.open() as f:
                self.update(json.load(f))
        else:
            for k, v in other.items():
                if k in self:
                    if isinstance(self[k], abc.Mapping) and isinstance(v, abc.Mapping):
                        self[k].update(v)
                    else:
                        self[k] = v


    def __repr__(self):
        return f"{self.__class__.__name__}({self.data})"



DefaultConfig = Config()



[docs]
class AtomMark(IntFlag):
    """
    The atom categories. Some categories may be combined, e.g., LIGAND | PEP
    is another valid category denoting ligand peptide atoms.
    """

    #: Unknown atom.
    UNK: int = 1
    #: Solvent atom.
    SOLVENT: int = 2
    #: Ligand atom. If not combined with PEP, NUC, or CARB, this category
    #: denotes non-polymer (small molecule) single-residue ligands.
    LIGAND: int = 4
    #: Peptide polymer atoms.
    PEP: int = 8
    #: Nucleotide polymer atoms.
    NUC: int = 16
    #: Carbohydrate polymer atoms.
    CARB: int = 32
    #: Covalent polymer modifications including ligands.
    COVALENT: int = 64



POL_MARKS = (("p", AtomMark.PEP), ("n", AtomMark.NUC), ("c", AtomMark.CARB))


if __name__ == "__main__":
    raise RuntimeError