Source code for radical.utils.config

'''
Config file parsing.

We provide a json based config file parser with following properties

  - system config files will be merged with user configs (if those exist)
  - python style comments are filtered out before parsing
  - after parsing, `${ABC:default}`-style values are set or expanded via
    `os.environ`
  - the returned class exposes settings as dicts or attributes
    cfg['foo']['bar'] == cfg.foo.bar


Config Names and Locations
--------------------------

We assume two basic locations for config files: one is installed within the
scope of a Python module, the other one is under user control, and usually in
the users home directory.  The  config reader accepts the following parameters
to derive the exact locations:

  - module: name of module under which the config is installed
  - path  : config file path relative to the module home
  - name  : config file name relative to the path

The `module` string is interpreted as follows:

    module         = 'module'
    module_path    = radical.utils.debug.find_module(module)
    sys_config_dir = '%s/configs' % module_path
    usr_base_dir   = os.getenv('RADICAL_CONFIG_USER_DIR') or \
                     os.getenv('HOME')
    if usr_base_dir:
        usr_config_dir = '%s/.%s/configs' % \
            (usr_base_dir, module.replace('.', '/'))

so the location of the module's `__init__.py` is used to derive the location
of the installed system config files, and the module name is used to derive
the location of the user provided config files.

For example, the module `radical.utils` will have the following config dirs:

    sys_config_dir = /tmp/ve/lib/python3.7/site-packages/radical/utils/configs
    usr_config_dir = /home/merzky/.radical/utils/configs

After loading the system level config file, any existing user level config
file is merged into it, via

    radical.utils.dict_merge(user_cfg, system_cfg, mode='overwrite')

so that the user config settings supersede the system config settings.

Both path and name specifiers can contain `*` as wildcard, which is then
interpreted as by `glob()`.  If that wildcard exists, then all matching config
files are read into *one* configuration dict, where each root key is set to
the value the '*' expands to (minus the `.json` extension).

For example, the name `radical.pilot.resource_*` with the following config
files:

    /tmp/ve/[...]/radical/pilot/configs/resource_xsede.json
    /tmp/ve/[...]/radical/pilot/configs/resource_ncsa.json

will result in a config dict like::

    {'xsede' : { 'foo' : 'bar' },
     'ncsa'  : { 'fiz' : 'baz' }}


Queries
-------

We support two types of queries on the resulting parsed configs:

  - `dict` like queries
  - the `_query(key)` method returns a single value, or 'None' if not found.

In the latter `_query()` case, the `key` can be specified as dot-separated
path, so that the following two snippets are equivalent (assuming that a
`foo.bar` section exists):

  val = cfg['foo']['bar'].get('baz')
  val = cfg._query('foo.bar.baz')


Environment
-----------

Towards `os.environ` completion, we support the following syntax in all string
*values* (not keys):

    `${RADICAL_UTILS_ENV:default_value}`

which will be replaced by

    `os.environ.get('RADICAL_UTILS_ENV', 'default_value')`

The default value is optional, an empty string is used if no default value is
given.  Env evaluation is only performed at time of parsing, not at time of
query.  RU attempts to convert env variables to float and int - if that fails,
values are stored as strings.


Validation
----------

It probably makes sense to switch to a json schema validator at some point,
see for example https://pypi.python.org/pypi/json-schema-validator. For now
this implementation remains schema-less, and will thus, in a very pythonesque
way, only fail once values are queried or used.


Implementation
--------------

This implementation is based on typed dictionaries which are accessed as
`TypedDict`'ed object hierarchy.
'''

__author__    = 'RADICAL-Cybertools Team'
__copyright__ = 'Copyright 2016-2022, The RADICAL-Cybertools Team'
__license__   = 'MIT'

import glob
import os

from .modules    import find_module
from .misc       import expand_env as ru_expand_env
from .json_io    import read_json, write_json
from .dict_mixin import dict_merge
from .typeddict  import TypedDict, TypedDictMeta

from .singleton  import Singleton


# ------------------------------------------------------------------------------
#
[docs]class Config(TypedDict): """Contents of a config (json) file from a module's config tree. Attributes: module: used to determine the module's config file location (default: `radical.utils`) category: name of config to be loaded from module's config path name: specify a specific configuration to be used path: path to app config json to be used for initialization cfg: application config dict to be used for initialization from_dict: alias for cfg, to satisfy base class constructor expand: enable / disable environment var expansion (default: True) env: environment dictionary to be used for expansion (default: `os.environ`) The naming of config files follows this rule: `<category>_<name>.json` For example, if the following is used in a system python installation: ru.Config('radical.pilot', category='session', name='minimal') it would attempt to load (depending on system details): /usr/lib/python3/site-packages/ \ radical/pilot/configs/session_minimal.json NOTE: Keys containing an underscore are not exposed via the API. Keys containing dots are split and interpreted as paths in the configuration hierarchy. """ _self_default = True # FIXME: we should cache config files after reading, so that repeated # instance creations do not trigger a new (identical) round of # parsing. # -------------------------------------------------------------------------- # def __init__(self, from_dict=None, module=None, category=None, name=None, cfg=None, path=None, expand=True, env=None, _internal=False): """ Load a config (json) file from the module's config tree, and overload any user specific config settings if found. Parameters ---------- from_dict: alias for cfg, to satisfy base class constructor module: used to determine the module's config file location - default: `radical.utils` category: name of config to be loaded from module's config path name: specify a specific configuration to be used path: path to app config json to be used for initialization cfg: application config dict to be used for initialization expand: enable / disable environment var expansion - default: True env: environment dictionary to be used for expansion - default: `os.environ` The naming of config files follows this rule: `<category>_<name>.json` For example, if the following is used in a system python installation: ru.Config('radical.pilot', category='session', name='minimal') it would attempt to load (depending on system details): /usr/lib/python3/site-packages/\ radical/pilot/configs/session_minimal.json NOTE: Keys containing an underscore are not exposed via the API. Keys containing dots are split and interpreted as paths in the configuration hierarchy. """ # if the `from_dict` is given but is a string, we interpret it as # `module` parameter. if from_dict and isinstance(from_dict, str): module = from_dict from_dict = None if from_dict: # if we could only overload constructors by signature... :-/ # As it is, we have to emulate that... assert (not module and not category and not name and not cfg and not path and not env and not _internal), 'from_dict must be exclusive' super().__init__(from_dict=from_dict) return if path and cfg: raise ValueError('conflicting initializers (path, cfg)') if path: cfg = read_json(path) if not cfg: # just use config files cfg = dict() # if a category has dot limited elements and no module is given, # interpret the first part as module # radical.pilot.session -> [radical.pilot, session] if not module and category and '.' in category: elems = category.split('.') module = '.'.join(elems[:-1]) category = elems[-1] # do the same if module is given but not the category if not category and module and '.' in module: elems = module.split('.') module = '.'.join(elems[:-1]) category = elems[-1] # if a category starts with a module prefix, strip that prefix if category and category.startswith('%s.' % module): category = category[len(module) + 1:] name_orig = name if not name: # by default, load the default config name = 'default' # purge module and category parts from name if module and name.startswith(module + '.'): name = name[len(module) + 1:] if category and name.startswith(category + '.'): name = name[len(category) + 1:] if name.startswith('/'): # load config from abs path sys_fspec = name usr_fspec = None if '*' in name: starred = True else : starred = False elif module and category: # construct cfg file paths modpath = find_module(module) if not modpath: raise ValueError("Cannot find module %s" % module) fname = '%s_%s.json' % (category.replace('.', '/'), name) sys_dir = '%s/configs' % modpath sys_fspec = '%s/%s' % (sys_dir, fname) home = os.getenv('RADICAL_CONFIG_USER_DIR') or os.getenv('HOME') if home: usr_dir = '%s/.%s/configs' % (home, module.replace('.', '/')) usr_fspec = '%s/%s' % (usr_dir, fname) else: usr_dir = None usr_fspec = None if '*' in fname: starred = True else : starred = False else: # we can't load a config file - just use the app config sys_fspec = None usr_fspec = None starred = False sys_cfg = dict() usr_cfg = dict() app_cfg = cfg if _internal: # no need to look at the FS, just convert the given cfg dict sys_fspec = None usr_fspec = None nfiles = 0 if not starred: if sys_fspec: sys_fname = sys_fspec if not os.path.isfile(sys_fname): sys_fname += '.json' if os.path.isfile(sys_fname): sys_cfg = read_json(sys_fname) nfiles += 1 if usr_fspec: usr_fname = usr_fspec if not os.path.isfile(usr_fname): usr_fname += '.json' if os.path.isfile(usr_fname): usr_cfg = read_json(usr_fname) nfiles += 1 else: # wildcard mode: whatever the '*' expands into is used as root dict # entry, and the respective content of the config file is stored # underneath it. if sys_fspec: postfix_len = len('.json') # ' .json' prefix_len = len(sys_fspec) - postfix_len - 1 # '*.json' for sys_fname in glob.glob(sys_fspec): base = sys_fname[prefix_len:-postfix_len] scfg = read_json(sys_fname) sys_cfg[base] = scfg nfiles += 1 if usr_fspec: postfix_len = len('.json') # ' .json' prefix_len = len(usr_fspec) - postfix_len - 1 # '*.json' for usr_fname in glob.glob(usr_fspec): base = usr_fname[prefix_len:-postfix_len] ucfg = read_json(usr_fname) usr_cfg[base] = ucfg nfiles += 1 # if we did not find *any* file, and the original `name` was None, # then try to load config files w/o name # Example: if there is no `registry_default.json`, then try to load # `registry.json`. if nfiles == 0 and name_orig is None and not _internal and category: fname = '%s.json' % (category.replace('.', '/')) sys_fname = '%s/%s' % (sys_dir, fname) if os.path.isfile(sys_fname): sys_cfg = read_json(sys_fname) if usr_dir: usr_fname = '%s/%s' % (usr_dir, fname) if os.path.isfile(usr_fname): usr_cfg = read_json(usr_fname) # merge sys, usr and app cfg before expansion cfg_dict = dict() cfg_dict = dict_merge(cfg_dict, sys_cfg, policy='overwrite') cfg_dict = dict_merge(cfg_dict, usr_cfg, policy='overwrite') cfg_dict = dict_merge(cfg_dict, app_cfg, policy='overwrite') if expand: cfg_dict = ru_expand_env(cfg_dict, env=env) super().__init__(from_dict=cfg_dict) # -------------------------------------------------------------------------- #
[docs] def write(self, fname): write_json(self.as_dict(), fname)
# ------------------------------------------------------------------------------ #
[docs]class DefaultConfigMeta(TypedDictMeta, Singleton): """ Metaclass inherited from `TypedDict`'s metaclass along with `Singleton` to avoid metaclass conflict (the metaclass of a derived class must be a subclass of the metaclasses of all its bases). `Singleton` metaclass allows to have only one instance of a corresponding class. """ pass
# ------------------------------------------------------------------------------ #
[docs]class DefaultConfig(Config, metaclass=DefaultConfigMeta): """ The settings in this default config are, unsurprisingly, used as default values for various RU classes and methods, as for example for log file locations, log levels, profile locations, etc. """ _schema = { 'log_lvl' : str, 'log_tgt' : str, 'log_dir' : str, 'report' : bool, 'report_tgt' : str, 'report_dir' : str, 'profile' : bool, 'profile_dir': str, } # -------------------------------------------------------------------------- # def __init__(self): super(DefaultConfig, self).__init__(module='radical.utils', category='utils', name='default')
# ------------------------------------------------------------------------------