Source code for schedy.experiments

# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import *
from six import raise_from

import requests
from requests.compat import urljoin
import functools
import logging

from . import errors, encoding
from .random import _DISTRIBUTION_TYPES
from .pbt import _EXPLOIT_STRATEGIES, _EXPLORE_STRATEGIES
from .jobs import Job, _make_job, _job_from_response
from .pagination import PageObjectsIterator
from .compat import json_dumps

logger = logging.getLogger(__name__)

def _check_status(status):
    return status in (Experiment.RUNNING, Experiment.DONE)

[docs]class Experiment(object): #: Status of a running experiment. RUNNING = 'RUNNING' #: Status of a completed (or paused) experiment. DONE = 'DONE' def __init__(self, name, status=RUNNING): ''' Base-class for all experiments. Args: name (str): Name of the experiment. An experiment is uniquely identified by its name. status (str): Status of the experiment. See :ref:`experiment_status`. ''' self.name = name self.status = status self._db = None
[docs] def add_job(self, **kwargs): ''' Adds a new job to this experiment. Args: hyperparameters (dict): A dictionnary of hyperparameters values. status (str): Job status. See :ref:`job_status`. Default: QUEUED. results (dict): A dictionnary of result values. Default: No results (empty dictionary). Returns: schedy.Job: The instance of the new job. ''' partial_job = Job( job_id=None, experiment=None, **kwargs) assert self._db is not None, 'Experiment was not added to a database' url = self._jobs_url() map_def = partial_job._to_map_definition() data = json_dumps(map_def, cls=encoding.SchedyJSONEncoder) response = self._db._authenticated_request('POST', url, data=data) errors._handle_response_errors(response) return _job_from_response(self, response)
[docs] def next_job(self): ''' Returns a new job to be worked on. This job will be set in the ``RUNNING`` state. This function handles everything so that two workers never start working on the same job. Returns: schedy.Job: The instance of the requested job. ''' assert self._db is not None, 'Experiment was not added to a database' url = urljoin(self._db._experiment_url(self.name), 'nextjob/') job = None # Try obtaining a job and running it until we manage to get hold of a # job we can indeed run (concurrent trials to run a job can cause us to # fail, so try and try again) while job is None: response = self._db._authenticated_request('GET', url) if response.status_code == requests.codes.no_content: raise errors.NoJobError('No job left for experiment {}.'.format(self.name), None) errors._handle_response_errors(response) job = _job_from_response(self, response) try: job.try_run() except errors.UnsafeUpdateError: job = None logger.debug('Two workers tried to start working on the same job, retrying.', exc_info=True) return job
[docs] def all_jobs(self): ''' Retrieves all the jobs belonging to this experiment. Returns: iterator of :py:class:`schedy.Job`: An iterator over all the jobs of this experiment. ''' assert self._db is not None, 'Experiment was not added to a database' url = self._jobs_url() return PageObjectsIterator( reqfunc=functools.partial(self._db._authenticated_request, 'GET', url), obj_creation_func=functools.partial(_make_job, self), )
[docs] def get_job(self, job_id): ''' Retrieves a job by id. Args: job_id (str): Id of the job to retrieve. Returns: schedy.Job: Instance of the requested job. ''' assert self._db is not None, 'Experiment was not added to a database' url = self._db._job_url(self.name, job_id) response = self._db._authenticated_request('GET', url) errors._handle_response_errors(response) job = _job_from_response(self, response) return job
def __str__(self): try: return '{}(name={!r}, params={})'.format(self.__class__.__name__, self.name, self._get_params()) except NotImplementedError: return '{}(name={!r})'.format(self.__class__.__name__, self.name)
[docs] def push_updates(self): ''' Push all the updates made to this experiment to the service. ''' assert self._db is not None, 'Experiment was not added to a database' url = self._db._experiment_url(self.name) content = self._to_map_definition() data = json_dumps(content, cls=encoding.SchedyJSONEncoder) response = self._db._authenticated_request('PUT', url, data=data) errors._handle_response_errors(response)
[docs] def delete(self, ensure=True): ''' Deletes this experiment. Args: ensure (bool): If true, an exception will be raised if the experiment was deleted before this call. ''' assert self._db is not None, 'Experiment was not added to a database' url = self._db._experiment_url(self.name) if ensure: headers = {'If-Match': '*'} else: headers = dict() response = self._db._authenticated_request('DELETE', url, headers=headers) errors._handle_response_errors(response)
@classmethod def _create_from_params(cls, name, status, params): raise NotImplementedError() def _get_params(self): raise NotImplementedError() def _to_map_definition(self): try: scheduler = self._SCHEDULER_NAME except AttributeError as e: raise_from(AttributeError('Experiment implementations should define a _SCHEDULER_NAME attribute'), e) return { 'status': self.status, 'scheduler': {scheduler: self._get_params()}, } @staticmethod def _from_map_definition(schedulers, map_def): try: name = str(map_def['name']) status = str(map_def['status']) scheduler_def_map = dict(map_def['scheduler']) if len(scheduler_def_map) != 1: raise ValueError('Invalid scheduler definition: {}.'.format(scheduler_def_map)) sched_def_key, sched_def_val = next(iter(scheduler_def_map.items())) scheduler = str(sched_def_key) params = sched_def_val except (ValueError, KeyError) as e: raise_from(ValueError('Invalid map definition for experiment.'), e) if not _check_status(status): raise ValueError('Invalid or unknown status value: {}.'.format(status)) try: exp_type = schedulers[scheduler] except KeyError as e: raise ValueError('Invalid or unregistered scheduler name: {}.'.format(scheduler)) return exp_type._create_from_params( name=name, status=status, params=params) def _jobs_url(self): return urljoin(self._db._experiment_url(self.name), 'jobs/')
[docs]class ManualSearch(Experiment): ''' Represents a manual search, that is to say an experiment for which the only jobs returned by :py:meth:`schedy.Experiment.next_job` are jobs that were queued beforehand (by using :py:meth:`schedy.Experiment.add_job` for example). ''' _SCHEDULER_NAME = 'Manual' @classmethod def _create_from_params(cls, name, status, params): if params is not None: raise ValueError('Expected no parameters for manual search, found {}.'.format(type(params))) return cls(name=name, status=status) def _get_params(self): return None
[docs]class RandomSearch(Experiment): _SCHEDULER_NAME = 'RandomSearch' def __init__(self, name, distributions, status=Experiment.RUNNING): ''' Represents a random search, that is to say en experiment that returns jobs with random hyperparameters when no job was queued manually using :py:meth:`schedy.Experiment.add_job`. If you create a job manually for this experiment, it must have only and all the hyperparameters specified in the ``distributions`` parameter. Args: name (str): Name of the experiment. An experiment is uniquely identified by its name. distributions (dict): A dictionary of distributions (see :py:mod:`schedy.random`), whose keys are the names of the hyperparameters. status (str): Status of the experiment. See :ref:`experiment_status`. ''' super().__init__(name, status) self.distributions = distributions @classmethod def _create_from_params(cls, name, status, params): try: items = params.items() except AttributeError as e: raise ValueError('Expected parameters as a dict, found {}.'.format(type(params))) distributions = dict() for key, dist_def in items: try: dist_name_raw, dist_args = next(iter(dist_def.items())) dist_name = str(dist_name_raw) except (KeyError, TypeError) as e: raise_from(ValueError('Invalid distribution definition.'), e) try: dist_type = _DISTRIBUTION_TYPES[dist_name] except KeyError as e: raise ValueError('Invalid or unknown distribution type: {}.'.format(dist_name)) distributions[key] = dist_type._from_args(dist_args) return cls(name=name, distributions=distributions, status=status) def _get_params(self): return {key: {dist._FUNC_NAME: dist._args()} for key, dist in self.distributions.items()}
[docs]class PopulationBasedTraining(Experiment): _SCHEDULER_NAME = 'PBT' def __init__(self, name, objective, result_name, exploit, explore=dict(), initial_distributions=dict(), population_size=None, status=Experiment.RUNNING, max_generations=None): ''' Implements Population Based Training (see `paper <https://arxiv.org/pdf/1711.09846.pdf>`_). You have two ways to specify the initial jobs for Population Based training. You can create them manually using :py:meth:`schedy.Experiment.add_job`, or you can specify the ``initial_distributions`` and ``population_size`` parameters. If you create a job manually for this experiment, it must have at least the hyperparameters specified in the ``explore`` parameter. Args: name (str): Name of the experiment. An experiment is uniquely identified by its name. objective (str): The objective of the training, either :py:attr:`schedy.pbt.MINIMIZE` (to minimize a result) or :py:attr:`schedy.pbt.MAXIMIZE` (to maximize a result). result_name (str): The name of the result to optimize. This result must be present in the results of all :py:attr:`~schedy.Experiment.RUNNING` jobs of this experiment. exploit (schedy.pbt.ExploitStrategy): Strategy to use to exploit the results (i.e. to focus on the most promising jobs). explore (dict): Strategy to use to explore new hyperparameter values. The keys of the dictionary are the name of the hyperparameters (str), and the values are the strategy associated with the hyperparameter (:py:class:`schedy.pbt.ExploreStrategy`). Values for the omitted hyperparameters will not be explored. This parameter is optional: if you do not specify any explore strategy, only exploitation will be used. initial_distributions (dict): The initial distributions for the hyperparameters, as dictionary of distributions (see :py:mod:`schedy.random`) whose keys are the names of the hyperparameters. This parameter optional, you can also create the initial jobs manually. If you use this parameter, make sure to use ``population_size`` as well. population_size (int): Number of initial jobs to create, before starting to exploit/explore (i.e. size of the population). It does **not** have to be the number of jobs you can process in parallel. The original paper used values between 10 and 80. status (str): Status of the experiment. See :ref:`experiment_status`. max_generations (int): Maximum number of generations to run before marking the experiment the experiments as done (:py:ref:`experiment_status`). When the maximum number of generations is reached, subsequent calls to :py:meth:`schedy.Experiment.next_job` will raise :py:class:`schedy.errors.NoJobError`, to indicate that the job queue is empty. ''' super().__init__(name, status) self.objective = objective self.result_name = result_name self.exploit = exploit self.explore = explore self.initial_distributions = initial_distributions self.population_size = population_size self.max_generations = max_generations @classmethod def _create_from_params(cls, name, status, params): kwargs = { 'name': name, 'status': status, 'objective': params['objective'], 'result_name': params['qualityResultName'], } exploit_type, exploit_params = next(iter(params['exploit'].items())) kwargs['exploit'] = _EXPLOIT_STRATEGIES[exploit_type]._from_params(exploit_params) population_size = params.get('numParallel') if population_size is not None: kwargs['population_size'] = population_size initial_distributions = params.get('init') if initial_distributions is not None: init_param = dict() for hp, dist_map in initial_distributions.items(): dist_name, dist_params = next(iter(dist_map.items())) init_param[hp] = _DISTRIBUTION_TYPES[dist_name]._from_args(dist_params) kwargs['initial_distributions'] = init_param explore = params.get('explore') if explore is not None: explore_map = dict() for hp, strat_map in explore.items(): strat_name, strat_params = next(iter(strat_map.items())) explore_map[hp] = _EXPLORE_STRATEGIES[strat_name]._from_params(strat_params) kwargs['explore'] = explore_map max_generations = params.get('max_generations') if max_generations is not None: kwargs['max_generations'] = max_generations return cls(**kwargs) def _get_params(self): params = { 'objective': self.objective, 'qualityResultName': self.result_name, } if self.population_size: params['numParallel'] = self.population_size if self.initial_distributions: params['init'] = { name: {dist._FUNC_NAME: dist._args()} for name, dist in self.initial_distributions.items() } params['exploit'] = {self.exploit._EXPLOIT_STRATEGY_NAME: self.exploit._get_params()} if self.explore: params['explore'] = { name: {strat._EXPLORE_STRATEGY_NAME: strat._get_params()} for name, strat in self.explore.items() } if self.max_generations: params['maxGenerations'] = self.max_generations return params
def _make_experiment(db, data): try: exp_data = dict(data) except ValueError as e: raise_from(errors.UnhandledResponseError('Expected experiment data as a dict, received {}.'.format(type(data)), None), e) try: exp = Experiment._from_map_definition(db._schedulers, exp_data) except ValueError as e: raise_from(errors.UnhandledResponseError('Response contains an invalid experiment', None), e) exp._db = db return exp